抓取词典数据的问题

动态网站爬取被识别概率低一些吧?我尝试了一下,为什么是Timeout?

address.txt内容如下:

como
que
de una
niña
bebe
¿O me equivoco?

python源代码

# -*- coding: utf-8 -*-

import os.path
from os import path
import time
from playwright.sync_api import sync_playwright
from playwright.sync_api import TimeoutError

with sync_playwright() as p:
    browser = p.chromium.launch()

    # 模拟高清屏,2为缩放倍率,爬取网页可以用1,制作高清的图片词典可以用2
    context = browser.new_context(device_scale_factor=2)

    page = context.new_page()

    for i, line in enumerate(open("address.txt")):
        filename = line + ".html"  # 保存的文件名
        line = line.strip()  # 移除文本行前后空格

        # 检查文件是否存在,存在跳过
        if path.exists(filename):
            continue

        try:
            # 设置50秒超时,默认是30秒,超时就跳过,下次再处理。
            page.set_default_navigation_timeout(50000)
            page.goto('https://www.spanishdict.com/translate/'+line)
        except TimeoutError:
            # 打印文本行,去除前后空格换行,错误提示
            print('current: ', i, line, '[timeout]')
            continue

        # 等待2秒,确保动态网页也可以爬取
        time.sleep(2)

        # 读取网页内容
        content = page.content()
        # 打印文本行,去除前后空格换行,响应内容长度
        print('current: ', i, line, len(content))

        # 保存网页到文件
        with open(filename, "w") as f:
            f.write(content)

        # 保存截图,方便查看效果
        page.screenshot(path="screenshot.png", full_page=True)
        # 保存指定选择器的截图,如果网页加密,可以方便制作图片词典
        # elem = page.query_selector(".mtb")
        # elem.screenshot(path="mtb.png")

    browser.close()