def get_Html(url): """requests到url的HTML""" chrome_options = ChromeOptions() chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(options=chrome_options) try: driver.get(url) driver.maximize_window() time.sleep(1) source = driver.find_element_by_id('nc_1_n1z') action = ActionChains(driver) action.click_and_hold(source).perform() distance = 300 tracks = get_tracks(distance) # for i in tracks: # action.move_by_offset(xoffset=i, yoffset=0).pause(0.1).perform() # time.sleep(0.5) i = 0 while i <= distance: action.move_by_offset(xoffset=60, yoffset=0).pause(0.1).perform() i += 60 action.release().perform() finally: driver.get(driver.current_url) time.sleep(1) # driver.execute_script("window.scrollTo(0,document.body.scrollHeight);") # driver.implicitly_wait(20) target = driver.find_element_by_css_selector(".footer") driver.execute_script("arguments[0].scrollIntoView();", target) source = driver.page_source html1 = etree.HTML(source) return html1
滑块验证次数多了会验证不通过,但人去拖动就没问题,window.navigator.webdriver浏览器控制台显示的是false,在网上搜了可能是Chrome浏览器驱动文件(对windows而言就是对应版本的chromedriver.exe)中的【特征字符串】被网站截获,判断出是爬虫所为。
但不知道是不是其他问题,请教一下各位大佬
也许和鼠标轨迹有关。
用鼠标宏录制一下手动操作,多录制一些,然后用程序按照鼠标宏的路径及时间稍取随机再去拖滑块。如果仍然会验证不通过,就说明可能有多种反爬措施。