官方文档:https://selenium-python.readthedocs.io/
中文文档:https://python-selenium-zh.readthedocs.io/zh_CN/latest/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| from selenium import webdriver from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument('user-agent="MQQBrowser/26 Mozilla/5.0"')
options.add_argument('window-size=1920x1080')
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--proxy-server=127.0.0.1:8888")
options.add_argument('--no-sandbox')
option.add_argument("--disable-javascript")
options.add_argument('blink-settings=imagesEnabled=false')
chrome_path = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" options.add_argument(f"--chrome-executable={chrome_path}")
options.add_argument("--user-data-dir=./sele_cache/kuaishou")
chromeOptions.add_experimental_option("excludeSwitches", ['enable-automation']) chromeOptions.add_argument("--disable-blink-features") chromeOptions.add_argument("--disable-blink-features=AutomationControlled")
browser = webdriver.Chrome(options=options) browser.maximize_window() html = browser.page_source browser.save_screenshot("headless.png")
|
驱动火狐
driver下载:https://github.com/mozilla/geckodriver/releases
指定路径:
1 2 3 4
| self.browser = webdriver.Firefox( executable_path="C:\\Users\\dell\\Desktop\\geckodriver.exe", firefox_binary="C:\\Program Files\\Mozilla Firefox\\firefox.exe", )
|
添加 / 获取 / 清空 cookies
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| cookies = [ { "domain": ".jd.com", "expirationDate": 1624175025, "hostOnly": False, "value": "122270672.15941099761991128995462.1594109976.1608619478.1608622027.9", }, { "domain": ".jd.com", "expirationDate": 1608624825, "hostOnly": False, "value": "122270672.9.15941099761991128995462|9.1608622027", } ]
for ck in cookies: browser.add_cookie(ck)
cookies = browser.get_cookies() browser.delete_all_cookies()
|
使用代理,带鉴权
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| from selenium import webdriver from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy_ip = "IP地址:端口号" proxy_username = "用户名" proxy_password = "密码"
proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = proxy_ip proxy.ssl_proxy = proxy_ip
proxy.add_argument(f"--proxy-auth={proxy_username}:{proxy_password}")
driver = webdriver.Firefox(proxy=proxy)
|
改变WebDriver参数
1 2 3 4 5 6 7 8
| browser.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', {webdriver:false}); """ } )
|
执行JavaScript
1 2
| browser.execute_script('window.scrollTo(0,0);') browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| time.sleep(10)
browser.implicitly_wait(30)
from selenium.webdriver.support.wait import WebDriverWait
""" 10 - 最大超时时间 0.5 - 检测间隔 """
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By wait = WebDriverWait(browser, 10, 0.5) element =wait.until( EC.presence_of_element_located( (By.ID,"kw"), message="" )
wait.until(lambda diver:driver.find_element_by_id('kw'))
|
切换到iframe
1 2 3 4 5 6 7 8 9 10
| driver.switch_to.frame("iframe_name") driver.switch_to.frame(0)
element = driver.find_element_by_css_selector("css_selector") element.click()
driver.switch_to.default_content()
|
切换到 alert 按钮
1 2
| alert = browser.switch_to_alert() alert.accept()
|
滑块实战(阿里滑块)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| from selenium.webdriver import ActionChains
un = driver.find_element_by_id("userName") un.send_keys(info["user"])
block = browser.find_element_by_xpath("//div[@class='scale_text slidetounlock']") b_width = block.size["width"]
slider = browser.find_element_by_xpath("//span[@class='nc_iconfont btn_slide']") s_width = border.size["width"]
dist = b_width - s_width track_lst = get_track(dist)
ActionChains(browser).click_and_hold(slider).perform() for x in track_lst: ActionChains(browser).move_by_offset(xoffset=x_o, yoffset=0) time.sleep(0.01) ActionChains(browser).release().perform()
browser.find_element_by_xpath("//button[@id='verify']").click()
|
stealth.min.js 指纹隐藏
1 2 3 4 5 6
| with open("stealth.min.js", "r") as f: js_code = f.read() driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", {"source": js_code} )
|
使用stealth前后对比:
Chrome版本报错:This version of ChromeDriver only supports Chrome version
参考:https://stackoverflow.com/questions/60296873/sessionnotcreatedexception-message-session-not-created-this-version-of-chrome
1 2 3
| from webdriver_manager.chrome import ChromeDriverManager
browser = webdriver.Chrome(ChromeDriverManager().install())
|
可以对照着优化浏览器,防检测
会直观的显示,哪项参数是否通过了检测
执行请求
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| response = self.browser.execute_script(''' function queryData(url) { var p = new Promise(function(resolve,reject) { var e={ "url":"", "method":"GET" }; var h = new XMLHttpRequest; h.open(e.method, e.url, true); h.setRequestHeader("accept","application/json, */*"); // h.setRequestHeader("salute-by","lx"); h.onreadystatechange =function() { if(h.readyState === 4 && h.status === 200) { resolve(h.responseText); } else {} }; h.send(null); }); return p; } var p1 = queryData('lx'); const result_json = Promise.all([p1]).then(function(result){ console.log("对象"+ result[0]) return result[0] })
return result_json ''')
|