官方文档:https://selenium-python.readthedocs.io/
中文文档https://python-selenium-zh.readthedocs.io/zh_CN/latest/

Options 初始化配置(https://zhuanlan.zhihu.com/p/60852696)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
# 添加UA
options.add_argument('user-agent="MQQBrowser/26 Mozilla/5.0"')
# 窗口大小
options.add_argument('window-size=1920x1080')
# 无头浏览器
options.add_argument("--headless")
# 禁用gpu(谷歌文档提到需要加上这个属性来规避bug)
options.add_argument("--disable-gpu")
# 设置代理
options.add_argument("--proxy-server=127.0.0.1:8888")
# 以最高权限运行
options.add_argument('--no-sandbox')
# 禁用JavaScript
option.add_argument("--disable-javascript")
# 不加载图片, 提升速度
options.add_argument('blink-settings=imagesEnabled=false')
# 驱动chrome.exe
chrome_path = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
options.add_argument(f"--chrome-executable={chrome_path}")
# 使用缓存,启用后可能会加载失败
options.add_argument("--user-data-dir=./sele_cache/kuaishou")
# 隐藏正受控制
chromeOptions.add_experimental_option("excludeSwitches", ['enable-automation'])
chromeOptions.add_argument("--disable-blink-features")
chromeOptions.add_argument("--disable-blink-features=AutomationControlled")

browser = webdriver.Chrome(options=options)
browser.maximize_window() # 窗口最大化
html = browser.page_source # 获取网页源码
browser.save_screenshot("headless.png") # 截屏

驱动火狐

driver下载:https://github.com/mozilla/geckodriver/releases

指定路径:

1
2
3
4
self.browser = webdriver.Firefox(
executable_path="C:\\Users\\dell\\Desktop\\geckodriver.exe",
firefox_binary="C:\\Program Files\\Mozilla Firefox\\firefox.exe",
)

添加 / 获取 / 清空 cookies

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
cookies = [
{
"domain": ".jd.com",
"expirationDate": 1624175025,
"hostOnly": False,
"value": "122270672.15941099761991128995462.1594109976.1608619478.1608622027.9",
},
{
"domain": ".jd.com",
"expirationDate": 1608624825,
"hostOnly": False,
"value": "122270672.9.15941099761991128995462|9.1608622027",
}
]

for ck in cookies:
browser.add_cookie(ck)

cookies = browser.get_cookies()
browser.delete_all_cookies()

使用代理,带鉴权

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy, ProxyType

# 设置代理IP和账号密码
proxy_ip = "IP地址:端口号"
proxy_username = "用户名"
proxy_password = "密码"

# 创建Proxy对象
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = proxy_ip
proxy.ssl_proxy = proxy_ip

# 添加账号密码认证
proxy.add_argument(f"--proxy-auth={proxy_username}:{proxy_password}")

# 创建浏览器对象并设置代理
driver = webdriver.Firefox(proxy=proxy)

执行cdp命令(Chrome Devtools Protocal)

改变WebDriver参数

1
2
3
4
5
6
7
8
browser.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(navigator, 'webdriver', {webdriver:false});
"""
}
)

执行JavaScript

1
2
browser.execute_script('window.scrollTo(0,0);')  # 操作滚动条
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') # 滚动到底部

显式 / 隐式等待(https://www.cnblogs.com/feng0815/p/13888703.html)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# 强制等待
time.sleep(10)

# 隐式等待,设置最大超时时间,直到整个页面加载完毕
browser.implicitly_wait(30)

# 显式等待
from selenium.webdriver.support.wait import WebDriverWait

"""
10 - 最大超时时间
0.5 - 检测间隔
"""
# 等待 id="kw" 的 elements 出现
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
wait = WebDriverWait(browser, 10, 0.5)
element =wait.until(
EC.presence_of_element_located(
(By.ID,"kw"),
message=""
)

# 或者匿名函数
wait.until(lambda diver:driver.find_element_by_id('kw'))

切换到iframe

1
2
3
4
5
6
7
8
9
10
# 切换到iframe上下文
driver.switch_to.frame("iframe_name") # 通过
driver.switch_to.frame(0) # 通过索引

# 在iframe内定位元素并进行操作
element = driver.find_element_by_css_selector("css_selector")
element.click()

# 切换回默认上下文
driver.switch_to.default_content()

切换到 alert 按钮

1
2
alert = browser.switch_to_alert()
alert.accept()

滑块实战(阿里滑块)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from selenium.webdriver import ActionChains

# 文本框输入
un = driver.find_element_by_id("userName")
un.send_keys(info["user"])

# 背景
block = browser.find_element_by_xpath("//div[@class='scale_text slidetounlock']")
b_width = block.size["width"]
# 滑块
slider = browser.find_element_by_xpath("//span[@class='nc_iconfont btn_slide']")
s_width = border.size["width"]
# 生成移动轨迹
dist = b_width - s_width
track_lst = get_track(dist)
# 按住滑块,拖动,松开
ActionChains(browser).click_and_hold(slider).perform()
for x in track_lst:
ActionChains(browser).move_by_offset(xoffset=x_o, yoffset=0)
time.sleep(0.01)

ActionChains(browser).release().perform()

# 点击登录按钮
browser.find_element_by_xpath("//button[@id='verify']").click()

stealth.min.js 指纹隐藏

1
2
3
4
5
6
with open("stealth.min.js", "r") as f:
js_code = f.read()

driver.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument", {"source": js_code}
)

使用stealth前后对比:

Chrome版本报错:This version of ChromeDriver only supports Chrome version

参考:https://stackoverflow.com/questions/60296873/sessionnotcreatedexception-message-session-not-created-this-version-of-chrome

1
2
3
from webdriver_manager.chrome import ChromeDriverManager

browser = webdriver.Chrome(ChromeDriverManager().install())

浏览器参数检测(https://infosimples.github.io/detect-headless/)

可以对照着优化浏览器,防检测

Antibot指纹检测(https://bot.sannysoft.com/)

会直观的显示,哪项参数是否通过了检测

执行请求

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
response = self.browser.execute_script('''
function queryData(url) {
var p = new Promise(function(resolve,reject) {
var e={
"url":"",
"method":"GET"
};
var h = new XMLHttpRequest;
h.open(e.method, e.url, true);
h.setRequestHeader("accept","application/json, */*");
// h.setRequestHeader("salute-by","lx");
h.onreadystatechange =function() {
if(h.readyState === 4 && h.status === 200) {
resolve(h.responseText);
} else {}
};
h.send(null);
});
return p;
}
var p1 = queryData('lx');
const result_json = Promise.all([p1]).then(function(result){
console.log("对象"+ result[0])
return result[0]
})

return result_json
''')