浏览器安装:playwright install
使用无痕浏览器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| browser = pw.chromium.launch( headless=False, args=[ "--start-maximized", "--disable-features=AutomationControlled", "--disable-blink-features", ], ignore_default_args=["--enable-automation"], ) context = browser.new_context( no_viewport=True, ignore_https_errors=True, ) page = context.new_page() code = ( """Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});""" ) page.add_init_script(code)
|
使用代理(另一种方式)
1 2 3 4 5 6 7 8 9 10
| browser = pw.chromium.connect_over_cdp(endpoint_url="ws://localhost:8080/?stealth") context = browser.new_context( proxy={ "server": "http://175.6.136.136:20000", "username": "", "password": "", }, no_viewport=True, ignore_https_errors=True, )
|
使用浏览器缓存
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| self.browser = self.pw.chromium.launch_persistent_context( headless=False, executable_path="C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", args=[ "--start-maximized", "--disable-features=AutomationControlled", "--disable-blink-features", ], ignore_default_args=["--enable-automation"], user_data_dir="C:\\Users\\14276\\Desktop\\btConfig", user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", no_viewport=True, ignore_https_errors=True, ) self.page = self.browser.new_page()
|
基本用法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| self.page.goto(self.home_url) self.page.click("div.switch-switch.email") self.page.wait_for_timeout(1000) self.page.fill('input[fieldname="item_label_email"]', "") self.page.type( 'input[fieldname="item_label_email"]', self.username, delay=random.randint(100, 150), ) frames = self.page.frames
ele = self.page.query_selector(slider_css)
ele = self.page.locator("div.ecom-dropdown-trigger>span:first-of-type") self.shop_name = ele.text_content()
slider = self.frame.locator("img#captcha-verify_img_slide") cdn = slider.bounding_box() length = cdn["x"] + 27 + 4 high = cdn["y"]
self.frame.hover("img#captcha-verify_img_slide") self.page.mouse.down() self.page.mouse.move(length, high) for x in self.get_track(real_length): length = length + x self.page.mouse.move(length, high) self.page.wait_for_timeout(random.randint(10, 15)) self.page.mouse.up()
|
拦截请求
1 2 3 4 5 6
| page.route('**/*', handle_request)
def handle_request(route, request): if "strategy_market" in request.url: print(request.url) route.continue_()
|
发起请求
浏览器中xmlHttpRequest和Fetch的区别前言 随着前后端分离模式的流行,在项目开发过程中必不可少的一项就是 - 掘金
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| def rpc_get_code(item_url, item_body): exe_js_code = """ async () => { async function getData() { // 定义请求的URL和参数 const url = 'item_url'; const params = 'item_body'; const fullUrl = `${url}?${params}`; const xhr = new XMLHttpRequest();
// 初始化一个Promise来处理异步请求 return new Promise((resolve, reject) => { // 配置请求类型为GET xhr.open('GET', fullUrl, true);
// 设置请求完成的回调函数 xhr.onload = function () { if (xhr.status >= 200 && xhr.status < 300) { // 请求成功,将响应数据传递给resolve const data = JSON.parse(xhr.responseText); resolve(data); // 将JSON对象传递给resolve } else { // 请求失败,将错误传递给reject reject({ status: xhr.status, statusText: xhr.statusText }); } };
// 设置请求失败的回调函数 xhr.onerror = function () { reject({ status: xhr.status, statusText: xhr.statusText }); }; xhr.send(); }); }; var res = await getData() return res } """ new_js = exe_js_code.replace("item_body", item_body).replace( "item_url", item_url ) return new_js
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| def rpc_post_code(item_url, item_body): exe_js_code = """ async () => { async function postData() { const body = JSON.stringify(item_body); const url = 'item_url'; return new Promise(function (resolve, reject) { fetch(url, { method: 'POST', body: body }) .then(response => response.json()) .then(data => { resolve(data); }); }) }; var res=await postData() return res } """ new_js = exe_js_code.replace("item_body", json.dumps(item_body)).replace( "item_url", item_url ) return new_js
|
开启stealth模式
pip install playwright_stealth
1 2 3
| from playwright_stealth import stealth_sync
stealth_sync(self.page)
|
Cookie 注入
1 2 3 4 5 6 7 8
| ck_dct = [ {"name": name.strip(), "value": value.strip(), "domain": ".jd.com", "path": "/"} for k, v in cookies.items() ] with sync_playwright() as pw: browser = pw.chromium.launch() context = browser.new_context() context.add_cookies(ck_dct)
|
Cookie 获取
1 2
| for ck in self.page.context.cookies(): cookie += f"{ck['name']}={ck['value']};"
|
贝塞尔曲线轨迹
From:https://pypi.org/project/python-ghost-cursor/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| from python_ghost_cursor import path as slide_path
In [4]: start = {"x": 0, "y": 0} ...: end = {"x": 40, "y": 20}
In [5]: slide_path(start, end) Out[5]: [{'x': 0, 'y': 0}, {'x': 1.7205508520070967, 'y': 0}, {'x': 3.280150746033031, 'y': 0}, {'x': 4.762752498693698, 'y': 0}, {'x': 6.252308926604997, 'y': 0}, {'x': 7.832772846382823, 'y': 0}, {'x': 9.588097074643073, 'y': 0}, {'x': 11.602234428001648, 'y': 0}, {'x': 13.959137723074438, 'y': 0}, {'x': 16.742759776477342, 'y': 0}, {'x': 20.03705340482626, 'y': 0}, {'x': 23.92597142473709, 'y': 1.0269500874731925}, {'x': 28.493466652825727, 'y': 6.196217858339223}, {'x': 33.82349190570806, 'y': 12.498489441945145}, {'x': 40.0, 'y': 20.0}]
|