调试断点什么的废话就不说了,直接扣代码,搜 __g._encrypt 就行

image.png

还是按以前一样,修改函数名,运行看看

image.png

可以运行,而且每次结果都不一样,这里我就不去校验了,直接下结论,这结果是错误的,和网页的有差别 🥶🥶🥶

补充了 atob 函数后,还是只能在浏览器环境运行,node环境运行不了,看来是检测了环境

参考大神文章:https://blog.csdn.net/zjq592767809/article/details/126512798

  • 产生的是随机结果,看看能不能 hook 随机数 或 时间戳

    1
    2
    3
    Math.random = function(){
    return 0.50
    };

算法的分析还原过程,大佬的博客已经写的很清楚了,我这里来写一下 搜索接口 /api/v4/search_v3 的注意点

注意点 一

请求主链接,获取 _xsrf 参数

image.png

注意点 二

访问 /udid 接口时,请求头要带上 x-xsrftoken 参数

image.png

没有上面的步骤的话,请求就会触发知乎的验证码机制

x96.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# -*- coding: utf-8 -*-
# time: 2022/9/13 11:26
# author: Chen

import ctypes
import struct


class x_zse_96_V3(object):
local_48 = [48, 53, 57, 48, 53, 51, 102, 55, 100, 49, 53, 101, 48, 49, 100, 55]
local_55 = "6fpLRqJO8M/c3jnYxFkUVC4ZIG12SiH=5v0mXDazWBTsuw7QetbKdoPyAl+hN9rgE"
h = {
"zk": [1170614578, 1024848638, 1413669199, -343334464, -766094290, -1373058082, -143119608, -297228157,
1933479194, -971186181, -406453910, 460404854, -547427574, -1891326262, -1679095901, 2119585428,
-2029270069, 2035090028, -1521520070, -5587175, -77751101, -2094365853, -1243052806, 1579901135,
1321810770, 456816404, -1391643889, -229302305, 330002838, -788960546, 363569021, -1947871109],
"zb": [20, 223, 245, 7, 248, 2, 194, 209, 87, 6, 227, 253, 240, 128, 222, 91, 237, 9, 125, 157, 230, 93, 252,
205, 90, 79, 144, 199, 159, 197, 186, 167, 39, 37, 156, 198, 38, 42, 43, 168, 217, 153, 15, 103, 80, 189,
71, 191, 97, 84, 247, 95, 36, 69, 14, 35, 12, 171, 28, 114, 178, 148, 86, 182, 32, 83, 158, 109, 22, 255,
94, 238, 151, 85, 77, 124, 254, 18, 4, 26, 123, 176, 232, 193, 131, 172, 143, 142, 150, 30, 10, 146, 162,
62, 224, 218, 196, 229, 1, 192, 213, 27, 110, 56, 231, 180, 138, 107, 242, 187, 54, 120, 19, 44, 117,
228, 215, 203, 53, 239, 251, 127, 81, 11, 133, 96, 204, 132, 41, 115, 73, 55, 249, 147, 102, 48, 122,
145, 106, 118, 74, 190, 29, 16, 174, 5, 177, 129, 63, 113, 99, 31, 161, 76, 246, 34, 211, 13, 60, 68,
207, 160, 65, 111, 82, 165, 67, 169, 225, 57, 112, 244, 155, 51, 236, 200, 233, 58, 61, 47, 100, 137,
185, 64, 17, 70, 234, 163, 219, 108, 170, 166, 59, 149, 52, 105, 24, 212, 78, 173, 45, 0, 116, 226, 119,
136, 206, 135, 175, 195, 25, 92, 121, 208, 126, 139, 3, 75, 141, 21, 130, 98, 241, 40, 154, 66, 184, 49,
181, 46, 243, 88, 101, 183, 8, 23, 72, 188, 104, 179, 210, 134, 250, 201, 164, 89, 216, 202, 220, 50,
221, 152, 140, 33, 235, 214],
"zm": [120, 50, 98, 101, 99, 98, 119, 100, 103, 107, 99, 119, 97, 99, 110, 111]
}

@staticmethod
def pad(data_to_pad):
padding_len = 16 - len(data_to_pad) % 16
padding = chr(padding_len).encode() * padding_len
return data_to_pad + padding

@staticmethod
def unpad(padded_data):
padding_len = padded_data[-1]
return padded_data[:-padding_len]

@staticmethod
def left_shift(x, y):
x, y = ctypes.c_int32(x).value, y % 32
return ctypes.c_int32(x << y).value

@staticmethod
def Unsigned_right_shift(x, y):
x, y = ctypes.c_uint32(x).value, y % 32
return ctypes.c_uint32(x >> y).value

@classmethod
def Q(cls, e, t):
return cls.left_shift((4294967295 & e), t) | cls.Unsigned_right_shift(e, 32 - t)

@classmethod
def G(cls, e):
t = list(struct.pack(">i", e))
n = [cls.h['zb'][255 & t[0]], cls.h['zb'][255 & t[1]], cls.h['zb'][255 & t[2]], cls.h['zb'][255 & t[3]]]
r = struct.unpack(">i", bytes(n))[0]
return r ^ cls.Q(r, 2) ^ cls.Q(r, 10) ^ cls.Q(r, 18) ^ cls.Q(r, 24)

@classmethod
def g_r(cls, e):
n = list(struct.unpack(">iiii", bytes(e)))
[n.append(n[r] ^ cls.G(n[r + 1] ^ n[r + 2] ^ n[r + 3] ^ cls.h['zk'][r])) for r in range(32)]
return list(
struct.pack(">i", n[35]) + struct.pack(">i", n[34]) + struct.pack(">i", n[33]) + struct.pack(">i", n[32]))

@classmethod
def re_g_r(cls, e):
n = [0] * 32 + list(struct.unpack(">iiii", bytes(e)))[::-1]
for r in range(31, -1, -1):
n[r] = cls.G(n[r + 1] ^ n[r + 2] ^ n[r + 3] ^ cls.h['zk'][r]) ^ n[r + 4]
return list(
struct.pack(">i", n[0]) + struct.pack(">i", n[1]) + struct.pack(">i", n[2]) + struct.pack(">i", n[3]))

@classmethod
def g_x(cls, e, t):
n = []
i = 0
for _ in range(len(e), 0, -16):
o = e[16 * i: 16 * (i + 1)]
a = [o[c] ^ t[c] for c in range(16)]
t = cls.g_r(a)
n += t
i += 1
return n

@classmethod
def re_g_x(cls, e, t):
n = []
i = 0
for _ in range(len(e), 0, -16):
o = e[16 * i: 16 * (i + 1)]
a = cls.re_g_r(o)
t = [a[c] ^ t[c] for c in range(16)]
n += t
t = o
i += 1
return n

@classmethod
def b64encode(cls, md5_bytes: bytes, device: int = 0, seed: int = 63) -> str:
local_50 = bytes([seed, device]) + md5_bytes # 随机数 0 是环境检测通过
local_50 = cls.pad(bytes(local_50))
local_34 = local_50[:16]
local_35 = [local_34[local_11] ^ cls.local_48[local_11] ^ 42 for local_11 in range(16)]
local_36 = cls.g_r(local_35)
local_38 = local_50[16:]
local_39 = cls.g_x(local_38, local_36)
local_53 = local_36 + local_39
local_56 = 0
local_57 = ""
for local_13 in range(len(local_53) - 1, 0, -3):
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_59 = local_53[local_13] ^ cls.Unsigned_right_shift(58, local_58) & 255
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_59 = local_59 | (local_53[local_13 - 1] ^ cls.Unsigned_right_shift(58, local_58) & 255) << 8
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_59 = local_59 | (local_53[local_13 - 2] ^ cls.Unsigned_right_shift(58, local_58) & 255) << 16
local_57 = local_57 + cls.local_55[local_59 & 63]
local_57 = local_57 + cls.local_55[cls.Unsigned_right_shift(local_59, 6) & 63]
local_57 = local_57 + cls.local_55[cls.Unsigned_right_shift(local_59, 12) & 63]
local_57 = local_57 + cls.local_55[cls.Unsigned_right_shift(local_59, 18) & 63]
return local_57

@classmethod
def b64decode(cls, x_zse_96: str) -> dict:
local_56 = 0
local_57 = []
for local_13 in range(0, len(x_zse_96), 4):
local_59 = (cls.local_55.index(x_zse_96[local_13 + 3]) << 18) + (
cls.local_55.index(x_zse_96[local_13 + 2]) << 12) + (
cls.local_55.index(x_zse_96[local_13 + 1]) << 6) + cls.local_55.index(
x_zse_96[local_13])
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_57.append((local_59 & 255) ^ cls.Unsigned_right_shift(58, local_58))
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_57.append(((local_59 >> 8) & 255) ^ cls.Unsigned_right_shift(58, local_58))
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_57.append(((local_59 >> 16) & 255) ^ cls.Unsigned_right_shift(58, local_58))
local_36, local_39 = local_57[-16:][::-1], local_57[:-16][::-1]
local_38 = cls.re_g_x(local_39, local_36)
local_35 = cls.re_g_r(local_36)
local_34 = [local_35[local_11] ^ cls.local_48[local_11] ^ 42 for local_11 in range(16)]
local_50 = cls.unpad(bytes(local_34 + local_38))
return {
'seed': local_50[0],
'device': local_50[1],
'md5_bytes': local_50[2:]
}


def x_zse_96_b64encode(md5_bytes: bytes):
h = {
"zk": [1170614578, 1024848638, 1413669199, -343334464, -766094290, -1373058082, -143119608, -297228157,
1933479194, -971186181, -406453910, 460404854, -547427574, -1891326262, -1679095901, 2119585428,
-2029270069, 2035090028, -1521520070, -5587175, -77751101, -2094365853, -1243052806, 1579901135,
1321810770, 456816404, -1391643889, -229302305, 330002838, -788960546, 363569021, -1947871109],
"zb": [20, 223, 245, 7, 248, 2, 194, 209, 87, 6, 227, 253, 240, 128, 222, 91, 237, 9, 125, 157, 230, 93, 252,
205, 90, 79, 144, 199, 159, 197, 186, 167, 39, 37, 156, 198, 38, 42, 43, 168, 217, 153, 15, 103, 80, 189,
71, 191, 97, 84, 247, 95, 36, 69, 14, 35, 12, 171, 28, 114, 178, 148, 86, 182, 32, 83, 158, 109, 22, 255,
94, 238, 151, 85, 77, 124, 254, 18, 4, 26, 123, 176, 232, 193, 131, 172, 143, 142, 150, 30, 10, 146, 162,
62, 224, 218, 196, 229, 1, 192, 213, 27, 110, 56, 231, 180, 138, 107, 242, 187, 54, 120, 19, 44, 117,
228, 215, 203, 53, 239, 251, 127, 81, 11, 133, 96, 204, 132, 41, 115, 73, 55, 249, 147, 102, 48, 122,
145, 106, 118, 74, 190, 29, 16, 174, 5, 177, 129, 63, 113, 99, 31, 161, 76, 246, 34, 211, 13, 60, 68,
207, 160, 65, 111, 82, 165, 67, 169, 225, 57, 112, 244, 155, 51, 236, 200, 233, 58, 61, 47, 100, 137,
185, 64, 17, 70, 234, 163, 219, 108, 170, 166, 59, 149, 52, 105, 24, 212, 78, 173, 45, 0, 116, 226, 119,
136, 206, 135, 175, 195, 25, 92, 121, 208, 126, 139, 3, 75, 141, 21, 130, 98, 241, 40, 154, 66, 184, 49,
181, 46, 243, 88, 101, 183, 8, 23, 72, 188, 104, 179, 210, 134, 250, 201, 164, 89, 216, 202, 220, 50,
221, 152, 140, 33, 235, 214],
"zm": [120, 50, 98, 101, 99, 98, 119, 100, 103, 107, 99, 119, 97, 99, 110, 111]
}

def left_shift(x, y):
x, y = ctypes.c_int32(x).value, y % 32
return ctypes.c_int32(x << y).value

def Unsigned_right_shift(x, y):
x, y = ctypes.c_uint32(x).value, y % 32
return ctypes.c_uint32(x >> y).value

def Q(e, t):
return left_shift((4294967295 & e), t) | Unsigned_right_shift(e, 32 - t)

def G(e):
t = list(struct.pack(">i", e))
n = [h['zb'][255 & t[0]], h['zb'][255 & t[1]], h['zb'][255 & t[2]], h['zb'][255 & t[3]]]
r = struct.unpack(">i", bytes(n))[0]
return r ^ Q(r, 2) ^ Q(r, 10) ^ Q(r, 18) ^ Q(r, 24)

def g_r(e):
n = list(struct.unpack(">iiii", bytes(e)))
[n.append(n[r] ^ G(n[r + 1] ^ n[r + 2] ^ n[r + 3] ^ h['zk'][r])) for r in range(32)]
return list(
struct.pack(">i", n[35]) + struct.pack(">i", n[34]) + struct.pack(">i", n[33]) + struct.pack(">i", n[32]))

def g_x(e, t):
n = []
i = 0
for _ in range(len(e), 0, -16):
o = e[16 * i: 16 * (i + 1)]
a = [o[c] ^ t[c] for c in range(16)]
t = g_r(a)
n += t
i += 1
return n

local_48 = [48, 53, 57, 48, 53, 51, 102, 55, 100, 49, 53, 101, 48, 49, 100, 55]
local_50 = bytes([63, 0]) + md5_bytes # 随机数 0 是环境检测通过
local_50 = x_zse_96_V3.pad(bytes(local_50))
local_34 = local_50[:16]
local_35 = [local_34[local_11] ^ local_48[local_11] ^ 42 for local_11 in range(16)]
local_36 = g_r(local_35)
local_38 = local_50[16:]
local_39 = g_x(local_38, local_36)
local_53 = local_36 + local_39
local_55 = "6fpLRqJO8M/c3jnYxFkUVC4ZIG12SiH=5v0mXDazWBTsuw7QetbKdoPyAl+hN9rgE"
local_56 = 0
local_57 = ""
for local_13 in range(len(local_53) - 1, 0, -3):
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_59 = local_53[local_13] ^ Unsigned_right_shift(58, local_58) & 255
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_59 = local_59 | (local_53[local_13 - 1] ^ Unsigned_right_shift(58, local_58) & 255) << 8
local_58 = 8 * (local_56 % 4)
local_56 = local_56 + 1
local_59 = local_59 | (local_53[local_13 - 2] ^ Unsigned_right_shift(58, local_58) & 255) << 16
local_57 = local_57 + local_55[local_59 & 63]
local_57 = local_57 + local_55[Unsigned_right_shift(local_59, 6) & 63]
local_57 = local_57 + local_55[Unsigned_right_shift(local_59, 12) & 63]
local_57 = local_57 + local_55[Unsigned_right_shift(local_59, 18) & 63]
return local_57


def get_x96(e):
# e = "6315b498d0299f51f115a987e6a65ae8"
result = x_zse_96_b64encode(e.encode())
return result

spider.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
# time: 2022/9/13 11:47
# author: Chen

import time
from hashlib import md5
from pprint import pprint
from urllib.parse import quote, urlparse

import requests
from w3lib.url import add_or_replace_parameters

from x96 import get_x96

session = requests.session()
session.headers.clear() # 不能是python的UA

FD_PROXIES = {"http": "http://127.0.0.1:8888", "https": "http://127.0.0.1:8888"}
session.proxies.update(FD_PROXIES)
session.verify = False

default_headers = {
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
}
session.headers.update(default_headers)

zhihu_headers = {
"Connection": "keep-alive",
"x-zse-93": "101_3_3.0",
"x-ab-param": "",
"x-api-version": "3.0.91",
"sec-ch-ua-mobile": "?0",
"x-requested-with": "fetch",
"x-app-za": "OS=Web",
"sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
"sec-ch-ua-platform": '"Windows"',
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Dest": "empty",
}


def get_x_zse_96(url, params, d_c0):
api_v4 = urlparse(add_or_replace_parameters(url, params))
zh_path = api_v4.path + "?" + api_v4.query
j = "+".join(["101_3_3.0", zh_path, d_c0])
md5_str = md5(j.encode()).hexdigest()
return get_x96(md5_str)


def zhihu_init():
session.get(search_url)
_xsrf = session.cookies.get("_xsrf")
_headers = {
"x-xsrftoken": _xsrf,
"Referer": search_url,
}
udid_url = "https://www.zhihu.com/udid"
session.post(udid_url, headers={**zhihu_headers, **_headers})
d_c0 = session.cookies.get("d_c0")
return d_c0


def main(d_c0: str, keyword: str, page: int):
url = "https://www.zhihu.com/api/v4/search_v3"
params = {
"gk_version": "gz-gaokao",
"t": "general",
"q": keyword,
"correction": "1",
"offset": 20 * page, # 页数
"limit": "20",
"filter_fields": "",
"lc_idx": 20 * page, # 和 offset 保持一致
"show_all_topics": "0",
"search_source": "Normal",
## 下面是一些筛选的参数
# "vertical": "answer",
# "sort": "created_time",
# "time_interval": "a_week",
}

x96 = get_x_zse_96(url, params, d_c0)
# print(x96)
_headers = {
"Referer": search_url,
"x-zse-96": f"2.0_{x96}",
}
resp = session.get(url, params=params, headers={**zhihu_headers, **_headers})
data = resp.json()
pprint(data)


if __name__ == "__main__":
kw = "戒赌"
search_url = "https://www.zhihu.com/search?type=content&q=" + quote(kw)
d_c0 = zhihu_init()
for page in range(3):
time.sleep(0.5)
main(d_c0, kw, page)