defget_real_nums(css_data, div_lst): """ 根据CSS样式数据筛选数字 """ real_nums = [] for col_md in div_lst: number_score_list = [] for index, div inenumerate(col_md.css("div>div")): name = div.attrib.get("class") number = div.css("div::text").get() _css = css_data.get(name) if _css: if _css.get("content"): real_nums.append(int(_css.get("content").strip('"'))) break elif _css.get("opacity"): continue else: offset = int(_css.get("left").strip("em")) score = index + offset number_score_list.append((number, score)) else: # 正常标签 number_score_list.append((number, index))
if number_score_list: new_list = sorted(number_score_list, key=lambda x: x[1]) real_number = "".join((t[0] for t in new_list)) real_nums.append(real_number)
return real_nums
defparse_css(text): """ 解析CSS文本 """ css_data = defaultdict(dict) lines = text.splitlines() for li in lines: name, _, con, _ = li.strip().split(" ") p, v = con.split(":") name = name.replace(":before", "").lstrip(".") if p in ['content', 'left', 'opacity']: css_data[name][p] = v returndict(css_data)
defmain(): session = glidedsky_login(EMAIL, PASSWORD) for url in (LEVEL_URL + f'?page={index}'for index inrange(1, 11)): resp = session.get(url) dom = Selector(resp.text) css_text = dom.css("style::text").get().strip() css_data = parse_css(css_text) div_list = dom.css('div.col-md-1') real_nums = get_real_nums(css_data, div_list) print(real_nums)