python编程技巧五

IP代理地址显示

url = "https://ifconfig.me/ip"
requests.get(url).text
'112.96.242.221'

url = "https://myip.ipip.net/"
requests.get(url).text
'当前 IP：112.96.242.221  来自于：中国 广东 广州  联通\n'

无穷大 float(“inf”)

SSE协议（Server-Sent Events、服务器发送事件）

response = requests.get(url, params=params, headers=headers, stream=True)
response.raise_for_status()
for line in response.iter_lines():
    if line:
        decoded_line = line.decode("utf-8").lstrip("data: ")
        data = json.loads(decoded_line)
        for de in data["data"]["details"]:
            ts, jg, cj, _, _ = de.split(",")
            print(f"时间：{ts} 成交：{cj} 价格：{jg}")

局部符号表 locals()

def my_function():
    a = 10
    b = 20
    print(locals())  # 打印当前函数的局部变量

my_function()

>>> {'a': 10, 'b': 20}

列表字典排序

# 假设有一个列表，包含多个字典
list_of_dicts = [
    {'name': 'Alice', 'age': 25},
    {'name': 'Bob', 'age': 20},
    {'name': 'Charlie', 'age': 30}
]

# 根据'age'字段进行排序
sorted_list = sorted(list_of_dicts, key=itemgetter('age'))

dateparser 自动解析时间

转换百分比

1 2	def format_percentage(ratio): return f"{ratio * 100:.0f}%"

读取 jsonline文件

"""
大概长这样
{}
{}
{}
"""

with open("spus.jsonl", encoding="utf8") as fp:
    for item in jsonlines.Reader(fp):
    	print(item)

openpyxl

读取表格

workbook = load_workbook(file_path)
sheet = workbook.active  # 获取当前工作簿的活动工作表
for row in sheet.iter_rows(
    values_only=True,  # 只返回每行的值
    min_row=2  # 从第二行开始读，跳过标题
):
    data.append(row)

写入表格

workbook = Workbook()
sheet = workbook.active
for row in data:
    sheet.append(row)
workbook.save("cxs.xlsx")

单元格颜色

# 选择单元格
cell = ws['C1']
# 获取填充对象
fill = cell.fill
# 检查填充类型
if fill.fill_type == 'solid':
    # 获取填充颜色
    color = fill.start_color
    print(f"The color of cell A1 is {color.rgb}")
else:
    print("Cell A1 does not have a solid fill color.")

zipfile

解压缩

1 2	with zipfile.ZipFile(zip_file, "r") as zip_ref: zip_ref.extractall(file_path)

pytesseract ocr的使用

安装：pip install pytesseract

exe安装：https://github.com/UB-Mannheim/tesseract/wiki

安装完成后，把 .\Tesseract-OCR 放到环境变量

中文训练集：https://github.com/tesseract-ocr/tessdata/blob/main/chi_sim.traineddata

下载完成后，放到 .\Tesseract-OCR\tessdata

# 没配置环境变量，需要指定路径
# pytesseract.pytesseract.tesseract_cmd = "C:\\Users\\14276\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe"

print(pytesseract.image_to_string(Image.open("cxs.png")))
print(pytesseract.image_to_string(Image.open("0_big.png"), lang='chi_sim'))

moviepy

提取视频音频

from moviepy.editor import *

def split_audio(mkv_file_path, mp3_file_path):
    video = VideoFileClip(mkv_file_path)
    audio = video.audio
    audio.write_audiofile(mp3_file_path)

# 示例用法
mkv_file_path = "Survivorman.S01E06.720p.HDTV.AC3.x264-NTb.mkv"
mp3_file_path = "06.mp3"
split_audio(mkv_file_path, mp3_file_path)

supervisor

参考：https://mp.weixin.qq.com/s/zqSyqCD-e3SeLc0tvFRk8A

IP代理地址显示

无穷大 float(“inf”)

SSE协议（Server-Sent Events、服务器发送事件）

局部符号表 locals()

列表字典排序

dateparser 自动解析时间

转换百分比

读取 jsonline文件

openpyxl

读取表格

写入表格

单元格颜色

zipfile

解压缩

pytesseract ocr的使用

moviepy

提取视频音频

supervisor

用于部署持久化进程