IP代理地址显示

1
2
3
4
5
6
7
url = "https://ifconfig.me/ip"
requests.get(url).text
'112.96.242.221'

url = "https://myip.ipip.net/"
requests.get(url).text
'当前 IP:112.96.242.221 来自于:中国 广东 广州 联通\n'

无穷大 float(“inf”)

局部符号表 locals()

1
2
3
4
5
6
7
8
def my_function():
a = 10
b = 20
print(locals()) # 打印当前函数的局部变量

my_function()

>>> {'a': 10, 'b': 20}

列表字典排序

1
2
3
4
5
6
7
8
9
# 假设有一个列表,包含多个字典
list_of_dicts = [
{'name': 'Alice', 'age': 25},
{'name': 'Bob', 'age': 20},
{'name': 'Charlie', 'age': 30}
]

# 根据'age'字段进行排序
sorted_list = sorted(list_of_dicts, key=itemgetter('age'))

dateparser 自动解析时间

转换百分比

1
2
def format_percentage(ratio):
return f"{ratio * 100:.0f}%"

读取 jsonline文件

1
2
3
4
5
6
7
8
9
10
"""
大概长这样
{}
{}
{}
"""

with open("spus.jsonl", encoding="utf8") as fp:
for item in jsonlines.Reader(fp):
print(item)

openpyxl

读取表格

1
2
3
4
5
6
7
workbook = load_workbook(file_path)
sheet = workbook.active # 获取当前工作簿的活动工作表
for row in sheet.iter_rows(
values_only=True, # 只返回每行的值
min_row=2 # 从第二行开始读,跳过标题
):
data.append(row)

写入表格

1
2
3
4
5
workbook = Workbook()
sheet = workbook.active
for row in data:
sheet.append(row)
workbook.save("cxs.xlsx")

单元格颜色

1
2
3
4
5
6
7
8
9
10
11
# 选择单元格
cell = ws['C1']
# 获取填充对象
fill = cell.fill
# 检查填充类型
if fill.fill_type == 'solid':
# 获取填充颜色
color = fill.start_color
print(f"The color of cell A1 is {color.rgb}")
else:
print("Cell A1 does not have a solid fill color.")

zipfile

解压缩

1
2
with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(file_path)

pytesseract ocr的使用

安装:pip install pytesseract

exe安装:https://github.com/UB-Mannheim/tesseract/wiki

安装完成后,把 .\Tesseract-OCR 放到环境变量

中文训练集:https://github.com/tesseract-ocr/tessdata/blob/main/chi_sim.traineddata

下载完成后,放到 .\Tesseract-OCR\tessdata

1
2
3
4
5
# 没配置环境变量,需要指定路径
# pytesseract.pytesseract.tesseract_cmd = "C:\\Users\\14276\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe"

print(pytesseract.image_to_string(Image.open("cxs.png")))
print(pytesseract.image_to_string(Image.open("0_big.png"), lang='chi_sim'))

moviepy

提取视频音频

1
2
3
4
5
6
7
8
9
10
11
from moviepy.editor import *

def split_audio(mkv_file_path, mp3_file_path):
video = VideoFileClip(mkv_file_path)
audio = video.audio
audio.write_audiofile(mp3_file_path)

# 示例用法
mkv_file_path = "Survivorman.S01E06.720p.HDTV.AC3.x264-NTb.mkv"
mp3_file_path = "06.mp3"
split_audio(mkv_file_path, mp3_file_path)

supervisor

参考:https://mp.weixin.qq.com/s/zqSyqCD-e3SeLc0tvFRk8A

用于部署持久化进程