Python编程-常见库的使用

psutil 进程

import os, psuti

for proc in psutil.process_iter():
    proc.name
    proc.pid
    
# 终止进程
os.system("taskkill /f /im chrome.exe")  # windows
os.popen('taskkill.exe /pid:8888')  # pid
os.system(f"kill -9 {proc.pid}")  # linux

pandas

读取csv文件

import pandas as pd

df = pd.read_csv("安卓.csv", encoding="GBK")  # 含有中文，没有就默认
# 行数
max_lines = df.shape[0]
# 第一行数据
first_line_data = df.loc[0].values

datetime

当前时间

In [8]: from datetime import datetime

In [9]: n = datetime.now()
In [10]: n
Out[10]: datetime.datetime(2022, 6, 7, 9, 10, 9, 977949)
In [11]: n.day
Out[11]: 7
In [12]: n.hour
Out[12]: 9
In [13]: n.timestamp()
Out[13]: 1654564209.977949
 
# 星期二
In [4]: _now = datetime.now()
In [5]: _now.weekday()
Out[5]: 1

昨天日期

# 时间戳
_now = datetime.now()
yesterday_ts = datetime(year=_now.year, month=_now.month, day=_now.day - 1).timestamp()

# 格式化
yes_day = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

时间差

In [15]: n
Out[15]: datetime.datetime(2022, 6, 7, 9, 10, 9, 977949)
In [17]: n - timedelta(days=5, hours=8, minutes=17)
Out[17]: datetime.datetime(2022, 6, 2, 0, 53, 9, 977949)

解析字符串

1 2	In [4]: datetime.strptime('2022-07-15', '%Y-%m-%d') Out[4]: datetime.datetime(2022, 7, 15, 0, 0)

解析13位时间戳

In [4]: ts = 1653187235001

In [5]: from datetime import datetime

In [6]: datetime.fromtimestamp(ts / 1000).strftime('%Y-%m-%d %H:%M:%S')
Out[6]: '2022-05-22 10:40:35'

获取0点时间戳

1
2
3

begin_ts = (
    int(datetime.strptime("2023-09-12", "%Y-%m-%d").timestamp()) - 86400 * 180
)

格式化输出

In [26]: n.strftime("%Y年%m月%d日 %H时%M分%S秒 ".encode("unicode_escape").decode()).encode().decode("unicode_escape")
Out[26]: '2022年06月07日 09时10分09秒

In [28]: n.strftime("%Y-%m-%d %H:%M")
Out[28]: '2022-06-07 09:10'

# 当天日期
In [3]: import datetime
In [4]: str(datetime.date.today())
Out[4]: '2024-10-30'

opencc

简繁体转换

1
2
3

from opencc import OpenCC
t2s_con = OpenCC("t2s")  # traditional -> simplied
s = t2s_con.convert(t)

xmltodict

import xmltodict

def read_xml():
    """
    流读取 xml
    """
    xml_file = "C:\\Users\\duoyi\\Desktop\\维基.xml"
    xmltodict.parse(
        xml_input=open(xml_file, "rb"), item_depth=2, item_callback=handle_item
    )
    
    # gz_file = "D:\\迅雷下载\\zhwiki-latest-pages-articles.xml.bz2"
    # xmltodict.parse(BZ2File(gz_file), item_depth=2, item_callback=handle_item)
       
def handle_item(_, item):
    title = item.get("title")

csv

写入

import csv

with open('xxx.csv','w', newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    first_row = [header1, header2, header3]
    writer.writerow(first_row)
    writer.writerows(row_list -> [[...], [...]])

# 避免显示科学计算，精度丢失
writer.writerow([tradeId + "\t"])
# 解决中文乱码问题
encoding = "utf-8-sig"

逐行读取

import csv

with open("京东聊天.csv", "r") as fp:
	reader = csv.reader(fp)
    try:
        row = next(reader)
        print(row)
    except UnicodeDecodeError:
        continue

检测文件编码

import chardet

with open("京东聊天.csv", "r") as fp:
	result = chardet.detect(fp.read())["encoding"]

转字典

csv_file = "vd_ingredient_20241226.csv"
dict_list = []

with open(csv_file, encoding="utf8") as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        dict_list.append(row)

opencv

安装：pip install opencv-python

图片展示

import cv2


def img_show(path):
    """
    展示图片，按任意键退出
    path 不能有中文
    """
    img = cv2.imread(path)
    window_name = "Image"
    cv2.namedWindow(window_name, 0)
    cv2.resizeWindow(window_name, width=img.shape[1], height=img.shape[0])
    cv2.imshow(window_name, img)
    cv2.waitKey(0)  # delay milliseconds
    cv2.destroyAllWindows()


img_show("C:\\Users\\Desktop\\my_blog_files\\home_index.png")

# 图片存储
cv2.imwrite('nologo.png', img)

PIL

图像处理

from PIL import Image

# 打开
img = Image.open(path)
# 新建
img = Image.new(mode: "RGB", (width, height))

# 识别图片类型，GIF | PNG | JPEG
if img.format == "PNG":
    pass

# 获取坐标值rgb值
img.getpixel((0, 0))
# 设置坐标像素值
img.putpixel((0, 0), (r, g, b))  

img.convert('1')  # 二值化
img.convert('L')  # 灰度图像
img.convert('P')  # 8位彩色图像
img.convert('RGBA')  # 32位彩色图像
img.convert('CMKY')  # 印刷四分色模式
img.convert('F')  # 32位浮点灰色图像

生成PDF

image_list = []
for content in content_lst:
    img = Image.open(BytesIO(content))
    image_list.append(img.convert("RGB"))

pdf_path = "C:\\Users\\Desktop\\xxx.pdf"
img1 = image_list.pop(0)
img1.save(pdf_path, save_all=True, append_images=image_list)

图片缩放

1
2
3

image = Image.open()
image_w, image_h = image.size
image.thumbnail((image_w / 2, image_h / 2))

按比例裁剪

image = Image.open("small.png")
image_w, image_h = image.size
left = image_w * 175 / 370
top = 0
right = image_w
buttom = image_h
captcha = image.crop((left, top, right, buttom))
captcha.save("small.png")

截屏

1 2	img = ImageGrab.grab(bbox=(0, 212, 1300, 444)) img.save("cxs.png")

拼接

new_img = Image.new("RGBA", (width, height))
new_up_img = Image.new("RGBA", (width, height // 2))
new_but_img = Image.new("RGBA", (width, height // 2))
new_img.paste(new_up_img, (0, 0))  # 上半图
new_img.paste(new_but_img, (0, height // 2))  # 下半图
new_img.save(img_name)

collections

nametuple，tuple子类，可命名数组

In [43]: User = namedtuple('User', ['name','age','height'])

In [44]: user = User(name='cxs', age=25, height=165)
In [45]: user._asdict()
Out[45]: OrderedDict([('name', 'cxs'), ('age', 25), ('height', 165)])

In [47]: clw = User._make(["clw", 29, 156])
In [48]: clw._asdict()
Out[48]: OrderedDict([('name', 'clw'), ('age', 29), ('height', 156)])

In [49]: user.name
Out[49]: 'cxs'

defaultdict

In [56]: d = defaultdict(int)
In [57]: d['cxs']
Out[57]: 0

In [58]: d = defaultdict(str)
In [59]: d['cxs']
Out[59]: ''

In [60]: d = defaultdict(list)
In [61]: d['cxs']
Out[61]: []

In [62]: d = defaultdict(dict)
In [63]: d['cxs']
Out[63]: {}

# 只在第一层赋值
In [72]: d["cxs"]["abc"]
KeyError: 'abc'

# 或者自定义function
In [66]: d = defaultdict(lambda : "cxs")
In [67]: d['cxs']
Out[67]: 'cxs'

字典也有类似功能

In [73]: cxs = {"name": "cxs"}

In [74]: cxs.setdefault("name", 123)  # 如果key存在，则不改变
Out[74]: 'cxs'
In [75]: cxs
Out[75]: {'name': 'cxs'}

In [76]: cxs.setdefault("age", 123)  # 如果key不存在，则改变
Out[76]: 123
In [77]: cxs
Out[77]: {'name': 'cxs', 'age': 123}

In [78]: cxs.get("gender", "man")  # key不存在，返回默认值
Out[78]: 'man'

区别于 getattr，只能用于获取属性值

In [79]: clw
Out[79]: User(name='clw', age=29, height=156)

In [81]: getattr(clw, "gender", "woman")
Out[81]: 'woman'

Counter ，统计元素出现次数

In [83]: counter = Counter("fdkjfdkjfkjf")

In [84]: counter
Out[84]: Counter({'f': 4, 'd': 2, 'k': 3, 'j': 3})

In [85]: counter.update("aberhj")
In [86]: counter
Out[86]:
Counter({'f': 4,
         'd': 2,
         'k': 3,
         'j': 4,
         'a': 1,
         'b': 1,
         'e': 1,
         'r': 1,
         'h': 1})

In [87]: counter.most_common(2)  # top n 问题
Out[87]: [('f', 4), ('j', 4)]

ChainMap ，链接dict

m1 = {'Type': 'admin', 'codeID': '00001'}
m2 = {'name': 'woodname','codeID': '00002'}
m = ChainMap(m1, m2, ...)

In [90]: m
Out[90]: ChainMap({'Type': 'admin', 'codeID': '00001'}, {'name': 'woodname', 'codeID': '00002'})

In [91]: m.maps
Out[91]: [{'Type': 'admin', 'codeID': '00001'}, {'name': 'woodname', 'codeID': '00002'}]

for i in m.items():
    print(i)
# 输出：
# ('name', 'woodname')
# ('codeID', '00001')
# ('Type', 'admin')

In [93]: m['name']
Out[93]: 'woodname'
In [94]: m['codeID']  # 当key重复时以最前一个为准
Out[94]: '00001'
In [95]: m.maps[1]["codeID"]
Out[95]: '00002'

# 新增map
m3 = {'data': '888'}
m = m.new_child(m3)

sqlite3

import sqlite3

# 会自动创建数据库
conn = sqlite3.connect("xxx.db")
cursor = conn.cursor()

# 初始化数据表
cursor.execute("create table videos (href varchar(100) primary key)")

# 查询
cursor.execute(f"select * from alphacoders where p_id={p_id}")
result = cursor.fetchall()

# 插入数据
cursor.execute(f'insert into videos (p_id) values ("zi'f'c")')

# 提交事务，不然没记录
cursor.close()
conn.commit()  

conn.close()

logging

标准头部

在init文件夹中初始化

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s",
    stream=sys.stdout,
)
logger = logging.getLogger(__name__)

xlsxwriter（python写入Excel表格）

初始化

book = xlsxwriter.Workbook(
    filename="筛选.xlsx",
    options={
        "strings_to_urls": True,  # 超链接自动转换
        "strings_to_numbers": True,
        "default_format_properties": {
            "font_name": "微软雅黑",  # 字体. 默认值 "Arial"
            "font_size": 12,  # 字号. 默认值 11
            "align": "center",
            "valign": "vcenter",
        },
    },
)

工作簿

sheet = book.add_worksheet(name='')

# 设置每列格式
sheet.set_column(0, 10, width=25, cell_format=header_format)
# 写入单个数据
sheet.write(row=0, col=0, data='cxs')
# 写入多个数据
sheet.write_row(row=0, col=0, data=["cxs", 24])
# 单元格合并
sheet.merge_range(first_row, first_col, last_row, last_col, data)

自定义格式

header_format = book.add_format(
    {
        "bg_color": "#98F5FF", 
        "bold": True
    }
)

文件保存

1	book.close()

string

# 列出所有英文字母
In [3]: string.ascii_letters
Out[3]: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
# 所有数字
In [4]: string.digits
Out[4]: '0123456789'
# 所有标点符号
In [9]: string.punctuation
Out[9]: '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
# 所有空格
In [11]: string.whitespace
Out[11]: ' \t\n\r\x0b\x0c'

ApsScheduler 定时任务

参考：https://www.cnblogs.com/zhaoyingjie/p/9664081.html

from apscheduler.schedulers.twisted import TwistedScheduler

scheduler = TwistedScheduler()

# 间隔
scheduler.add_job(func, trigger="interval", seconds=30)
# 每天定时
scheduler.add_job(func, "cron", args=("xxx",), hour="4", minute="15")
# 半秒触发一次
scheduler.add_job(func, "cron", second="*/2")
# 特定时间点
scheduler.add_job(func, "date", args=("tieba_user",), run_date=next_time)

scheduler.start()

异步任务

from apscheduler.schedulers.asyncio import AsyncIOScheduler

scheduler = AsyncIOScheduler()
scheduler.add_job(dy_login, 'cron', hour='0-8', minute=0)
scheduler.start()
asyncio.get_event_loop().run_forever()