psutil 进程

Python常用库之psutil使用指南 - 知乎

1
2
3
4
5
6
7
8
9
10
import os, psuti

for proc in psutil.process_iter():
proc.name
proc.pid

# 终止进程
os.system("taskkill /f /im chrome.exe") # windows
os.popen('taskkill.exe /pid:8888') # pid
os.system(f"kill -9 {proc.pid}") # linux

pandas

读取csv文件

1
2
3
4
5
6
7
import pandas as pd

df = pd.read_csv("安卓.csv", encoding="latin-1")
# 行数
max_lines = df.shape[0]
# 第一行数据
first_line_data = df.loc[0].values

datetime

当前时间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
In [8]: from datetime import datetime

In [9]: n = datetime.now()
In [10]: n
Out[10]: datetime.datetime(2022, 6, 7, 9, 10, 9, 977949)
In [11]: n.day
Out[11]: 7
In [12]: n.hour
Out[12]: 9
In [13]: n.timestamp()
Out[13]: 1654564209.977949

# 星期二
In [4]: _now = datetime.now()
In [5]: _now.weekday()
Out[5]: 1

昨天日期

1
2
3
4
5
6
# 时间戳
_now = datetime.now()
yesterday_ts = datetime(year=_now.year, month=_now.month, day=_now.day - 1).timestamp()

# 格式化
yes_day = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

时间差

1
2
3
4
In [15]: n
Out[15]: datetime.datetime(2022, 6, 7, 9, 10, 9, 977949)
In [17]: n - timedelta(days=5, hours=8, minutes=17)
Out[17]: datetime.datetime(2022, 6, 2, 0, 53, 9, 977949)

解析字符串

1
2
In [4]: datetime.strptime('2022-07-15', '%Y-%m-%d')
Out[4]: datetime.datetime(2022, 7, 15, 0, 0)

解析13位时间戳

1
2
3
4
5
6
In [4]: ts = 1653187235001

In [5]: from datetime import datetime

In [6]: datetime.fromtimestamp(ts / 1000).strftime('%Y-%m-%d %H:%M:%S')
Out[6]: '2022-05-22 10:40:35'

获取昨天日期

1
(datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')

获取0点时间戳

1
2
3
begin_ts = (
int(datetime.strptime("2023-09-12", "%Y-%m-%d").timestamp()) - 86400 * 180
)

格式化输出

1
2
3
4
5
In [26]: n.strftime("%Y年%m月%d日 %H时%M分%S秒 ".encode("unicode_escape").decode()).encode().decode("unicode_escape")
Out[26]: '2022年06月07日 09时10分09秒

In [28]: n.strftime("%Y-%m-%d %H:%M")
Out[28]: '2022-06-07 09:10'

opencc

简繁体转换

1
2
3
from opencc import OpenCC
t2s_con = OpenCC("t2s") # traditional -> simplied
s = t2s_con.convert(t)

xmltodict

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import xmltodict

def read_xml():
"""
流读取 xml
"""
xml_file = "C:\\Users\\duoyi\\Desktop\\维基.xml"
xmltodict.parse(
xml_input=open(xml_file, "rb"), item_depth=2, item_callback=handle_item
)

# gz_file = "D:\\迅雷下载\\zhwiki-latest-pages-articles.xml.bz2"
# xmltodict.parse(BZ2File(gz_file), item_depth=2, item_callback=handle_item)

def handle_item(_, item):
title = item.get("title")

csv

写入

1
2
3
4
5
6
7
8
9
10
11
12
import csv

with open('xxx.csv','w', newline="", encoding="utf-8") as f:
writer = csv.writer(f)
first_row = [header1, header2, header3]
writer.writerow(first_row)
writer.writerows(row_list -> [[...], [...]])

# 避免显示科学计算,精度丢失
writer.writerow([tradeId + "\t"])
# 解决中文乱码问题
encoding = "utf-8-sig"

逐行读取

1
2
3
4
5
6
7
8
9
import csv

with open("京东聊天.csv", "r") as fp:
reader = csv.reader(fp)
try:
row = next(reader)
print(row)
except UnicodeDecodeError:
continue

检测文件编码

1
2
3
4
import chardet

with open("京东聊天.csv", "r") as fp:
result = chardet.detect(fp.read())["encoding"]

opencv

图片展示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import cv2


def img_show(path):
"""
展示图片,按任意键退出
path 不能有中文
"""
img = cv2.imread(path)
window_name = "Image"
cv2.namedWindow(window_name, 0)
cv2.resizeWindow(window_name, width=img.shape[1], height=img.shape[0])
cv2.imshow(window_name, img)
cv2.waitKey(0) # delay milliseconds
cv2.destroyAllWindows()


img_show("C:\\Users\\Desktop\\my_blog_files\\home_index.png")

# 图片存储
cv2.imwrite('nologo.png', img)

PIL

图像处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from PIL import Image

# 打开
img = Image.open(path)
# 新建
img = Image.new(mode: "RGB", (width, height))

# 识别图片类型,GIF | PNG | JPEG
if img.format == "PNG":
pass

# 获取坐标值rgb值
img.getpixel((0, 0))
# 设置坐标像素值
img.putpixel((0, 0), (r, g, b))

img.convert('1') # 二值化
img.convert('L') # 灰度图像
img.convert('P') # 8位彩色图像
img.convert('RGBA') # 32位彩色图像
img.convert('CMKY') # 印刷四分色模式
img.convert('F') # 32位浮点灰色图像

生成PDF

1
2
3
4
5
6
7
8
image_list = []
for content in content_lst:
img = Image.open(BytesIO(content))
image_list.append(img.convert("RGB"))

pdf_path = "C:\\Users\\Desktop\\xxx.pdf"
img1 = image_list.pop(0)
img1.save(pdf_path, save_all=True, append_images=image_list)

图片缩放

1
2
3
image = Image.open()
image_w, image_h = image.size
image.thumbnail((image_w / 2, image_h / 2))

按比例裁剪

1
2
3
4
5
6
7
8
image = Image.open("small.png")
image_w, image_h = image.size
left = image_w * 175 / 370
top = 0
right = image_w
buttom = image_h
captcha = image.crop((left, top, right, buttom))
captcha.save("small.png")

截屏

1
2
img = ImageGrab.grab(bbox=(0, 212, 1300, 444))
img.save("cxs.png")

collections

  • nametuple,tuple子类,可命名数组

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    In [43]: User = namedtuple('User', ['name','age','height'])

    In [44]: user = User(name='cxs', age=25, height=165)
    In [45]: user._asdict()
    Out[45]: OrderedDict([('name', 'cxs'), ('age', 25), ('height', 165)])

    In [47]: clw = User._make(["clw", 29, 156])
    In [48]: clw._asdict()
    Out[48]: OrderedDict([('name', 'clw'), ('age', 29), ('height', 156)])

    In [49]: user.name
    Out[49]: 'cxs'
  • defaultdict

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    In [56]: d = defaultdict(int)
    In [57]: d['cxs']
    Out[57]: 0

    In [58]: d = defaultdict(str)
    In [59]: d['cxs']
    Out[59]: ''

    In [60]: d = defaultdict(list)
    In [61]: d['cxs']
    Out[61]: []

    In [62]: d = defaultdict(dict)
    In [63]: d['cxs']
    Out[63]: {}

    # 只在第一层赋值
    In [72]: d["cxs"]["abc"]
    KeyError: 'abc'

    # 或者自定义function
    In [66]: d = defaultdict(lambda : "cxs")
    In [67]: d['cxs']
    Out[67]: 'cxs'

    字典也有类似功能

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    In [73]: cxs = {"name": "cxs"}

    In [74]: cxs.setdefault("name", 123) # 如果key存在,则不改变
    Out[74]: 'cxs'
    In [75]: cxs
    Out[75]: {'name': 'cxs'}

    In [76]: cxs.setdefault("age", 123) # 如果key不存在,则改变
    Out[76]: 123
    In [77]: cxs
    Out[77]: {'name': 'cxs', 'age': 123}

    In [78]: cxs.get("gender", "man") # key不存在,返回默认值
    Out[78]: 'man'

    区别于 getattr,只能用于获取属性值

    1
    2
    3
    4
    5
    In [79]: clw
    Out[79]: User(name='clw', age=29, height=156)

    In [81]: getattr(clw, "gender", "woman")
    Out[81]: 'woman'
  • Counter ,统计元素出现次数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    In [83]: counter = Counter("fdkjfdkjfkjf")

    In [84]: counter
    Out[84]: Counter({'f': 4, 'd': 2, 'k': 3, 'j': 3})

    In [85]: counter.update("aberhj")
    In [86]: counter
    Out[86]:
    Counter({'f': 4,
    'd': 2,
    'k': 3,
    'j': 4,
    'a': 1,
    'b': 1,
    'e': 1,
    'r': 1,
    'h': 1})

    In [87]: counter.most_common(2) # top n 问题
    Out[87]: [('f', 4), ('j', 4)]
  • ChainMap ,链接dict

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    m1 = {'Type': 'admin', 'codeID': '00001'}
    m2 = {'name': 'woodname','codeID': '00002'}
    m = ChainMap(m1, m2, ...)

    In [90]: m
    Out[90]: ChainMap({'Type': 'admin', 'codeID': '00001'}, {'name': 'woodname', 'codeID': '00002'})

    In [91]: m.maps
    Out[91]: [{'Type': 'admin', 'codeID': '00001'}, {'name': 'woodname', 'codeID': '00002'}]

    for i in m.items():
    print(i)
    # 输出:
    # ('name', 'woodname')
    # ('codeID', '00001')
    # ('Type', 'admin')

    In [93]: m['name']
    Out[93]: 'woodname'
    In [94]: m['codeID'] # 当key重复时以最前一个为准
    Out[94]: '00001'
    In [95]: m.maps[1]["codeID"]
    Out[95]: '00002'

    # 新增map
    m3 = {'data': '888'}
    m = m.new_child(m3)

sqlite3

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import sqlite3

# 会自动创建数据库
conn = sqlite3.connect("xxx.db")
cursor = conn.cursor()

# 初始化数据表
cursor.execute("create table videos (href varchar(100) primary key)")

# 查询
cursor.execute(f"select * from alphacoders where p_id={p_id}")
result = cursor.fetchall()

# 插入数据
cursor.execute(f'insert into videos (p_id) values ("zi'f'c")')

# 提交事务,不然没记录
cursor.close()
conn.commit()

conn.close()

logging

标准头部

在init文件夹中初始化

1
2
3
4
5
6
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s",
stream=sys.stdout,
)
logger = logging.getLogger(__name__)

xlsxwriter(python写入Excel表格)

初始化

1
2
3
4
5
6
7
8
9
10
11
12
13
book = xlsxwriter.Workbook(
filename="筛选.xlsx",
options={
"strings_to_urls": True, # 超链接自动转换
"strings_to_numbers": True,
"default_format_properties": {
"font_name": "微软雅黑", # 字体. 默认值 "Arial"
"font_size": 12, # 字号. 默认值 11
"align": "center",
"valign": "vcenter",
},
},
)

工作簿

1
2
3
4
5
6
7
8
9
10
sheet = book.add_worksheet(name='')

# 设置每列格式
sheet.set_column(0, 10, width=25, cell_format=header_format)
# 写入单个数据
sheet.write(row=0, col=0, data='cxs')
# 写入多个数据
sheet.write_row(row=0, col=0, data=["cxs", 24])
# 单元格合并
sheet.merge_range(first_row, first_col, last_row, last_col, data)

自定义格式

1
2
3
4
5
6
header_format = book.add_format(
{
"bg_color": "#98F5FF",
"bold": True
}
)

文件保存

1
book.close()

string

1
2
3
4
5
6
7
8
9
10
11
12
# 列出所有英文字母
In [3]: string.ascii_letters
Out[3]: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
# 所有数字
In [4]: string.digits
Out[4]: '0123456789'
# 所有标点符号
In [9]: string.punctuation
Out[9]: '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
# 所有空格
In [11]: string.whitespace
Out[11]: ' \t\n\r\x0b\x0c'

ApsScheduler 定时任务

参考:https://www.cnblogs.com/zhaoyingjie/p/9664081.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from apscheduler.schedulers.twisted import TwistedScheduler

scheduler = TwistedScheduler()

# 间隔
scheduler.add_job(func, trigger="interval", seconds=30)
# 每天定时
scheduler.add_job(func, "cron", args=("xxx",), hour="4", minute="15")
# 半秒触发一次
scheduler.add_job(func, "cron", second="*/2")
# 特定时间点
scheduler.add_job(func, "date", args=("tieba_user",), run_date=next_time)

scheduler.start()

异步任务

1
2
3
4
5
6
from apscheduler.schedulers.asyncio import AsyncIOScheduler

scheduler = AsyncIOScheduler()
scheduler.add_job(dy_login, 'cron', hour='0-8', minute=0)
scheduler.start()
asyncio.get_event_loop().run_forever()