1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
| ''' 创建连接 ''' mongo_con = pymongo.MongoClient() cxs_col = mongo_con.get_database("spider").get_collection("baikecitiao_cxs")
>>> from pymongo import IndexModel, ASCENDING, DESCENDING >>> index1 = IndexModel([("hello", DESCENDING), ... ("world", ASCENDING)], name="hello_world") >>> index2 = IndexModel([("goodbye", DESCENDING)], unique=True) >>> db.test.create_indexes([index1, index2]) >>> db.test.create_index([("world", ASCENDING)])
index_info = self.collection_order.index_information() if "tradeId_-1" in index_info: pass
''' 查询条件 ''' col.find({}, {"html": 0}) col.find({}, {"html": 1}) {"_id": {"$gt": start_id}} {"user_id": {"$in": [a, b, c]}} {appendReview: {$ne: []}} {"$or": [{"category": "30011"}, {"user": "cxs"}, {"age": "25"}]} {_field: {"$regex": "2022-08-29"}} {"data.product_item": {$size: 2}} {"data.product_item.1": {$exists: true}}
''' 相关操作 ''' for doc in col.find({}).limit(100).skip(100).sort('age', pymongo.ASCENDING): pass
doc = col.find_one({"_id": doc["_id"]})
col.update_one( filter={"_id": doc["_id"]}, update={"$set": {"html": html}}, upsert=True, )
result = collection.delete_one(filter=_query) result.deleted_count
''' 批量拉取数据 ''' fetch_all = collection.find({}) for doc in fetch_all.batch_size(10): pass
_count = collection.count_documents(filter={})
''' bulk_write -> Send a batch of write operations to the server 减少网络IO ''' >>> from pymongo import InsertOne, DeleteOne, ReplaceOne >>> requests = [InsertOne({'y': 1}), DeleteOne({'x': 1}), ReplaceOne({'w': 1}, {'z': 1}, upsert=True)] >>> result = db.test.bulk_write(requests)
''' aggregate 管道查询 ''' $match 匹配 $exists 字段是否存在 $sample 随机取样
pipeline = [ {"$match": {"name": {"$regex": "cxs"}, "uploaded": {"$exists": False}}}, {"$sample": {"size": size}}, ] for doc in col.aggregate(pipeline) doc col.aggregate(pipeline).next()
|