解锁并提取Linux客户端微信数据库 (vibe coded)
1# -*- coding: utf-8 -*-#
2# -------------------------------------------------------------------------------
3# Name: Favorite.py
4# Description: 负责处理wx收藏数据库
5# Author: xaoyaoo
6# Date: 2024/05/18
7# -------------------------------------------------------------------------------
8from collections import defaultdict
9
10from .db_base import DatabaseBase
11from .utils import timestamp2str, xml2dict
12
13
14# * FavItems:收藏的消息条目列表
15# * FavDataItem:收藏的具体数据。大概可以确定以下两点
16# * 即使只是简单收藏一篇公众号文章也会在 FavDataItem 中有一个对应的记录
17# * 对于收藏的合并转发类型的消息,合并转发中的每一条消息在 FavDataItem 中都是一个独立的记录
18# * FavTags:为收藏内容添加的标签
19
20
21class FavoriteHandler(DatabaseBase):
22 _class_name = "Favorite"
23 Favorite_required_tables = ["FavItems", "FavDataItem", "FavTagDatas", "FavBindTagDatas"]
24
25 def get_tags(self, LocalID):
26 """
27 return: {LocalID: TagName}
28 """
29 if not self.tables_exist("FavTagDatas"):
30 return {}
31 if LocalID is None:
32 sql = "select LocalID, TagName from FavTagDatas order by ServerSeq"
33 else:
34 sql = "select LocalID, TagName from FavTagDatas where LocalID = '%s' order by ServerSeq " % LocalID
35 tags = self.execute(sql) # [(1, 797940830, '程序语言类'), (2, 806153863, '账单')]
36 # 转换为字典
37 tags = {tag[0]: tag[1] for tag in tags}
38 return tags
39
40 def get_FavBindTags(self):
41 """
42 return: [(FavLocalID, TagName)]
43 """
44 sql = ("select DISTINCT A.FavLocalID, B.TagName "
45 "from FavBindTagDatas A, FavTagDatas B where A.TagLocalID = B.LocalID")
46 FavBindTags = self.execute(sql)
47 return FavBindTags
48
49 def get_favorite(self):
50 """
51 return: [{FavItemsFields}, {FavItemsFields}]
52 """
53 FavItemsFields = {
54 "FavLocalID": "本地收藏ID",
55 "SvrFavId": "服务器收藏ID",
56 "SourceId": "源ID",
57 "Type": "类型",
58 "SourceType": "源类型",
59 "LocalStatus": "本地状态",
60 "Flag": "标记",
61 "Status": "状态",
62 "FromUser": "源用户",
63 "RealChatName": "实际聊天名称",
64 "SearchKey": "搜索关键字",
65 "UpdateTime": "更新时间",
66 "reseverd0": "预留字段0",
67 "XmlBuf": "XML缓冲区"
68 }
69 FavDataItemFields = {
70 "FavLocalID": "本地收藏ID",
71 "Type": "类型",
72 "DataId": "数据ID",
73 "HtmlId": "HTML ID",
74 "Datasourceid": "数据源ID",
75 "Datastatus": "数据状态",
76 "Datafmt": "数据格式",
77 "Datatitle": "数据标题",
78 "Datadesc": "数据描述",
79 "Thumbfullmd5": "缩略图全MD5",
80 "Thumbhead256md5": "缩略图头256MD5",
81 "Thumbfullsize": "缩略图全尺寸",
82 "fullmd5": "全MD5",
83 "head256md5": "头256MD5",
84 "fullsize": "全尺寸",
85 "cdn_thumburl": "CDN缩略图URL",
86 "cdn_thumbkey": "CDN缩略图KEY",
87 "thumb_width": "缩略图宽度",
88 "thumb_height": "缩略图高度",
89 "cdn_dataurl": "CDN数据URL",
90 "cdn_datakey": "CDN数据KEY",
91 "cdn_encryver": "CDN加密版本",
92 "duration": "时长",
93 "stream_weburl": "流媒体WEB URL",
94 "stream_dataurl": "流媒体数据URL",
95 "stream_lowbandurl": "流媒体低带宽URL",
96 "sourcethumbpath": "源缩略图路径",
97 "sourcedatapath": "源数据路径",
98 "stream_videoid": "流媒体视频ID",
99 "Rerserved1": "保留字段1",
100 "Rerserved2": "保留字段2",
101 "Rerserved3": "保留字段3",
102 "Rerserved4": "保留字段4",
103 "Rerserved5": "保留字段5",
104 "Rerserved6": "保留字段6",
105 "Rerserved7": "保留字段7"
106 }
107
108 if not self.tables_exist(["FavItems", "FavDataItem"]):
109 return False
110
111 sql1 = "select " + ",".join(FavItemsFields.keys()) + " from FavItems order by UpdateTime desc"
112 sql2 = "select " + ",".join(FavDataItemFields.keys()) + " from FavDataItem B order by B.RecId asc"
113
114 FavItemsList = self.execute(sql1)
115 FavDataItemList = self.execute(sql2)
116 if FavItemsList is None or len(FavItemsList) == 0:
117 return False
118
119 FavDataDict = {}
120 if FavDataItemList and len(FavDataItemList) >= 0:
121 for item in FavDataItemList:
122 data_dict = {}
123 for i, key in enumerate(FavDataItemFields.keys()):
124 data_dict[key] = item[i]
125 FavDataDict[item[0]] = FavDataDict.get(item[0], []) + [data_dict]
126 # 获取标签
127 FavTags = self.get_FavBindTags()
128 FavTagsDict = {}
129 for FavLocalID, TagName in FavTags:
130 FavTagsDict[FavLocalID] = FavTagsDict.get(FavLocalID, []) + [TagName]
131
132 rdata = []
133 for item in FavItemsList:
134 processed_item = {
135 key: item[i] for i, key in enumerate(FavItemsFields.keys())
136 }
137 processed_item['UpdateTime'] = timestamp2str(processed_item['UpdateTime'])
138 processed_item['XmlBuf'] = xml2dict(processed_item['XmlBuf'])
139 processed_item['TypeName'] = Favorite_type_converter(processed_item['Type'])
140 processed_item['FavData'] = FavDataDict.get(processed_item['FavLocalID'], [])
141 processed_item['Tags'] = FavTagsDict.get(processed_item['FavLocalID'], [])
142 rdata.append(processed_item)
143 try:
144 import pandas as pd
145 except ImportError:
146 return False
147 pf = pd.DataFrame(FavItemsList)
148 pf.columns = FavItemsFields.keys() # set column names
149 pf["UpdateTime"] = pf["UpdateTime"].apply(timestamp2str) # 处理时间
150 pf["XmlBuf"] = pf["XmlBuf"].apply(xml2dict) # 处理xml
151 pf["TypeName"] = pf["Type"].apply(Favorite_type_converter) # 添加类型名称列
152 pf["FavData"] = pf["FavLocalID"].apply(lambda x: FavDataDict.get(x, [])) # 添加数据列
153 pf["Tags"] = pf["FavLocalID"].apply(lambda x: FavTagsDict.get(x, [])) # 添加标签列
154 pf = pf.fillna("") # 去掉Nan
155 rdata = pf.to_dict(orient="records")
156 return rdata
157
158
159def Favorite_type_converter(type_id_or_name: [str, int]):
160 """
161 收藏类型ID与名称转换
162 名称(str)=>ID(int)
163 ID(int)=>名称(str)
164 :param type_id_or_name: 消息类型ID或名称
165 :return: 消息类型名称或ID
166 """
167 type_name_dict = defaultdict(lambda: "未知", {
168 1: "文本", # 文本 已测试
169 2: "图片", # 图片 已测试
170 3: "语音", # 语音
171 4: "视频", # 视频 已测试
172 5: "链接", # 链接 已测试
173 6: "位置", # 位置
174 7: "小程序", # 小程序
175 8: "文件", # 文件 已测试
176 14: "聊天记录", # 聊天记录 已测试
177 16: "群聊视频", # 群聊中的视频 可能
178 18: "笔记" # 笔记 已测试
179 })
180
181 if isinstance(type_id_or_name, int):
182 return type_name_dict[type_id_or_name]
183 elif isinstance(type_id_or_name, str):
184 return next((k for k, v in type_name_dict.items() if v == type_id_or_name), (0, 0))
185 else:
186 raise ValueError("Invalid input type")