Skip to content

Commit 6aa5b32

Browse files
committed
fix(chat): 修复群聊置顶系统消息显示名称与导出文案
- 解析 ChatRoomTopMsg 文本/XML 系统消息,识别置顶与取消置顶操作 - 优先使用备注名/联系人名替换 wxid,避免实时消息、历史消息、搜索结果显示原始账号 - 导出 JSON/TXT/HTML 时复用同一套系统消息名称解析逻辑 - 补充系统消息解析与实时消息展示测试,覆盖多种消息载荷格式
1 parent 3cd5fed commit 6aa5b32

5 files changed

Lines changed: 470 additions & 54 deletions

File tree

src/wechat_decrypt_tool/chat_export_service.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from dataclasses import dataclass, field
2020
from datetime import datetime
2121
from pathlib import Path
22-
from typing import Any, Iterable, Literal, Optional
22+
from typing import Any, Callable, Iterable, Literal, Optional
2323
from urllib.parse import urljoin, urlparse
2424

2525
import requests
@@ -3386,6 +3386,7 @@ def _parse_message_for_export(
33863386
resource_conn: Optional[sqlite3.Connection],
33873387
resource_chat_id: Optional[int],
33883388
sender_alias: str = "",
3389+
resolve_display_name: Optional[Callable[[str], str]] = None,
33893390
) -> dict[str, Any]:
33903391
raw_text = row.raw_text or ""
33913392
sender_username = str(row.sender_username or "").strip()
@@ -3449,7 +3450,18 @@ def _parse_message_for_export(
34493450

34503451
if local_type == 10000:
34513452
render_type = "system"
3452-
content_text = _parse_system_message_content(raw_text)
3453+
system_display_name_resolver = None
3454+
if resolve_display_name is not None:
3455+
def system_display_name_resolver(username: str, fallback_display_name: str) -> str:
3456+
resolved = str(resolve_display_name(username) or "").strip()
3457+
if resolved and resolved != username:
3458+
return resolved
3459+
fallback = str(fallback_display_name or "").strip()
3460+
return fallback or resolved or username
3461+
content_text = _parse_system_message_content(
3462+
raw_text,
3463+
resolve_display_name=system_display_name_resolver,
3464+
)
34533465
elif local_type == 49:
34543466
parsed = _parse_app_message(raw_text)
34553467
render_type = str(parsed.get("renderType") or "text")
@@ -3923,6 +3935,7 @@ def lookup_alias(username: str) -> str:
39233935
resource_conn=resource_conn,
39243936
resource_chat_id=resource_chat_id,
39253937
sender_alias=sender_alias,
3938+
resolve_display_name=resolve_display_name,
39263939
)
39273940
if not _is_render_type_selected(msg.get("renderType"), want_types):
39283941
continue
@@ -4101,6 +4114,7 @@ def lookup_alias(username: str) -> str:
41014114
resource_conn=resource_conn,
41024115
resource_chat_id=resource_chat_id,
41034116
sender_alias=sender_alias,
4117+
resolve_display_name=resolve_display_name,
41044118
)
41054119
if not _is_render_type_selected(msg.get("renderType"), want_types):
41064120
continue
@@ -4859,6 +4873,7 @@ def _mark_exported() -> None:
48594873
resource_conn=resource_conn,
48604874
resource_chat_id=resource_chat_id,
48614875
sender_alias="",
4876+
resolve_display_name=resolve_display_name,
48624877
)
48634878
if not _is_render_type_selected(msg.get("renderType"), want_types):
48644879
continue

src/wechat_decrypt_tool/chat_helpers.py

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from collections import Counter
88
from datetime import datetime
99
from pathlib import Path
10-
from typing import Any, Optional
10+
from typing import Any, Callable, Optional
1111
from urllib.parse import parse_qs, quote, urlparse
1212

1313
from fastapi import HTTPException
@@ -787,7 +787,112 @@ def _to_float(value: Any) -> Optional[float]:
787787
}
788788

789789

790-
def _parse_system_message_content(raw_text: str) -> str:
790+
def _extract_chatroom_top_message_metadata(raw_text: str) -> dict[str, str]:
791+
text = str(raw_text or "").strip()
792+
if not text:
793+
return {}
794+
795+
lower_text = text.lower()
796+
if "<mmchatroomtopmsg" in lower_text or "<sysmsg" in lower_text:
797+
chatroom_id = str(_extract_xml_tag_text(text, "chatroomname") or "").strip()
798+
operation = str(_extract_xml_tag_text(text, "op") or "").strip()
799+
operator_username = str(_extract_xml_tag_text(text, "username") or "").strip()
800+
operator_display_name = str(_extract_xml_tag_text(text, "nickname") or "").strip()
801+
if chatroom_id.endswith("@chatroom") and operation in {"1", "2"} and operator_username:
802+
return {
803+
"operation": operation,
804+
"operatorUsername": operator_username,
805+
"operatorDisplayName": operator_display_name,
806+
}
807+
808+
def _is_int_token(value: str) -> bool:
809+
candidate = str(value or "").strip()
810+
if not candidate:
811+
return False
812+
if candidate[0] in {"+", "-"}:
813+
candidate = candidate[1:]
814+
return candidate.isdigit()
815+
816+
normalized = re.sub(r"<!--\s*ChatRoomTopMsgRequest\s*-->", " ", text, flags=re.IGNORECASE)
817+
normalized = re.sub(r"<!--\s*ChatRoomTopMsgResponse\s*-->", " ", normalized, flags=re.IGNORECASE)
818+
normalized = re.sub(r"\s+", " ", normalized).strip()
819+
if not normalized:
820+
return {}
821+
822+
parts = normalized.split(" ")
823+
has_markers = ("chatroomtopmsgrequest" in lower_text) or ("chatroomtopmsgresponse" in lower_text)
824+
if len(parts) < 5:
825+
return {}
826+
827+
chatroom_id = str(parts[0] or "").strip()
828+
operation = str(parts[1] or "").strip()
829+
if not chatroom_id.endswith("@chatroom"):
830+
return {}
831+
if operation not in {"1", "2"}:
832+
return {}
833+
834+
if not has_markers:
835+
if len(parts) < 6:
836+
return {}
837+
if not _is_int_token(parts[2]) or not _is_int_token(parts[3]) or not _is_int_token(parts[5]):
838+
return {}
839+
840+
operator_username = str(parts[4] or "").strip()
841+
if not operator_username:
842+
return {}
843+
844+
operator_display_name = ""
845+
if len(parts) >= 6 and _is_int_token(parts[5]):
846+
response_tokens = parts[6:]
847+
if len(response_tokens) >= 2 and _is_int_token(response_tokens[-1]):
848+
response_tokens = response_tokens[:-1]
849+
operator_display_name = " ".join(response_tokens).strip()
850+
851+
return {
852+
"operation": operation,
853+
"operatorUsername": operator_username,
854+
"operatorDisplayName": operator_display_name,
855+
}
856+
857+
858+
def _parse_chatroom_top_message(
859+
raw_text: str,
860+
resolve_display_name: Optional[Callable[[str, str], str]] = None,
861+
) -> str:
862+
meta = _extract_chatroom_top_message_metadata(raw_text)
863+
if not meta:
864+
return ""
865+
866+
operation = str(meta.get("operation") or "").strip()
867+
operator_username = str(meta.get("operatorUsername") or "").strip()
868+
operator_display_name = str(meta.get("operatorDisplayName") or "").strip()
869+
870+
if resolve_display_name is not None and operator_username:
871+
try:
872+
resolved = str(resolve_display_name(operator_username, operator_display_name) or "").strip()
873+
except Exception:
874+
resolved = ""
875+
if resolved:
876+
operator_display_name = resolved
877+
878+
if not operator_display_name:
879+
operator_display_name = operator_username or "有人"
880+
881+
action_map = {
882+
"1": "置顶了一条消息",
883+
"2": "移除了一条置顶消息",
884+
}
885+
action = action_map.get(operation)
886+
if not action:
887+
return ""
888+
889+
return f"{operator_display_name}{action}"
890+
891+
892+
def _parse_system_message_content(
893+
raw_text: str,
894+
resolve_display_name: Optional[Callable[[str, str], str]] = None,
895+
) -> str:
791896
text = str(raw_text or "").strip()
792897
if not text:
793898
return "[系统消息]"
@@ -801,12 +906,17 @@ def _clean_system_text(value: str) -> str:
801906
if nested_content:
802907
candidate = nested_content
803908

909+
candidate = re.sub(r"<!--.*?-->", " ", candidate, flags=re.IGNORECASE | re.DOTALL)
804910
candidate = re.sub(r"<!\[CDATA\[", "", candidate, flags=re.IGNORECASE)
805911
candidate = re.sub(r"\]\]>", "", candidate)
806912
candidate = re.sub(r"</?[_a-zA-Z0-9]+[^>]*>", "", candidate)
807913
candidate = re.sub(r"\s+", " ", candidate).strip()
808914
return candidate
809915

916+
top_message_text = _parse_chatroom_top_message(text, resolve_display_name=resolve_display_name)
917+
if top_message_text:
918+
return top_message_text
919+
810920
if "revokemsg" in text.lower():
811921
replace_msg = _extract_xml_tag_text(text, "replacemsg")
812922
cleaned_replace_msg = _clean_system_text(replace_msg)
@@ -2334,4 +2444,5 @@ def _row_to_search_hit(
23342444
"locationLng": location_lng,
23352445
"locationPoiname": location_poiname,
23362446
"locationLabel": location_label,
2447+
"_rawText": raw_text if local_type in (10000, 266287972401) else "",
23372448
}

0 commit comments

Comments
 (0)