Skip to content

Commit 8ee949f

Browse files
committed
improvement(voice): 语音优先转 MP3,并完善导出/接口兜底
- 新增 _convert_silk_to_browser_audio:优先 MP3(有 ffmpeg),否则 WAV,最后回退 SILK - /chat/voice 返回浏览器可播格式,并补充 Content-Disposition 文件名后缀 - HTML 导出按实际格式写入语音资源;音频缺失时仍保留语音气泡结构 - 测试覆盖 MP3 优先、缺失音频兜底,并在用例结束 logging.shutdown()
1 parent f783919 commit 8ee949f

5 files changed

Lines changed: 330 additions & 50 deletions

File tree

src/wechat_decrypt_tool/chat_export_service.py

Lines changed: 98 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
)
5454
from .logging_config import get_logger
5555
from .media_helpers import (
56-
_convert_silk_to_wav,
56+
_convert_silk_to_browser_audio,
5757
_detect_image_media_type,
5858
_fallback_search_media_by_file_id,
5959
_read_and_maybe_decrypt_media,
@@ -121,9 +121,10 @@ def _resolve_ui_public_dir() -> Optional[Path]:
121121
if ui_dir_env:
122122
candidates.append(Path(ui_dir_env))
123123

124-
# Repo default: `frontend/.output/public` after `npm --prefix frontend run generate`.
124+
# Repo defaults: generated Nuxt output or checked-in desktop UI assets.
125125
repo_root = Path(__file__).resolve().parents[2]
126126
candidates.append(repo_root / "frontend" / ".output" / "public")
127+
candidates.append(repo_root / "desktop" / "resources" / "ui")
127128

128129
for p in candidates:
129130
try:
@@ -622,6 +623,68 @@ def _download_remote_image_to_zip(
622623
.wce-audio-actions a { font-size: 0.75rem; color: #07c160; text-decoration: none; }
623624
.wce-audio-actions a:hover { text-decoration: underline; }
624625
626+
/* Voice message fallback styles (keep close to `frontend/pages/chat/[[username]].vue`). */
627+
.wechat-voice-wrapper { display: flex; width: 100%; position: relative; }
628+
.wechat-voice-bubble {
629+
border-radius: var(--message-radius);
630+
position: relative;
631+
transition: opacity 0.15s ease;
632+
min-width: 80px;
633+
max-width: 200px;
634+
cursor: pointer;
635+
}
636+
.wechat-voice-bubble:hover { opacity: 0.85; }
637+
.wechat-voice-bubble:active { opacity: 0.7; }
638+
.wechat-voice-sent { background: #95EC69; }
639+
.wechat-voice-sent::after {
640+
content: '';
641+
position: absolute;
642+
top: 50%;
643+
right: -4px;
644+
transform: translateY(-50%) rotate(45deg);
645+
width: 10px;
646+
height: 10px;
647+
background: #95EC69;
648+
border-radius: 2px;
649+
}
650+
.wechat-voice-received { background: #fff; }
651+
.wechat-voice-received::before {
652+
content: '';
653+
position: absolute;
654+
top: 50%;
655+
left: -4px;
656+
transform: translateY(-50%) rotate(45deg);
657+
width: 10px;
658+
height: 10px;
659+
background: #fff;
660+
border-radius: 2px;
661+
}
662+
.wechat-voice-content { display: flex; align-items: center; padding: 8px 12px; gap: 8px; }
663+
.wechat-voice-icon { width: 18px; height: 18px; flex-shrink: 0; color: #1a1a1a; }
664+
.wechat-quote-voice-icon { width: 14px; height: 14px; color: inherit; }
665+
.voice-icon-sent { transform: scaleX(-1); }
666+
.wechat-voice-icon.voice-playing .voice-wave-2 { animation: voice-wave-2 1s infinite; }
667+
.wechat-voice-icon.voice-playing .voice-wave-3 { animation: voice-wave-3 1s infinite; }
668+
@keyframes voice-wave-2 {
669+
0%, 33% { opacity: 0; }
670+
34%, 100% { opacity: 1; }
671+
}
672+
@keyframes voice-wave-3 {
673+
0%, 66% { opacity: 0; }
674+
67%, 100% { opacity: 1; }
675+
}
676+
.wechat-voice-duration { font-size: 14px; color: #1a1a1a; }
677+
.wechat-voice-unread {
678+
position: absolute;
679+
top: 50%;
680+
right: -20px;
681+
transform: translateY(-50%);
682+
width: 8px;
683+
height: 8px;
684+
border-radius: 50%;
685+
background: #e75e58;
686+
}
687+
625688
/* Index page helpers. */
626689
.wce-index { min-height: 100vh; background: #EDEDED; }
627690
.wce-index-container { max-width: 880px; margin: 0 auto; padding: 24px; }
@@ -4958,40 +5021,38 @@ def _mark_exported() -> None:
49585021
tw.write(f' <div class="{esc_attr(bubble_base_cls + " " + bubble_dir_cls)}">{render_text_with_emojis(msg.get("content") or "")}</div>\n')
49595022
elif rt == "voice":
49605023
voice = offline_path(msg, "voice")
5024+
duration_ms = msg.get("voiceLength")
5025+
width = get_voice_width(duration_ms)
5026+
seconds = get_voice_duration_in_seconds(duration_ms)
5027+
voice_dir_cls = "wechat-voice-sent" if is_sent else "wechat-voice-received"
5028+
content_dir_cls = " flex-row-reverse" if is_sent else ""
5029+
icon_dir_cls = "voice-icon-sent" if is_sent else "voice-icon-received"
5030+
voice_id = str(msg.get("id") or "").strip()
5031+
5032+
tw.write(' <div class="wechat-voice-wrapper">\n')
5033+
tw.write(
5034+
f' <div class="wechat-voice-bubble msg-radius {esc_attr(voice_dir_cls)}" style="width: {esc_attr(width)}" data-voice-id="{esc_attr(voice_id)}">\n'
5035+
)
5036+
tw.write(f' <div class="wechat-voice-content{esc_attr(content_dir_cls)}">\n')
5037+
tw.write(
5038+
f' <svg class="wechat-voice-icon {esc_attr(icon_dir_cls)}" viewBox="0 0 32 32" fill="currentColor">\n'
5039+
)
5040+
tw.write(
5041+
' <path d="M10.24 11.616l-4.224 4.192 4.224 4.192c1.088-1.056 1.76-2.56 1.76-4.192s-0.672-3.136-1.76-4.192z"></path>\n'
5042+
)
5043+
tw.write(
5044+
' <path class="voice-wave-2" d="M15.199 6.721l-1.791 1.76c1.856 1.888 3.008 4.48 3.008 7.328s-1.152 5.44-3.008 7.328l1.791 1.76c2.336-2.304 3.809-5.536 3.809-9.088s-1.473-6.784-3.809-9.088z"></path>\n'
5045+
)
5046+
tw.write(
5047+
' <path class="voice-wave-3" d="M20.129 1.793l-1.762 1.76c3.104 3.168 5.025 7.488 5.025 12.256s-1.921 9.088-5.025 12.256l1.762 1.76c3.648-3.616 5.887-8.544 5.887-14.016s-2.239-10.432-5.887-14.016z"></path>\n'
5048+
)
5049+
tw.write(" </svg>\n")
5050+
tw.write(f' <span class="wechat-voice-duration">{esc_text(seconds)}"</span>\n')
5051+
tw.write(" </div>\n")
5052+
tw.write(" </div>\n")
49615053
if voice:
4962-
duration_ms = msg.get("voiceLength")
4963-
width = get_voice_width(duration_ms)
4964-
seconds = get_voice_duration_in_seconds(duration_ms)
4965-
voice_dir_cls = "wechat-voice-sent" if is_sent else "wechat-voice-received"
4966-
content_dir_cls = " flex-row-reverse" if is_sent else ""
4967-
icon_dir_cls = "voice-icon-sent" if is_sent else "voice-icon-received"
4968-
voice_id = str(msg.get("id") or "").strip()
4969-
4970-
tw.write(' <div class="wechat-voice-wrapper">\n')
4971-
tw.write(
4972-
f' <div class="wechat-voice-bubble msg-radius {esc_attr(voice_dir_cls)}" style="width: {esc_attr(width)}" data-voice-id="{esc_attr(voice_id)}">\n'
4973-
)
4974-
tw.write(f' <div class="wechat-voice-content{esc_attr(content_dir_cls)}">\n')
4975-
tw.write(
4976-
f' <svg class="wechat-voice-icon {esc_attr(icon_dir_cls)}" viewBox="0 0 32 32" fill="currentColor">\n'
4977-
)
4978-
tw.write(
4979-
' <path d="M10.24 11.616l-4.224 4.192 4.224 4.192c1.088-1.056 1.76-2.56 1.76-4.192s-0.672-3.136-1.76-4.192z"></path>\n'
4980-
)
4981-
tw.write(
4982-
' <path class="voice-wave-2" d="M15.199 6.721l-1.791 1.76c1.856 1.888 3.008 4.48 3.008 7.328s-1.152 5.44-3.008 7.328l1.791 1.76c2.336-2.304 3.809-5.536 3.809-9.088s-1.473-6.784-3.809-9.088z"></path>\n'
4983-
)
4984-
tw.write(
4985-
' <path class="voice-wave-3" d="M20.129 1.793l-1.762 1.76c3.104 3.168 5.025 7.488 5.025 12.256s-1.921 9.088-5.025 12.256l1.762 1.76c3.648-3.616 5.887-8.544 5.887-14.016s-2.239-10.432-5.887-14.016z"></path>\n'
4986-
)
4987-
tw.write(" </svg>\n")
4988-
tw.write(f' <span class="wechat-voice-duration">{esc_text(seconds)}"</span>\n')
4989-
tw.write(" </div>\n")
4990-
tw.write(" </div>\n")
49915054
tw.write(f' <audio src="{esc_attr(voice)}" preload="none" class="hidden"></audio>\n')
4992-
tw.write(" </div>\n")
4993-
else:
4994-
tw.write(f' <div class="{esc_attr(bubble_base_cls + " " + bubble_dir_cls)}">{render_text_with_emojis(msg.get("content") or "")}</div>\n')
5055+
tw.write(" </div>\n")
49955056
elif rt == "file":
49965057
fsrc = offline_path(msg, "file")
49975058
title = str(msg.get("title") or msg.get("content") or "文件").strip()
@@ -5982,13 +6043,9 @@ def _materialize_voice(
59826043
if not isinstance(data, (bytes, bytearray)):
59836044
data = bytes(data)
59846045

5985-
wav = _convert_silk_to_wav(data)
5986-
if wav != data and wav[:4] == b"RIFF":
5987-
ext = "wav"
5988-
payload = wav
5989-
else:
5990-
ext = "silk"
5991-
payload = data
6046+
payload, ext, _media_type = _convert_silk_to_browser_audio(data, preferred_format="mp3")
6047+
if not payload:
6048+
return "", False
59926049

59936050
arc = f"media/voices/voice_{int(server_id)}.{ext}"
59946051
zf.writestr(arc, payload)

src/wechat_decrypt_tool/media_helpers.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,6 +1964,114 @@ def _convert_silk_to_wav(silk_data: bytes) -> bytes:
19641964
return silk_data
19651965

19661966

1967+
def _looks_like_mp3(data: bytes) -> bool:
1968+
if not data:
1969+
return False
1970+
if data.startswith(b"ID3"):
1971+
return True
1972+
return len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0
1973+
1974+
1975+
@lru_cache(maxsize=1)
1976+
def _find_ffmpeg_executable() -> str:
1977+
import shutil
1978+
1979+
env_value = str(os.environ.get("WECHAT_TOOL_FFMPEG") or "").strip()
1980+
if env_value:
1981+
resolved = shutil.which(env_value)
1982+
if resolved:
1983+
return resolved
1984+
candidate = Path(env_value).expanduser()
1985+
if candidate.is_file():
1986+
return str(candidate)
1987+
1988+
return shutil.which("ffmpeg") or ""
1989+
1990+
1991+
def _convert_wav_to_mp3(wav_data: bytes) -> bytes:
1992+
import subprocess
1993+
import tempfile
1994+
1995+
if not wav_data or not wav_data.startswith(b"RIFF"):
1996+
return b""
1997+
1998+
ffmpeg_exe = _find_ffmpeg_executable()
1999+
if not ffmpeg_exe:
2000+
return b""
2001+
2002+
try:
2003+
with tempfile.TemporaryDirectory() as tmp_dir:
2004+
tmp_path = Path(tmp_dir)
2005+
wav_path = tmp_path / "voice.wav"
2006+
mp3_path = tmp_path / "voice.mp3"
2007+
wav_path.write_bytes(wav_data)
2008+
2009+
proc = subprocess.run(
2010+
[
2011+
ffmpeg_exe,
2012+
"-y",
2013+
"-hide_banner",
2014+
"-loglevel",
2015+
"error",
2016+
"-i",
2017+
str(wav_path),
2018+
"-vn",
2019+
"-codec:a",
2020+
"libmp3lame",
2021+
"-q:a",
2022+
"4",
2023+
str(mp3_path),
2024+
],
2025+
check=False,
2026+
capture_output=True,
2027+
)
2028+
if proc.returncode != 0 or not mp3_path.exists():
2029+
err = proc.stderr.decode("utf-8", errors="ignore").strip()
2030+
if err:
2031+
logger.warning(f"WAV to MP3 conversion failed: {err}")
2032+
return b""
2033+
2034+
mp3_data = mp3_path.read_bytes()
2035+
if _looks_like_mp3(mp3_data):
2036+
return mp3_data
2037+
except Exception as e:
2038+
logger.warning(f"WAV to MP3 conversion failed: {e}")
2039+
2040+
return b""
2041+
2042+
2043+
def _convert_silk_to_browser_audio(
2044+
silk_data: bytes,
2045+
*,
2046+
preferred_format: str = "mp3",
2047+
) -> tuple[bytes, str, str]:
2048+
"""Convert SILK audio to a browser-friendly format.
2049+
2050+
Returns `(payload, ext, media_type)`.
2051+
Preference order:
2052+
1) MP3 if ffmpeg is available
2053+
2) WAV if SILK decoding succeeds
2054+
3) original SILK bytes as a last-resort fallback
2055+
"""
2056+
2057+
data = bytes(silk_data or b"")
2058+
if not data:
2059+
return b"", "silk", "audio/silk"
2060+
2061+
if _looks_like_mp3(data):
2062+
return data, "mp3", "audio/mpeg"
2063+
2064+
wav_data = data if data.startswith(b"RIFF") else _convert_silk_to_wav(data)
2065+
if wav_data.startswith(b"RIFF"):
2066+
if str(preferred_format or "").strip().lower() == "mp3":
2067+
mp3_data = _convert_wav_to_mp3(wav_data)
2068+
if mp3_data:
2069+
return mp3_data, "mp3", "audio/mpeg"
2070+
return wav_data, "wav", "audio/wav"
2071+
2072+
return data, "silk", "audio/silk"
2073+
2074+
19672075
def _resolve_media_path_for_kind(
19682076
account_dir: Path,
19692077
kind: str,

src/wechat_decrypt_tool/routers/chat_media.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
)
3434
from ..logging_config import get_logger
3535
from ..media_helpers import (
36-
_convert_silk_to_wav,
36+
_convert_silk_to_browser_audio,
3737
_decrypt_emoticon_aes_cbc,
3838
_detect_image_extension,
3939
_detect_image_media_type,
@@ -1762,12 +1762,12 @@ async def get_chat_voice(server_id: int, account: Optional[str] = None):
17621762
if not isinstance(data, (bytes, bytearray)):
17631763
data = bytes(data)
17641764

1765-
# Try to convert SILK to WAV for browser playback
1766-
wav_data = _convert_silk_to_wav(data)
1767-
if wav_data != data:
1765+
payload, ext, media_type = _convert_silk_to_browser_audio(data, preferred_format="mp3")
1766+
if payload and ext != "silk":
17681767
return Response(
1769-
content=wav_data,
1770-
media_type="audio/wav",
1768+
content=payload,
1769+
media_type=media_type,
1770+
headers={"Content-Disposition": f"inline; filename=voice_{int(server_id)}.{ext}"},
17711771
)
17721772

17731773
# Fallback to raw SILK if conversion fails
@@ -1821,11 +1821,16 @@ async def open_chat_media_folder(
18211821
if not isinstance(data, (bytes, bytearray)):
18221822
data = bytes(data)
18231823

1824+
payload, ext, _media_type = _convert_silk_to_browser_audio(data, preferred_format="mp3")
1825+
if not payload:
1826+
payload = data
1827+
ext = "silk"
1828+
18241829
export_dir = account_dir / "_exports"
18251830
export_dir.mkdir(parents=True, exist_ok=True)
1826-
p = export_dir / f"voice_{int(server_id)}.silk"
1831+
p = export_dir / f"voice_{int(server_id)}.{ext}"
18271832
try:
1828-
p.write_bytes(data)
1833+
p.write_bytes(payload)
18291834
except Exception as e:
18301835
raise HTTPException(status_code=500, detail=f"Failed to export voice: {e}")
18311836
else:

0 commit comments

Comments
 (0)