-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
97 lines (79 loc) · 3.36 KB
/
Copy pathmain.py
File metadata and controls
97 lines (79 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""中文 LLM 教学路由器 - 主入口"""
import yaml
from pathlib import Path
from src.core.simple_classifier import classify, classify_with_llm, SubjectType, SUBJECT_CONFIG, SubjectCategory, get_category
from src.core.model_router import ModelRouter
from src.core.narrator import generate_script, generate_svg_animation, generate_highlight_html, parse_solution
from src.services.deepseek_client import DeepSeekClient
from src.services.qwen_client import QwenClient
from src.services.minimax_tts import MinimaxTTS
def load_config(path: str = "config.yaml") -> dict:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
def process(input_path: str, output_dir: str = "output"):
"""处理一道题目:图片或文本 -> 分类 -> 求解 -> 脚本 -> 语音 + 动画"""
cfg = load_config()
Path(output_dir).mkdir(exist_ok=True)
qwen = QwenClient(cfg["dashscope_api_key"])
deepseek = DeepSeekClient(cfg["deepseek_api_key"])
router = ModelRouter(deepseek, qwen)
tts = MinimaxTTS(cfg["minimax_api_key"], cfg["minimax_group_id"])
# 1. OCR(如果是图片)
if input_path.lower().endswith((".png", ".jpg", ".jpeg")):
print("[OCR] 正在识别图片...")
question = qwen.ocr(input_path)
print(f"[OCR] 识别结果:\n{question}\n")
else:
with open(input_path, "r", encoding="utf-8") as f:
question = f.read()
# 2. 分类
dashscope_key = cfg.get("dashscope_api_key", "")
if dashscope_key:
subject = classify_with_llm(question, dashscope_key)
else:
subject = classify(question)
label = SUBJECT_CONFIG[subject]["label"]
print(f"[分类] 判定为: {label}")
# 3. 求解
print(f"[求解] 调用模型中...")
solution = router.solve(question, subject)
parsed = parse_solution(solution)
print(f"[求解] 完成\n")
if parsed["thinking"]:
print(f"[思路]\n{parsed['thinking']}\n")
if parsed["answer"]:
print(f"[答案]\n{parsed['answer']}\n")
# 4. 生成脚本
script = generate_script(solution, subject)
# 5. TTS
voice = SUBJECT_CONFIG[subject]["voice"]
audio_path = str(Path(output_dir) / "audio.mp3")
print("[TTS] 生成语音中...")
tts.synthesize(script, voice=voice, output_path=audio_path)
print(f"[TTS] 已保存: {audio_path}")
# 6. 动画
anim_type = SUBJECT_CONFIG[subject]["anim_type"]
if anim_type == "svg":
svg = generate_svg_animation(solution)
anim_path = str(Path(output_dir) / "animation.svg")
with open(anim_path, "w", encoding="utf-8") as f:
f.write(svg)
print(f"[动画] SVG 已保存: {anim_path}")
else:
html = generate_highlight_html(solution)
anim_path = str(Path(output_dir) / "animation.html")
with open(anim_path, "w", encoding="utf-8") as f:
f.write(html)
print(f"[动画] HTML 已保存: {anim_path}")
# 保存解题结果
sol_path = str(Path(output_dir) / "solution.md")
with open(sol_path, "w", encoding="utf-8") as f:
f.write(solution)
print(f"[结果] 已保存: {sol_path}")
return {"solution": solution, "audio": audio_path, "animation": anim_path}
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("用法: python main.py <图片或文本文件路径>")
sys.exit(1)
process(sys.argv[1])