easyTTS/easyTTS.py at main · kyinwind/easyTTS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
import os
import re
from pydub import AudioSegment
from docx import Document
from tkinter import Tk
import tkinter as tk
from tkinter.filedialog import askdirectory
from tkinter.filedialog import askopenfilename, askdirectory
from tkinter import messagebox
from gradio_client import Client, file
from gradio_client import handle_file
import sys

global client
client = None  # 先初始化 client

def split_text(text, max_length=1000):
    """
    将长文本拆分为多个段落，每段不超过指定长度。
    :param text: 输入的文本内容
    :param max_length: 每段文本的最大长度
    :return: 拆分后的文本列表
    """
    paragraphs = []
    while len(text) > max_length:
        split_index = text.rfind("。", 0, max_length)  # 尽量在句号处拆分
        if split_index == -1:
            split_index = max_length
        paragraphs.append(text[:split_index + 1])
        text = text[split_index + 1:]
    if text:
        paragraphs.append(text)
    return paragraphs

def synthesize_text_to_mp3(text, output_path, speed="0.8",cankao_file="",cankao_txt=""):
    init_client()   # 初始化 client
    #print("文件是否存在:", os.path.exists(cankao_file))
    #print("cankao_file 类型:", type(cankao_file))
    #print("file 函数:", file)
    #print(file(cankao_file))
    # 确保传递的 `cankao_file` 是一个字符串路径，而不是文件对象
    ref_audio_path = handle_file(cankao_file)
    global client
    result = client.predict(
        text= text,
        text_lang="中文",
        ref_audio_path=ref_audio_path,  # 这里修正
        aux_ref_audio_paths=[],
        prompt_text=cankao_txt,
        prompt_lang="中文",
        top_k=5,
        top_p=1,
        temperature=1,
        text_split_method="凑四句一切",
        batch_size=20,
        speed_factor=1,
        ref_text_free=False,
        split_bucket=True,
        fragment_interval=0.3,
        seed=-1,
        keep_random=True,
        parallel_infer=True,
        repetition_penalty=1.35,
        api_name="/inference"
    )
    print(result)
    print(output_path)
    # 从wav文件加载音频数据
    wav_file_path = result[0]
    audio = AudioSegment.from_wav(wav_file_path)
    # 将音频数据导出为mp3格式
    audio.export(output_path, format="mp3")

def merge_audio_files(file_list, output_file):
    """
    合并多个 MP3 文件，保持采样率为 16 kHz，比特率为 128 kbps，单声道。
    :param files: 要合并的 MP3 文件路径列表
    :param output_file: 合并后的输出文件路径
    """
    # 加载第一个音频文件
    combined_audio = AudioSegment.from_file(file_list[0])

    # 逐一加载并合并音频文件
    for audiofile in file_list[1:]:
        audio = AudioSegment.from_file(audiofile)
        combined_audio += audio

    # 设置导出格式和参数
    #combined_audio = combined_audio.set_frame_rate(16000)  # 16 kHz
    #combined_audio = combined_audio.set_channels(1)       # 单声道
    #combined_audio = combined_audio.set_sample_width(2)   # 保持与 MP3 格式兼容的采样宽度

    # 导出为 MP3 文件，设置比特率
    combined_audio.export(output_file, format="mp3", bitrate="128k")
    print(f"音频已成功合并并保存到: {output_file}")

def extract_number(file_name):
    # 用正则表达式提取文件名中的第一个数字
    match = re.search(r'\d+', file_name)
    return int(match.group()) if match else float('inf')  # 如果没有数字，放在最后

def process_txt_files_in_directory(input_directory, output_directory, speed="0.8",cankao_file="",cankao_txt=""):
    """
    遍历指定目录下的所有 `.txt` 文件，并生成对应的 MP3 文件，支持语速调整。
    """
    # 确保输出目录存在
    os.makedirs(output_directory, exist_ok=True)

    # 遍历目录下的所有 `.txt` 文件
    for file_name in sorted(os.listdir(input_directory), key=extract_number):
        if file_name.endswith(".txt"):
            input_file_path = os.path.join(input_directory, file_name)
            base_name = file_name.replace(".txt", "")
            final_output_file = os.path.join(output_directory, f"{base_name}.mp3")

            # 检查同名 MP3 文件是否已存在且非空
            if os.path.exists(final_output_file) and os.path.getsize(final_output_file) > 0:
                print(f"文件已存在且非空，跳过生成: {final_output_file}")
                continue

            print(f"<------------开始处理文件: {final_output_file}")
            # 读取文本内容
            with open(input_file_path, "r", encoding="utf-8") as file:
                text = file.read()

            # 拆分长文本
            paragraphs = split_text(text, max_length=1500)
            temp_files = []

            # 为每个段落生成 MP3 文件
            for i, paragraph in enumerate(paragraphs):
                temp_file = os.path.join(output_directory, f"{base_name}_part{i + 1}.mp3")
                synthesize_text_to_mp3(paragraph, temp_file, speed=speed,cankao_file=cankao_file,cankao_txt=cankao_txt)
                temp_files.append(temp_file)

            # 合并生成的 MP3 文件
            final_output_file = os.path.join(output_directory, f"{base_name}.mp3")
            merge_audio_files(temp_files, final_output_file)

            # 清理临时文件
            for temp_file in temp_files:
                os.remove(temp_file)
                print(f"已删除临时文件: {temp_file}")
            print(f"MP3 文件生成完成: {final_output_file}------------>")

# 主程序
def createmp3():
    # 使用 tkinter 弹出窗口让用户选择输入目录
    Tk().withdraw()  # 隐藏主窗口
    input_dir = askdirectory(title="请选择包含 .txt 文件的目录")
    # 获取脚本所在目录，推导出参考音频的目录
    script_directory = os.path.dirname(os.path.abspath(__file__))
    print("脚本所在目录:", script_directory)
    cankao_dir = os.path.join(script_directory, "cankao")
    # 获取参考音频文件，即参考目录下一个名为cankao.wav的文件
    cankao_file = os.path.join(cankao_dir, "cankao.wav")
    cankao_txtfile = os.path.join(cankao_dir, "cankao.txt")
    #读出参考文本文件的内容
    with open(cankao_txtfile, "r", encoding="utf-8") as file:
        cankao_text = file.read()
    print("参考音频文件:", cankao_file)
    print("参考文本内容:", cankao_text)
    # 检查用户是否选择了目录
    if not input_dir:
        print("未选择任何目录，程序退出。")
    else:
        print(f"选择的目录: {input_dir}")

        # 用户输入语速
        speed = "0.8"

        # 设置输出目录
        #output_dir = os.path.join(input_dir, "output_mp3_files")
        output_dir = input_dir
        # 执行批量处理
        process_txt_files_in_directory(input_dir, output_dir, speed=speed,cankao_file=cankao_file,cankao_txt=cankao_text)
        print(f"所有文件已处理，MP3 文件保存在: {output_dir}")


def split_word_by_heading(docx_file, output_folder):
    """
    按 Word 文档中的标题1或标题2拆分内容为若干个TXT文件。
    如果存在标题2，则文件名为“序号-标题1-标题2”，否则文件名为“序号-标题1”。
    :param docx_file: Word 文档路径（.docx）
    :param output_folder: 输出TXT文件的文件夹路径
    """
    # 打开Word文档
    document = Document(docx_file)
    current_heading1 = None
    current_heading2 = None
    current_content = []
    heading_counter = 1  # 初始化序号计数器

    for paragraph in document.paragraphs:
        # 检查是否是标题1
        if paragraph.style.name == 'Heading 1':
            # 如果当前有内容，保存到文件
            if current_heading2 or current_heading1:
                save_to_txt(heading_counter, current_heading1, current_heading2, current_content, output_folder)
                current_content = []  # 清空内容
                heading_counter += 1  # 更新序号
            current_heading1 = paragraph.text.strip()  # 更新标题1
            current_heading2 = None  # 清空标题2
        # 检查是否是标题2
        elif paragraph.style.name == 'Heading 2':
            # 如果当前有内容，保存到文件
            if current_heading2:
                save_to_txt(heading_counter, current_heading1, current_heading2, current_content, output_folder)
                current_content = []  # 清空内容
                heading_counter += 1  # 更新序号
            current_heading2 = paragraph.text.strip()  # 更新标题2
        else:
            # 添加到当前内容
            current_content.append(paragraph.text.strip().replace("<", "").replace(">", ""))

    # 保存最后一部分内容
    if current_heading1:
        save_to_txt(heading_counter, current_heading1, current_heading2, current_content, output_folder)
    return "文件已经处理完成，保存在: " + output_folder


def save_to_txt(counter, heading1, heading2, content, output_folder):
    """
    将内容保存为TXT文件，文件名前缀为“序号-标题1-标题2”或“序号-标题1”。

    :param counter: 序号
    :param heading1: 一级标题名称
    :param heading2: 二级标题名称（可为None）
    :param content: 内容列表
    :param output_folder: 输出TXT文件的文件夹路径
    """
    # 确保输出文件夹存在
    os.makedirs(output_folder, exist_ok=True)
    # 格式化文件名
    if heading2:
        filename = f"{counter}、{heading1}-{heading2}.txt".replace(" ", "_").replace("/", "_")
        filenamewithouttxt = f"{heading1}、{heading2}"
    else:
        filename = f"{counter}、{heading1}.txt".replace(" ", "_").replace("/", "_")
        filenamewithouttxt = f"{heading1}"
    filepath = os.path.join(output_folder, filename)
    # 写入文件
    with open(filepath, 'w', encoding='utf-8') as f:
        # 第一行写标题
        f.write(filenamewithouttxt + "\n\n")
        # 写入内容
        f.write("\n".join(content))


def createtxtbyword():
    # 使用Tkinter选择文件和目录
    Tk().withdraw()  # 隐藏主窗口
    docx_path = askopenfilename(filetypes=[("Word Documents", "*.docx")], title="选择一个Word文档")

    if not docx_path:
        print("未选择文件，程序退出。")
    else:
        # 获取源文件所在目录
        output_dir = os.path.dirname(docx_path)

        # 运行拆分函数
        split_word_by_heading(docx_path, output_dir)
        print(f"所有文件已保存到目录：{output_dir}")

from gradio_client import Client  # 确保导入正确

def init_client():
    try:
        global client
        client = Client("http://localhost:9872/")
    except Exception as e:
        #Tk().withdraw()  # 隐藏主窗口
        messagebox.showerror("错误", f"无法连接到服务器: {e}")
        #raise

# 创建主窗口
if __name__ == "__main__":
    if len(sys.argv) >= 2:
        print("sys.argv为:", sys.argv)
        print("参数长度为len(sys.argv):", len(sys.argv))
        funcname = sys.argv[1]
        print("funcname为:", funcname)
        if funcname == "split_word_by_heading":
            document_path = sys.argv[2]  # 获取传入的 URL
            from urllib.parse import urlparse
            parsed_url = urlparse(document_path)
            file_path = parsed_url.path  # 转换为普通文件路径
            print("文件路径：",file_path)
            # 获取源文件所在目录
            output_dir = os.path.dirname(file_path)
            result = split_word_by_heading(file_path,output_dir)  # 处理文档
            print("调用结果:",result)  # 通过标准输出返回给 Swift

    root = tk.Tk()
    root.geometry("500x600")  # 设置窗口大小为400x300
    root.eval('tk::PlaceWindow . center')  # 窗口居中显示
    root.title("EasyTTS")

    # 创建按钮并绑定函数
    button1 = tk.Button(root, text="1、拆分word文档", command=createtxtbyword)
    button1.pack(pady=50)

    button2 = tk.Button(root, text="2、生成mp3文件", command=createmp3)
    button2.pack(pady=50)
    # 创建一个文本框来显示日志
    log_text = tk.Text(root, height=200, width=350)
    log_text.pack(pady=10)

    # 重定向标准输出到文本框
    class TextRedirector:
        def __init__(self, widget, tag="stdout"):
            self.widget = widget
            self.tag = tag

        def write(self, str):
            self.widget.insert(tk.END, str)
            self.widget.see(tk.END)  # 自动滚动到最后一行

        def flush(self):
            pass

    sys.stdout = TextRedirector(log_text)
    sys.stderr = TextRedirector(log_text)

    init_client()  # 初始化 client
    # 运行主循环
    root.mainloop()