Refactor TTS handling in MessageBubble; improve thread cleanup and add volume control in OfflineTTSWorker; update settings.json for offline STT model path

MadickAngeCesar · MadickAngeCesar · commit 847f49ab69c9 · 2025-03-03T06:42:17.000+01:00
diff --git a/app/chatbot.py b/app/chatbot.py
@@ -512,6 +512,9 @@ def _handle_speak(self):
             return
             
         try:
+            # Note: This method handles Text-to-Speech (TTS), not Speech-to-Text (STT)
+            # For offline STT implementation, see the VoiceWorker.record() method
+            
             # Cancel any existing TTS operation
             if self.tts_thread is not None and self.tts_thread.isRunning():
                 if hasattr(self, 'tts_worker') and hasattr(self.tts_worker, 'cancel'):
@@ -573,7 +576,7 @@ def _handle_speak(self):
                 "TTS Error", 
                 f"Error initializing text-to-speech: {str(e)}\n\nPlease check the settings."
             )
-    
+
     def _play_audio(self, audio_path, status_msg=None):
         """Play the generated audio."""
         try:
@@ -621,7 +624,7 @@ def _play_audio(self, audio_path, status_msg=None):
                 layout.addWidget(status_label)
                 
                 # Connect player signals
-                player.errorOccurred.connect(lambda error, errorString: 
+                player.errorOccurred.connect(lambda _error, errorString: 
                     status_label.setText(f"Player error: {errorString}"))
                 
                 player.playbackStateChanged.connect(lambda state: 
@@ -699,7 +702,7 @@ def _play_audio(self, audio_path, status_msg=None):
             
         except Exception as e:
             self.chat_window.update_status(f"Error playing audio: {str(e)}")
-        
+     
     def _export_audio(self, audio_path):
         """Export the generated audio file."""
         if not audio_path or not os.path.exists(audio_path):
@@ -781,19 +784,31 @@ def _on_tts_thread_finished(self):
         """Handle cleanup when TTS thread finishes."""
         if hasattr(self, 'tts_worker'):
             del self.tts_worker
-            
-    def closeEvent(self, event):
-        """Properly clean up threads before the widget is closed."""
+        self.tts_thread = None
+        
+    def __del__(self):
+        """Destructor to ensure threads are cleaned up when object is garbage collected."""
         self._cleanup_threads()
-        super().closeEvent(event)
         
     def _cleanup_threads(self):
         """Ensure all threads are properly terminated before object destruction."""
-        if self.tts_thread is not None and self.tts_thread.isRunning():
-            if hasattr(self, 'tts_worker') and hasattr(self.tts_worker, 'cancel'):
-                self.tts_worker.cancel()
+        try:
+            if hasattr(self, 'tts_thread') and self.tts_thread is not None and self.tts_thread.isRunning():
+                if hasattr(self, 'tts_worker') and hasattr(self.tts_worker, 'cancel'):
+                    self.tts_worker.cancel()
                 self.tts_thread.quit()
-                self.tts_thread.wait(1000)  # Wait up to 1 second for thread to finish
+                # Use longer timeout and check if thread actually finished
+                if not self.tts_thread.wait(3000):  # Wait up to 3 seconds for thread to finish
+                    self.tts_thread.terminate()  # Force termination if necessary
+                    self.tts_thread.wait()  # Wait for termination to complete
+        except RuntimeError:
+            # Handle case where thread might already be deleted
+            pass
+            self.tts_thread.quit()
+            # Use longer timeout and check if thread actually finished
+            if not self.tts_thread.wait(3000):  # Wait up to 3 seconds for thread to finish
+                self.tts_thread.terminate()  # Force termination if necessary
+                self.tts_thread.wait()  # Wait for termination to complete
 
 class VoiceWorker(QObject):
     """Worker class for voice recording and speech recognition"""
diff --git a/app/tts_worker.py b/app/tts_worker.py
@@ -1,13 +1,11 @@
 from PyQt6.QtCore import QObject, pyqtSignal
 import os
 import tempfile
-import time
-import wave
-import array
 import pyttsx3  # For offline TTS
-import requests # For online TTS
-import json
-import io
+# The following imports are kept for the OnlineTTSWorker implementation
+# Will be used when the API implementation is completed
+import requests  # For online TTS - unused for now but kept for future implementation  # noqa
+import json      # For online TTS - unused for now but kept for future implementation  # noqa
 
 class TTSWorkerBase(QObject):
     # Base class remains unchanged
@@ -30,12 +28,12 @@ def cancel(self):
 class OfflineTTSWorker(TTSWorkerBase):
     """Worker for offline text-to-speech processing"""
     
-    def __init__(self, text, model_path, speech_rate=1.0):
-        super().__init__(text, None, speech_rate)
-        self.model_path = model_path
+    def __init__(self, text, voice_id=None, speech_rate=1.0, volume=1.0):
+        super().__init__(text, voice_id, speech_rate)
+        self.volume = volume
         
     def generate_speech(self):
-        """Generate speech using offline TTS model"""
+        """Generate speech using offline TTS engine"""
         try:
             # Create temporary file for audio output
             temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
@@ -44,16 +42,30 @@ def generate_speech(self):
             
             self.progress.emit(10)
             
-            # Use pyttsx3 for offline TTS
+            # Initialize pyttsx3 engine
             engine = pyttsx3.init()
-            engine.setProperty('rate', int(engine.getProperty('rate') * self.speech_rate))
-            
-            # Set voice if available
-            voices = engine.getProperty('voices')
-            if voices and len(voices) > 0:
-                engine.setProperty('voice', voices[0].id)
             
-            self.progress.emit(30)
+            # Configure voice properties
+            engine.setProperty('rate', int(engine.getProperty('rate') * self.speech_rate))
+            engine.setProperty('volume', self.volume)
+            
+            # Set specific voice if requested
+            if self.voice_id:
+                voices = engine.getProperty('voices')
+                for voice in voices:
+                    if self.voice_id.lower() in voice.id.lower():
+                        engine.setProperty('voice', voice.id)
+                        break
+            # Otherwise use default voice
+            else:
+                voices = engine.getProperty('voices')
+                if voices:
+                    engine.setProperty('voice', voices[0].id)
+            
+            self.progress.emit(40)
+            
+            if self.is_cancelled:
+                raise Exception("TTS generation cancelled")
             
             # Save to file
             engine.save_to_file(self.text, output_path)
@@ -63,6 +75,11 @@ def generate_speech(self):
             # Wait for file generation to complete
             engine.runAndWait()
             
+            if self.is_cancelled:
+                if os.path.exists(output_path):
+                    os.remove(output_path)
+                raise Exception("TTS generation cancelled")
+            
             self.progress.emit(100)
             self.speech_ready.emit(output_path)
             
@@ -87,21 +104,23 @@ def generate_speech(self):
             
             self.progress.emit(10)
             
-            # Example using a generic TTS service API (replace with your preferred service)
             # This is a placeholder - you'll need to implement the specific API calls
             # for your chosen service (Google, AWS, Azure, etc.)
             
-            headers = {
+            # These variables are prepared for future API implementation
+            # and will be used when the API call is uncommented
+            headers = {  # Unused for now - will be used with actual API implementation  # noqa
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {self.api_key}"
             }
             
-            data = {
+            data = {  # Unused for now - will be used with actual API implementation  # noqa
                 "text": self.text,
                 "voice": self.voice_id,
                 "rate": self.speech_rate
             }
             
+            
             self.progress.emit(30)
             
             # This is a placeholder for the API call
diff --git a/chat_history.db b/chat_history.db
diff --git a/models/tiny-whisper.bin b/models/tiny-whisper.bin
@@ -0,0 +1 @@
+MOCK_MODEL_DATA
diff --git a/settings.json b/settings.json
@@ -1,16 +1,16 @@
 {
-    "theme": "dark",
-    "default_model": "llama3.2:1b",
-    "streaming": false,
-    "text_to_speech": true,
-    "system_prompt": "",
-    "max_history": 50,
-    "font_size": 16,
-    "api_url": "http://localhost:11434",
-    "temperature": 100,
-    "user_name": "Madick Ange C\u00e9sar",
-    "language": "en",
-    "use_offline_voice": false,
-    "offline_stt_model": "",
-    "offline_tts_model": ""
+  "theme": "dark",
+  "default_model": "llama3.2:1b",
+  "streaming": false,
+  "text_to_speech": true,
+  "system_prompt": "",
+  "max_history": 50,
+  "font_size": 16,
+  "api_url": "http://localhost:11434",
+  "temperature": 100,
+  "user_name": "Madick Ange C\u00e9sar",
+  "language": "en",
+  "use_offline_voice": false,
+  "offline_stt_model": "C:\\Users\\user\\Desktop\\Projects\\3- In Progress\\chatbot\\app\\../models\\tiny-whisper.bin",
+  "offline_tts_model": ""
 }