@@ -1018,6 +1018,14 @@ private void HandleRecognizedText(string rawText, float confidence, string sourc
10181018 MarkRecognizingState ( ) ;
10191019
10201020 VoiceRuntimeLog . Info ( $ "Recognized({ source } ) conf={ confidence : F2} text={ text } ") ;
1021+ VoiceListenerStatusCenter . PublishRecognition ( new VoiceRecognitionRecord
1022+ {
1023+ Text = text ,
1024+ Confidence = ClampConfidence ( confidence ) ,
1025+ Source = source ,
1026+ CapturedAtUtc = DateTime . UtcNow ,
1027+ AudioPcm16Mono = SnapshotSpeechPcm ( )
1028+ } ) ;
10211029
10221030 bool isSystemFallback = _classicFallbackEnabled && string . Equals ( source , "system-speech" , StringComparison . OrdinalIgnoreCase ) ;
10231031 double taskLikelihood = _intentRecognizer . ScoreTaskLikelihood ( text ) ;
@@ -1059,6 +1067,17 @@ private void HandleRecognizedText(string rawText, float confidence, string sourc
10591067 TryExtractConversationTasksAsync ( ) ;
10601068 }
10611069
1070+ private byte [ ] SnapshotSpeechPcm ( )
1071+ {
1072+ lock ( _speechBuffer )
1073+ {
1074+ if ( _speechBuffer . Count == 0 )
1075+ return null ;
1076+
1077+ return _speechBuffer . ToArray ( ) ;
1078+ }
1079+ }
1080+
10621081 private bool IsDuplicateRecognition ( string text )
10631082 {
10641083 if ( string . IsNullOrWhiteSpace ( _lastRecognizedText ) )
@@ -1250,16 +1269,13 @@ private void CaptureSpeechBufferIfNeeded(byte[] buffer, int bytesRecorded)
12501269 if ( ! _isRecording || buffer == null || bytesRecorded <= 0 )
12511270 return ;
12521271
1253- if ( _speakerVerifyEnabled )
1272+ lock ( _speechBuffer )
12541273 {
1255- lock ( _speechBuffer )
1274+ int maxBytes = ( int ) ( 16000 * 2 * 20 ) ; // ~20s
1275+ int copy = Math . Min ( bytesRecorded , maxBytes - _speechBuffer . Count ) ;
1276+ if ( copy > 0 )
12561277 {
1257- int maxBytes = ( int ) ( 16000 * 2 * 6 ) ; // ~6s
1258- int copy = Math . Min ( bytesRecorded , maxBytes - _speechBuffer . Count ) ;
1259- if ( copy > 0 )
1260- {
1261- for ( int i = 0 ; i < copy ; i ++ ) _speechBuffer . Add ( buffer [ i ] ) ;
1262- }
1278+ for ( int i = 0 ; i < copy ; i ++ ) _speechBuffer . Add ( buffer [ i ] ) ;
12631279 }
12641280 }
12651281
@@ -1280,6 +1296,13 @@ private void CaptureSpeechBufferIfNeeded(byte[] buffer, int bytesRecorded)
12801296 private void EvaluateCompletedSegment ( WaveFormat format )
12811297 {
12821298 EvaluateSpeakerSegment ( format ) ;
1299+ if ( ! _speakerVerifyEnabled )
1300+ {
1301+ lock ( _speechBuffer )
1302+ {
1303+ _speechBuffer . Clear ( ) ;
1304+ }
1305+ }
12831306 if ( _funAsrEnabled )
12841307 {
12851308 TryRecognizeSegmentWithFunAsr ( format ) ;
0 commit comments