@@ -19,6 +19,7 @@ import (
1919 "charm.land/lipgloss/v2"
2020
2121 "github.com/docker/cagent/pkg/app"
22+ "github.com/docker/cagent/pkg/audio/transcribe"
2223 "github.com/docker/cagent/pkg/history"
2324 "github.com/docker/cagent/pkg/runtime"
2425 "github.com/docker/cagent/pkg/session"
@@ -87,6 +88,10 @@ type appModel struct {
8788 statusBar statusbar.StatusBar
8889 completions completion.Manager
8990
91+ // Speech-to-text
92+ transcriber * transcribe.Transcriber
93+ transcriptCh chan string // bridges transcriber goroutine → Bubble Tea event loop
94+
9095 // Working state indicator (resize handle spinner)
9196 workingSpinner spinner.Spinner
9297
@@ -181,6 +186,7 @@ func New(ctx context.Context, spawner SessionSpawner, initialApp *app.App, initi
181186 notification : notification .New (),
182187 dialogMgr : dialog .New (),
183188 completions : completion .New (),
189+ transcriber : transcribe .New (os .Getenv ("OPENAI_API_KEY" )),
184190 workingSpinner : spinner .New (spinner .ModeSpinnerOnly , styles .SpinnerDotsHighlightStyle ),
185191 focusedPanel : PanelEditor ,
186192 editorLines : 3 ,
@@ -790,10 +796,18 @@ func (m *appModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
790796 // --- Speech-to-text ---
791797
792798 case messages.StartSpeakMsg :
793- return m , notification .InfoCmd ("Speech-to-text is not yet supported" )
799+ if ! m .transcriber .IsSupported () {
800+ return m , notification .InfoCmd ("Speech-to-text is only supported on macOS" )
801+ }
802+ return m .handleStartSpeak ()
794803
795- case messages.StopSpeakMsg , messages.SpeakTranscriptMsg :
796- return m , nil
804+ case messages.StopSpeakMsg :
805+ return m .handleStopSpeak ()
806+
807+ case messages.SpeakTranscriptMsg :
808+ m .editor .InsertText (msg .Delta )
809+ cmd := m .waitForTranscript ()
810+ return m , cmd
797811
798812 // --- MCP prompts ---
799813
@@ -1461,6 +1475,19 @@ func (m *appModel) Bindings() []key.Binding {
14611475
14621476// handleKeyPress handles all keyboard input with proper priority routing.
14631477func (m * appModel ) handleKeyPress (msg tea.KeyPressMsg ) (tea.Model , tea.Cmd ) {
1478+ // Check if we should stop transcription on Enter or Escape
1479+ if m .transcriber .IsRunning () {
1480+ switch msg .String () {
1481+ case "enter" :
1482+ model , cmd := m .handleStopSpeak ()
1483+ sendCmd := m .editor .SendContent ()
1484+ return model , tea .Batch (cmd , sendCmd )
1485+
1486+ case "esc" :
1487+ return m .handleStopSpeak ()
1488+ }
1489+ }
1490+
14641491 // Dialog gets priority when open
14651492 if m .dialogMgr .Open () {
14661493 u , cmd := m .dialogMgr .Update (msg )
@@ -1978,6 +2005,8 @@ func (m *appModel) cleanupAll() {
19782005 m .cancelThinkingCheck ()
19792006 m .cancelThinkingCheck = nil
19802007 }
2008+ m .transcriber .Stop ()
2009+ m .closeTranscriptCh ()
19812010 for _ , cp := range m .chatPages {
19822011 cp .Cleanup ()
19832012 }
0 commit comments