Skip to content

Commit d016736

Browse files
authored
Merge pull request #1922 from dgageot/speak
Fix speech to text on macOS
2 parents 3754da5 + 1eab5c4 commit d016736

3 files changed

Lines changed: 97 additions & 3 deletions

File tree

pkg/tui/handlers.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,69 @@ func (m *appModel) handleAttachFile(filePath string) (tea.Model, tea.Cmd) {
531531
})
532532
}
533533

534+
// --- Speech-to-text ---
535+
536+
func (m *appModel) handleStartSpeak() (tea.Model, tea.Cmd) {
537+
if m.transcriber.IsRunning() {
538+
return m, nil
539+
}
540+
541+
// Close any previous channel to unblock stale waitForTranscript goroutines.
542+
m.closeTranscriptCh()
543+
544+
ch := make(chan string, 100)
545+
m.transcriptCh = ch
546+
err := m.transcriber.Start(context.Background(), func(delta string) {
547+
select {
548+
case ch <- delta:
549+
default:
550+
}
551+
})
552+
if err != nil {
553+
m.closeTranscriptCh()
554+
return m, notification.ErrorCmd(fmt.Sprintf("Failed to start listening: %v", err))
555+
}
556+
557+
return m, tea.Batch(
558+
notification.InfoCmd("🎤 Listening... (ENTER to send or ESC to cancel)"),
559+
m.editor.SetRecording(true),
560+
m.waitForTranscript(),
561+
)
562+
}
563+
564+
func (m *appModel) handleStopSpeak() (tea.Model, tea.Cmd) {
565+
if !m.transcriber.IsRunning() {
566+
return m, nil
567+
}
568+
569+
m.transcriber.Stop()
570+
m.closeTranscriptCh()
571+
572+
return m, tea.Batch(m.editor.SetRecording(false), notification.SuccessCmd("Stopped listening"))
573+
}
574+
575+
// waitForTranscript returns a command that blocks until the next transcript
576+
// delta arrives and delivers it as a SpeakTranscriptMsg.
577+
func (m *appModel) waitForTranscript() tea.Cmd {
578+
ch := m.transcriptCh
579+
return func() tea.Msg {
580+
delta, ok := <-ch
581+
if !ok {
582+
return nil
583+
}
584+
return messages.SpeakTranscriptMsg{Delta: delta}
585+
}
586+
}
587+
588+
// closeTranscriptCh closes the transcript channel and sets it to nil,
589+
// unblocking any goroutines waiting in waitForTranscript.
590+
func (m *appModel) closeTranscriptCh() {
591+
if m.transcriptCh != nil {
592+
close(m.transcriptCh)
593+
m.transcriptCh = nil
594+
}
595+
}
596+
534597
func (m *appModel) handleElicitationResponse(action tools.ElicitationAction, content map[string]any) (tea.Model, tea.Cmd) {
535598
if err := m.application.ResumeElicitation(context.Background(), action, content); err != nil {
536599
slog.Error("Failed to resume elicitation", "action", action, "error", err)

pkg/tui/tui.go

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"charm.land/lipgloss/v2"
2020

2121
"github.com/docker/cagent/pkg/app"
22+
"github.com/docker/cagent/pkg/audio/transcribe"
2223
"github.com/docker/cagent/pkg/history"
2324
"github.com/docker/cagent/pkg/runtime"
2425
"github.com/docker/cagent/pkg/session"
@@ -87,6 +88,10 @@ type appModel struct {
8788
statusBar statusbar.StatusBar
8889
completions completion.Manager
8990

91+
// Speech-to-text
92+
transcriber *transcribe.Transcriber
93+
transcriptCh chan string // bridges transcriber goroutine → Bubble Tea event loop
94+
9095
// Working state indicator (resize handle spinner)
9196
workingSpinner spinner.Spinner
9297

@@ -181,6 +186,7 @@ func New(ctx context.Context, spawner SessionSpawner, initialApp *app.App, initi
181186
notification: notification.New(),
182187
dialogMgr: dialog.New(),
183188
completions: completion.New(),
189+
transcriber: transcribe.New(os.Getenv("OPENAI_API_KEY")),
184190
workingSpinner: spinner.New(spinner.ModeSpinnerOnly, styles.SpinnerDotsHighlightStyle),
185191
focusedPanel: PanelEditor,
186192
editorLines: 3,
@@ -790,10 +796,18 @@ func (m *appModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
790796
// --- Speech-to-text ---
791797

792798
case messages.StartSpeakMsg:
793-
return m, notification.InfoCmd("Speech-to-text is not yet supported")
799+
if !m.transcriber.IsSupported() {
800+
return m, notification.InfoCmd("Speech-to-text is only supported on macOS")
801+
}
802+
return m.handleStartSpeak()
794803

795-
case messages.StopSpeakMsg, messages.SpeakTranscriptMsg:
796-
return m, nil
804+
case messages.StopSpeakMsg:
805+
return m.handleStopSpeak()
806+
807+
case messages.SpeakTranscriptMsg:
808+
m.editor.InsertText(msg.Delta)
809+
cmd := m.waitForTranscript()
810+
return m, cmd
797811

798812
// --- MCP prompts ---
799813

@@ -1461,6 +1475,19 @@ func (m *appModel) Bindings() []key.Binding {
14611475

14621476
// handleKeyPress handles all keyboard input with proper priority routing.
14631477
func (m *appModel) handleKeyPress(msg tea.KeyPressMsg) (tea.Model, tea.Cmd) {
1478+
// Check if we should stop transcription on Enter or Escape
1479+
if m.transcriber.IsRunning() {
1480+
switch msg.String() {
1481+
case "enter":
1482+
model, cmd := m.handleStopSpeak()
1483+
sendCmd := m.editor.SendContent()
1484+
return model, tea.Batch(cmd, sendCmd)
1485+
1486+
case "esc":
1487+
return m.handleStopSpeak()
1488+
}
1489+
}
1490+
14641491
// Dialog gets priority when open
14651492
if m.dialogMgr.Open() {
14661493
u, cmd := m.dialogMgr.Update(msg)
@@ -1978,6 +2005,8 @@ func (m *appModel) cleanupAll() {
19782005
m.cancelThinkingCheck()
19792006
m.cancelThinkingCheck = nil
19802007
}
2008+
m.transcriber.Stop()
2009+
m.closeTranscriptCh()
19812010
for _, cp := range m.chatPages {
19822011
cp.Cleanup()
19832012
}

pkg/tui/tui_exit_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/stretchr/testify/assert"
1111
"github.com/stretchr/testify/require"
1212

13+
"github.com/docker/cagent/pkg/audio/transcribe"
1314
"github.com/docker/cagent/pkg/tui/components/completion"
1415
"github.com/docker/cagent/pkg/tui/components/editor"
1516
"github.com/docker/cagent/pkg/tui/components/notification"
@@ -137,6 +138,7 @@ func newTestModel() (*appModel, *mockChatPage, *mockEditor) {
137138
pendingSidebarCollapsed: map[string]bool{},
138139
chatPage: page,
139140
editor: ed,
141+
transcriber: transcribe.New(""),
140142
notification: notification.New(),
141143
dialogMgr: dialog.New(),
142144
completions: completion.New(),

0 commit comments

Comments
 (0)