livekit · rosetta-livekit-bot · Jun 17, 2026 · devin-ai-integration · Jun 17, 2026
diff --git a/.changeset/gemini-tts-31-flash.md b/.changeset/gemini-tts-31-flash.md
@@ -0,0 +1,5 @@
+---
+'@livekit/agents-plugin-google': patch
+---
+
+Update Gemini TTS to default to Gemini 3.1 Flash TTS preview and stream generated audio chunks.
diff --git a/examples/src/google_gemini_tts.ts b/examples/src/google_gemini_tts.ts
@@ -0,0 +1,42 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { type JobContext, ServerOptions, cli, defineAgent, voice } from '@livekit/agents';
+import * as deepgram from '@livekit/agents-plugin-deepgram';
+import * as google from '@livekit/agents-plugin-google';
+import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node';
+import { fileURLToPath } from 'node:url';
+
+class GeminiTTSAgent extends voice.Agent {
+  async onEnter() {
+    this.session.generateReply({ instructions: 'greet the user and introduce yourself' });
+  }
+}
+
+export default defineAgent({
+  entry: async (ctx: JobContext) => {
+    const agent = new GeminiTTSAgent({
+      instructions: 'Your name is Kelly. Respond briefly and concisely using voice conversation.',
+    });
+
+    const session = new voice.AgentSession({
+      stt: new deepgram.STT(),
+      llm: new google.LLM({ model: 'gemini-2.5-flash' }),
+      tts: new google.beta.TTS({
+        apiKey: process.env.GOOGLE_API_KEY,
+        voiceName: 'Kore',
+        model: 'gemini-3.1-flash-tts-preview',
+      }),
+    });
+
+    await session.start({
+      agent,
+      room: ctx.room,
+      inputOptions: {
+        noiseCancellation: BackgroundVoiceCancellation(),
+      },
+    });
+  },
+});
+
+cli.runApp(new ServerOptions({ agent: fileURLToPath(import.meta.url) }));
diff --git a/plugins/google/src/beta/gemini_tts.test.ts b/plugins/google/src/beta/gemini_tts.test.ts
@@ -3,9 +3,65 @@
 // SPDX-License-Identifier: Apache-2.0
 import { STT } from '@livekit/agents-plugin-openai';
 import { tts } from '@livekit/agents-plugins-test';
-import { describe } from 'vitest';
+import { describe, expect, it, vi } from 'vitest';
 import { TTS } from './gemini_tts.js';
 
-describe.skip('Google Gemini TTS', async () => {
-  await tts(new TTS(), new STT());
+const { generateContentStream } = vi.hoisted(() => ({
+  generateContentStream: vi.fn(),
+}));
+
+vi.mock('@google/genai', () => ({
+  GoogleGenAI: vi.fn(function GoogleGenAI() {
+    return {
+      models: {
+        generateContentStream,
+      },
+    };
+  }),
+}));
+
+describe('Google Gemini TTS integration', () => {
+  it.skip('synthesizes with live providers', async () => {
+    await tts(new TTS(), new STT());
+  });
 });
+
+describe('Google Gemini TTS', () => {
+  it('synthesizes audio from a streamed Gemini response', async () => {
+    const audioChunk = Buffer.alloc(4800);
+
+    generateContentStream.mockImplementation(async function* () {
+      yield buildResponseChunk(audioChunk);
+      yield buildResponseChunk(audioChunk);
+    });
+
+    const stream = new TTS({ apiKey: 'test-api-key' }).synthesize('Hello world');
+    let audioCount = 0;
+
+    for await (const _frame of stream) {
+      audioCount += 1;
+    }
+
+    expect(generateContentStream).toHaveBeenCalledOnce();
+    expect(audioCount).toBeGreaterThan(0);
+  });
+});
+
+function buildResponseChunk(data: Buffer) {
+  return {
+    candidates: [
+      {
+        content: {
+          parts: [
+            {
+              inlineData: {
+                data: data.toString('base64'),
+                mimeType: 'audio/pcm',
+              },
+            },
+          ],
+        },
+      },
+    ],
+  };
+}
diff --git a/plugins/google/src/beta/gemini_tts.ts b/plugins/google/src/beta/gemini_tts.ts
@@ -49,7 +49,7 @@ export type GeminiVoices =
   | 'Sadaltager'
   | 'Sulafat';
 
-const DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-lite-preview-tts';
+const DEFAULT_MODEL: GeminiTTSModels = 'gemini-3.1-flash-tts-preview';
 const DEFAULT_VOICE: GeminiVoices = 'Kore';
 const DEFAULT_SAMPLE_RATE = 24000; // not configurable
 const NUM_CHANNELS = 1;
@@ -234,15 +234,38 @@ export class ChunkedStream extends tts.ChunkedStream {
     ];
 
     try {
+      let lastFrame: AudioFrame | undefined;
+      const sendLastFrame = (final: boolean) => {
+        if (lastFrame) {
+          this.queue.put({
+            requestId,
+            frame: lastFrame,
+            segmentId: requestId,
+            final,
+          });
+          lastFrame = undefined;
+        }
+      };
+
       const responseStream = await this.#tts.client.models.generateContentStream({
         model: this.#tts.opts.model,
         contents,
         config,
       });
 
       for await (const response of responseStream) {
-        await this.#processResponse(response, bstream, requestId);
+        await this.#processResponse(response, bstream, (frame) => {
+          sendLastFrame(false);
+          lastFrame = frame;
+        });
       }
+
+      for (const frame of bstream.flush()) {
+        sendLastFrame(false);
+        lastFrame = frame;
+      }
+
+      sendLastFrame(true);
     } catch (error: unknown) {
       if (error instanceof Error && error.name === 'AbortError') {
         return;
@@ -298,7 +321,7 @@ export class ChunkedStream extends tts.ChunkedStream {
   async #processResponse(
     response: types.GenerateContentResponse,
     bstream: AudioByteStream,
-    requestId: string,
+    onFrame: (frame: AudioFrame) => void,
   ) {
     if (!response.candidates || response.candidates.length === 0) {
       return;
@@ -309,36 +332,15 @@ export class ChunkedStream extends tts.ChunkedStream {
       return;
     }
 
-    let lastFrame: AudioFrame | undefined;
-    const sendLastFrame = (final: boolean) => {
-      if (lastFrame) {
-        this.queue.put({
-          requestId,
-          frame: lastFrame,
-          segmentId: requestId,
-          final,
-        });
-        lastFrame = undefined;
-      }
-    };
-
     for (const part of candidate.content.parts) {
       if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {
         const audioBuffer = Buffer.from(part.inlineData.data, 'base64');
 
         for (const frame of bstream.write(audioBuffer)) {
-          sendLastFrame(false);
-          lastFrame = frame;
+          onFrame(frame);
         }
       }
     }
-
-    for (const frame of bstream.flush()) {
-      sendLastFrame(false);
-      lastFrame = frame;
-    }
-
-    sendLastFrame(true);
   }
 }