From 1b5253394836619a763673fb53fba1db82beed81 Mon Sep 17 00:00:00 2001
From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com>
Date: Thu, 11 Jun 2026 11:03:54 +1000
Subject: [PATCH] feat(ai,ai-gemini): add Google Veo video adapter on the
 typed-duration contract (#634)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Restacked on 618-image-to-image-and-image-to-video-support to adopt the
multimodal MediaPrompt format, carrying a minimal additive port of the
#534 typed-duration contract:

- @tanstack/ai (non-breaking): VideoAdapter/BaseVideoAdapter gain a
  TModelDurationByName generic (default Record<string, number> preserves
  existing duration?: number typing), DurationOptions, snapToDurationOption,
  and default availableDurations()/snapDuration() implementations.
  generateVideo's duration is typed via VideoDurationForAdapter.
- @tanstack/ai-gemini: GeminiVideoAdapter over generateVideos /
  getVideosOperation with per-model typed durations (Veo 3.x 4|6|8,
  Veo 2 5|6|8 per current Veo docs), MediaPrompt image routing
  (start_frame → image, end_frame → lastFrame, reference/character →
  referenceImages), RAI filter surfacing, geminiVideo/createGeminiVideo
  factories, and finalized Veo model-meta entries.
- E2E: gemini added to video-gen with a custom aimock mount for
  :predictLongRunning + operations polling; all transports pass.
- Docs + media-generation skill updated for Veo (typed durations,
  image-to-video role table).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .changeset/gemini-veo-video-adapter.md        |  42 ++
 docs/media/video-generation.md                |  80 ++-
 packages/ai-gemini/src/adapters/video.ts      | 411 ++++++++++++++
 packages/ai-gemini/src/index.ts               |  25 +
 packages/ai-gemini/src/model-meta.ts          |  53 +-
 .../src/video/video-provider-options.ts       | 126 +++++
 .../ai-gemini/tests/video-adapter.test.ts     | 518 ++++++++++++++++++
 .../skills/ai-core/media-generation/SKILL.md  |  30 +-
 .../src/activities/generateVideo/adapter.ts   |  74 ++-
 .../ai/src/activities/generateVideo/index.ts  |  28 +-
 .../ai/src/activities/generateVideo/snap.ts   | 100 ++++
 packages/ai/src/activities/index.ts           |   4 +
 packages/ai/src/types.ts                      |   9 +-
 packages/ai/tests/stream-generation.test.ts   |   3 +
 testing/e2e/global-setup.ts                   |  68 +++
 testing/e2e/src/lib/feature-support.ts        |   9 +-
 testing/e2e/src/lib/media-providers.ts        |  10 +-
 17 files changed, 1544 insertions(+), 46 deletions(-)
 create mode 100644 .changeset/gemini-veo-video-adapter.md
 create mode 100644 packages/ai-gemini/src/adapters/video.ts
 create mode 100644 packages/ai-gemini/src/video/video-provider-options.ts
 create mode 100644 packages/ai-gemini/tests/video-adapter.test.ts
 create mode 100644 packages/ai/src/activities/generateVideo/snap.ts

diff --git a/.changeset/gemini-veo-video-adapter.md b/.changeset/gemini-veo-video-adapter.md
new file mode 100644
index 000000000..555b90c2c
--- /dev/null
+++ b/.changeset/gemini-veo-video-adapter.md
@@ -0,0 +1,42 @@
+---
+'@tanstack/ai': minor
+'@tanstack/ai-gemini': minor
+---
+
+Add a Google Veo video adapter (`geminiVideo` / `createGeminiVideo`) and the
+per-model typed-duration video contract it is built on (#534, #634).
+
+**`@tanstack/ai`** (additive, non-breaking): `VideoAdapter` /
+`BaseVideoAdapter` gain a `TModelDurationByName` generic (defaulting to
+`Record<string, number>`, preserving today's `duration?: number` typing for
+adapters without a map) plus two introspection methods with safe defaults:
+
+- `availableDurations()` — a `DurationOptions` tagged union
+  (`discrete | range | mixed | none`) describing the durations the current
+  model accepts. Default: `{ kind: 'none' }`.
+- `snapDuration(seconds)` — coerce raw seconds to the closest valid duration
+  (`snapToDurationOption` is exported for adapter authors). Default:
+  `undefined`.
+
+`generateVideo({ duration })` is now typed per model via
+`VideoDurationForAdapter<TAdapter>`.
+
+**`@tanstack/ai-gemini`**: new Veo adapter over the long-running
+`:predictLongRunning` operation, supporting `veo-3.1-generate-preview`,
+`veo-3.1-fast-generate-preview`, `veo-3.0-generate-001`,
+`veo-3.0-fast-generate-001`, and `veo-2.0-generate-001`:
+
+- `geminiVideo('veo-3.0-generate-001')` → `duration?: 4 | 6 | 8`
+  (Veo 2: `5 | 6 | 8`); `adapter.snapDuration(7)` → `6`.
+- Multimodal prompts: the first un-roled / `'start_frame'` image part
+  becomes the input image, `'end_frame'` → `lastFrame`, `'reference'` /
+  `'character'` → `referenceImages`.
+- `size` takes Veo aspect ratios (`'16:9' | '9:16'`); everything else from
+  the SDK's `GenerateVideosConfig` (e.g. `resolution`, `generateAudio`,
+  `negativePrompt`) is available through `modelOptions`.
+- Responsible-AI filtering is surfaced as a failed job with the filter
+  reasons.
+
+Note: Veo result URLs are served by the Gemini Files API and require the
+Google API key to download (`x-goog-api-key` header or `key` query
+parameter).
diff --git a/docs/media/video-generation.md b/docs/media/video-generation.md
index dd175b3b6..eebbdf530 100644
--- a/docs/media/video-generation.md
+++ b/docs/media/video-generation.md
@@ -2,11 +2,13 @@
 title: Video Generation
 id: video-generation
 order: 6
-description: "Generate video from text prompts with OpenAI Sora using TanStack AI's experimental generateVideo() jobs/polling API."
+description: "Generate video from text prompts with OpenAI Sora or Google Veo using TanStack AI's experimental generateVideo() jobs/polling API."
 keywords:
   - tanstack ai
   - video generation
   - sora
+  - veo
+  - gemini
   - generateVideo
   - jobs api
   - experimental
@@ -36,6 +38,7 @@ TanStack AI provides experimental support for video generation through dedicated
 
 Currently supported:
 - **OpenAI**: Sora-2 and Sora-2-Pro models (when available)
+- **Google Gemini**: Veo 3.1, Veo 3, and Veo 2 models (via the long-running operations API)
 
 ## Basic Usage
 
@@ -417,9 +420,9 @@ adapter uses to route the input to the provider-specific field:
 
 | Role            | Maps to                                                       |
 | --------------- | ------------------------------------------------------------- |
-| `'start_frame'` | fal `start_image_url` (positional default for the first input)         |
-| `'end_frame'`   | fal `end_image_url` (Veo `lastFrame` planned — no Veo adapter yet)      |
-| `'reference'`   | fal `reference_image_urls` (Veo `referenceImages` planned)              |
+| `'start_frame'` | fal `start_image_url`, Veo input `image` (positional default for the first input) |
+| `'end_frame'`   | fal `end_image_url`, Veo `lastFrame`                          |
+| `'reference'`   | fal `reference_image_urls`, Veo `referenceImages`             |
 | `'character'`   | Same as `'reference'` — character consistency images                    |
 
 ```typescript
@@ -445,7 +448,7 @@ await generateVideo({
 | ------------ | -------------------------------------------------------------------------------------------------------- |
 | **OpenAI**   | Sora-2 / Sora-2-Pro → the image part goes to `input_reference`; flattened text is the prompt. Single image only — throws if more than one. |
 | **fal.ai**   | Field names resolve per endpoint from a map generated from the fal SDK's endpoint types — e.g. `role: 'start_frame'` lands on `image_url` for Kling/Veo image-to-video, `first_frame_url` for first-last-frame endpoints, and `start_image_url` otherwise. Defaults: single input → `image_url` (start frame); `role: 'end_frame'` → `end_image_url`; `role: 'reference'` / `'character'` → `reference_image_urls`. Override per-endpoint via `modelOptions` — the media-conditioning fields are typed optional there (even when the endpoint requires them) since they usually arrive as prompt parts. |
-| **Gemini**   | Veo adapter not yet implemented — image prompt parts will be supported when Veo lands.                    |
+| **Gemini**   | Veo → the first un-roled / `'start_frame'` image becomes the input image; `'end_frame'` → `lastFrame`; `'reference'` / `'character'` → `referenceImages` (asset references, Veo 3.1). Throws on multiple starting images. |
 
 Adapters whose underlying API can't accept image inputs throw a clear
 runtime error so calls fail fast.
@@ -488,6 +491,67 @@ const { jobId } = await generateVideo({
 })
 ```
 
+### Google Veo (Gemini) Model Options
+
+Veo runs on Google's long-running operations API. The adapter starts the
+operation, and `getVideoJobStatus` polls it until the video is ready:
+
+```typescript
+import { generateVideo } from '@tanstack/ai'
+import { geminiVideo } from '@tanstack/ai-gemini'
+
+const adapter = geminiVideo('veo-3.1-generate-preview')
+
+const { jobId } = await generateVideo({
+  adapter,
+  prompt: 'A close-up of a luthier carving a guitar neck',
+  size: '16:9', // aspect ratio: '16:9' or '9:16'
+  duration: 8, // typed per model — see below
+  modelOptions: {
+    resolution: '1080p', // '720p' (default), '1080p', '4k' (Veo 3.1 only)
+    negativePrompt: 'cartoon, low quality',
+    generateAudio: true, // Veo 3+ generates synchronized audio
+  },
+})
+```
+
+#### Typed durations
+
+Each Veo model accepts a fixed set of durations, enforced at compile time on
+the `duration` option:
+
+| Model | `duration` values (seconds) |
+|-------|------------------------------|
+| `veo-3.1-generate-preview` | `4`, `6`, `8` |
+| `veo-3.1-fast-generate-preview` | `4`, `6`, `8` |
+| `veo-3.0-generate-001` | `4`, `6`, `8` |
+| `veo-3.0-fast-generate-001` | `4`, `6`, `8` |
+| `veo-2.0-generate-001` | `5`, `6`, `8` |
+
+If you have raw seconds (for example from a UI slider), coerce them with
+`snapDuration`, or inspect the full set with `availableDurations`:
+
+```typescript
+const adapter = geminiVideo('veo-3.0-generate-001')
+
+adapter.availableDurations() // { kind: 'discrete', values: [4, 6, 8] }
+adapter.snapDuration(7) // 6 — closest valid duration
+
+await generateVideo({
+  adapter,
+  prompt: 'A timelapse of a city skyline at dusk',
+  duration: adapter.snapDuration(7),
+})
+```
+
+Adapters that haven't declared a per-model duration map keep the plain
+`duration?: number` typing, return `{ kind: 'none' }` from
+`availableDurations()`, and return `undefined` from `snapDuration()`.
+
+> **Note:** The video URL returned for Veo jobs is served by the Gemini
+> Files API and requires your API key to download (send it as an
+> `x-goog-api-key` header or `key` query parameter).
+
 ## Response Types
 
 > **Note:** The interfaces below are the underlying adapter-level types. The `getVideoJobStatus()` helper returns a single merged object, `{ status, progress?, url?, error?, usage? }` — it does not return `jobId` or `expiresAt`.
@@ -586,9 +650,11 @@ Check the [OpenAI documentation](https://platform.openai.com/docs) for current l
 
 ## Environment Variables
 
-The video adapter uses the same environment variable as other OpenAI adapters:
+The video adapters use the same environment variables as the other adapters
+for their provider:
 
-- `OPENAI_API_KEY`: Your OpenAI API key
+- `OPENAI_API_KEY`: Your OpenAI API key (Sora)
+- `GOOGLE_API_KEY` or `GEMINI_API_KEY`: Your Google API key (Veo)
 
 ## Explicit API Keys
 
diff --git a/packages/ai-gemini/src/adapters/video.ts b/packages/ai-gemini/src/adapters/video.ts
new file mode 100644
index 000000000..b6935e503
--- /dev/null
+++ b/packages/ai-gemini/src/adapters/video.ts
@@ -0,0 +1,411 @@
+import {
+  GenerateVideosOperation,
+  VideoGenerationReferenceType,
+} from '@google/genai'
+import { resolveMediaPrompt } from '@tanstack/ai'
+import { BaseVideoAdapter, snapToDurationOption } from '@tanstack/ai/adapters'
+import { arrayBufferToBase64 } from '@tanstack/ai-utils'
+import { createGeminiClient, getGeminiApiKeyFromEnv } from '../utils'
+import { getGeminiVideoDurationOptions } from '../video/video-provider-options'
+import type { DurationOptions } from '@tanstack/ai/adapters'
+import type {
+  ImagePart,
+  MediaInputMetadata,
+  VideoGenerationOptions,
+  VideoJobResult,
+  VideoStatusResult,
+  VideoUrlResult,
+} from '@tanstack/ai'
+import type {
+  GenerateVideosConfig,
+  GoogleGenAI,
+  Image,
+  VideoGenerationReferenceImage,
+} from '@google/genai'
+import type {
+  GeminiVideoModel,
+  GeminiVideoModelDurationByName,
+  GeminiVideoModelInputModalitiesByName,
+  GeminiVideoModelProviderOptionsByName,
+  GeminiVideoModelSizeByName,
+  GeminiVideoProviderOptions,
+  GeminiVideoSize,
+} from '../video/video-provider-options'
+import type { GeminiClientConfig } from '../utils'
+
+/**
+ * Configuration for Gemini video adapter.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export interface GeminiVideoConfig extends GeminiClientConfig {}
+
+/**
+ * Extract a human-readable message from a long-running operation's error,
+ * which the SDK types as `Record<string, unknown>` (a google.rpc.Status).
+ */
+function operationErrorMessage(error: Record<string, unknown>): string {
+  if (typeof error.message === 'string' && error.message.length > 0) {
+    return error.message
+  }
+  return JSON.stringify(error)
+}
+
+/**
+ * Convert a TanStack image prompt part into the genai `Image` shape Veo
+ * accepts: base64 `imageBytes` (data sources, data: URIs, fetched HTTP
+ * URLs) or a `gcsUri` passthrough for Cloud Storage references.
+ */
+async function imagePartToVeoImage(
+  part: ImagePart<MediaInputMetadata>,
+): Promise<Image> {
+  if (part.source.type === 'data') {
+    return {
+      imageBytes: part.source.value,
+      mimeType: part.source.mimeType || 'image/png',
+    }
+  }
+  const url = part.source.value
+  if (url.startsWith('gs://')) {
+    return {
+      gcsUri: url,
+      ...(part.source.mimeType && { mimeType: part.source.mimeType }),
+    }
+  }
+  if (url.startsWith('data:')) {
+    const match = url.match(/^data:([^;,]+)?(;base64)?,(.*)$/)
+    if (!match || !match[2]) {
+      throw new Error(
+        'gemini: only base64 data: URIs are supported for video image inputs.',
+      )
+    }
+    return {
+      imageBytes: match[3] ?? '',
+      mimeType: match[1] || part.source.mimeType || 'image/png',
+    }
+  }
+  const response = await fetch(url)
+  if (!response.ok) {
+    throw new Error(
+      `Failed to fetch image input (${response.status} ${response.statusText}): ${url}`,
+    )
+  }
+  const blob = await response.blob()
+  const buffer = await blob.arrayBuffer()
+  return {
+    imageBytes: arrayBufferToBase64(buffer),
+    mimeType: part.source.mimeType || blob.type || 'image/png',
+  }
+}
+
+/**
+ * Gemini Veo Video Generation Adapter
+ *
+ * Tree-shakeable adapter for Google Veo video generation. Veo runs as a
+ * long-running operation: `createVideoJob` starts the operation via the
+ * `:predictLongRunning` endpoint, `getVideoStatus` polls it, and
+ * `getVideoUrl` extracts the generated video's URI once it completes.
+ *
+ * Image prompt parts are routed by `metadata.role`:
+ * - `'start_frame'` (or the first un-roled image) → the input image the
+ *   video starts from
+ * - `'end_frame'` → `lastFrame` (the frame the video ends on)
+ * - `'reference'` / `'character'` → `referenceImages` (asset references,
+ *   Veo 3.1)
+ *
+ * Note: the returned video URI is served by the Gemini Files API and
+ * requires the API key (`x-goog-api-key` header or `?key=` query
+ * parameter) to download.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export class GeminiVideoAdapter<
+  TModel extends GeminiVideoModel,
+> extends BaseVideoAdapter<
+  TModel,
+  GeminiVideoProviderOptions,
+  GeminiVideoModelProviderOptionsByName,
+  GeminiVideoModelSizeByName,
+  GeminiVideoModelInputModalitiesByName,
+  GeminiVideoModelDurationByName
+> {
+  readonly name = 'gemini' as const
+
+  protected client: GoogleGenAI
+
+  constructor(config: GeminiVideoConfig, model: TModel) {
+    super({}, model)
+    this.client = createGeminiClient(config)
+  }
+
+  async createVideoJob(
+    options: VideoGenerationOptions<
+      GeminiVideoProviderOptions,
+      GeminiVideoSize,
+      GeminiVideoModelDurationByName[TModel]
+    >,
+  ): Promise<VideoJobResult> {
+    const { prompt, size, duration, modelOptions, logger } = options
+
+    logger.request(
+      `activity=video.create provider=${this.name} model=${this.model} size=${size ?? 'default'} duration=${duration ?? 'default'}`,
+      { provider: this.name, model: this.model },
+    )
+
+    try {
+      const resolved = resolveMediaPrompt(prompt)
+
+      if (resolved.videos.length > 0) {
+        throw new Error(
+          `${this.name}.createVideoJob does not support video prompt parts (model: ${this.model}).`,
+        )
+      }
+      if (resolved.audios.length > 0) {
+        throw new Error(
+          `${this.name}.createVideoJob does not support audio prompt parts (model: ${this.model}).`,
+        )
+      }
+
+      const { image, lastFrame, referenceImages } = await this.routeImageParts(
+        resolved.images,
+      )
+
+      const config: GenerateVideosConfig = {
+        ...modelOptions,
+        ...(size !== undefined && { aspectRatio: size }),
+        ...(duration !== undefined && { durationSeconds: duration }),
+        ...(lastFrame && { lastFrame }),
+        ...(referenceImages.length > 0 && { referenceImages }),
+      }
+
+      const operation = await this.client.models.generateVideos({
+        model: this.model,
+        prompt: resolved.text,
+        ...(image && { image }),
+        config,
+      })
+
+      if (!operation.name) {
+        throw new Error(
+          'Veo did not return an operation name for the video generation job.',
+        )
+      }
+
+      return { jobId: operation.name, model: this.model }
+    } catch (error) {
+      logger.errors(`${this.name}.createVideoJob fatal`, {
+        error,
+        source: `${this.name}.createVideoJob`,
+      })
+      throw error
+    }
+  }
+
+  /**
+   * Route image prompt parts onto Veo's request fields by `metadata.role`.
+   */
+  private async routeImageParts(
+    parts: Array<ImagePart<MediaInputMetadata>>,
+  ): Promise<{
+    image: Image | undefined
+    lastFrame: Image | undefined
+    referenceImages: Array<VideoGenerationReferenceImage>
+  }> {
+    let image: Image | undefined
+    let lastFrame: Image | undefined
+    const referenceImages: Array<VideoGenerationReferenceImage> = []
+
+    for (const part of parts) {
+      const role = part.metadata?.role
+      switch (role) {
+        case 'end_frame': {
+          if (lastFrame) {
+            throw new Error(
+              `${this.name}: Veo accepts at most one 'end_frame' image.`,
+            )
+          }
+          lastFrame = await imagePartToVeoImage(part)
+          break
+        }
+        case 'reference':
+        case 'character': {
+          referenceImages.push({
+            image: await imagePartToVeoImage(part),
+            referenceType: VideoGenerationReferenceType.ASSET,
+          })
+          break
+        }
+        case 'start_frame':
+        case undefined: {
+          if (image) {
+            throw new Error(
+              `${this.name}: Veo accepts at most one starting image; received multiple 'start_frame'/un-roled images. Use metadata.role ('end_frame', 'reference') to disambiguate the others.`,
+            )
+          }
+          image = await imagePartToVeoImage(part)
+          break
+        }
+        case 'mask':
+        case 'control':
+          throw new Error(
+            `${this.name}: unsupported image role "${role}" for Veo video generation.`,
+          )
+      }
+    }
+
+    return { image, lastFrame, referenceImages }
+  }
+
+  async getVideoStatus(jobId: string): Promise<VideoStatusResult> {
+    const operation = await this.getOperation(jobId)
+
+    if (!operation.done) {
+      return { jobId, status: 'processing' }
+    }
+
+    if (operation.error) {
+      return {
+        jobId,
+        status: 'failed',
+        error: operationErrorMessage(operation.error),
+      }
+    }
+
+    // The operation can finish "successfully" with every sample dropped by
+    // Responsible-AI filters — surface that as a failure instead of letting
+    // getVideoUrl() throw on an empty response.
+    const videos = operation.response?.generatedVideos ?? []
+    if (videos.length === 0) {
+      const reasons = operation.response?.raiMediaFilteredReasons
+      return {
+        jobId,
+        status: 'failed',
+        error: reasons?.length
+          ? `Video was filtered by Responsible-AI: ${reasons.join('; ')}`
+          : 'Veo returned no generated videos.',
+      }
+    }
+
+    return { jobId, status: 'completed' }
+  }
+
+  async getVideoUrl(jobId: string): Promise<VideoUrlResult> {
+    const operation = await this.getOperation(jobId)
+
+    if (!operation.done) {
+      throw new Error(
+        `Video is not ready yet. Check status first. Job ID: ${jobId}`,
+      )
+    }
+
+    if (operation.error) {
+      throw new Error(
+        `Video generation failed: ${operationErrorMessage(operation.error)}`,
+      )
+    }
+
+    const uri = operation.response?.generatedVideos?.[0]?.video?.uri
+    if (!uri) {
+      const reasons = operation.response?.raiMediaFilteredReasons
+      throw new Error(
+        reasons?.length
+          ? `Video was filtered by Responsible-AI: ${reasons.join('; ')}`
+          : `Video URL not found in operation response. Job ID: ${jobId}`,
+      )
+    }
+
+    return { jobId, url: uri }
+  }
+
+  override availableDurations(): DurationOptions<
+    GeminiVideoModelDurationByName[TModel]
+  > {
+    return getGeminiVideoDurationOptions(this.model)
+  }
+
+  override snapDuration(
+    seconds: number,
+  ): GeminiVideoModelDurationByName[TModel] | undefined {
+    return snapToDurationOption(seconds, this.availableDurations())
+  }
+
+  /**
+   * Fetch the long-running operation by name. The SDK's
+   * `operations.getVideosOperation` needs a real `GenerateVideosOperation`
+   * instance (it calls `_fromAPIResponse` on it), so reconstruct one from
+   * the job ID rather than passing an object literal.
+   */
+  private async getOperation(jobId: string): Promise<GenerateVideosOperation> {
+    const operation = new GenerateVideosOperation()
+    operation.name = jobId
+    return await this.client.operations.getVideosOperation({ operation })
+  }
+}
+
+/**
+ * Creates a Gemini video adapter with an explicit API key.
+ * Type resolution happens here at the call site.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ *
+ * @param model - The model name (e.g., 'veo-3.1-generate-preview')
+ * @param apiKey - Your Google API key
+ * @param config - Optional additional configuration
+ * @returns Configured Gemini video adapter instance with resolved types
+ *
+ * @example
+ * ```typescript
+ * const adapter = createGeminiVideo('veo-3.1-generate-preview', 'your-api-key');
+ *
+ * const { jobId } = await generateVideo({
+ *   adapter,
+ *   prompt: 'A beautiful sunset over the ocean',
+ *   duration: adapter.snapDuration(7), // → 6
+ * });
+ * ```
+ */
+export function createGeminiVideo<TModel extends GeminiVideoModel>(
+  model: TModel,
+  apiKey: string,
+  config?: Omit<GeminiVideoConfig, 'apiKey'>,
+): GeminiVideoAdapter<TModel> {
+  return new GeminiVideoAdapter({ apiKey, ...config }, model)
+}
+
+/**
+ * Creates a Gemini video adapter with automatic API key detection from environment variables.
+ * Type resolution happens here at the call site.
+ *
+ * Looks for `GOOGLE_API_KEY` or `GEMINI_API_KEY` in:
+ * - `process.env` (Node.js)
+ * - `window.env` (Browser with injected env)
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ *
+ * @param model - The model name (e.g., 'veo-3.1-generate-preview')
+ * @param config - Optional configuration (excluding apiKey which is auto-detected)
+ * @returns Configured Gemini video adapter instance with resolved types
+ * @throws Error if GOOGLE_API_KEY or GEMINI_API_KEY is not found in environment
+ *
+ * @example
+ * ```typescript
+ * // Automatically uses GOOGLE_API_KEY from environment
+ * const adapter = geminiVideo('veo-3.1-generate-preview');
+ *
+ * // Create a video generation job
+ * const { jobId } = await generateVideo({
+ *   adapter,
+ *   prompt: 'A cat playing piano'
+ * });
+ *
+ * // Poll for status
+ * const status = await getVideoJobStatus({ adapter, jobId });
+ * ```
+ */
+export function geminiVideo<TModel extends GeminiVideoModel>(
+  model: TModel,
+  config?: Omit<GeminiVideoConfig, 'apiKey'>,
+): GeminiVideoAdapter<TModel> {
+  const apiKey = getGeminiApiKeyFromEnv()
+  return createGeminiVideo(model, apiKey, config)
+}
diff --git a/packages/ai-gemini/src/index.ts b/packages/ai-gemini/src/index.ts
index f204c184d..462de4067 100644
--- a/packages/ai-gemini/src/index.ts
+++ b/packages/ai-gemini/src/index.ts
@@ -61,6 +61,30 @@ export {
   type GeminiAudioProviderOptions,
 } from './adapters/audio'
 
+// Video / Veo generation adapter (experimental)
+/**
+ * @experimental Veo video generation is an experimental feature and may change.
+ */
+export {
+  GeminiVideoAdapter,
+  createGeminiVideo,
+  geminiVideo,
+  type GeminiVideoConfig,
+} from './adapters/video'
+export {
+  GEMINI_VIDEO_DURATIONS,
+  getGeminiVideoDurationOptions,
+} from './video/video-provider-options'
+export type {
+  GeminiVideoModel,
+  GeminiVideoModelDurationByName,
+  GeminiVideoModelInputModalitiesByName,
+  GeminiVideoModelProviderOptionsByName,
+  GeminiVideoModelSizeByName,
+  GeminiVideoProviderOptions,
+  GeminiVideoSize,
+} from './video/video-provider-options'
+
 // Re-export models from model-meta for convenience
 export {
   GEMINI_MODELS,
@@ -71,6 +95,7 @@ export { GEMINI_IMAGE_MODELS as GeminiImageModels } from './model-meta'
 export { GEMINI_TTS_MODELS as GeminiTTSModels } from './model-meta'
 export { GEMINI_TTS_VOICES as GeminiTTSVoices } from './model-meta'
 export { GEMINI_AUDIO_MODELS as GeminiAudioModels } from './model-meta'
+export { GEMINI_VIDEO_MODELS as GeminiVideoModels } from './model-meta'
 export type { GeminiModels as GeminiTextModel } from './model-meta'
 export type { GeminiImageModels as GeminiImageModel } from './model-meta'
 export type { GeminiTTSVoice } from './model-meta'
diff --git a/packages/ai-gemini/src/model-meta.ts b/packages/ai-gemini/src/model-meta.ts
index 610bd2ef6..150b23fb2 100644
--- a/packages/ai-gemini/src/model-meta.ts
+++ b/packages/ai-gemini/src/model-meta.ts
@@ -631,7 +631,11 @@ const IMAGEN_3 = {
     GeminiCommonConfigOptions &
     GeminiCachedContentOptions
 >
-/** 
+/**
+ * Veo video generation models. Pricing is per second of generated video
+ * (audio+video rate where the model supports audio).
+ * @experimental Veo video generation is an experimental feature and may change.
+ */
 const VEO_3_1_PREVIEW = {
   name: 'veo-3.1-generate-preview',
   max_input_tokens: 1024,
@@ -650,9 +654,9 @@ const VEO_3_1_PREVIEW = {
   },
 } as const satisfies ModelMeta<
   GeminiToolConfigOptions &
-  GeminiSafetyOptions &
-  GeminiGenerationConfigOptions &
-  GeminiCachedContentOptions
+    GeminiSafetyOptions &
+    GeminiCommonConfigOptions &
+    GeminiCachedContentOptions
 >
 
 const VEO_3_1_FAST_PREVIEW = {
@@ -673,9 +677,9 @@ const VEO_3_1_FAST_PREVIEW = {
   },
 } as const satisfies ModelMeta<
   GeminiToolConfigOptions &
-  GeminiSafetyOptions &
-  GeminiGenerationConfigOptions &
-  GeminiCachedContentOptions
+    GeminiSafetyOptions &
+    GeminiCommonConfigOptions &
+    GeminiCachedContentOptions
 >
 
 const VEO_3 = {
@@ -696,9 +700,9 @@ const VEO_3 = {
   },
 } as const satisfies ModelMeta<
   GeminiToolConfigOptions &
-  GeminiSafetyOptions &
-  GeminiGenerationConfigOptions &
-  GeminiCachedContentOptions
+    GeminiSafetyOptions &
+    GeminiCommonConfigOptions &
+    GeminiCachedContentOptions
 >
 
 const VEO_3_FAST = {
@@ -719,9 +723,9 @@ const VEO_3_FAST = {
   },
 } as const satisfies ModelMeta<
   GeminiToolConfigOptions &
-  GeminiSafetyOptions &
-  GeminiGenerationConfigOptions &
-  GeminiCachedContentOptions
+    GeminiSafetyOptions &
+    GeminiCommonConfigOptions &
+    GeminiCachedContentOptions
 >
 
 const VEO_2 = {
@@ -741,10 +745,10 @@ const VEO_2 = {
   },
 } as const satisfies ModelMeta<
   GeminiToolConfigOptions &
-  GeminiSafetyOptions &
-  GeminiGenerationConfigOptions &
-  GeminiCachedContentOptions
-> */
+    GeminiSafetyOptions &
+    GeminiCommonConfigOptions &
+    GeminiCachedContentOptions
+>
 
 const GEMINI_3_5_FLASH = {
   name: 'gemini-3.5-flash',
@@ -878,20 +882,17 @@ export const GEMINI_TTS_VOICES = [
 
 export type GeminiTTSVoice = (typeof GEMINI_TTS_VOICES)[number]
 
-/*   const GEMINI_AUDIO_MODELS = [
-  GEMINI_2_5_PRO_TTS.name,
-  GEMINI_2_5_FLASH_TTS.name,
-  GEMINI_2_5_FLASH_LIVE.name,
-  GEMINI_2_FLASH_LIVE.name,
-] as const
-
-  const GEMINI_VIDEO_MODELS = [
+/**
+ * Veo video generation models.
+ * @experimental Veo video generation is an experimental feature and may change.
+ */
+export const GEMINI_VIDEO_MODELS = [
   VEO_3_1_PREVIEW.name,
   VEO_3_1_FAST_PREVIEW.name,
   VEO_3.name,
   VEO_3_FAST.name,
   VEO_2.name,
-] as const */
+] as const
 
 // Manual type map for per-model provider options
 export type GeminiChatModelProviderOptionsByName = {
diff --git a/packages/ai-gemini/src/video/video-provider-options.ts b/packages/ai-gemini/src/video/video-provider-options.ts
new file mode 100644
index 000000000..b1fd5671a
--- /dev/null
+++ b/packages/ai-gemini/src/video/video-provider-options.ts
@@ -0,0 +1,126 @@
+/**
+ * Gemini Veo Video Generation Provider Options
+ *
+ * Based on https://ai.google.dev/gemini-api/docs/video
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+import type { DurationOptions } from '@tanstack/ai/adapters'
+import type { GenerateVideosConfig } from '@google/genai'
+import type { GEMINI_VIDEO_MODELS } from '../model-meta'
+
+/**
+ * Model type for Gemini Veo video generation.
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoModel = (typeof GEMINI_VIDEO_MODELS)[number]
+
+/**
+ * Supported aspect ratios for Veo video generation. This is the `size` value
+ * for the Gemini video adapter — Veo expresses output shape as an aspect
+ * ratio (plus an optional `resolution` in `modelOptions`), not pixel
+ * dimensions.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoSize = '16:9' | '9:16'
+
+/**
+ * Provider-specific options for Gemini Veo video generation.
+ *
+ * Derived from the SDK's `GenerateVideosConfig`, minus the fields the
+ * adapter manages itself:
+ * - `durationSeconds` — set via the typed top-level `duration` option
+ *   (use `adapter.snapDuration(seconds)` to coerce raw seconds)
+ * - `aspectRatio` — set via the top-level `size` option
+ * - `lastFrame` / `referenceImages` — set via image parts in the `prompt`
+ *   with `metadata.role: 'end_frame'` / `'reference'`
+ * - `httpOptions` / `abortSignal` — client-level transport concerns
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoProviderOptions = Omit<
+  GenerateVideosConfig,
+  | 'durationSeconds'
+  | 'aspectRatio'
+  | 'lastFrame'
+  | 'referenceImages'
+  | 'httpOptions'
+  | 'abortSignal'
+>
+
+/**
+ * Model-specific provider options mapping.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoModelProviderOptionsByName = {
+  [TModel in GeminiVideoModel]: GeminiVideoProviderOptions
+}
+
+/**
+ * Model-specific size (aspect ratio) mapping.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoModelSizeByName = {
+  [TModel in GeminiVideoModel]: GeminiVideoSize
+}
+
+/**
+ * Per-model prompt input modalities. Every Veo model accepts image
+ * conditioning inputs (first frame, last frame, reference images) alongside
+ * the text prompt.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoModelInputModalitiesByName = {
+  [TModel in GeminiVideoModel]: readonly ['image']
+}
+
+/**
+ * Per-model duration unions (seconds, as numbers — the API's
+ * `parameters.durationSeconds` field is numeric).
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type GeminiVideoModelDurationByName = {
+  'veo-3.1-generate-preview': 4 | 6 | 8
+  'veo-3.1-fast-generate-preview': 4 | 6 | 8
+  'veo-3.0-generate-001': 4 | 6 | 8
+  'veo-3.0-fast-generate-001': 4 | 6 | 8
+  'veo-2.0-generate-001': 5 | 6 | 8
+}
+
+/**
+ * Runtime duration table backing `availableDurations()` / `snapDuration()`.
+ *
+ * Curated from the official Veo docs
+ * (https://ai.google.dev/gemini-api/docs/video) — the Gemini OpenAPI spec
+ * types the `:predictLongRunning` request's `parameters` as unconstrained,
+ * so it carries no per-model duration information to derive these from.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export const GEMINI_VIDEO_DURATIONS: {
+  readonly [TModel in GeminiVideoModel]: DurationOptions<
+    GeminiVideoModelDurationByName[TModel]
+  >
+} = {
+  'veo-3.1-generate-preview': { kind: 'discrete', values: [4, 6, 8] },
+  'veo-3.1-fast-generate-preview': { kind: 'discrete', values: [4, 6, 8] },
+  'veo-3.0-generate-001': { kind: 'discrete', values: [4, 6, 8] },
+  'veo-3.0-fast-generate-001': { kind: 'discrete', values: [4, 6, 8] },
+  'veo-2.0-generate-001': { kind: 'discrete', values: [5, 6, 8] },
+}
+
+/**
+ * Look up the duration options for a Veo model.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export function getGeminiVideoDurationOptions<TModel extends GeminiVideoModel>(
+  model: TModel,
+): DurationOptions<GeminiVideoModelDurationByName[TModel]> {
+  return GEMINI_VIDEO_DURATIONS[model]
+}
diff --git a/packages/ai-gemini/tests/video-adapter.test.ts b/packages/ai-gemini/tests/video-adapter.test.ts
new file mode 100644
index 000000000..1e5945e01
--- /dev/null
+++ b/packages/ai-gemini/tests/video-adapter.test.ts
@@ -0,0 +1,518 @@
+import { describe, expect, expectTypeOf, it, vi } from 'vitest'
+import { resolveDebugOption } from '@tanstack/ai/adapter-internals'
+import {
+  GeminiVideoAdapter,
+  createGeminiVideo,
+  geminiVideo,
+} from '../src/adapters/video'
+import {
+  GEMINI_VIDEO_DURATIONS,
+  getGeminiVideoDurationOptions,
+} from '../src/video/video-provider-options'
+import type { GenerateVideosOperation, GoogleGenAI } from '@google/genai'
+import type { GeminiVideoModel } from '../src/video/video-provider-options'
+
+const testLogger = resolveDebugOption(false)
+
+interface ClientStub {
+  models: { generateVideos: ReturnType<typeof vi.fn> }
+  operations: { getVideosOperation: ReturnType<typeof vi.fn> }
+}
+
+function createClientStub(
+  overrides: {
+    createResult?: Partial<GenerateVideosOperation>
+    pollResult?: Partial<GenerateVideosOperation>
+  } = {},
+): ClientStub {
+  return {
+    models: {
+      generateVideos: vi.fn().mockResolvedValue(
+        overrides.createResult ?? {
+          name: 'models/veo-3.1-generate-preview/operations/op-123',
+        },
+      ),
+    },
+    operations: {
+      getVideosOperation: vi.fn().mockResolvedValue(
+        overrides.pollResult ?? {
+          name: 'models/veo-3.1-generate-preview/operations/op-123',
+          done: true,
+          response: {
+            generatedVideos: [
+              { video: { uri: 'https://example.com/video.mp4' } },
+            ],
+          },
+        },
+      ),
+    },
+  }
+}
+
+/**
+ * Test subclass that injects a stubbed GoogleGenAI client through the
+ * protected `client` seam instead of patching globals.
+ */
+class StubbedGeminiVideoAdapter<
+  TModel extends GeminiVideoModel,
+> extends GeminiVideoAdapter<TModel> {
+  constructor(model: TModel, stub: ClientStub) {
+    super({ apiKey: 'test-key' }, model)
+    this.client = stub as unknown as GoogleGenAI
+  }
+}
+
+describe('Gemini Video Adapter', () => {
+  describe('factories', () => {
+    it('creates an adapter with the provided API key', () => {
+      const adapter = createGeminiVideo('veo-3.1-generate-preview', 'test-key')
+      expect(adapter).toBeInstanceOf(GeminiVideoAdapter)
+      expect(adapter.kind).toBe('video')
+      expect(adapter.name).toBe('gemini')
+      expect(adapter.model).toBe('veo-3.1-generate-preview')
+    })
+
+    it('geminiVideo throws without an API key in the environment', () => {
+      const googleKey = process.env.GOOGLE_API_KEY
+      const geminiKey = process.env.GEMINI_API_KEY
+      delete process.env.GOOGLE_API_KEY
+      delete process.env.GEMINI_API_KEY
+      try {
+        expect(() => geminiVideo('veo-3.1-generate-preview')).toThrow(
+          /GOOGLE_API_KEY or GEMINI_API_KEY/,
+        )
+      } finally {
+        if (googleKey !== undefined) process.env.GOOGLE_API_KEY = googleKey
+        if (geminiKey !== undefined) process.env.GEMINI_API_KEY = geminiKey
+      }
+    })
+  })
+
+  describe('availableDurations', () => {
+    it('returns the discrete Veo 3.x duration set', () => {
+      const adapter = createGeminiVideo('veo-3.0-generate-001', 'test-key')
+      expect(adapter.availableDurations()).toEqual({
+        kind: 'discrete',
+        values: [4, 6, 8],
+      })
+    })
+
+    it('returns the discrete Veo 2 duration set', () => {
+      const adapter = createGeminiVideo('veo-2.0-generate-001', 'test-key')
+      expect(adapter.availableDurations()).toEqual({
+        kind: 'discrete',
+        values: [5, 6, 8],
+      })
+    })
+
+    it('covers every model in the duration table', () => {
+      for (const model of Object.keys(
+        GEMINI_VIDEO_DURATIONS,
+      ) as Array<GeminiVideoModel>) {
+        expect(getGeminiVideoDurationOptions(model).kind).toBe('discrete')
+      }
+    })
+  })
+
+  describe('snapDuration', () => {
+    it('snaps to the closest valid duration', () => {
+      const adapter = createGeminiVideo('veo-3.0-generate-001', 'test-key')
+      expect(adapter.snapDuration(3)).toBe(4)
+      expect(adapter.snapDuration(5)).toBe(4)
+      expect(adapter.snapDuration(7)).toBe(6)
+      expect(adapter.snapDuration(100)).toBe(8)
+    })
+
+    it('snaps Veo 2 values to its own set', () => {
+      const adapter = createGeminiVideo('veo-2.0-generate-001', 'test-key')
+      expect(adapter.snapDuration(1)).toBe(5)
+      expect(adapter.snapDuration(7)).toBe(6)
+      expect(adapter.snapDuration(9)).toBe(8)
+    })
+  })
+
+  describe('per-model duration typing', () => {
+    it('types duration as the model-specific union at compile time', () => {
+      const veo3 = createGeminiVideo('veo-3.0-generate-001', 'test-key')
+      expectTypeOf(veo3.snapDuration).returns.toEqualTypeOf<
+        4 | 6 | 8 | undefined
+      >()
+      type Veo3Options = Parameters<typeof veo3.createVideoJob>[0]
+      expectTypeOf<Veo3Options['duration']>().toEqualTypeOf<
+        4 | 6 | 8 | undefined
+      >()
+
+      const veo2 = createGeminiVideo('veo-2.0-generate-001', 'test-key')
+      expectTypeOf(veo2.snapDuration).returns.toEqualTypeOf<
+        5 | 6 | 8 | undefined
+      >()
+      type Veo2Options = Parameters<typeof veo2.createVideoJob>[0]
+      expectTypeOf<Veo2Options['duration']>().toEqualTypeOf<
+        5 | 6 | 8 | undefined
+      >()
+    })
+  })
+
+  describe('createVideoJob', () => {
+    it('starts a long-running operation and returns its name as jobId', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      const result = await adapter.createVideoJob({
+        model: 'veo-3.1-generate-preview',
+        prompt: 'a guitar being played in a store',
+        size: '16:9',
+        duration: 6,
+        modelOptions: { negativePrompt: 'blurry footage' },
+        logger: testLogger,
+      })
+
+      expect(result).toEqual({
+        jobId: 'models/veo-3.1-generate-preview/operations/op-123',
+        model: 'veo-3.1-generate-preview',
+      })
+      expect(stub.models.generateVideos).toHaveBeenCalledWith({
+        model: 'veo-3.1-generate-preview',
+        prompt: 'a guitar being played in a store',
+        config: {
+          negativePrompt: 'blurry footage',
+          aspectRatio: '16:9',
+          durationSeconds: 6,
+        },
+      })
+    })
+
+    it('omits aspectRatio and durationSeconds when size/duration are not given', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-2.0-generate-001',
+        stub,
+      )
+
+      await adapter.createVideoJob({
+        model: 'veo-2.0-generate-001',
+        prompt: 'a sunset',
+        logger: testLogger,
+      })
+
+      expect(stub.models.generateVideos).toHaveBeenCalledWith({
+        model: 'veo-2.0-generate-001',
+        prompt: 'a sunset',
+        config: {},
+      })
+    })
+
+    it('throws when the operation comes back without a name', async () => {
+      const stub = createClientStub({ createResult: {} })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.0-generate-001',
+        stub,
+      )
+
+      await expect(
+        adapter.createVideoJob({
+          model: 'veo-3.0-generate-001',
+          prompt: 'a sunset',
+          logger: testLogger,
+        }),
+      ).rejects.toThrow(/operation name/)
+    })
+  })
+
+  describe('multimodal prompt routing', () => {
+    const dataImage = (role?: 'start_frame' | 'end_frame' | 'reference') =>
+      ({
+        type: 'image',
+        source: { type: 'data', value: 'aGVsbG8=', mimeType: 'image/jpeg' },
+        ...(role && { metadata: { role } }),
+      }) as const
+
+    it('routes an un-roled image part to the input image', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await adapter.createVideoJob({
+        model: 'veo-3.1-generate-preview',
+        prompt: [
+          { type: 'text', content: 'animate this product photo' },
+          dataImage(),
+        ],
+        logger: testLogger,
+      })
+
+      expect(stub.models.generateVideos).toHaveBeenCalledWith({
+        model: 'veo-3.1-generate-preview',
+        prompt: 'animate this product photo',
+        image: { imageBytes: 'aGVsbG8=', mimeType: 'image/jpeg' },
+        config: {},
+      })
+    })
+
+    it('routes end_frame and reference roles to lastFrame/referenceImages', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await adapter.createVideoJob({
+        model: 'veo-3.1-generate-preview',
+        prompt: [
+          { type: 'text', content: 'pan from start to end' },
+          dataImage('start_frame'),
+          dataImage('end_frame'),
+          dataImage('reference'),
+        ],
+        logger: testLogger,
+      })
+
+      const call = stub.models.generateVideos.mock.calls[0]?.[0]
+      expect(call.image).toEqual({
+        imageBytes: 'aGVsbG8=',
+        mimeType: 'image/jpeg',
+      })
+      expect(call.config.lastFrame).toEqual({
+        imageBytes: 'aGVsbG8=',
+        mimeType: 'image/jpeg',
+      })
+      expect(call.config.referenceImages).toEqual([
+        {
+          image: { imageBytes: 'aGVsbG8=', mimeType: 'image/jpeg' },
+          referenceType: 'ASSET',
+        },
+      ])
+    })
+
+    it('decodes base64 data: URI image sources', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.0-generate-001',
+        stub,
+      )
+
+      await adapter.createVideoJob({
+        model: 'veo-3.0-generate-001',
+        prompt: [
+          { type: 'text', content: 'animate' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'data:image/png;base64,aGVsbG8=' },
+          },
+        ],
+        logger: testLogger,
+      })
+
+      const call = stub.models.generateVideos.mock.calls[0]?.[0]
+      expect(call.image).toEqual({
+        imageBytes: 'aGVsbG8=',
+        mimeType: 'image/png',
+      })
+    })
+
+    it('rejects multiple starting images', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await expect(
+        adapter.createVideoJob({
+          model: 'veo-3.1-generate-preview',
+          prompt: [
+            { type: 'text', content: 'animate' },
+            dataImage(),
+            dataImage(),
+          ],
+          logger: testLogger,
+        }),
+      ).rejects.toThrow(/at most one starting image/)
+    })
+
+    it('rejects video prompt parts', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await expect(
+        adapter.createVideoJob({
+          model: 'veo-3.1-generate-preview',
+          prompt: [
+            { type: 'text', content: 'extend this' },
+            {
+              type: 'video',
+              source: {
+                type: 'data',
+                value: 'aGVsbG8=',
+                mimeType: 'video/mp4',
+              },
+            },
+          ],
+          logger: testLogger,
+        }),
+      ).rejects.toThrow(/video prompt parts/)
+    })
+  })
+
+  describe('getVideoStatus', () => {
+    const jobId = 'models/veo-3.1-generate-preview/operations/op-123'
+
+    it('polls the operation by job ID', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await adapter.getVideoStatus(jobId)
+
+      const call = stub.operations.getVideosOperation.mock.calls[0]?.[0] as {
+        operation: GenerateVideosOperation
+      }
+      expect(call.operation.name).toBe(jobId)
+    })
+
+    it('maps an in-flight operation to processing', async () => {
+      const stub = createClientStub({
+        pollResult: { name: jobId, done: false },
+      })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      expect(await adapter.getVideoStatus(jobId)).toEqual({
+        jobId,
+        status: 'processing',
+      })
+    })
+
+    it('maps a completed operation with videos to completed', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      expect(await adapter.getVideoStatus(jobId)).toEqual({
+        jobId,
+        status: 'completed',
+      })
+    })
+
+    it('maps an operation error to failed with its message', async () => {
+      const stub = createClientStub({
+        pollResult: {
+          name: jobId,
+          done: true,
+          error: { code: 3, message: 'Invalid duration' },
+        },
+      })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      expect(await adapter.getVideoStatus(jobId)).toEqual({
+        jobId,
+        status: 'failed',
+        error: 'Invalid duration',
+      })
+    })
+
+    it('maps a fully RAI-filtered response to failed with the reasons', async () => {
+      const stub = createClientStub({
+        pollResult: {
+          name: jobId,
+          done: true,
+          response: {
+            generatedVideos: [],
+            raiMediaFilteredCount: 1,
+            raiMediaFilteredReasons: ['unsafe content'],
+          },
+        },
+      })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      const status = await adapter.getVideoStatus(jobId)
+      expect(status.status).toBe('failed')
+      expect(status.error).toContain('unsafe content')
+    })
+  })
+
+  describe('getVideoUrl', () => {
+    const jobId = 'models/veo-3.1-generate-preview/operations/op-123'
+
+    it('returns the generated video URI', async () => {
+      const stub = createClientStub()
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      expect(await adapter.getVideoUrl(jobId)).toEqual({
+        jobId,
+        url: 'https://example.com/video.mp4',
+      })
+    })
+
+    it('throws when the operation is still running', async () => {
+      const stub = createClientStub({
+        pollResult: { name: jobId, done: false },
+      })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await expect(adapter.getVideoUrl(jobId)).rejects.toThrow(/not ready/)
+    })
+
+    it('throws with the operation error message on failure', async () => {
+      const stub = createClientStub({
+        pollResult: {
+          name: jobId,
+          done: true,
+          error: { code: 13, message: 'internal error' },
+        },
+      })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await expect(adapter.getVideoUrl(jobId)).rejects.toThrow(/internal error/)
+    })
+
+    it('throws with RAI reasons when every sample was filtered', async () => {
+      const stub = createClientStub({
+        pollResult: {
+          name: jobId,
+          done: true,
+          response: {
+            generatedVideos: [],
+            raiMediaFilteredCount: 1,
+            raiMediaFilteredReasons: ['unsafe content'],
+          },
+        },
+      })
+      const adapter = new StubbedGeminiVideoAdapter(
+        'veo-3.1-generate-preview',
+        stub,
+      )
+
+      await expect(adapter.getVideoUrl(jobId)).rejects.toThrow(/unsafe content/)
+    })
+  })
+})
diff --git a/packages/ai/skills/ai-core/media-generation/SKILL.md b/packages/ai/skills/ai-core/media-generation/SKILL.md
index af9d80cf0..cae40b000 100644
--- a/packages/ai/skills/ai-core/media-generation/SKILL.md
+++ b/packages/ai/skills/ai-core/media-generation/SKILL.md
@@ -3,8 +3,9 @@ name: ai-core/media-generation
 description: >
   Image, audio, video, speech (TTS), and transcription generation using
   activity-specific adapters: generateImage() with openaiImage/geminiImage,
-  generateAudio() with geminiAudio/falAudio, generateVideo() with async
-  polling, generateSpeech() with openaiSpeech, generateTranscription() with
+  generateAudio() with geminiAudio/falAudio, generateVideo() with
+  openaiVideo/geminiVideo (async polling, per-model typed durations),
+  generateSpeech() with openaiSpeech, generateTranscription() with
   openaiTranscription. React hooks: useGenerateImage, useGenerateAudio,
   useGenerateSpeech, useTranscription, useGenerateVideo.
   TanStack Start server function integration with toServerSentEventsResponse.
@@ -428,6 +429,31 @@ const stream = generateVideo({
 return toServerSentEventsResponse(stream)
 ```
 
+Google Veo (`@tanstack/ai-gemini`) uses the same jobs/polling flow. Its
+`duration` option is typed per model (e.g. `4 | 6 | 8` for Veo 3.x,
+`5 | 6 | 8` for Veo 2); use `adapter.snapDuration(seconds)` to coerce raw
+seconds and `adapter.availableDurations()` to enumerate the valid set.
+Image prompt parts route by `metadata.role`: first un-roled /
+`'start_frame'` image → input image, `'end_frame'` → `lastFrame`,
+`'reference'` / `'character'` → `referenceImages`:
+
+```typescript
+import { geminiVideo } from '@tanstack/ai-gemini'
+
+const adapter = geminiVideo('veo-3.1-generate-preview')
+adapter.availableDurations() // { kind: 'discrete', values: [4, 6, 8] }
+
+const { jobId } = await generateVideo({
+  adapter,
+  prompt: 'A golden retriever playing in sunflowers',
+  size: '16:9', // Veo sizes are aspect ratios: '16:9' | '9:16'
+  duration: adapter.snapDuration(7), // 6
+  modelOptions: { resolution: '1080p', generateAudio: true },
+})
+// Note: Veo result URLs require the Google API key to download
+// (x-goog-api-key header or ?key= query parameter).
+```
+
 Client hook with job tracking:
 
 ```tsx
diff --git a/packages/ai/src/activities/generateVideo/adapter.ts b/packages/ai/src/activities/generateVideo/adapter.ts
index 4f0eaed21..64dd0162e 100644
--- a/packages/ai/src/activities/generateVideo/adapter.ts
+++ b/packages/ai/src/activities/generateVideo/adapter.ts
@@ -6,6 +6,25 @@ import type {
   VideoUrlResult,
 } from '../../types'
 
+/**
+ * Structured description of the durations a video model accepts.
+ *
+ * Tagged union so the same shape can express discrete enums (OpenAI Sora,
+ * Veo), continuous ranges, mixed shapes, and models with no duration field.
+ * Consumed by `VideoAdapter.availableDurations()`.
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export type DurationOptions<T extends string | number | undefined> =
+  | { kind: 'discrete'; values: ReadonlyArray<NonNullable<T>> }
+  | { kind: 'range'; min: number; max: number; step?: number; unit: 'seconds' }
+  | {
+      kind: 'mixed'
+      values: ReadonlyArray<NonNullable<T>>
+      range?: { min: number; max: number; step?: number }
+    }
+  | { kind: 'none' }
+
 /**
  * Configuration for video adapter instances
  *
@@ -34,6 +53,9 @@ export interface VideoAdapterConfig {
  * - TModelSizeByName: Map from model name to its supported sizes
  * - TModelInputModalitiesByName: Map from model name to the non-text prompt
  *   modalities it accepts (constrains the `prompt` part types at compile time)
+ * - TModelDurationByName: Map from model name to its supported duration
+ *   union. Defaults to `Record<string, number>` so adapters that haven't
+ *   declared a map keep today's `duration?: number` typing.
  */
 export interface VideoAdapter<
   TModel extends string = string,
@@ -45,6 +67,8 @@ export interface VideoAdapter<
   >,
   TModelInputModalitiesByName extends ModelInputModalitiesByName =
     ModelInputModalitiesByName,
+  TModelDurationByName extends Record<string, string | number | undefined> =
+    Record<string, number>,
 > {
   /** Discriminator for adapter kind - used to determine API shape */
   readonly kind: 'video'
@@ -61,6 +85,7 @@ export interface VideoAdapter<
     modelProviderOptionsByName: TModelProviderOptionsByName
     modelSizeByName: TModelSizeByName
     modelInputModalitiesByName: TModelInputModalitiesByName
+    modelDurationByName: TModelDurationByName
   }
 
   /**
@@ -68,7 +93,11 @@ export interface VideoAdapter<
    * Returns a job ID that can be used to poll for status and retrieve the video.
    */
   createVideoJob: (
-    options: VideoGenerationOptions<TProviderOptions, TModelSizeByName[TModel]>,
+    options: VideoGenerationOptions<
+      TProviderOptions,
+      TModelSizeByName[TModel],
+      TModelDurationByName[TModel]
+    >,
   ) => Promise<VideoJobResult>
 
   /**
@@ -81,13 +110,26 @@ export interface VideoAdapter<
    * Should only be called after status is 'completed'.
    */
   getVideoUrl: (jobId: string) => Promise<VideoUrlResult>
+
+  /**
+   * Describe the durations this adapter's model accepts. Returns a tagged
+   * union so consumers can render UI / coerce input without provider-specific
+   * knowledge.
+   */
+  availableDurations: () => DurationOptions<TModelDurationByName[TModel]>
+
+  /**
+   * Coerce a raw seconds value to the closest valid duration for this model.
+   * Returns `undefined` for models with no duration field.
+   */
+  snapDuration: (seconds: number) => TModelDurationByName[TModel] | undefined
 }
 
 /**
  * A VideoAdapter with any/unknown type parameters.
  * Useful as a constraint in generic functions and interfaces.
  */
-export type AnyVideoAdapter = VideoAdapter<any, any, any, any, any>
+export type AnyVideoAdapter = VideoAdapter<any, any, any, any, any, any>
 
 /**
  * Abstract base class for video generation adapters.
@@ -107,12 +149,15 @@ export abstract class BaseVideoAdapter<
   >,
   TModelInputModalitiesByName extends ModelInputModalitiesByName =
     ModelInputModalitiesByName,
+  TModelDurationByName extends Record<string, string | number | undefined> =
+    Record<string, number>,
 > implements VideoAdapter<
   TModel,
   TProviderOptions,
   TModelProviderOptionsByName,
   TModelSizeByName,
-  TModelInputModalitiesByName
+  TModelInputModalitiesByName,
+  TModelDurationByName
 > {
   readonly kind = 'video' as const
   abstract readonly name: string
@@ -124,6 +169,7 @@ export abstract class BaseVideoAdapter<
     modelProviderOptionsByName: TModelProviderOptionsByName
     modelSizeByName: TModelSizeByName
     modelInputModalitiesByName: TModelInputModalitiesByName
+    modelDurationByName: TModelDurationByName
   }
 
   protected config: VideoAdapterConfig
@@ -134,13 +180,33 @@ export abstract class BaseVideoAdapter<
   }
 
   abstract createVideoJob(
-    options: VideoGenerationOptions<TProviderOptions, TModelSizeByName[TModel]>,
+    options: VideoGenerationOptions<
+      TProviderOptions,
+      TModelSizeByName[TModel],
+      TModelDurationByName[TModel]
+    >,
   ): Promise<VideoJobResult>
 
   abstract getVideoStatus(jobId: string): Promise<VideoStatusResult>
 
   abstract getVideoUrl(jobId: string): Promise<VideoUrlResult>
 
+  /**
+   * Default implementation returns `{ kind: 'none' }`. Adapters that have
+   * declared their per-model duration map should override this.
+   */
+  availableDurations(): DurationOptions<TModelDurationByName[TModel]> {
+    return { kind: 'none' }
+  }
+
+  /**
+   * Default implementation returns `undefined`. Adapters that have declared
+   * their per-model duration map should override.
+   */
+  snapDuration(_seconds: number): TModelDurationByName[TModel] | undefined {
+    return undefined
+  }
+
   protected generateId(): string {
     return `${this.name}-${Date.now()}-${Math.random().toString(36).substring(7)}`
   }
diff --git a/packages/ai/src/activities/generateVideo/index.ts b/packages/ai/src/activities/generateVideo/index.ts
index cc942a5a2..643db32f0 100644
--- a/packages/ai/src/activities/generateVideo/index.ts
+++ b/packages/ai/src/activities/generateVideo/index.ts
@@ -67,6 +67,25 @@ export type VideoPromptForAdapter<TAdapter> =
         : MediaPrompt
     : MediaPrompt
 
+/**
+ * Extract the duration type for a VideoAdapter's model via ~types.
+ * Mirrors `VideoSizeForAdapter`. Falls back to `number` for adapters that
+ * haven't declared per-model duration constraints.
+ */
+export type VideoDurationForAdapter<TAdapter> =
+  TAdapter extends VideoAdapter<
+    infer TModel,
+    any,
+    any,
+    any,
+    any,
+    infer TDurationMap
+  >
+    ? TModel extends keyof TDurationMap
+      ? TDurationMap[TModel]
+      : number
+    : number
+
 // ===========================
 // Activity Options Types
 
@@ -113,8 +132,13 @@ export type VideoCreateOptions<
   prompt: VideoPromptForAdapter<TAdapter>
   /** Video size — format depends on the provider (e.g., "16:9", "1280x720") */
   size?: VideoSizeForAdapter<TAdapter>
-  /** Video duration in seconds */
-  duration?: number
+  /**
+   * Video duration in seconds. Adapters that declare a per-model duration
+   * map narrow this to the model's valid union (e.g. `4 | 6 | 8` for Veo 3).
+   * Pass `adapter.snapDuration(seconds)` to coerce raw seconds to a valid
+   * value.
+   */
+  duration?: VideoDurationForAdapter<TAdapter>
   /**
    * Whether to stream the video generation lifecycle.
    * When true, returns an AsyncIterable<StreamChunk> that handles the full
diff --git a/packages/ai/src/activities/generateVideo/snap.ts b/packages/ai/src/activities/generateVideo/snap.ts
new file mode 100644
index 000000000..f779d32f0
--- /dev/null
+++ b/packages/ai/src/activities/generateVideo/snap.ts
@@ -0,0 +1,100 @@
+import type { DurationOptions } from './adapter'
+
+/**
+ * Extract a numeric seconds value from a `DurationOptions` entry. Returns
+ * `null` for entries that don't parse as a number — e.g. `'auto'`.
+ *
+ * Handles the keyword-with-unit form FAL uses for Luma/Veo (`'8s'`, `'9s'`)
+ * by stripping a trailing `s`. Pure-numeric strings (`'5'`, `'10'`) parse via
+ * Number(). Numbers pass through.
+ */
+function entryToSeconds(entry: string | number): number | null {
+  if (typeof entry === 'number') {
+    return Number.isFinite(entry) ? entry : null
+  }
+  const stripped = entry.endsWith('s') ? entry.slice(0, -1) : entry
+  const parsed = Number(stripped)
+  return Number.isFinite(parsed) ? parsed : null
+}
+
+/**
+ * Snap a raw seconds value to the closest valid duration for a model's
+ * `DurationOptions`.
+ *
+ * - `none`            → `undefined`
+ * - `discrete`        → closest numeric-parseable entry; if none parse,
+ *                       returns `values[0]` (keyword-only models like 'auto')
+ * - `range`           → clamped to [min, max] and rounded to `step` (default 1)
+ * - `mixed`           → closest of (discrete numerics ∪ range values)
+ *
+ * @experimental Video generation is an experimental feature and may change.
+ */
+export function snapToDurationOption<T extends string | number | undefined>(
+  seconds: number,
+  options: DurationOptions<T>,
+): T | undefined {
+  switch (options.kind) {
+    case 'none':
+      return undefined
+
+    case 'discrete': {
+      return pickClosestDiscrete(seconds, options.values)
+    }
+
+    case 'range': {
+      const step = options.step ?? 1
+      const clamped = Math.min(options.max, Math.max(options.min, seconds))
+      const snapped =
+        Math.round((clamped - options.min) / step) * step + options.min
+      return Math.min(options.max, Math.max(options.min, snapped)) as T
+    }
+
+    case 'mixed': {
+      const discreteCandidate = pickClosestDiscrete(seconds, options.values)
+      if (!options.range) return discreteCandidate
+
+      const { min, max, step = 1 } = options.range
+      const clamped = Math.min(max, Math.max(min, seconds))
+      const rangeValue = Math.min(
+        max,
+        Math.max(min, Math.round((clamped - min) / step) * step + min),
+      )
+
+      // Compare distance; range value is numeric, discrete may have non-numeric
+      // first-entry fallback (return distance Infinity for non-numerics).
+      const discreteSeconds =
+        typeof discreteCandidate === 'number'
+          ? discreteCandidate
+          : discreteCandidate !== undefined
+            ? (entryToSeconds(discreteCandidate) ?? Infinity)
+            : Infinity
+
+      return Math.abs(discreteSeconds - seconds) <=
+        Math.abs(rangeValue - seconds)
+        ? discreteCandidate
+        : (rangeValue as T)
+    }
+  }
+}
+
+function pickClosestDiscrete<T extends string | number>(
+  seconds: number,
+  values: ReadonlyArray<T>,
+): T | undefined {
+  if (values.length === 0) return undefined
+
+  let best: T | undefined
+  let bestDistance = Infinity
+  for (const value of values) {
+    const v = entryToSeconds(value)
+    if (v === null) continue
+    const distance = Math.abs(v - seconds)
+    if (distance < bestDistance) {
+      bestDistance = distance
+      best = value
+    }
+  }
+
+  // Keyword-only set (no numeric-parseable entries) — fall back to first entry.
+  return best ?? values[0]
+}
diff --git a/packages/ai/src/activities/index.ts b/packages/ai/src/activities/index.ts
index 69d06be22..07fdbe73a 100644
--- a/packages/ai/src/activities/index.ts
+++ b/packages/ai/src/activities/index.ts
@@ -119,6 +119,7 @@ export {
   type VideoCreateOptions,
   type VideoStatusOptions,
   type VideoUrlOptions,
+  type VideoDurationForAdapter,
 } from './generateVideo/index'
 
 export {
@@ -126,8 +127,11 @@ export {
   type VideoAdapter,
   type VideoAdapterConfig,
   type AnyVideoAdapter,
+  type DurationOptions,
 } from './generateVideo/adapter'
 
+export { snapToDurationOption } from './generateVideo/snap'
+
 // ===========================
 // TTS Activity
 // ===========================
diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts
index 7ab506016..2bf82a803 100644
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@@ -1700,6 +1700,7 @@ export interface AudioGenerationResult {
 export interface VideoGenerationOptions<
   TProviderOptions extends object = object,
   TSize extends string | undefined = string,
+  TDuration extends string | number | undefined = number,
 > {
   /** The model to use for video generation */
   model: string
@@ -1714,8 +1715,12 @@ export interface VideoGenerationOptions<
   prompt: MediaPrompt
   /** Video size — format depends on the provider (e.g., "16:9", "1280x720") */
   size?: TSize
-  /** Video duration in seconds */
-  duration?: number
+  /**
+   * Video duration in seconds. Adapters that declare a per-model duration
+   * map narrow this to the model's valid union; use
+   * `adapter.snapDuration(seconds)` to coerce raw seconds to a valid value.
+   */
+  duration?: TDuration
   /** Model-specific options for video generation */
   modelOptions?: TProviderOptions
   /**
diff --git a/packages/ai/tests/stream-generation.test.ts b/packages/ai/tests/stream-generation.test.ts
index 155b10d65..2cb2741c3 100644
--- a/packages/ai/tests/stream-generation.test.ts
+++ b/packages/ai/tests/stream-generation.test.ts
@@ -170,6 +170,9 @@ describe('generateVideo({ stream: true })', () => {
       model: 'test-model',
       '~types': {} as any,
 
+      availableDurations: () => ({ kind: 'none' as const }),
+      snapDuration: () => undefined,
+
       createVideoJob: vi.fn(async () => ({
         jobId: 'job-123',
         model: 'test-model',
diff --git a/testing/e2e/global-setup.ts b/testing/e2e/global-setup.ts
index c1eed2859..7a858cf91 100644
--- a/testing/e2e/global-setup.ts
+++ b/testing/e2e/global-setup.ts
@@ -43,6 +43,13 @@ export default async function globalSetup() {
   mock.mount('/v1/text-to-speech', elevenLabsTTSMount())
   mock.mount('/v1/speech-to-text', elevenLabsSTTMount())
 
+  // Gemini Veo video generation. aimock 1.29 mocks Gemini's `:predict`
+  // (Imagen) endpoint but not the long-running `:predictLongRunning` +
+  // operations-polling pair Veo uses, so mount both here. Non-Veo paths
+  // under /v1beta/models (chat, images) return false and fall through to
+  // aimock's native Gemini handlers.
+  mock.mount('/v1beta/models', geminiVeoMount())
+
   // Anthropic server_tool_use bug reproduction (issue #604). aimock can't
   // natively synthesize `server_tool_use` / `web_fetch_tool_result` content
   // blocks, so this mount hand-crafts the raw SSE Claude would emit when a
@@ -267,6 +274,67 @@ function elevenLabsSTTMount(): Mountable {
   }
 }
 
+/**
+ * Mounts Gemini Veo's long-running video generation endpoints:
+ *
+ * - `POST /v1beta/models/{model}:predictLongRunning` — starts the job and
+ *   returns the operation name.
+ * - `GET /v1beta/models/{model}/operations/{id}` — polls the operation. The
+ *   mock completes immediately with the raw MLDev wire shape
+ *   (`response.generateVideoResponse.generatedSamples[0].video.uri`), which
+ *   the `@google/genai` SDK maps to `response.generatedVideos[0].video.uri`.
+ *
+ * Mirrors the openai `onVideo` fixture: same prompt-agnostic completed job,
+ * same target video URL.
+ */
+function geminiVeoMount(): Mountable {
+  const VIDEO_URL = 'https://example.com/guitar-store.mp4'
+  return {
+    async handleRequest(
+      req: http.IncomingMessage,
+      res: http.ServerResponse,
+      // aimock strips the mount prefix ('/v1beta/models') and any query
+      // string, so pathname looks like '/{model}:predictLongRunning' or
+      // '/{model}/operations/{id}'.
+      pathname: string,
+    ): Promise<boolean> {
+      const createMatch = pathname.match(/^\/([^/:]+):predictLongRunning$/)
+      if (createMatch && req.method === 'POST') {
+        await drainBody(req)
+        res.statusCode = 200
+        res.setHeader('Content-Type', 'application/json')
+        res.end(
+          JSON.stringify({
+            name: `models/${createMatch[1]}/operations/veo-job-e2e`,
+          }),
+        )
+        return true
+      }
+
+      const pollMatch = pathname.match(/^\/([^/:]+)\/operations\/([^/]+)$/)
+      if (pollMatch && req.method === 'GET') {
+        res.statusCode = 200
+        res.setHeader('Content-Type', 'application/json')
+        res.end(
+          JSON.stringify({
+            name: `models/${pollMatch[1]}/operations/${pollMatch[2]}`,
+            done: true,
+            response: {
+              generateVideoResponse: {
+                generatedSamples: [{ video: { uri: VIDEO_URL } }],
+              },
+            },
+          }),
+        )
+        return true
+      }
+
+      // Not a Veo path — fall through to aimock's native Gemini handlers.
+      return false
+    },
+  }
+}
+
 /**
  * Mounts a Claude-shaped SSE response that includes a client `tool_use` block
  * followed by a `web_fetch` `server_tool_use` block, plus its
diff --git a/testing/e2e/src/lib/feature-support.ts b/testing/e2e/src/lib/feature-support.ts
index b4e85a715..5e722de63 100644
--- a/testing/e2e/src/lib/feature-support.ts
+++ b/testing/e2e/src/lib/feature-support.ts
@@ -191,12 +191,17 @@ export const matrix: Record<Feature, Set<Provider>> = {
   'sound-effects': new Set(['elevenlabs']),
   tts: new Set(['openai', 'grok', 'elevenlabs']),
   transcription: new Set(['openai', 'grok', 'elevenlabs']),
-  'video-gen': new Set(['openai']),
+  // Gemini Veo runs through a custom aimock mount (see geminiVeoMount in
+  // global-setup.ts) — aimock 1.29 doesn't model the long-running
+  // `:predictLongRunning` + operations-polling pair natively.
+  'video-gen': new Set(['openai', 'gemini']),
   // image-to-video (image parts in the generateVideo prompt). aimock 1.29's
   // `/v1/videos` handler parses Sora's multipart upload (the SDK switches to
   // multipart when `input_reference` carries a File) and matches on the
   // `prompt` form field, so the OpenAI/Sora route runs end-to-end. fal's
-  // endpoint-specific fields remain unit-test-only.
+  // endpoint-specific fields and Gemini Veo's image/lastFrame/referenceImages
+  // routing remain unit-test-only (the spec's journal assertion is tied to
+  // aimock's /v1/videos pipeline, which custom mounts bypass).
   'image-to-video': new Set(['openai']),
   // Only Gemini currently surfaces a first-class stateful conversation API via
   // the adapter (geminiTextInteractions, behind @tanstack/ai-gemini/experimental).
diff --git a/testing/e2e/src/lib/media-providers.ts b/testing/e2e/src/lib/media-providers.ts
index d399eb33f..759e56527 100644
--- a/testing/e2e/src/lib/media-providers.ts
+++ b/testing/e2e/src/lib/media-providers.ts
@@ -4,7 +4,11 @@ import {
   createOpenaiTranscription,
   createOpenaiVideo,
 } from '@tanstack/ai-openai'
-import { createGeminiAudio, createGeminiImage } from '@tanstack/ai-gemini'
+import {
+  createGeminiAudio,
+  createGeminiImage,
+  createGeminiVideo,
+} from '@tanstack/ai-gemini'
 import {
   createGrokImage,
   createGrokSpeech,
@@ -129,6 +133,10 @@ export function createVideoAdapter(
         baseURL: openaiUrl(aimockPort),
         defaultHeaders: headers,
       }),
+    gemini: () =>
+      createGeminiVideo('veo-3.1-generate-preview', DUMMY_KEY, {
+        httpOptions: { baseUrl: llmockBase(aimockPort), headers },
+      }),
   }
   const factory = factories[provider]
   if (!factory) throw new Error(`No video adapter for provider: ${provider}`)