From 7d63f7a2415a05537cbba04f12d95e4f00ebb349 Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:53:27 +1000 Subject: [PATCH 1/3] feat(ai-grok): video generation adapter for the grok-imagine video models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a grokVideo adapter to @tanstack/ai-grok for xAI's Imagine video models (grok-imagine-video at $0.05/s, grok-imagine-video-1.5-preview at $0.08/s) using the experimental generateVideo() jobs/polling architecture: POST /v1/videos/generations to create, GET /v1/videos/{request_id} to poll, hosted mp4 URL plus usage (billed seconds + exact USD cost) on completion. Sizing follows the grok-imagine aspect-ratio template ('16:9_720p' → aspect_ratio/resolution); durations are 1-15 integer seconds; image-to-video starting frames go through modelOptions.image. The Imagine video endpoints are plain JSON (not in the OpenAI SDK), so the adapter issues direct requests with an injectable fetch seam. Closes #705. Co-Authored-By: Claude Fable 5 --- .changeset/grok-imagine-video-adapter.md | 5 + docs/adapters/grok.md | 62 ++- docs/config.json | 5 +- docs/media/video-generation.md | 32 +- packages/ai-grok/src/adapters/video.ts | 368 ++++++++++++++ packages/ai-grok/src/index.ts | 18 + packages/ai-grok/src/model-meta.ts | 47 ++ .../src/video/video-provider-options.ts | 178 +++++++ packages/ai-grok/tests/video-adapter.test.ts | 458 ++++++++++++++++++ .../skills/ai-core/media-generation/SKILL.md | 10 +- 10 files changed, 1176 insertions(+), 7 deletions(-) create mode 100644 .changeset/grok-imagine-video-adapter.md create mode 100644 packages/ai-grok/src/adapters/video.ts create mode 100644 packages/ai-grok/src/video/video-provider-options.ts create mode 100644 packages/ai-grok/tests/video-adapter.test.ts diff --git a/.changeset/grok-imagine-video-adapter.md b/.changeset/grok-imagine-video-adapter.md new file mode 100644 index 000000000..56e9bf4cd --- /dev/null +++ b/.changeset/grok-imagine-video-adapter.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-grok': minor +--- + +Add a `grokVideo` adapter for the grok-imagine video models (`grok-imagine-video`, `grok-imagine-video-1.5-preview`) via xAI's Imagine API. Follows the experimental `generateVideo()` jobs/polling architecture: `createVideoJob` posts to `/v1/videos/generations`, status polling reads `/v1/videos/{request_id}`, and the completed result carries the hosted video URL plus usage (`unitsBilled` seconds and exact `cost` in USD). Sizing uses the aspect-ratio template consistent with the grok-imagine image models (`size: '16:9_720p'` → `aspect_ratio` / `resolution`), durations are 1–15 integer seconds, and image-to-video starting frames can be passed via `modelOptions.image: { url }`. diff --git a/docs/adapters/grok.md b/docs/adapters/grok.md index 528226903..405b0c3c6 100644 --- a/docs/adapters/grok.md +++ b/docs/adapters/grok.md @@ -2,7 +2,7 @@ title: Grok (xAI) id: grok-adapter order: 5 -description: "Use xAI Grok models with TanStack AI — Grok 4.1, Grok 4, Grok 3, and Grok 2 Image generation via @tanstack/ai-grok." +description: "Use xAI Grok models with TanStack AI — Grok 4.1, Grok 4, Grok 3, Grok 2 Image generation, and Grok Imagine video generation via @tanstack/ai-grok." keywords: - tanstack ai - grok @@ -10,10 +10,12 @@ keywords: - grok 4 - grok 4.1 - image generation + - video generation + - grok imagine - adapter --- -The Grok adapter provides access to xAI's Grok models, including Grok 4.1, Grok 4, Grok 3, and image generation with Grok 2 Image. +The Grok adapter provides access to xAI's Grok models, including Grok 4.1, Grok 4, Grok 3, image generation with Grok 2 Image, and video generation with the Grok Imagine video models. ## Installation @@ -160,6 +162,58 @@ const result = await generateImage({ console.log(result.images); ``` +## Video Generation (Experimental) + +Generate short video clips (1–15 seconds, with audio) with the Grok Imagine video models via xAI's asynchronous jobs/polling API: + +```typescript +import { generateVideo, getVideoJobStatus } from "@tanstack/ai"; +import { grokVideo } from "@tanstack/ai-grok"; + +const adapter = grokVideo("grok-imagine-video"); + +// 1. Create the job +const { jobId } = await generateVideo({ + adapter, + prompt: "A red panda balancing on a bamboo stalk in the rain", + size: "16:9_720p", // "aspectRatio" or "aspectRatio_resolution" + duration: 5, // integer seconds, 1–15 +}); + +// 2. Poll until complete, then read the video URL +let status = await getVideoJobStatus({ adapter, jobId }); +while (status.status !== "completed" && status.status !== "failed") { + await new Promise((r) => setTimeout(r, 5000)); + status = await getVideoJobStatus({ adapter, jobId }); +} + +console.log(status.url); // hosted .mp4 URL +``` + +Available models: + +- `grok-imagine-video` — text-to-video and image-to-video, $0.05 per second of video +- `grok-imagine-video-1.5-preview` — preview of the next model generation, $0.08 per second + +Like the Grok Imagine image models, sizing is aspect-ratio based: the `size` option takes an `aspectRatio_resolution` template. Supported aspect ratios are `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, and `2:3`; supported resolutions are `480p`, `720p`, and `1080p` (e.g. `"9:16_1080p"`). The resolution suffix is optional. + +For image-to-video, pass a starting frame via `modelOptions.image` — a public URL or a base64 data URI: + +```typescript +const { jobId } = await generateVideo({ + adapter: grokVideo("grok-imagine-video"), + prompt: "Make the waterfall crash down and slowly pan out the camera", + duration: 10, + modelOptions: { + image: { url: "https://example.com/waterfall-still.png" }, + }, +}); +``` + +When the job completes, the adapter reports usage on the result: `usage.unitsBilled` carries the billed seconds of video and `usage.cost` the exact cost in USD, both as returned by the xAI API. + +See [Video Generation](../media/video-generation) for the full jobs/polling flow, streaming mode, and the `useGenerateVideo` hook. + ## Text-to-Speech Generate speech with Grok TTS: @@ -263,6 +317,10 @@ Creates a Grok summarization adapter with an explicit API key. Creates a Grok image generation adapter. +### `grokVideo(model, config?)` / `createGrokVideo(model, apiKey, config?)` + +Creates a Grok video generation adapter (experimental) for the Grok Imagine video models (`'grok-imagine-video'`, `'grok-imagine-video-1.5-preview'`). + ### `grokSpeech(model, config?)` / `createGrokSpeech(model, apiKey, config?)` Creates a Grok text-to-speech adapter. diff --git a/docs/config.json b/docs/config.json index e3fc3b712..639fa0878 100644 --- a/docs/config.json +++ b/docs/config.json @@ -255,7 +255,7 @@ "label": "Video Generation", "to": "media/video-generation", "addedAt": "2026-04-15", - "updatedAt": "2026-06-08" + "updatedAt": "2026-06-10" }, { "label": "Generation Hooks", @@ -420,7 +420,8 @@ { "label": "Grok (xAI)", "to": "adapters/grok", - "addedAt": "2026-04-15" + "addedAt": "2026-04-15", + "updatedAt": "2026-06-10" }, { "label": "Groq", diff --git a/docs/media/video-generation.md b/docs/media/video-generation.md index ae325f95b..13259770b 100644 --- a/docs/media/video-generation.md +++ b/docs/media/video-generation.md @@ -2,11 +2,13 @@ title: Video Generation id: video-generation order: 6 -description: "Generate video from text prompts with OpenAI Sora using TanStack AI's experimental generateVideo() jobs/polling API." +description: "Generate video from text prompts with OpenAI Sora, xAI Grok Imagine, or fal.ai using TanStack AI's experimental generateVideo() jobs/polling API." keywords: - tanstack ai - video generation - sora + - grok imagine + - fal - generateVideo - jobs api - experimental @@ -36,6 +38,8 @@ TanStack AI provides experimental support for video generation through dedicated Currently supported: - **OpenAI**: Sora-2 and Sora-2-Pro models (when available) +- **Grok (xAI)**: grok-imagine-video and grok-imagine-video-1.5-preview models +- **fal.ai**: MiniMax, Luma, Kling, Hunyuan, and other hosted video models ## Basic Usage @@ -406,6 +410,32 @@ const { jobId } = await generateVideo({ }) ``` +### Grok (xAI Imagine) Model Options + +Based on the [xAI video generation API](https://docs.x.ai/docs/guides/video-generations). The Grok Imagine models are aspect-ratio sized — the generic `size` option takes an `aspectRatio_resolution` template (like the Grok Imagine image models), and clips can be 1–15 seconds long: + +```typescript +import { generateVideo } from '@tanstack/ai' +import { grokVideo } from '@tanstack/ai-grok' + +const { jobId } = await generateVideo({ + adapter: grokVideo('grok-imagine-video'), + prompt: 'A beautiful sunset over the ocean', + size: '16:9_720p', // aspect ratio: '1:1' | '16:9' | '9:16' | '4:3' | '3:4' | '3:2' | '2:3' + // resolution (optional suffix): '480p' | '720p' | '1080p' + duration: 5, // integer seconds, 1-15 + modelOptions: { + aspect_ratio: '16:9', // Alternative way to specify the aspect ratio + resolution: '720p', // Alternative way to specify the resolution + duration: 5, // Alternative way to specify the duration + // Image-to-video: animate a starting frame (public URL or base64 data URI) + image: { url: 'https://example.com/still.png' }, + }, +}) +``` + +Generated clips include an audio track. When the job completes, the adapter reports `usage.unitsBilled` (billed seconds of video) and `usage.cost` (exact USD cost as returned by the API) on the result. + ## Response Types > **Note:** The interfaces below are the underlying adapter-level types. The `getVideoJobStatus()` helper returns a single merged object, `{ status, progress?, url?, error?, usage? }` — it does not return `jobId` or `expiresAt`. diff --git a/packages/ai-grok/src/adapters/video.ts b/packages/ai-grok/src/adapters/video.ts new file mode 100644 index 000000000..4acd70e26 --- /dev/null +++ b/packages/ai-grok/src/adapters/video.ts @@ -0,0 +1,368 @@ +import { BaseVideoAdapter } from '@tanstack/ai/adapters' +import { toRunErrorPayload } from '@tanstack/ai/adapter-internals' +import { getGrokApiKeyFromEnv, withGrokDefaults } from '../utils/client' +import { + parseGrokVideoSize, + validateVideoDuration, + validateVideoSize, +} from '../video/video-provider-options' +import type { + TokenUsage, + VideoGenerationOptions, + VideoJobResult, + VideoStatusResult, + VideoUrlResult, +} from '@tanstack/ai' +import type { GrokVideoModel } from '../model-meta' +import type { + GrokVideoModelProviderOptionsByName, + GrokVideoModelSizeByName, + GrokVideoProviderOptions, +} from '../video/video-provider-options' +import type { GrokClientConfig } from '../utils' + +/** + * Configuration for Grok video adapter. + * + * @experimental Video generation is an experimental feature and may change. + */ +export interface GrokVideoConfig extends GrokClientConfig {} + +/** + * xAI bills video generation in "USD ticks": 10^10 ticks per US dollar + * (e.g. one grok-imagine-video second costs $0.05 = 500_000_000 ticks). + */ +const USD_TICKS_PER_DOLLAR = 10_000_000_000 + +/** Response of POST /v1/videos/generations. */ +interface GrokVideoCreateResponse { + request_id?: string +} + +/** Response of GET /v1/videos/{request_id}. */ +interface GrokVideoStatusResponse { + status?: string + progress?: number + model?: string + video?: { + url?: string + duration?: number + } + usage?: { + cost_in_usd_ticks?: number + } + error?: string +} + +function buildGrokVideoUsage( + response: GrokVideoStatusResponse, +): TokenUsage | undefined { + const seconds = response.video?.duration + const ticks = response.usage?.cost_in_usd_ticks + if (seconds === undefined && ticks === undefined) return undefined + return { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + ...(seconds !== undefined && { unitsBilled: seconds }), + ...(ticks !== undefined && { cost: ticks / USD_TICKS_PER_DOLLAR }), + } +} + +/** + * Grok Video Generation Adapter (xAI Imagine API) + * + * Tree-shakeable adapter for the grok-imagine video models using the + * async jobs/polling architecture: create a generation request, poll it, + * then read the completed video URL. + * + * The Imagine video endpoints are not part of the OpenAI SDK surface (and + * xAI rejects the SDK's multipart paths), so requests are plain JSON calls + * issued with the configured `fetch` (or the global one). + * + * @experimental Video generation is an experimental feature and may change. + * + * Features: + * - Async job-based video generation (1–15 second clips with audio) + * - Aspect-ratio sizing via the "aspectRatio_resolution" size template + * (e.g. '16:9_720p'), consistent with the grok-imagine image models + * - Image-to-video via `modelOptions.image` (starting frame URL or data URI) + * - Usage reporting: billed seconds (`unitsBilled`) and exact cost + */ +export class GrokVideoAdapter< + TModel extends GrokVideoModel, +> extends BaseVideoAdapter< + TModel, + GrokVideoProviderOptions, + GrokVideoModelProviderOptionsByName, + GrokVideoModelSizeByName +> { + readonly name = 'grok' as const + + private readonly clientConfig: GrokVideoConfig + + constructor(config: GrokVideoConfig, model: TModel) { + super({}, model) + this.clientConfig = withGrokDefaults(config) + } + + private get fetch(): ( + input: string, + init?: RequestInit, + ) => Promise { + return this.clientConfig.fetch ?? fetch + } + + private async request( + path: string, + init?: Omit, + ): Promise { + return await this.fetch(`${this.clientConfig.baseURL}${path}`, { + ...init, + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.clientConfig.apiKey}`, + }, + }) + } + + /** + * Reads the error message out of an Imagine API error body + * (`{"code": "...", "error": "..."}`), falling back to the raw text. + */ + private async errorMessage(response: Response): Promise { + const body = await response.text() + try { + const parsed: unknown = JSON.parse(body) + if ( + typeof parsed === 'object' && + parsed !== null && + 'error' in parsed && + typeof parsed.error === 'string' + ) { + return parsed.error + } + } catch { + // not JSON — fall through to the raw body + } + return body + } + + async createVideoJob( + options: VideoGenerationOptions, + ): Promise { + const { model, prompt, size, modelOptions, logger } = options + + validateVideoSize(model, size) + validateVideoDuration(model, options.duration) + validateVideoDuration(model, modelOptions?.duration) + const duration = options.duration ?? modelOptions?.duration + + // The generic `size` option carries an "aspectRatio_resolution" template + // (e.g. '16:9_720p') and maps to the Imagine API's `aspect_ratio` / + // `resolution` parameters; explicit modelOptions win over the template. + const parsedSize = size !== undefined ? parseGrokVideoSize(size) : undefined + const request = { + model, + prompt, + ...(parsedSize && { + aspect_ratio: parsedSize.aspectRatio, + ...(parsedSize.resolution !== undefined && { + resolution: parsedSize.resolution, + }), + }), + ...(duration !== undefined && { duration }), + ...modelOptions, + } + + try { + logger.request( + `activity=video.create provider=${this.name} model=${model} size=${size ?? 'default'} duration=${duration ?? 'default'}`, + { provider: this.name, model }, + ) + + const response = await this.request('/videos/generations', { + method: 'POST', + body: JSON.stringify(request), + }) + if (!response.ok) { + throw new Error( + `grok: video generation request failed (${response.status} ${response.statusText}): ${await this.errorMessage(response)}`, + ) + } + + const result = (await response.json()) as GrokVideoCreateResponse + if (!result.request_id) { + throw new Error( + 'grok: video generation response contained no request_id', + ) + } + return { jobId: result.request_id, model } + } catch (error: unknown) { + logger.errors(`${this.name}.createVideoJob fatal`, { + error: toRunErrorPayload(error, `${this.name}.createVideoJob failed`), + source: `${this.name}.createVideoJob`, + }) + throw error + } + } + + private async retrieveJob(jobId: string): Promise { + const response = await this.request(`/videos/${jobId}`) + if (!response.ok) { + const error = new Error( + `grok: video status request failed (${response.status} ${response.statusText}): ${await this.errorMessage(response)}`, + ) + ;(error as { status?: number }).status = response.status + throw error + } + return (await response.json()) as GrokVideoStatusResponse + } + + async getVideoStatus(jobId: string): Promise { + let response: GrokVideoStatusResponse + try { + response = await this.retrieveJob(jobId) + } catch (error) { + if ((error as { status?: number }).status === 404) { + return { jobId, status: 'failed', error: 'Job not found' } + } + throw error + } + + return { + jobId, + status: this.mapStatus(response.status), + ...(response.progress !== undefined && { progress: response.progress }), + ...(response.error !== undefined && { error: response.error }), + } + } + + async getVideoUrl(jobId: string): Promise { + let response: GrokVideoStatusResponse + try { + response = await this.retrieveJob(jobId) + } catch (error) { + if ((error as { status?: number }).status === 404) { + throw new Error(`Video job not found: ${jobId}`) + } + throw error + } + + const status = this.mapStatus(response.status) + if (status === 'failed') { + throw new Error( + `Video generation failed${response.error ? `: ${response.error}` : ''}. Job ID: ${jobId}`, + ) + } + const url = response.video?.url + if (!url) { + throw new Error( + `Video is not ready for download. Check status first. Job ID: ${jobId}`, + ) + } + + const usage = buildGrokVideoUsage(response) + return { + jobId, + url, + ...(usage && { usage }), + } + } + + /** + * Maps Imagine API job statuses onto the generic video status set. The + * API reports 'pending' while queued/generating (with a numeric + * `progress`), then a terminal 'done' / 'failed' / 'expired'. + */ + protected mapStatus( + apiStatus: string | undefined, + ): 'pending' | 'processing' | 'completed' | 'failed' { + switch (apiStatus) { + case 'pending': + case 'queued': + return 'pending' + case 'done': + case 'completed': + case 'succeeded': + return 'completed' + case 'failed': + case 'expired': + case 'error': + case 'cancelled': + return 'failed' + case undefined: + default: + return 'processing' + } + } +} + +/** + * Creates a Grok video adapter with an explicit API key. + * Type resolution happens here at the call site. + * + * @experimental Video generation is an experimental feature and may change. + * + * @param model - The model name (e.g., 'grok-imagine-video') + * @param apiKey - Your xAI API key + * @param config - Optional additional configuration + * @returns Configured Grok video adapter instance with resolved types + * + * @example + * ```typescript + * const adapter = createGrokVideo('grok-imagine-video', 'xai-...'); + * + * const { jobId } = await generateVideo({ + * adapter, + * prompt: 'A beautiful sunset over the ocean', + * size: '16:9_720p', + * duration: 5 + * }); + * ``` + */ +export function createGrokVideo( + model: TModel, + apiKey: string, + config?: Omit, +): GrokVideoAdapter { + return new GrokVideoAdapter({ apiKey, ...config }, model) +} + +/** + * Creates a Grok video adapter with automatic API key detection from environment variables. + * Type resolution happens here at the call site. + * + * Looks for `XAI_API_KEY` in: + * - `process.env` (Node.js) + * - `window.env` (Browser with injected env) + * + * @experimental Video generation is an experimental feature and may change. + * + * @param model - The model name (e.g., 'grok-imagine-video') + * @param config - Optional configuration (excluding apiKey which is auto-detected) + * @returns Configured Grok video adapter instance with resolved types + * @throws Error if XAI_API_KEY is not found in environment + * + * @example + * ```typescript + * // Automatically uses XAI_API_KEY from environment + * const adapter = grokVideo('grok-imagine-video'); + * + * // Create a video generation job + * const { jobId } = await generateVideo({ + * adapter, + * prompt: 'A cat playing piano' + * }); + * + * // Poll for status + * const status = await getVideoJobStatus({ adapter, jobId }); + * ``` + */ +export function grokVideo( + model: TModel, + config?: Omit, +): GrokVideoAdapter { + const apiKey = getGrokApiKeyFromEnv() + return createGrokVideo(model, apiKey, config) +} diff --git a/packages/ai-grok/src/index.ts b/packages/ai-grok/src/index.ts index 142ab3346..0377316b4 100644 --- a/packages/ai-grok/src/index.ts +++ b/packages/ai-grok/src/index.ts @@ -31,6 +31,22 @@ export type { GrokImageModelProviderOptionsByName, } from './image/image-provider-options' +// Video adapter - for video generation (xAI Imagine API) +export { + GrokVideoAdapter, + createGrokVideo, + grokVideo, + type GrokVideoConfig, +} from './adapters/video' +export type { + GrokVideoProviderOptions, + GrokVideoModelProviderOptionsByName, + GrokVideoModelSizeByName, + GrokVideoAspectRatio, + GrokVideoResolution, + GrokVideoSize, +} from './video/video-provider-options' + // Speech (TTS) adapter - for text-to-speech export { GrokSpeechAdapter, @@ -68,6 +84,7 @@ export type { ResolveInputModalities, GrokChatModel, GrokImageModel, + GrokVideoModel, GrokTTSModel, GrokTranscriptionModel, GrokRealtimeModel, @@ -75,6 +92,7 @@ export type { export { GROK_CHAT_MODELS, GROK_IMAGE_MODELS, + GROK_VIDEO_MODELS, GROK_TTS_MODELS, GROK_TRANSCRIPTION_MODELS, GROK_REALTIME_MODELS, diff --git a/packages/ai-grok/src/model-meta.ts b/packages/ai-grok/src/model-meta.ts index 5b76aaa10..f613dedab 100644 --- a/packages/ai-grok/src/model-meta.ts +++ b/packages/ai-grok/src/model-meta.ts @@ -219,6 +219,42 @@ const GROK_2_IMAGE = { }, } as const satisfies ModelMeta +// Imagine API video models. Pricing is per second of generated video +// (output only); generated videos carry an audio track. +const GROK_IMAGINE_VIDEO = { + name: 'grok-imagine-video', + supports: { + input: ['text', 'image'], + output: ['video', 'audio'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + // per second of video + normal: 0.05, + }, + }, +} as const satisfies ModelMeta + +const GROK_IMAGINE_VIDEO_1_5_PREVIEW = { + name: 'grok-imagine-video-1.5-preview', + supports: { + input: ['text', 'image'], + output: ['video', 'audio'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + // per second of video + normal: 0.08, + }, + }, +} as const satisfies ModelMeta + /** * Grok Chat Models * Based on xAI's available models as of 2025 @@ -351,6 +387,16 @@ export const GROK_COMBINED_TOOLS_AND_SCHEMA_MODELS = new Set([ */ export const GROK_IMAGE_MODELS = [GROK_2_IMAGE.name] as const +/** + * Grok Video Generation Models (xAI Imagine API) + * + * @experimental Video generation is an experimental feature and may change. + */ +export const GROK_VIDEO_MODELS = [ + GROK_IMAGINE_VIDEO.name, + GROK_IMAGINE_VIDEO_1_5_PREVIEW.name, +] as const + // xAI's `/v1/tts` endpoint is endpoint-addressed and does not take a `model` // parameter. This synthetic identifier satisfies the SDK's `TTSOptions.model` // contract and provides a stable value for logging and fixture matching. @@ -404,6 +450,7 @@ export const GROK_REALTIME_MODELS = [ export type GrokChatModel = (typeof GROK_CHAT_MODELS)[number] export type GrokImageModel = (typeof GROK_IMAGE_MODELS)[number] +export type GrokVideoModel = (typeof GROK_VIDEO_MODELS)[number] export type GrokTTSModel = (typeof GROK_TTS_MODELS)[number] export type GrokTranscriptionModel = (typeof GROK_TRANSCRIPTION_MODELS)[number] export type GrokRealtimeModel = (typeof GROK_REALTIME_MODELS)[number] diff --git a/packages/ai-grok/src/video/video-provider-options.ts b/packages/ai-grok/src/video/video-provider-options.ts new file mode 100644 index 000000000..0939b27b5 --- /dev/null +++ b/packages/ai-grok/src/video/video-provider-options.ts @@ -0,0 +1,178 @@ +/** + * Grok Video Generation Provider Options (xAI Imagine API) + * + * Based on https://docs.x.ai/docs/guides/video-generations + * + * @experimental Video generation is an experimental feature and may change. + */ + +/** + * Aspect ratios accepted by the grok-imagine video models. + * + * Note: this is a narrower set than the grok-imagine image models — the + * video endpoint rejects the phone-screen ratios ('9:19.5', '9:20', …) and + * 'auto'. + * + * @experimental Video generation is an experimental feature and may change. + */ +export type GrokVideoAspectRatio = + | '1:1' + | '16:9' + | '9:16' + | '4:3' + | '3:4' + | '3:2' + | '2:3' + +/** + * Resolution tiers for the grok-imagine video models. + * + * @experimental Video generation is an experimental feature and may change. + */ +export type GrokVideoResolution = '480p' | '720p' | '1080p' + +/** + * Size strings for grok-imagine video models. The Imagine API is + * aspect-ratio based rather than pixel-size based; like the grok-imagine + * image models, the generic `size` option uses an + * `aspectRatio_resolution` template ("16:9_720p") — the resolution suffix + * is optional ("16:9" uses the API default). + * + * @experimental Video generation is an experimental feature and may change. + */ +export type GrokVideoSize = + | GrokVideoAspectRatio + | `${GrokVideoAspectRatio}_${GrokVideoResolution}` + +const GROK_VIDEO_ASPECT_RATIOS: ReadonlyArray = [ + '1:1', + '16:9', + '9:16', + '4:3', + '3:4', + '3:2', + '2:3', +] + +const GROK_VIDEO_RESOLUTIONS: ReadonlyArray = ['480p', '720p', '1080p'] + +/** + * Video duration limits enforced by the Imagine API (seconds). + */ +export const GROK_VIDEO_MIN_DURATION = 1 +export const GROK_VIDEO_MAX_DURATION = 15 + +/** + * Parses a grok video size string into its components. + * Format: "aspectRatio" or "aspectRatio_resolution", + * e.g. "16:9_720p" → { aspectRatio: "16:9", resolution: "720p" }. + * Returns undefined when the string doesn't match the template. + */ +export function parseGrokVideoSize( + size: string, +): { aspectRatio: string; resolution?: string } | undefined { + const match = size.match(/^([\d.]+:[\d.]+)(?:_(.+))?$/) + const [, aspectRatio, resolution] = match ?? [] + if (aspectRatio === undefined) return undefined + return { aspectRatio, ...(resolution !== undefined && { resolution }) } +} + +/** + * Validate the `size` template for a given grok video model. + * + * @experimental Video generation is an experimental feature and may change. + */ +export function validateVideoSize( + model: string, + size?: string, +): asserts size is GrokVideoSize | undefined { + if (size === undefined) return + const parsed = parseGrokVideoSize(size) + if (!parsed || !GROK_VIDEO_ASPECT_RATIOS.includes(parsed.aspectRatio)) { + throw new Error( + `Size "${size}" is not supported by model "${model}". Expected ` + + `"aspectRatio" or "aspectRatio_resolution" (e.g. "16:9_720p") with ` + + `aspect ratio one of: ${GROK_VIDEO_ASPECT_RATIOS.join(', ')}`, + ) + } + if ( + parsed.resolution !== undefined && + !GROK_VIDEO_RESOLUTIONS.includes(parsed.resolution) + ) { + throw new Error( + `Resolution "${parsed.resolution}" is not supported by model "${model}". ` + + `Supported resolutions: ${GROK_VIDEO_RESOLUTIONS.join(', ')}`, + ) + } +} + +/** + * Validate video duration (seconds) for a given grok video model. + * The Imagine API accepts integer durations between 1 and 15 seconds. + * + * @experimental Video generation is an experimental feature and may change. + */ +export function validateVideoDuration(model: string, duration?: number): void { + if (duration === undefined) return + if ( + !Number.isInteger(duration) || + duration < GROK_VIDEO_MIN_DURATION || + duration > GROK_VIDEO_MAX_DURATION + ) { + throw new Error( + `Duration "${duration}" is not supported by model "${model}". ` + + `Supported durations: integer seconds between ${GROK_VIDEO_MIN_DURATION} and ${GROK_VIDEO_MAX_DURATION}`, + ) + } +} + +/** + * Provider-specific options for grok video generation. These map directly + * onto the Imagine API request body and take precedence over the generic + * `size` / `duration` options when both are provided. + * + * @experimental Video generation is an experimental feature and may change. + */ +export interface GrokVideoProviderOptions { + /** + * Output aspect ratio. + */ + aspect_ratio?: GrokVideoAspectRatio + + /** + * Output resolution tier. + */ + resolution?: GrokVideoResolution + + /** + * Video duration in integer seconds (1–15). + */ + duration?: number + + /** + * Source image for image-to-video generation: the image becomes the + * starting frame and the prompt describes the desired motion. `url` + * accepts a public URL (fetched by xAI's servers) or a base64 data URI. + */ + image?: { url: string } +} + +/** + * Type-only map from model name to its specific provider options. + * + * @experimental Video generation is an experimental feature and may change. + */ +export type GrokVideoModelProviderOptionsByName = { + 'grok-imagine-video': GrokVideoProviderOptions + 'grok-imagine-video-1.5-preview': GrokVideoProviderOptions +} + +/** + * Type-only map from model name to its supported `size` strings. + * + * @experimental Video generation is an experimental feature and may change. + */ +export type GrokVideoModelSizeByName = { + 'grok-imagine-video': GrokVideoSize + 'grok-imagine-video-1.5-preview': GrokVideoSize +} diff --git a/packages/ai-grok/tests/video-adapter.test.ts b/packages/ai-grok/tests/video-adapter.test.ts new file mode 100644 index 000000000..1acd27762 --- /dev/null +++ b/packages/ai-grok/tests/video-adapter.test.ts @@ -0,0 +1,458 @@ +import { describe, expect, it, vi } from 'vitest' +import { resolveDebugOption } from '@tanstack/ai/adapter-internals' +import { + GrokVideoAdapter, + createGrokVideo, + grokVideo, +} from '../src/adapters/video' +import { + parseGrokVideoSize, + validateVideoDuration, + validateVideoSize, +} from '../src/video/video-provider-options' + +const testLogger = resolveDebugOption(false) + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'Content-Type': 'application/json' }, + }) +} + +/** + * A `vi.fn` fetch stub with the real fetch parameter list, so call + * assertions (`mock.calls[0]`) are typed as `[input, init?]`. + */ +function mockFetch(handler: () => Response) { + return vi.fn(async (_input: string | URL | Request, _init?: RequestInit) => + handler(), + ) +} + +/** + * Builds an adapter whose HTTP layer is the provided mock — injected via + * the adapter config's `fetch` seam, so no globals are touched. + */ +function adapterWithFetch( + fetchMock: ( + input: string | URL | Request, + init?: RequestInit, + ) => Promise, +) { + return createGrokVideo('grok-imagine-video', 'test-api-key', { + fetch: fetchMock, + }) +} + +describe('Grok Video Adapter', () => { + describe('factories', () => { + it('creates an adapter with the provided API key', () => { + const adapter = createGrokVideo('grok-imagine-video', 'test-api-key') + expect(adapter).toBeInstanceOf(GrokVideoAdapter) + expect(adapter.kind).toBe('video') + expect(adapter.name).toBe('grok') + expect(adapter.model).toBe('grok-imagine-video') + }) + + it('supports the 1.5 preview model', () => { + const adapter = createGrokVideo( + 'grok-imagine-video-1.5-preview', + 'test-api-key', + ) + expect(adapter.model).toBe('grok-imagine-video-1.5-preview') + }) + + it('grokVideo reads XAI_API_KEY from the environment', () => { + vi.stubEnv('XAI_API_KEY', 'env-key') + try { + const adapter = grokVideo('grok-imagine-video') + expect(adapter).toBeInstanceOf(GrokVideoAdapter) + } finally { + vi.unstubAllEnvs() + } + }) + }) + + describe('createVideoJob', () => { + it('posts a JSON request to the Imagine generations endpoint', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'req-123' })) + const adapter = adapterWithFetch(fetchMock) + + const result = await adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'A red ball bouncing once', + size: '16:9_720p', + duration: 5, + logger: testLogger, + }) + + expect(result).toEqual({ jobId: 'req-123', model: 'grok-imagine-video' }) + expect(fetchMock).toHaveBeenCalledTimes(1) + const [url, init] = fetchMock.mock.calls[0]! + expect(url).toBe('https://api.x.ai/v1/videos/generations') + expect(init?.method).toBe('POST') + expect(init?.headers).toMatchObject({ + 'Content-Type': 'application/json', + Authorization: 'Bearer test-api-key', + }) + expect(JSON.parse(String(init?.body))).toEqual({ + model: 'grok-imagine-video', + prompt: 'A red ball bouncing once', + aspect_ratio: '16:9', + resolution: '720p', + duration: 5, + }) + }) + + it('maps a bare aspect-ratio size without a resolution', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'r' })) + const adapter = adapterWithFetch(fetchMock) + + await adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + size: '9:16', + logger: testLogger, + }) + + const body = JSON.parse(String(fetchMock.mock.calls[0]![1]?.body)) + expect(body.aspect_ratio).toBe('9:16') + expect(body).not.toHaveProperty('resolution') + expect(body).not.toHaveProperty('duration') + }) + + it('passes modelOptions through, including the image-to-video starting frame', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'r' })) + const adapter = adapterWithFetch(fetchMock) + + await adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'make the waterfall crash down', + modelOptions: { + image: { url: 'https://example.com/still.png' }, + resolution: '1080p', + duration: 10, + }, + logger: testLogger, + }) + + const body = JSON.parse(String(fetchMock.mock.calls[0]![1]?.body)) + expect(body.image).toEqual({ url: 'https://example.com/still.png' }) + expect(body.resolution).toBe('1080p') + expect(body.duration).toBe(10) + }) + + it('lets modelOptions win over the generic size template', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'r' })) + const adapter = adapterWithFetch(fetchMock) + + await adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + size: '16:9_480p', + modelOptions: { resolution: '1080p' }, + logger: testLogger, + }) + + const body = JSON.parse(String(fetchMock.mock.calls[0]![1]?.body)) + expect(body.aspect_ratio).toBe('16:9') + expect(body.resolution).toBe('1080p') + }) + + it('rejects unsupported sizes before calling the API', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'r' })) + const adapter = adapterWithFetch(fetchMock) + + await expect( + adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + size: '7:5', + logger: testLogger, + }), + ).rejects.toThrow(/Size "7:5" is not supported/) + await expect( + adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + size: '16:9_9k', + logger: testLogger, + }), + ).rejects.toThrow(/Resolution "9k" is not supported/) + expect(fetchMock).not.toHaveBeenCalled() + }) + + it('rejects out-of-range and non-integer durations before calling the API', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'r' })) + const adapter = adapterWithFetch(fetchMock) + + for (const duration of [0, 16, 2.5]) { + await expect( + adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + duration, + logger: testLogger, + }), + ).rejects.toThrow(/Duration .* is not supported/) + } + await expect( + adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + modelOptions: { duration: 99 }, + logger: testLogger, + }), + ).rejects.toThrow(/Duration "99" is not supported/) + expect(fetchMock).not.toHaveBeenCalled() + }) + + it('surfaces API error messages from the xAI error body', async () => { + const fetchMock = mockFetch(() => + jsonResponse( + { + code: 'invalid-argument', + error: 'Duration must be between 1 and 15 seconds', + }, + 400, + ), + ) + const adapter = adapterWithFetch(fetchMock) + + await expect( + adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + logger: testLogger, + }), + ).rejects.toThrow( + /video generation request failed \(400.*Duration must be between 1 and 15 seconds/, + ) + }) + + it('throws when the response carries no request_id', async () => { + const fetchMock = mockFetch(() => jsonResponse({})) + const adapter = adapterWithFetch(fetchMock) + + await expect( + adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + logger: testLogger, + }), + ).rejects.toThrow(/no request_id/) + }) + + it('honours a custom baseURL', async () => { + const fetchMock = mockFetch(() => jsonResponse({ request_id: 'r' })) + const adapter = createGrokVideo('grok-imagine-video', 'k', { + baseURL: 'https://proxy.example.com/v1', + fetch: fetchMock, + }) + + await adapter.createVideoJob({ + model: 'grok-imagine-video', + prompt: 'p', + logger: testLogger, + }) + + expect(fetchMock.mock.calls[0]![0]).toBe( + 'https://proxy.example.com/v1/videos/generations', + ) + }) + }) + + describe('getVideoStatus', () => { + it('maps a pending job with progress', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ status: 'pending', progress: 18 }), + ) + const adapter = adapterWithFetch(fetchMock) + + const status = await adapter.getVideoStatus('req-123') + + expect(fetchMock.mock.calls[0]![0]).toBe( + 'https://api.x.ai/v1/videos/req-123', + ) + expect(status).toEqual({ + jobId: 'req-123', + status: 'pending', + progress: 18, + }) + }) + + it('maps a done job to completed', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ + status: 'done', + progress: 100, + video: { url: 'https://vidgen.x.ai/video.mp4', duration: 5 }, + }), + ) + const adapter = adapterWithFetch(fetchMock) + + expect(await adapter.getVideoStatus('req-123')).toEqual({ + jobId: 'req-123', + status: 'completed', + progress: 100, + }) + }) + + it.each(['failed', 'expired'])('maps %s to failed', async (apiStatus) => { + const fetchMock = mockFetch(() => + jsonResponse({ status: apiStatus, error: 'moderation' }), + ) + const adapter = adapterWithFetch(fetchMock) + + expect(await adapter.getVideoStatus('req-123')).toEqual({ + jobId: 'req-123', + status: 'failed', + error: 'moderation', + }) + }) + + it('maps an unknown in-flight status to processing', async () => { + const fetchMock = mockFetch(() => jsonResponse({ status: 'generating' })) + const adapter = adapterWithFetch(fetchMock) + + expect((await adapter.getVideoStatus('req-123')).status).toBe( + 'processing', + ) + }) + + it('reports a 404 as a failed job rather than throwing', async () => { + const fetchMock = mockFetch(() => + jsonResponse( + { code: 'not-found', error: 'Failed to read static file.' }, + 404, + ), + ) + const adapter = adapterWithFetch(fetchMock) + + expect(await adapter.getVideoStatus('missing')).toEqual({ + jobId: 'missing', + status: 'failed', + error: 'Job not found', + }) + }) + + it('throws on non-404 API errors', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ error: 'server exploded' }, 500), + ) + const adapter = adapterWithFetch(fetchMock) + + await expect(adapter.getVideoStatus('req-123')).rejects.toThrow( + /video status request failed \(500/, + ) + }) + }) + + describe('getVideoUrl', () => { + it('returns the video URL with billed seconds and exact cost', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ + status: 'done', + progress: 100, + model: 'grok-imagine-video', + video: { + url: 'https://vidgen.x.ai/video.mp4', + duration: 5, + }, + usage: { cost_in_usd_ticks: 2_500_000_000 }, + }), + ) + const adapter = adapterWithFetch(fetchMock) + + expect(await adapter.getVideoUrl('req-123')).toEqual({ + jobId: 'req-123', + url: 'https://vidgen.x.ai/video.mp4', + usage: { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + unitsBilled: 5, + cost: 0.25, + }, + }) + }) + + it('omits usage when the response carries none', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ + status: 'done', + video: { url: 'https://vidgen.x.ai/video.mp4' }, + }), + ) + const adapter = adapterWithFetch(fetchMock) + + expect(await adapter.getVideoUrl('req-123')).toEqual({ + jobId: 'req-123', + url: 'https://vidgen.x.ai/video.mp4', + }) + }) + + it('throws when the job is not finished yet', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ status: 'pending', progress: 40 }), + ) + const adapter = adapterWithFetch(fetchMock) + + await expect(adapter.getVideoUrl('req-123')).rejects.toThrow( + /not ready for download/, + ) + }) + + it('throws with the provider error when the job failed', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ status: 'failed', error: 'moderation' }), + ) + const adapter = adapterWithFetch(fetchMock) + + await expect(adapter.getVideoUrl('req-123')).rejects.toThrow( + /Video generation failed: moderation/, + ) + }) + + it('throws a not-found error for unknown jobs', async () => { + const fetchMock = mockFetch(() => + jsonResponse({ code: 'not-found', error: 'nope' }, 404), + ) + const adapter = adapterWithFetch(fetchMock) + + await expect(adapter.getVideoUrl('missing')).rejects.toThrow( + /Video job not found: missing/, + ) + }) + }) + + describe('video provider option helpers', () => { + it('parses size templates', () => { + expect(parseGrokVideoSize('16:9_720p')).toEqual({ + aspectRatio: '16:9', + resolution: '720p', + }) + expect(parseGrokVideoSize('3:4')).toEqual({ aspectRatio: '3:4' }) + expect(parseGrokVideoSize('not-a-size')).toBeUndefined() + }) + + it('validates sizes', () => { + expect(() => validateVideoSize('m', '16:9')).not.toThrow() + expect(() => validateVideoSize('m', '2:3_1080p')).not.toThrow() + expect(() => validateVideoSize('m', undefined)).not.toThrow() + expect(() => validateVideoSize('m', '9:19.5')).toThrow(/not supported/) + expect(() => validateVideoSize('m', 'auto')).toThrow(/not supported/) + expect(() => validateVideoSize('m', '16:9_2k')).toThrow(/Resolution/) + }) + + it('validates durations', () => { + expect(() => validateVideoDuration('m', undefined)).not.toThrow() + expect(() => validateVideoDuration('m', 1)).not.toThrow() + expect(() => validateVideoDuration('m', 15)).not.toThrow() + expect(() => validateVideoDuration('m', 0)).toThrow(/Duration/) + expect(() => validateVideoDuration('m', 16)).toThrow(/Duration/) + expect(() => validateVideoDuration('m', 1.5)).toThrow(/Duration/) + }) + }) +}) diff --git a/packages/ai/skills/ai-core/media-generation/SKILL.md b/packages/ai/skills/ai-core/media-generation/SKILL.md index b9c4c1a2c..8387d6cb8 100644 --- a/packages/ai/skills/ai-core/media-generation/SKILL.md +++ b/packages/ai/skills/ai-core/media-generation/SKILL.md @@ -4,8 +4,8 @@ description: > Image, audio, video, speech (TTS), and transcription generation using activity-specific adapters: generateImage() with openaiImage/geminiImage, generateAudio() with geminiAudio/falAudio, generateVideo() with async - polling, generateSpeech() with openaiSpeech, generateTranscription() with - openaiTranscription. React hooks: useGenerateImage, useGenerateAudio, + polling (openaiVideo/grokVideo/falVideo), generateSpeech() with + openaiSpeech, generateTranscription() with openaiTranscription. React hooks: useGenerateImage, useGenerateAudio, useGenerateSpeech, useTranscription, useGenerateVideo. TanStack Start server function integration with toServerSentEventsResponse. type: sub-skill @@ -331,6 +331,12 @@ const stream = generateVideo({ return toServerSentEventsResponse(stream) ``` +Video adapters: `openaiVideo('sora-2')` (pixel sizes like `'1280x720'`, +durations 4/8/12s), `grokVideo('grok-imagine-video')` (aspect-ratio size +template like `'16:9_720p'`, integer durations 1-15s, image-to-video via +`modelOptions.image: { url }`, reports `usage.unitsBilled` seconds and exact +`usage.cost`), and `falVideo(...)` (hosted models, see cost tracking below). + Client hook with job tracking: ```tsx From 49a7c09ac981889b979599c7b87a14ca5efdc93d Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Thu, 11 Jun 2026 10:14:36 +1000 Subject: [PATCH 2/3] feat(examples): add direct-xAI grok video entries to ts-react-media Adds 'xAI Direct' text-to-video and image-to-video entries to the video generator that use the native grokVideo adapter against xAI's Imagine API (XAI_API_KEY), alongside the existing fal-hosted grok-imagine entries. Polling is now keyed by the UI model id (the direct entries share one adapter model), and the completed card shows the exact USD cost when the adapter reports it. Co-Authored-By: Claude Fable 5 --- examples/ts-react-media/.env.example | 4 ++ examples/ts-react-media/package.json | 1 + .../src/components/VideoGenerator.tsx | 25 +++++++--- examples/ts-react-media/src/lib/models.ts | 12 +++++ .../src/lib/server-functions.ts | 49 +++++++++++++++++-- pnpm-lock.yaml | 3 ++ 6 files changed, 84 insertions(+), 10 deletions(-) diff --git a/examples/ts-react-media/.env.example b/examples/ts-react-media/.env.example index b7c897653..fdf123604 100644 --- a/examples/ts-react-media/.env.example +++ b/examples/ts-react-media/.env.example @@ -5,3 +5,7 @@ FAL_KEY= # Get a Google API key at https://aistudio.google.com/apikey GOOGLE_API_KEY= + +# Get an xAI API key at https://console.x.ai — used by the "xAI Direct" +# Grok Imagine video models (the other Grok Imagine entries go through fal). +XAI_API_KEY= diff --git a/examples/ts-react-media/package.json b/examples/ts-react-media/package.json index 80bc30ce8..9adf242d1 100644 --- a/examples/ts-react-media/package.json +++ b/examples/ts-react-media/package.json @@ -14,6 +14,7 @@ "@tanstack/ai": "workspace:*", "@tanstack/ai-fal": "workspace:*", "@tanstack/ai-gemini": "workspace:*", + "@tanstack/ai-grok": "workspace:*", "@tanstack/react-router": "^1.158.4", "@tanstack/react-start": "^1.159.0", "@tanstack/router-plugin": "^1.158.4", diff --git a/examples/ts-react-media/src/components/VideoGenerator.tsx b/examples/ts-react-media/src/components/VideoGenerator.tsx index 712981dfb..aedc5b8ff 100644 --- a/examples/ts-react-media/src/components/VideoGenerator.tsx +++ b/examples/ts-react-media/src/components/VideoGenerator.tsx @@ -20,7 +20,7 @@ type JobState = model: string progress?: number | undefined } - | { status: 'completed'; url: string; unitsBilled?: number } + | { status: 'completed'; url: string; unitsBilled?: number; cost?: number } | { status: 'error'; message: string } interface VideoGeneratorProps { @@ -99,6 +99,7 @@ export default function VideoGenerator({ status: 'completed', url: url, unitsBilled: urlResult.usage?.unitsBilled, + cost: urlResult.usage?.cost, }, })) } else if (status.status === 'processing') { @@ -159,8 +160,11 @@ export default function VideoGenerator({ }, })) + // Poll keyed by the UI model id, not result.model: the direct-xAI + // entries share one adapter model ('grok-imagine-video'), so + // result.model wouldn't identify the card (or the adapter) uniquely. const interval = setInterval(() => { - pollStatus(result.jobId, result.model) + pollStatus(result.jobId, modelId) }, 4000) pollingRefs.current.set(modelId, interval) } catch (err) { @@ -401,12 +405,21 @@ export default function VideoGenerator({ className="w-full h-auto" /> - {state.unitsBilled != null && ( + {state.cost != null ? (

- Billed {state.unitsBilled} fal unit - {state.unitsBilled === 1 ? '' : 's'} — multiply by the - endpoint unit price for USD cost + Billed ${state.cost.toFixed(3)} + {state.unitsBilled != null + ? ` for ${state.unitsBilled} second${state.unitsBilled === 1 ? '' : 's'} of video` + : ''}

+ ) : ( + state.unitsBilled != null && ( +

+ Billed {state.unitsBilled} fal unit + {state.unitsBilled === 1 ? '' : 's'} — multiply by the + endpoint unit price for USD cost +

+ ) )} )} diff --git a/examples/ts-react-media/src/lib/models.ts b/examples/ts-react-media/src/lib/models.ts index cfa36dfc5..d961ab7ad 100644 --- a/examples/ts-react-media/src/lib/models.ts +++ b/examples/ts-react-media/src/lib/models.ts @@ -110,6 +110,18 @@ export const VIDEO_MODELS = [ description: 'xAI animate images to video', mode: 'image-to-video' as const, }, + { + id: 'grok-imagine-video', + name: 'Grok Imagine Video (xAI Direct, Text-to-Video)', + description: 'xAI Imagine API via the native grokVideo adapter', + mode: 'text-to-video' as const, + }, + { + id: 'grok-imagine-video/image-to-video', + name: 'Grok Imagine Video (xAI Direct, Image-to-Video)', + description: 'Animate a starting frame via the native grokVideo adapter', + mode: 'image-to-video' as const, + }, { id: 'fal-ai/ltx-2.3/text-to-video/fast', name: 'LTX-2.3 Fast (Text-to-Video)', diff --git a/examples/ts-react-media/src/lib/server-functions.ts b/examples/ts-react-media/src/lib/server-functions.ts index 455226e5d..a2c66b416 100644 --- a/examples/ts-react-media/src/lib/server-functions.ts +++ b/examples/ts-react-media/src/lib/server-functions.ts @@ -1,9 +1,23 @@ import { createServerFn } from '@tanstack/react-start' import { falImage, falVideo } from '@tanstack/ai-fal' import { geminiImage } from '@tanstack/ai-gemini' +import { grokVideo } from '@tanstack/ai-grok' import { generateImage, generateVideo, getVideoJobStatus } from '@tanstack/ai' -import type { FalModel } from '@tanstack/ai-fal' +/** + * Resolves the video adapter for a UI model id. The grok-imagine entries hit + * xAI's Imagine API directly via the native grokVideo adapter; everything + * else is a fal-hosted model. + */ +function videoAdapterForModel(model: string) { + if ( + model === 'grok-imagine-video' || + model === 'grok-imagine-video/image-to-video' + ) { + return grokVideo('grok-imagine-video') + } + return falVideo(model) +} export const generateImageFn = createServerFn({ method: 'POST' }) .inputValidator((data: { prompt: string; model: string }) => { @@ -151,6 +165,18 @@ export const createVideoJobFn = createServerFn({ method: 'POST' }) }, }) } + case 'grok-imagine-video': { + // Direct xAI Imagine API (XAI_API_KEY) — no fal in between. Sizing is + // an "aspectRatio_resolution" template; durations are 1-15 integer + // seconds. Completed jobs report usage.unitsBilled (billed seconds) + // and usage.cost (exact USD). + return generateVideo({ + adapter: grokVideo('grok-imagine-video'), + prompt: data.prompt, + size: '16:9_720p', + duration: 5, + }) + } case 'fal-ai/ltx-2.3/text-to-video/fast': { return generateVideo({ adapter: falVideo('fal-ai/ltx-2.3/text-to-video/fast'), @@ -199,6 +225,21 @@ export const createVideoJobFn = createServerFn({ method: 'POST' }) }, }) } + case 'grok-imagine-video/image-to-video': { + if (!data.imageUrl) + throw new Error('Image URL is required for image-to-video') + // The starting frame goes through modelOptions.image — a public URL + // or a base64 data URI (which is what the upload flow produces). + return generateVideo({ + adapter: grokVideo('grok-imagine-video'), + prompt: data.prompt, + size: '16:9_720p', + duration: 5, + modelOptions: { + image: { url: data.imageUrl }, + }, + }) + } case 'fal-ai/ltx-2.3/image-to-video/fast': { if (!data.imageUrl) throw new Error('Image URL is required for image-to-video') @@ -217,9 +258,9 @@ export const createVideoJobFn = createServerFn({ method: 'POST' }) }) export const getVideoStatusFn = createServerFn({ method: 'GET' }) - .inputValidator((data: { jobId: string; model: FalModel }) => data) + .inputValidator((data: { jobId: string; model: string }) => data) .handler(async ({ data }) => { - const adapter = falVideo(data.model) + const adapter = videoAdapterForModel(data.model) return await getVideoJobStatus({ adapter, jobId: data.jobId, @@ -229,7 +270,7 @@ export const getVideoStatusFn = createServerFn({ method: 'GET' }) export const getVideoUrlFn = createServerFn({ method: 'GET' }) .inputValidator((data: { jobId: string; model: string }) => data) .handler(async ({ data }) => { - const adapter = falVideo(data.model) + const adapter = videoAdapterForModel(data.model) return await getVideoJobStatus({ adapter, jobId: data.jobId, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3fe7276d9..ca1622f08 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -494,6 +494,9 @@ importers: '@tanstack/ai-gemini': specifier: workspace:* version: link:../../packages/ai-gemini + '@tanstack/ai-grok': + specifier: workspace:* + version: link:../../packages/ai-grok '@tanstack/react-router': specifier: ^1.158.4 version: 1.159.5(react-dom@19.2.3(react@19.2.3))(react@19.2.3) From c6e1573e5d10beb40843355011542dac958b9519 Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Thu, 11 Jun 2026 10:27:14 +1000 Subject: [PATCH 3/3] feat(examples): provider optgroups in video model dropdown, use grok-imagine-video-1.5-preview Groups the video model dropdown by provider (fal.ai / xAI direct) the same way the image generator does, adds the model version to the direct-xAI entry names, and switches them to the grok-imagine-video-1.5-preview model. Co-Authored-By: Claude Fable 5 --- .../src/components/VideoGenerator.tsx | 25 +++++++++++++------ examples/ts-react-media/src/lib/models.ts | 18 ++++++++++--- .../src/lib/server-functions.ts | 14 +++++------ 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/examples/ts-react-media/src/components/VideoGenerator.tsx b/examples/ts-react-media/src/components/VideoGenerator.tsx index aedc5b8ff..d9e27131b 100644 --- a/examples/ts-react-media/src/components/VideoGenerator.tsx +++ b/examples/ts-react-media/src/components/VideoGenerator.tsx @@ -41,6 +41,8 @@ export default function VideoGenerator({ const pollingRefs = useRef>(new Map()) const filteredModels = VIDEO_MODELS.filter((m) => m.mode === mode) + const falModels = filteredModels.filter((m) => m.provider === 'fal') + const xaiModels = filteredModels.filter((m) => m.provider === 'xai') useEffect(() => { if (initialImageUrl) { @@ -161,8 +163,8 @@ export default function VideoGenerator({ })) // Poll keyed by the UI model id, not result.model: the direct-xAI - // entries share one adapter model ('grok-imagine-video'), so - // result.model wouldn't identify the card (or the adapter) uniquely. + // entries share one adapter model ('grok-imagine-video-1.5-preview'), + // so result.model wouldn't identify the card (or the adapter) uniquely. const interval = setInterval(() => { pollStatus(result.jobId, modelId) }, 4000) @@ -248,11 +250,20 @@ export default function VideoGenerator({ className="w-full px-4 py-3 bg-gray-800 border border-gray-700 rounded-lg text-white focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent disabled:opacity-50" > - {filteredModels.map((model) => ( - - ))} + + {falModels.map((model) => ( + + ))} + + + {xaiModels.map((model) => ( + + ))} + diff --git a/examples/ts-react-media/src/lib/models.ts b/examples/ts-react-media/src/lib/models.ts index d961ab7ad..2989dee96 100644 --- a/examples/ts-react-media/src/lib/models.ts +++ b/examples/ts-react-media/src/lib/models.ts @@ -79,60 +79,70 @@ export const VIDEO_MODELS = [ name: 'Kling 3 Pro (Text-to-Video)', description: 'High-quality text-to-video generation', mode: 'text-to-video' as const, + provider: 'fal' as const, }, { id: 'fal-ai/kling-video/v3/pro/image-to-video', name: 'Kling 3 Pro (Image-to-Video)', description: 'Animate images with Kling', mode: 'image-to-video' as const, + provider: 'fal' as const, }, { id: 'fal-ai/veo3.1', name: 'Veo 3.1 (Text-to-Video)', description: 'Google Veo text-to-video', mode: 'text-to-video' as const, + provider: 'fal' as const, }, { id: 'fal-ai/veo3.1/image-to-video', name: 'Veo 3.1 (Image-to-Video)', description: 'Google Veo image-to-video', mode: 'image-to-video' as const, + provider: 'fal' as const, }, { id: 'xai/grok-imagine-video/text-to-video', name: 'Grok Imagine Video (Text-to-Video)', description: 'xAI video generation from text', mode: 'text-to-video' as const, + provider: 'fal' as const, }, { id: 'xai/grok-imagine-video/image-to-video', name: 'Grok Imagine Video (Image-to-Video)', description: 'xAI animate images to video', mode: 'image-to-video' as const, + provider: 'fal' as const, }, { - id: 'grok-imagine-video', - name: 'Grok Imagine Video (xAI Direct, Text-to-Video)', + id: 'grok-imagine-video-1.5-preview', + name: 'Grok Imagine Video 1.5 (Text-to-Video)', description: 'xAI Imagine API via the native grokVideo adapter', mode: 'text-to-video' as const, + provider: 'xai' as const, }, { - id: 'grok-imagine-video/image-to-video', - name: 'Grok Imagine Video (xAI Direct, Image-to-Video)', + id: 'grok-imagine-video-1.5-preview/image-to-video', + name: 'Grok Imagine Video 1.5 (Image-to-Video)', description: 'Animate a starting frame via the native grokVideo adapter', mode: 'image-to-video' as const, + provider: 'xai' as const, }, { id: 'fal-ai/ltx-2.3/text-to-video/fast', name: 'LTX-2.3 Fast (Text-to-Video)', description: 'Fast text-to-video generation', mode: 'text-to-video' as const, + provider: 'fal' as const, }, { id: 'fal-ai/ltx-2.3/image-to-video/fast', name: 'LTX-2.3 Fast (Image-to-Video)', description: 'Fast image-to-video animation', mode: 'image-to-video' as const, + provider: 'fal' as const, }, ] as const diff --git a/examples/ts-react-media/src/lib/server-functions.ts b/examples/ts-react-media/src/lib/server-functions.ts index a2c66b416..0a95f281a 100644 --- a/examples/ts-react-media/src/lib/server-functions.ts +++ b/examples/ts-react-media/src/lib/server-functions.ts @@ -11,10 +11,10 @@ import { generateImage, generateVideo, getVideoJobStatus } from '@tanstack/ai' */ function videoAdapterForModel(model: string) { if ( - model === 'grok-imagine-video' || - model === 'grok-imagine-video/image-to-video' + model === 'grok-imagine-video-1.5-preview' || + model === 'grok-imagine-video-1.5-preview/image-to-video' ) { - return grokVideo('grok-imagine-video') + return grokVideo('grok-imagine-video-1.5-preview') } return falVideo(model) } @@ -165,13 +165,13 @@ export const createVideoJobFn = createServerFn({ method: 'POST' }) }, }) } - case 'grok-imagine-video': { + case 'grok-imagine-video-1.5-preview': { // Direct xAI Imagine API (XAI_API_KEY) — no fal in between. Sizing is // an "aspectRatio_resolution" template; durations are 1-15 integer // seconds. Completed jobs report usage.unitsBilled (billed seconds) // and usage.cost (exact USD). return generateVideo({ - adapter: grokVideo('grok-imagine-video'), + adapter: grokVideo('grok-imagine-video-1.5-preview'), prompt: data.prompt, size: '16:9_720p', duration: 5, @@ -225,13 +225,13 @@ export const createVideoJobFn = createServerFn({ method: 'POST' }) }, }) } - case 'grok-imagine-video/image-to-video': { + case 'grok-imagine-video-1.5-preview/image-to-video': { if (!data.imageUrl) throw new Error('Image URL is required for image-to-video') // The starting frame goes through modelOptions.image — a public URL // or a base64 data URI (which is what the upload flow produces). return generateVideo({ - adapter: grokVideo('grok-imagine-video'), + adapter: grokVideo('grok-imagine-video-1.5-preview'), prompt: data.prompt, size: '16:9_720p', duration: 5,