diff --git a/.changeset/structured-output-undo-null-widening.md b/.changeset/structured-output-undo-null-widening.md new file mode 100644 index 000000000..35f69fb12 --- /dev/null +++ b/.changeset/structured-output-undo-null-widening.md @@ -0,0 +1,19 @@ +--- +'@tanstack/ai-utils': minor +'@tanstack/ai': minor +'@tanstack/openai-base': minor +'@tanstack/ai-openrouter': patch +--- + +Fix structured output validation rejecting `null` for optional fields, across both stream modes and every adapter. + +Strict-mode structured output widens optional fields to `required` + nullable, so the provider returns `null` for an absent optional. Validating that `null` against the original schema then failed, because `.optional()` means `T | undefined`, not `T | null` — surfacing as a `StandardSchemaValidationError` (e.g. `Invalid type: Expected string but received null`). + +The engine now undoes the widening as a single, schema-aware step the moment the structured output is captured, so the fix applies uniformly: + +- The strict-conversion pass records a `NullWideningMap` marking exactly the positions where it added `null`, so the response can be un-widened precisely — no re-deriving or guessing which nulls were synthetic. +- `@tanstack/ai-utils` adds `undoNullWidening(value, map)` — a counterpart to `transformNullsToUndefined` that strips only the nulls the widening pass synthesized, preserving the ones a `.nullable()`/`.nullish()` field genuinely allows. +- The engine applies this via a new `finalStructuredOutput.normalize` hook the instant the result is captured, so **both** the `Promise` result **and** the streaming `structured-output.complete` event carry the un-widened object. Previously only the `Promise` path was corrected, and only for adapters that preserved provider nulls. +- `@tanstack/openai-base` adapters (and the OpenAI/Grok/Groq adapters built on them) no longer blind-strip every `null` from structured output via `transformStructuredOutput` — that default is now a passthrough. The blind strip masked the validation bug but also destroyed genuine `.nullable()` nulls; precise un-widening in the engine fixes both. The `transformStructuredOutput` hook remains for provider-specific reshaping. + +Adapters that already preserve provider nulls (`@tanstack/ai-openrouter`, Anthropic, Gemini, Ollama) now get correct un-widening on their streaming structured output too, not just `Promise`. diff --git a/docs/config.json b/docs/config.json index e3fc3b712..956cbe65f 100644 --- a/docs/config.json +++ b/docs/config.json @@ -167,7 +167,8 @@ { "label": "Overview", "to": "structured-outputs/overview", - "addedAt": "2026-05-19" + "addedAt": "2026-05-19", + "updatedAt": "2026-06-10" }, { "label": "One-Shot Extraction", diff --git a/docs/structured-outputs/overview.md b/docs/structured-outputs/overview.md index 98ef6f7ea..5245d8e52 100644 --- a/docs/structured-outputs/overview.md +++ b/docs/structured-outputs/overview.md @@ -86,7 +86,9 @@ Pick the journey that matches what you're building. The four guides under "Struc The streaming and multi-turn paths both build on `useChat({ outputSchema })`. The "with tools" path layers on top of either. Pick the one that describes your shipping shape — start there, follow the cross-links when you need a piece of another story. -> **Note:** Server-side validation is **path-dependent**. For the non-streaming agentic path (`await chat({ outputSchema })`), the engine runs Standard Schema validation inside the finalization step and routes failures through `onError` (the awaited promise rejects). For the streaming path (`chat({ outputSchema, stream: true })`), validation is deliberately deferred to the consumer — the engine forwards the adapter-emitted `structured-output.complete` event verbatim, and consumers read the validated object from the `value.object` field (or call `parseWithStandardSchema` themselves on the raw text). The schema you pass to `useChat({ outputSchema })` on the client is used for TypeScript inference and (in `useChat`) for client-side `parsePartialJSON`-based progressive parsing — the typed-object guarantee comes from the server-side path you pick. +> **Note:** Server-side validation is **path-dependent**. For the non-streaming agentic path (`await chat({ outputSchema })`), the engine runs Standard Schema validation inside the finalization step and routes failures through `onError` (the awaited promise rejects). For the streaming path (`chat({ outputSchema, stream: true })`), Standard Schema _validation_ is deliberately deferred to the consumer — consumers read the object from the `structured-output.complete` event's `value.object` field (or call `parseWithStandardSchema` themselves on the raw text). The schema you pass to `useChat({ outputSchema })` on the client is used for TypeScript inference and (in `useChat`) for client-side `parsePartialJSON`-based progressive parsing — the typed-object guarantee comes from the server-side path you pick. +> +> On **both** paths the engine normalizes the captured object before it reaches you: to satisfy strict providers, optional fields are widened to `required` + nullable, so the provider returns `null` for an absent optional. The engine undoes exactly that widening — an `.optional()` field that came back `null` reads back as **absent** (matching `T | undefined`), while a genuine `.nullable()` field's `null` is **preserved**. So `value.object` (streaming) and the awaited result (non-streaming) both carry the un-widened shape your schema describes. ## Middleware integration diff --git a/packages/ai-openrouter/src/adapters/responses-text.ts b/packages/ai-openrouter/src/adapters/responses-text.ts index a658d985e..91f7d9335 100644 --- a/packages/ai-openrouter/src/adapters/responses-text.ts +++ b/packages/ai-openrouter/src/adapters/responses-text.ts @@ -5,7 +5,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../internal/request-options' import { makeStructuredOutputCompatible } from '../internal/schema-converter' import { convertFunctionToolToResponsesFormat } from '../internal/responses-tool-converter' @@ -697,14 +697,12 @@ export class OpenRouterResponsesTextAdapter< /** * OpenRouter routes through a wide variety of upstream providers; some - * return `null` as a distinct sentinel rather than collapsing it to absent. - * Stripping nulls would erase that distinction, so we passthrough. - * - * `transformNullsToUndefined` is imported for parity with the other - * provider adapters but intentionally not invoked here. + * return `null` as a distinct sentinel rather than collapsing it to absent, + * so we passthrough and let the engine un-widen strict-mode nulls precisely. + * Matches the base adapters' default — kept as an explicit override because + * OpenRouter extends `BaseTextAdapter` directly, not the OpenAI base. */ protected transformStructuredOutput(parsed: unknown): unknown { - void transformNullsToUndefined return parsed } diff --git a/packages/ai-openrouter/src/adapters/text.ts b/packages/ai-openrouter/src/adapters/text.ts index 2648da0a3..415be9ad6 100644 --- a/packages/ai-openrouter/src/adapters/text.ts +++ b/packages/ai-openrouter/src/adapters/text.ts @@ -5,7 +5,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../internal/request-options' import { makeStructuredOutputCompatible } from '../internal/schema-converter' import { convertToolsToProviderFormat } from '../tools' @@ -624,14 +624,12 @@ export class OpenRouterTextAdapter< * Final shaping pass applied to parsed structured-output JSON before it is * returned to the caller. OpenRouter routes through a wide variety of * upstream providers; some return `null` as a distinct sentinel ("the field - * exists, the value is null") rather than collapsing it to absent. Stripping - * nulls would erase that distinction, so we passthrough. - * - * `transformNullsToUndefined` is imported for parity with the other - * provider adapters but intentionally not invoked here. + * exists, the value is null") rather than collapsing it to absent, so we + * passthrough and let the engine un-widen strict-mode nulls precisely. This + * now matches the base adapters' default — kept as an explicit override + * because OpenRouter extends `BaseTextAdapter` directly, not the OpenAI base. */ protected transformStructuredOutput(parsed: unknown): unknown { - void transformNullsToUndefined return parsed } diff --git a/packages/ai-openrouter/tests/openrouter-adapter.test.ts b/packages/ai-openrouter/tests/openrouter-adapter.test.ts index 83d8c9251..9b9dfbef0 100644 --- a/packages/ai-openrouter/tests/openrouter-adapter.test.ts +++ b/packages/ai-openrouter/tests/openrouter-adapter.test.ts @@ -1249,7 +1249,13 @@ describe('OpenRouter structured output', () => { outputSchema, }) - expect(result).toEqual({ name: 'Alice', age: 30, nickname: null }) + // `nickname` was optional, so strict-mode widening made it `required` + + // nullable and the provider returned `null` for the absent value. The + // engine un-widens that synthesized null before returning, so the optional + // field reads back as absent — matching `.optional()` semantics — rather + // than leaking the synthetic `null` through. + expect(result).toEqual({ name: 'Alice', age: 30 }) + expect('nickname' in (result as object)).toBe(false) // The structured-output streaming call carries the strict-transformed schema. const structuredCall = mockSend.mock.calls.find( diff --git a/packages/ai-utils/src/index.ts b/packages/ai-utils/src/index.ts index d6b79c9b8..843d5eb37 100644 --- a/packages/ai-utils/src/index.ts +++ b/packages/ai-utils/src/index.ts @@ -1,4 +1,5 @@ export { generateId } from './id' export { getApiKeyFromEnv } from './env' -export { transformNullsToUndefined } from './transforms' +export { transformNullsToUndefined, undoNullWidening } from './transforms' +export type { NullWideningMap } from './transforms' export { arrayBufferToBase64, base64ToArrayBuffer } from './base64' diff --git a/packages/ai-utils/src/transforms.ts b/packages/ai-utils/src/transforms.ts index 53eba0ecb..c083952ad 100644 --- a/packages/ai-utils/src/transforms.ts +++ b/packages/ai-utils/src/transforms.ts @@ -21,6 +21,11 @@ * therefore become `{}`; arbitrary class instances become a plain-object * snapshot of just their own enumerable string properties. Don't pass * non-JSON values. + * + * Schema-blind: strips EVERY null, including ones a `.nullable()` field + * legitimately allows. When the original schema is available, prefer + * {@link undoNullWidening}, which only strips the nulls strict-mode widening + * synthesized. */ export function transformNullsToUndefined(obj: T): T { if (obj === null) { @@ -44,3 +49,76 @@ export function transformNullsToUndefined(obj: T): T { } return result as T } + +/** + * Records exactly where strict-mode null-widening synthesized a `null`, so + * {@link undoNullWidening} can strip those nulls and leave every other one + * untouched. Built by the widening pass itself as it walks the schema (see + * `convertSchemaForStructuredOutput` in `@tanstack/ai`), so it can never drift + * from what was actually widened — no value-shape guessing required. + * + * - `widened`: the widening pass added `null` to THIS position's type (an + * optional field promoted to `required` + nullable). A `null` here is + * synthetic → strip it. Positions a `.nullable()`/`.nullish()` field already + * allowed carry no `widened` mark, so their nulls survive. + * - `properties` / `items`: descend into a nested object / array to reach + * widened positions deeper in the tree. Only objects and arrays the widener + * actually recursed into appear here. + */ +export type NullWideningMap = { + widened?: boolean + properties?: Record + items?: NullWideningMap | Array +} + +function walk(value: unknown, map: NullWideningMap | undefined): unknown { + if (value === null) { + // Strip only nulls the widening pass synthesized (marked `widened`); keep + // every genuine `.nullable()`/`.nullish()` null and every null the map + // doesn't describe. + return map?.widened ? undefined : null + } + if (typeof value !== 'object' || !map) return value + + if (Array.isArray(value)) { + const { items } = map + if (!items) return value + // Tuple maps (`items: [a, b, …]`) describe each position separately; + // a single `items` map applies to every element. + return Array.isArray(items) + ? value.map((item, index) => walk(item, items[index])) + : value.map((item) => walk(item, items)) + } + + const { properties } = map + if (!properties) return value + const result: Record = {} + for (const [key, child] of Object.entries(value as Record)) { + const next = walk(child, properties[key]) + // A synthesized null collapsed to undefined → omit the key so the field + // reads as absent (`key in result === false`), matching how `.optional()` + // treats absence. + if (next === undefined) continue + result[key] = next + } + return result +} + +/** + * Inverse of strict-mode null-widening for structured output. + * + * To satisfy OpenAI-style strict schemas, optional fields are widened to + * `required` with `null` added to their type, so the provider returns `null` + * for an absent optional. Validating that `null` against the ORIGINAL schema + * fails, because `.optional()` means `T | undefined`, not `T | null`. + * + * Unlike {@link transformNullsToUndefined}, this consults a {@link + * NullWideningMap} recorded by the widening pass and drops ONLY the nulls that + * pass actually synthesized. Nulls a `.nullable()`/`.nullish()` field genuinely + * allows are preserved, so both `optional` and `nullable` fields round-trip + * correctly. With no map, the value is returned untouched. + */ +export function undoNullWidening(value: T, map?: NullWideningMap): T { + if (!map) return value + return walk(value, map) as T +} diff --git a/packages/ai-utils/tests/transforms.test.ts b/packages/ai-utils/tests/transforms.test.ts index 8ce65c1b6..e063866d2 100644 --- a/packages/ai-utils/tests/transforms.test.ts +++ b/packages/ai-utils/tests/transforms.test.ts @@ -1,5 +1,6 @@ -import { describe, it, expect } from 'vitest' -import { transformNullsToUndefined } from '../src/transforms' +import { describe, expect, it } from 'vitest' +import { transformNullsToUndefined, undoNullWidening } from '../src/transforms' +import type { NullWideningMap } from '../src/transforms' describe('transformNullsToUndefined', () => { it('should convert null values to undefined', () => { @@ -49,3 +50,107 @@ describe('transformNullsToUndefined', () => { expect(result).toEqual({ a: { b: { c: { e: 'keep' } } } }) }) }) + +describe('undoNullWidening', () => { + // The widening pass records a map of the nulls it synthesized. For an object + // with one optional field (`opt`) and one nullable field (`nul`), only `opt` + // is widened — so only `opt` is marked: + // req: string (required) -> not widened, absent from the map + // opt: optional(string) -> widened to `required` + null + // nul: nullable(string) -> already allowed null, not widened + const map: NullWideningMap = { + properties: { + opt: { widened: true }, + }, + } + + it('drops a synthesized null on a widened field (key becomes absent)', () => { + const result = undoNullWidening({ req: 'a', opt: null }, map) + expect(result).toEqual({ req: 'a' }) + expect('opt' in (result as object)).toBe(false) + }) + + it('keeps a genuine null on a field the widener did not touch', () => { + const result = undoNullWidening({ req: 'a', nul: null }, map) + expect(result).toEqual({ req: 'a', nul: null }) + }) + + it('handles widened and genuine nulls in the same object', () => { + const result = undoNullWidening({ req: 'a', opt: null, nul: null }, map) + expect(result).toEqual({ req: 'a', nul: null }) + }) + + it('leaves present values untouched', () => { + const result = undoNullWidening({ req: 'a', opt: 'b', nul: 'c' }, map) + expect(result).toEqual({ req: 'a', opt: 'b', nul: 'c' }) + }) + + it('descends into a widened object to drop its inner synthesized null', () => { + // `obj` is itself optional (so it may come back null) AND has an inner + // optional `note`. The map marks both the object and the nested field. + const nested: NullWideningMap = { + properties: { + obj: { + widened: true, + properties: { note: { widened: true } }, + }, + }, + } + // obj is present (kept), but its optional `note` came back null. + const result = undoNullWidening({ obj: { inner: 'x', note: null } }, nested) + expect(result).toEqual({ obj: { inner: 'x' } }) + + // …and when the whole object comes back null, the key drops out. + expect(undoNullWidening({ obj: null }, nested)).toEqual({}) + }) + + it('strips synthesized nulls inside array items', () => { + const arrMap: NullWideningMap = { + properties: { + items: { + items: { properties: { label: { widened: true } } }, + }, + }, + } + const result = undoNullWidening( + { + items: [ + { id: '1', label: null }, + { id: '2', label: 'two' }, + ], + }, + arrMap, + ) + expect(result).toEqual({ items: [{ id: '1' }, { id: '2', label: 'two' }] }) + }) + + it('applies tuple-style item maps per index', () => { + // [ { name }, { note? } ] — only the second position has a widened field. + const tupleMap: NullWideningMap = { + properties: { + pair: { + items: [{}, { properties: { note: { widened: true } } }], + }, + }, + } + const result = undoNullWidening( + { pair: [{ name: 'Ada' }, { note: null }] }, + tupleMap, + ) + // The synthesized null in the second tuple position is dropped using that + // position's map, not the first's. + expect(result).toEqual({ pair: [{ name: 'Ada' }, {}] }) + }) + + it('returns the value untouched when no map is supplied', () => { + const value = { a: null, b: 1 } + expect(undoNullWidening(value)).toBe(value) + }) + + it('leaves nulls under positions the map does not describe', () => { + // `extra` carries no map entry — the widener never synthesized a null + // there, so it is preserved. + const result = undoNullWidening({ req: 'a', extra: null }, map) + expect(result).toEqual({ req: 'a', extra: null }) + }) +}) diff --git a/packages/ai/package.json b/packages/ai/package.json index 1ffd3b4f8..128a598f7 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -80,6 +80,7 @@ "@ag-ui/core": "^0.0.52", "@standard-schema/spec": "^1.1.0", "@tanstack/ai-event-client": "workspace:*", + "@tanstack/ai-utils": "workspace:*", "partial-json": "^0.1.7" }, "peerDependencies": { diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index eba41682f..d71fe0b88 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -6,6 +6,7 @@ */ import { devtoolsMiddleware } from '@tanstack/ai-event-client' +import { undoNullWidening } from '@tanstack/ai-utils' import { stripToSpecMiddleware } from '../../strip-to-spec-middleware' import { streamToText } from '../../stream-to-response.js' import { resolveDebugOption } from '../../logger/resolve' @@ -18,6 +19,7 @@ import { executeToolCalls, } from './tools/tool-calls' import { + convertSchemaForStructuredOutput, convertSchemaToJsonSchema, isStandardSchema, parseWithStandardSchema, @@ -413,11 +415,21 @@ interface TextEngineConfig< * (used by runStreamingStructuredOutput). When false, chunks are * consumed internally for middleware visibility but not yielded * (used by runAgenticStructuredOutput). - * - validate: optional callback invoked AFTER the structured-output result - * is captured but BEFORE the terminal hook fires. If it throws, the - * engine records a `finalizationError` and fires `onError` instead of - * `onFinish` (per spec §7.3). On success, the returned value is stored - * as the validated result and retrievable via + * - normalize: optional schema-aware transform applied to the captured + * structured-output object the moment it enters the engine — BEFORE it is + * stored, validated, or yielded. Used to undo strict-mode null-widening + * (`undoNullWidening`): strict schemas widen optional fields to + * `required` + nullable so the provider returns `null` for an absent + * optional, and this strips exactly those synthesized nulls while keeping + * the ones a `.nullable()` field genuinely allows. Applied here (not in + * the adapter) because the engine is the only layer holding the original + * schema's null-widening map, and applying it at capture fixes BOTH the + * streaming chunk and the Promise result with one transform. + * - validate: optional callback invoked AFTER `normalize` and AFTER the + * structured-output result is captured, but BEFORE the terminal hook + * fires. If it throws, the engine records a `finalizationError` and fires + * `onError` instead of `onFinish` (per spec §7.3). On success, the + * returned value is stored as the validated result and retrievable via * `getValidatedStructuredOutput()`. Used by `runAgenticStructuredOutput` * to perform Standard Schema validation inside the engine. * - nativeCombined: when true, the adapter declared @@ -432,6 +444,7 @@ interface TextEngineConfig< finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean + normalize?: (data: unknown) => unknown validate?: (data: unknown) => unknown nativeCombined?: boolean } @@ -537,8 +550,8 @@ class TextEngine< // to carry, so the client matches it to the streaming text deltas. private combinedStructuredMessageId: string | null = null // Holds the validated value when `finalStructuredOutput.validate` is provided - // and succeeds. Distinct from `structuredOutputResult.data` (the raw, - // unvalidated payload from the structured-output.complete chunk). + // and succeeds. Distinct from `structuredOutputResult.data` (the normalized + // but unvalidated payload from the structured-output.complete chunk). private validatedStructuredOutput: unknown = undefined private hasValidatedStructuredOutput = false private finalizationError: { @@ -549,6 +562,7 @@ class TextEngine< private readonly finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean + normalize?: (data: unknown) => unknown validate?: (data: unknown) => unknown nativeCombined?: boolean } @@ -2047,15 +2061,29 @@ class TextEngine< // All narrowing below is via the discriminated-union `chunk.type` // — no `as` casts. + // The chunk forwarded to middleware/consumers. Replaced below only for + // the structured-output.complete event, whose `object` we normalize + // (un-widen) so streaming consumers see the same cleaned payload the + // Promise path validates and returns. + let outboundChunk: StreamChunk = chunk + if ( chunk.type === EventType.CUSTOM && chunk.name === 'structured-output.complete' ) { const parsed = readStructuredOutputCompleteValue(chunk.value) if (parsed) { - this.structuredOutputResult = { - data: parsed.object, - rawText: parsed.raw, + const object = this.finalStructuredOutput.normalize + ? this.finalStructuredOutput.normalize(parsed.object) + : parsed.object + this.structuredOutputResult = { data: object, rawText: parsed.raw } + // Rewrite the outbound event so the yielded chunk carries the + // normalized object (the original `chunk.value` still holds the + // widened one). Preserve every other field — `raw`, `reasoning` — + // by spreading the original value. + const value = chunk.value + if (object !== parsed.object && value && typeof value === 'object') { + outboundChunk = { ...chunk, value: { ...value, object } } } } } @@ -2079,7 +2107,7 @@ class TextEngine< // 7b. Pipe through middleware const outputChunks = await this.middlewareRunner.runOnChunk( this.middlewareCtx, - chunk, + outboundChunk, ) // 7c. Decide consumer visibility — only yieldChunks=true callers get them. @@ -2236,7 +2264,14 @@ class TextEngine< } else { try { const parsed: unknown = JSON.parse(rawText) - this.structuredOutputResult = { data: parsed, rawText } + // Normalize (un-widen) before storing so the synthesized + // structured-output.complete chunk and the Promise result both + // carry the cleaned payload. JSON.parse preserves provider nulls, so + // this is where native-combined output gets its widening undone. + const data = this.finalStructuredOutput.normalize + ? this.finalStructuredOutput.normalize(parsed) + : parsed + this.structuredOutputResult = { data, rawText } } catch (err: unknown) { const detail = rawText.slice(0, 200) + (rawText.length > 200 ? '...' : '') @@ -2691,18 +2726,31 @@ async function runAgenticStructuredOutput< // Same strict-conversion as the streaming path (`forStructuredOutput: true`) // so the same Zod schema produces the same JSON Schema regardless of - // stream mode — Promise and stream:true must not diverge here. - const jsonSchema = convertSchemaToJsonSchema(outputSchema, { - forStructuredOutput: true, - }) + // stream mode — Promise and stream:true must not diverge here. The same + // pass also records a `nullWideningMap`: optional fields are widened to + // `required` + nullable for the provider, which then returns `null` for an + // absent optional — a `null` the original `.optional()` (`T | undefined`) + // schema would otherwise reject. The map pinpoints exactly those synthesized + // nulls so `undoNullWidening` can drop them while preserving the ones a + // `.nullable()` field genuinely allows. + const { jsonSchema, nullWideningMap } = + convertSchemaForStructuredOutput(outputSchema) if (!jsonSchema) { throw new Error('Failed to convert output schema to JSON Schema') } + // Un-widening runs in the engine the moment the structured output is + // captured (`finalStructuredOutput.normalize`), so it applies uniformly to + // every adapter and to both stream modes — the engine is the only layer + // holding the schema's `nullWideningMap`. Validation then runs on the + // already-normalized data, so `validate` is a plain Standard Schema parse. + const normalize = (data: unknown): unknown => + undoNullWidening(data, nullWideningMap) + // Validation runs INSIDE the engine (per spec §7.3) so validation failures // route through the engine's terminal-hook chooser as `onError`. We pass a // `validate` callback when the schema is a Standard Schema; otherwise we - // pass through the raw data and the engine returns it unchanged. + // pass through the (normalized) data and the engine returns it unchanged. const validate = isStandardSchema(outputSchema) ? (data: unknown): unknown => parseWithStandardSchema>(outputSchema, data) @@ -2734,6 +2782,7 @@ async function runAgenticStructuredOutput< finalStructuredOutput: { jsonSchema, yieldChunks: false, + normalize, ...(validate ? { validate } : {}), ...(nativeCombined ? { nativeCombined: true } : {}), }, @@ -2906,17 +2955,23 @@ async function* fallbackStructuredOutputStream( * RUN_STARTED/RUN_FINISHED are suppressed; the structured-output finalization * step's pair brackets the run for the consumer. * - * Schema validation is intentionally NOT run on this path — it is the - * consumer's responsibility. The `structured-output.complete` CUSTOM event - * is forwarded with the adapter-produced `value.object` as-is. This is a - * deliberate asymmetry vs. `runAgenticStructuredOutput` (Promise path), - * which DOES run Standard Schema validation inside the engine and routes - * validation failures through `onError`. The reason for the asymmetry: + * Standard Schema *validation* is intentionally NOT run on this path — it is + * the consumer's responsibility. This is a deliberate asymmetry vs. + * `runAgenticStructuredOutput` (Promise path), which DOES validate inside + * the engine and routes validation failures through `onError`. The reason: * streaming consumers typically render partial JSON progressively (via * `parsePartialJSON` or `useChat`'s `partial` slot) and validate downstream * after assembly. Running validation server-side would force a hard error * on partial-by-design payloads. See `docs/structured-outputs/overview.md`. * + * Null-widening normalization, however, IS run on both paths: the + * `structured-output.complete` CUSTOM event is forwarded with its `value.object` + * already un-widened (synthesized strict-mode nulls dropped, genuine + * `.nullable()` nulls kept), so a consumer validating the assembled object + * against the original schema doesn't choke on a `null` for an `.optional()` + * field. Same `convertSchemaForStructuredOutput` pass and same + * `undoNullWidening` map as the Promise path — the two must not diverge. + * * Pre-flight validation (missing schema, unconvertible schema) throws * synchronously at call time rather than as a yielded RUN_ERROR mid-stream — * those are programmer errors, not runtime conditions. @@ -2934,14 +2989,17 @@ function runStreamingStructuredOutput< } // forStructuredOutput strict-converts the schema once at the activity - // boundary. Adapters can re-convert if their wire format diverges, but the - // default flow hands them a strict-ready schema. - const jsonSchema = convertSchemaToJsonSchema(outputSchema, { - forStructuredOutput: true, - }) + // boundary, capturing the null-widening map so the engine can un-widen the + // provider's response before it reaches the consumer. Adapters can re-convert + // if their wire format diverges, but the default flow hands them a + // strict-ready schema. + const { jsonSchema, nullWideningMap } = + convertSchemaForStructuredOutput(outputSchema) if (!jsonSchema) { throw new Error('Failed to convert output schema to JSON Schema') } + const normalize = (data: unknown): unknown => + undoNullWidening(data, nullWideningMap) // The implementation generator yields the broader internal type // (`StreamChunk | StructuredOutputCompleteEvent`) so agent-loop @@ -2952,6 +3010,7 @@ function runStreamingStructuredOutput< return runStreamingStructuredOutputImpl( options, jsonSchema, + normalize, ) as StructuredOutputStream> } @@ -2977,6 +3036,7 @@ async function* runStreamingStructuredOutputImpl< >( options: TextActivityOptions, jsonSchema: NonNullable>, + normalize: (data: unknown) => unknown, ): StructuredOutputStreamInternal> { const { adapter, @@ -3021,6 +3081,7 @@ async function* runStreamingStructuredOutputImpl< finalStructuredOutput: { jsonSchema, yieldChunks: true, + normalize, ...(nativeCombined ? { nativeCombined: true } : {}), }, }, @@ -3035,9 +3096,10 @@ async function* runStreamingStructuredOutputImpl< await mcpManager.dispose() } - // Schema validation for the streaming variant remains the consumer's - // responsibility — they read the CUSTOM 'structured-output.complete' from - // the yielded stream. Matches pre-fix behavior. + // Standard Schema validation for the streaming variant remains the + // consumer's responsibility — they read the CUSTOM 'structured-output.complete' + // from the yielded stream. (Null-widening normalization, by contrast, already + // ran inside the engine via `normalize`, so the object they read is un-widened.) void outputSchema } diff --git a/packages/ai/src/activities/chat/tools/schema-converter.ts b/packages/ai/src/activities/chat/tools/schema-converter.ts index 6bcd81aeb..cda434bd1 100644 --- a/packages/ai/src/activities/chat/tools/schema-converter.ts +++ b/packages/ai/src/activities/chat/tools/schema-converter.ts @@ -2,6 +2,7 @@ import type { StandardJSONSchemaV1, StandardSchemaV1, } from '@standard-schema/spec' +import type { NullWideningMap } from '@tanstack/ai-utils' import type { JSONSchema, SchemaInput } from '../../../types' /** @@ -82,6 +83,22 @@ export function isStandardSchema(schema: unknown): schema is StandardSchemaV1 { ) } +/** + * Result of {@link makeStructuredOutputCompatible}: the strict-ready schema plus + * a {@link NullWideningMap} recording every position where a `null` was + * synthesized, so the response can be un-widened before validation without + * re-deriving (or guessing) which nulls were synthetic. + */ +interface StructuredOutputConversion { + schema: JSONSchema + nullWidening: NullWideningMap | undefined +} + +/** Drop an empty map to `undefined` so leaf/no-op subtrees don't litter it. */ +function pruneMap(map: NullWideningMap): NullWideningMap | undefined { + return Object.keys(map).length > 0 ? map : undefined +} + /** * Transform a JSON schema to be compatible with OpenAI's structured output requirements. * OpenAI requires: @@ -89,59 +106,76 @@ export function isStandardSchema(schema: unknown): schema is StandardSchemaV1 { * - Optional fields should have null added to their type union * - additionalProperties must be false for objects * + * Alongside the transformed schema it returns a {@link NullWideningMap} marking + * exactly the positions where `null` was added, so `undoNullWidening` can strip + * those synthesized nulls (and only those) from the provider's response. + * * @param schema - JSON schema to transform * @param originalRequired - Original required array (to know which fields were optional) - * @returns Transformed schema compatible with OpenAI structured output + * @returns Transformed schema + the null-widening map for the round trip */ function makeStructuredOutputCompatible( schema: JSONSchema, originalRequired: Array = [], -): JSONSchema { +): StructuredOutputConversion { const result: JSONSchema = { ...schema } + const map: NullWideningMap = {} // Handle object types if (result.type === 'object' && result.properties) { const properties: Record = { ...result.properties } const allPropertyNames = Object.keys(properties) + const propertyMaps: Record = {} // Transform each property for (const propName of allPropertyNames) { const prop = properties[propName] if (!prop) continue const wasOptional = !originalRequired.includes(propName) + // `null` synthesized AT this property (the field itself can come back null). + let widenedHere = false + // Map describing widened positions INSIDE this property. + let childMap: NullWideningMap | undefined // Recursively transform nested objects/arrays if (prop.type === 'object' && prop.properties) { - const transformed = makeStructuredOutputCompatible( - prop, - prop.required || [], - ) + const nested = makeStructuredOutputCompatible(prop, prop.required || []) properties[propName] = wasOptional - ? { ...transformed, type: ['object', 'null'] } - : transformed + ? { ...nested.schema, type: ['object', 'null'] } + : nested.schema + widenedHere = wasOptional + childMap = nested.nullWidening } else if (prop.type === 'array' && prop.items) { const items = Array.isArray(prop.items) ? prop.items[0] : prop.items - const transformed: JSONSchema = { + const nestedItems = items + ? makeStructuredOutputCompatible(items, items.required || []) + : undefined + properties[propName] = { ...prop, - items: items - ? makeStructuredOutputCompatible(items, items.required || []) - : prop.items, + items: nestedItems ? nestedItems.schema : prop.items, + ...(wasOptional ? { type: ['array', 'null'] } : {}), } - properties[propName] = wasOptional - ? { ...transformed, type: ['array', 'null'] } - : transformed + widenedHere = wasOptional + childMap = nestedItems?.nullWidening + ? { items: nestedItems.nullWidening } + : undefined } else if (wasOptional) { - // Make optional fields nullable by adding null to the type + // Make optional fields nullable by adding null to the type. Mark + // `widenedHere` only where we actually add `null`; a field already + // typed nullable (`.nullish()`) is left as-is and keeps its null. if (prop.type && !Array.isArray(prop.type)) { - properties[propName] = { - ...prop, - type: [prop.type, 'null'], - } + properties[propName] = { ...prop, type: [prop.type, 'null'] } + widenedHere = true } else if (Array.isArray(prop.type) && !prop.type.includes('null')) { - properties[propName] = { - ...prop, - type: [...prop.type, 'null'], - } + properties[propName] = { ...prop, type: [...prop.type, 'null'] } + widenedHere = true + } + } + + if (widenedHere || childMap) { + propertyMaps[propName] = { + ...(childMap ?? {}), + ...(widenedHere ? { widened: true } : {}), } } } @@ -151,17 +185,23 @@ function makeStructuredOutputCompatible( result.required = allPropertyNames // additionalProperties must be false result.additionalProperties = false + if (Object.keys(propertyMaps).length > 0) map.properties = propertyMaps } // Handle array types with object items if (result.type === 'array' && result.items) { const items = Array.isArray(result.items) ? result.items[0] : result.items if (items) { - result.items = makeStructuredOutputCompatible(items, items.required || []) + const nestedItems = makeStructuredOutputCompatible( + items, + items.required || [], + ) + result.items = nestedItems.schema + if (nestedItems.nullWidening) map.items = nestedItems.nullWidening } } - return result + return { schema: result, nullWidening: pruneMap(map) } } /** @@ -179,6 +219,48 @@ export interface ConvertSchemaOptions { forStructuredOutput?: boolean } +/** + * Normalize any supported schema input to a typed, UN-widened `JSONSchema` — + * the shared first half of conversion, before any structured-output widening. + * + * - Standard JSON Schemas are rebuilt structurally (dropping `$schema`, which + * LLM providers ignore) and given the explicit `type`/`properties`/`required` + * defaults object shapes need downstream. + * - Plain `JSONSchema` inputs are rebuilt into the typed view; non-object inputs + * are surfaced untouched (they can't be widened). + * - Standard Schema validators lacking a `~standard.jsonSchema` converter throw + * with actionable guidance, rather than shipping `{ '~standard': … }` to the + * provider and producing an opaque downstream error. + */ +function toTypedJsonSchema(schema: SchemaInput): JSONSchema | undefined { + if (isStandardJSONSchema(schema)) { + const jsonSchema = schema['~standard'].jsonSchema.input({ + target: 'draft-07', + }) + const result: JSONSchema = toJsonSchema(jsonSchema) + if ('properties' in result && !result.type) result.type = 'object' + if (result.type === 'object' && !('properties' in result)) { + result.properties = {} + } + if (result.type === 'object' && !('required' in result)) { + result.required = [] + } + return result + } + + if (isStandardSchema(schema)) { + throw new Error( + 'Schema is a Standard Schema validator but does not expose a JSON Schema ' + + 'converter on `~standard.jsonSchema`. Use Zod v4.2+, ArkType v2.1.28+, ' + + 'or wrap a Valibot schema with `toStandardJsonSchema()` from ' + + '`@valibot/to-json-schema` before passing it as `outputSchema`.', + ) + } + + if (typeof schema !== 'object') return schema + return toJsonSchema(schema) +} + /** * Converts a Standard JSON Schema compliant schema or plain JSONSchema to JSON Schema format * compatible with LLM providers. @@ -247,77 +329,48 @@ export function convertSchemaToJsonSchema( const { forStructuredOutput = false } = options - // If it's a Standard JSON Schema compliant schema, use the standard interface - if (isStandardJSONSchema(schema)) { - const jsonSchema = schema['~standard'].jsonSchema.input({ - target: 'draft-07', - }) - - // Rebuild structurally so the typed JSONSchema view is acquired without - // a `Record as JSONSchema` cast; `toJsonSchema()` also - // drops the `$schema` key which LLM providers don't need. - let result: JSONSchema = toJsonSchema(jsonSchema) - - // Ensure object schemas always have type: "object" - // If it has properties (even empty), it should be an object type - if ('properties' in result && !result.type) { - result.type = 'object' - } - - // Ensure properties exists for object types (even if empty) - if (result.type === 'object' && !('properties' in result)) { - result.properties = {} - } - - // Ensure required exists for object types (even if empty array) - if (result.type === 'object' && !('required' in result)) { - result.required = [] - } - - // Apply structured output transformation if requested - if (forStructuredOutput) { - result = makeStructuredOutputCompatible(result, result.required || []) - } - - return result - } - - // Detect Standard Schema validators (Zod, ArkType, Valibot, …) that don't - // expose a `~standard.jsonSchema` converter. These would otherwise fall - // through to the JSONSchema pass-through below and ship `{ '~standard': … }` - // straight to the LLM provider, producing an opaque downstream error. Fail - // fast with actionable guidance instead. - if (isStandardSchema(schema)) { - throw new Error( - 'Schema is a Standard Schema validator but does not expose a JSON Schema ' + - 'converter on `~standard.jsonSchema`. Use Zod v4.2+, ArkType v2.1.28+, ' + - 'or wrap a Valibot schema with `toStandardJsonSchema()` from ' + - '`@valibot/to-json-schema` before passing it as `outputSchema`.', - ) - } - - // If it's not a Standard JSON Schema, assume it's already a JSONSchema and pass through - // Still apply structured output transformation if requested - - // At this branch, `schema` is the plain `JSONSchema` arm of `SchemaInput` - // (the two `~standard` arms were handled above). When no transformation - // is requested we pass the schema through by reference to preserve - // identity for callers that compare via `===`. - if (typeof schema !== 'object') { - // The SchemaInput union is object-shaped on every arm; if we ever hit a - // non-object here, propagate it untouched and let the downstream - // provider error loudly rather than silently widen. + // Plain-JSONSchema passthrough: with no widening requested, return the schema + // by reference so callers comparing via `===` keep identity. Only the widening + // path needs the rebuilt, normalized view from `toTypedJsonSchema`. + if ( + !forStructuredOutput && + !isStandardJSONSchema(schema) && + !isStandardSchema(schema) + ) { return schema } - if (forStructuredOutput) { - // Build a typed view structurally so we don't need a SchemaInput→JSONSchema - // cast on the transformation path. - const typedView = toJsonSchema(schema) - return makeStructuredOutputCompatible(typedView, typedView.required || []) - } + const base = toTypedJsonSchema(schema) + // Non-object inputs can't be widened; surface them untouched. + if (!base || typeof base !== 'object') return base + if (!forStructuredOutput) return base + return makeStructuredOutputCompatible(base, base.required || []).schema +} - return schema +/** + * Convert a schema for structured output AND capture the {@link NullWideningMap} + * recording every `null` the strict-mode widening synthesized. The map lets the + * caller undo that widening on the provider's response (via `undoNullWidening`) + * before validating against the original schema — optional fields read back as + * absent while genuine `.nullable()` nulls survive. The map is `undefined` when + * the schema isn't a widenable object or when no field needed widening. + */ +export function convertSchemaForStructuredOutput( + schema: SchemaInput | undefined, +): { + jsonSchema: JSONSchema | undefined + nullWideningMap: NullWideningMap | undefined +} { + if (!schema) return { jsonSchema: undefined, nullWideningMap: undefined } + const base = toTypedJsonSchema(schema) + if (!base || typeof base !== 'object') { + return { jsonSchema: base, nullWideningMap: undefined } + } + const { schema: jsonSchema, nullWidening } = makeStructuredOutputCompatible( + base, + base.required || [], + ) + return { jsonSchema, nullWideningMap: nullWidening } } /** diff --git a/packages/ai/tests/chat-structured-output-null-normalization.test.ts b/packages/ai/tests/chat-structured-output-null-normalization.test.ts new file mode 100644 index 000000000..7d5a372bf --- /dev/null +++ b/packages/ai/tests/chat-structured-output-null-normalization.test.ts @@ -0,0 +1,314 @@ +/** + * Structured output: schema-aware null normalization. + * + * To satisfy OpenAI-style strict schemas, optional fields are widened to + * `required` + nullable, so the provider returns `null` for an absent optional. + * Validating that `null` against the original schema (`.optional()` === + * `T | undefined`, NOT `T | null`) used to throw. The engine now undoes the + * widening before validation — dropping synthesized nulls while preserving the + * ones a `.nullable()` field genuinely allows. + */ +import { describe, expect, it } from 'vitest' +import { z } from 'zod' +import { undoNullWidening } from '@tanstack/ai-utils' +import { chat } from '../src/activities/chat/index' +import { convertSchemaForStructuredOutput } from '../src/activities/chat/tools/schema-converter' +import { EventType } from '../src/types' +import { collectChunks, createMockAdapter } from './test-utils' +import type { StreamChunk } from '../src/types' + +const messages = [{ role: 'user' as const, content: 'go' }] + +/** Find the terminal `structured-output.complete` event and return its value. */ +function completeValue(chunks: Array): { + object: unknown + raw: string + reasoning?: string +} { + const complete = chunks.find( + (c) => + c.type === EventType.CUSTOM && + (c as { name?: string }).name === 'structured-output.complete', + ) + expect(complete).toBeDefined() + return ( + complete as { value: { object: unknown; raw: string; reasoning?: string } } + ).value +} + +const completeObject = (chunks: Array): unknown => + completeValue(chunks).object + +/** A native-combined turn: the schema-constrained JSON arrives as assistant text. */ +function textTurn(json: string): Array { + const timestamp = Date.now() + return [ + { type: EventType.RUN_STARTED, runId: 'r1', threadId: 't1', timestamp }, + { + type: EventType.TEXT_MESSAGE_START, + messageId: 'm1', + role: 'assistant', + timestamp, + }, + { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: 'm1', + delta: json, + timestamp, + }, + { type: EventType.TEXT_MESSAGE_END, messageId: 'm1', timestamp }, + { + type: EventType.RUN_FINISHED, + runId: 'r1', + threadId: 't1', + finishReason: 'stop', + timestamp, + }, + ] as Array +} + +describe('structured output null normalization', () => { + it('drops a provider null for an optional field so validation passes', async () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + }) + const { adapter } = createMockAdapter({ + // Strict-mode widening makes the provider return `null` for the absent + // optional. A schema-blind round-trip would fail validation here. + structuredOutput: async () => ({ + data: { title: 'Ship it', note: null }, + rawText: '{"title":"Ship it","note":null}', + }), + }) + + const result = await chat({ adapter, messages, outputSchema }) + + expect(result).toEqual({ title: 'Ship it' }) + expect('note' in result).toBe(false) + }) + + it('keeps a genuine null for a nullable field', async () => { + const outputSchema = z.object({ + title: z.string(), + tag: z.string().nullable(), + }) + const { adapter } = createMockAdapter({ + structuredOutput: async () => ({ + data: { title: 'Ship it', tag: null }, + rawText: '{"title":"Ship it","tag":null}', + }), + }) + + const result = await chat({ adapter, messages, outputSchema }) + + expect(result).toEqual({ title: 'Ship it', tag: null }) + }) + + // The streaming path doesn't schema-validate server-side, but it now un-widens + // the terminal `structured-output.complete` object inside the engine — so a + // consumer validating the assembled object downstream doesn't choke on a + // synthesized `null` for an `.optional()` field, while genuine `.nullable()` + // nulls still reach them. Mirrors the Promise behaviour above. + describe('streaming (stream: true)', () => { + it('un-widens the streamed structured-output.complete object', async () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + tag: z.string().nullable(), + }) + const { adapter } = createMockAdapter({ + // No native structuredOutputStream → engine wraps structuredOutput via + // the fallback stream, then normalizes the complete event. + structuredOutput: async () => ({ + data: { title: 'Ship it', note: null, tag: null }, + rawText: '{"title":"Ship it","note":null,"tag":null}', + }), + }) + + const stream = chat({ adapter, messages, outputSchema, stream: true }) + const chunks = await collectChunks( + stream as unknown as AsyncIterable, + ) + + const object = completeObject(chunks) + // `note` (optional → synthesized null) dropped; `tag` (nullable) kept. + expect(object).toEqual({ title: 'Ship it', tag: null }) + expect('note' in (object as object)).toBe(false) + }) + + it('rewrites only `object`, preserving the event’s `raw` and `reasoning`', async () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + }) + const raw = '{"title":"Ship it","note":null}' + // A NATIVE structuredOutputStream emits the terminal complete event with + // the widened object plus sibling `raw`/`reasoning` fields. The engine's + // outbound rewrite must replace `object` (un-widened) while spreading the + // rest of the value through untouched. + const { adapter } = createMockAdapter({ + structuredOutputStream: () => + (async function* () { + yield { type: EventType.RUN_STARTED, runId: 'r', threadId: 't' } + yield { + type: EventType.CUSTOM, + name: 'structured-output.complete', + value: { + object: { title: 'Ship it', note: null }, + raw, + reasoning: 'thought about it', + }, + } + yield { + type: EventType.RUN_FINISHED, + runId: 'r', + threadId: 't', + finishReason: 'stop', + } + })() as AsyncIterable, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages, + outputSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const value = completeValue(chunks) + expect(value.object).toEqual({ title: 'Ship it' }) + expect('note' in (value.object as object)).toBe(false) + // Sibling fields survive the rewrite. + expect(value.raw).toBe(raw) + expect(value.reasoning).toBe('thought about it') + }) + }) + + // Native-combined mode (adapter declares `supportsCombinedToolsAndSchema`): + // the engine harvests the JSON from the agent loop's accumulated final-turn + // text (`JSON.parse`, which preserves provider nulls) rather than from a + // separate structuredOutput call — a distinct capture site that must also + // un-widen. Covers both Promise and streaming. + describe('native-combined mode', () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + tag: z.string().nullable(), + }) + const json = '{"title":"Ship it","note":null,"tag":null}' + + it('un-widens the harvested Promise result', async () => { + const { adapter } = createMockAdapter({ + iterations: [textTurn(json)], + supportsCombinedToolsAndSchema: true, + }) + + const result = await chat({ adapter, messages, outputSchema }) + + expect(result).toEqual({ title: 'Ship it', tag: null }) + expect('note' in result).toBe(false) + }) + + it('un-widens the synthesized streaming complete event', async () => { + const { adapter } = createMockAdapter({ + iterations: [textTurn(json)], + supportsCombinedToolsAndSchema: true, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages, + outputSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const object = completeObject(chunks) + expect(object).toEqual({ title: 'Ship it', tag: null }) + expect('note' in (object as object)).toBe(false) + }) + }) +}) + +// Closes the gap between the two halves of the fix: the widening pass that +// PRODUCES the map and the `undoNullWidening` pass that CONSUMES it. The unit +// tests in `@tanstack/ai-utils` drive `undoNullWidening` with hand-authored +// maps; here we run a real schema through `convertSchemaForStructuredOutput` +// and feed a provider-shaped payload back through the map it produced, proving +// the two can't drift. +describe('convertSchemaForStructuredOutput → undoNullWidening round trip', () => { + it('un-widens a nested schema using the map the conversion produced', () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), // widened scalar + tag: z.string().nullable(), // genuine nullable — not widened + meta: z + .object({ author: z.string(), rev: z.number().optional() }) + .optional(), // widened object with an inner widened field + items: z.array( + z.object({ id: z.string(), label: z.string().optional() }), + ), + }) + + const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) + expect(nullWideningMap).toBeDefined() + + // What a strict provider returns: every absent optional comes back `null`. + const providerPayload = { + title: 'T', + note: null, + tag: null, + meta: { author: 'A', rev: null }, + items: [ + { id: '1', label: null }, + { id: '2', label: 'x' }, + ], + } + + expect(undoNullWidening(providerPayload, nullWideningMap)).toEqual({ + title: 'T', + tag: null, + meta: { author: 'A' }, + items: [{ id: '1' }, { id: '2', label: 'x' }], + }) + }) + + it('drops a widened nested object that comes back null', () => { + const outputSchema = z.object({ + title: z.string(), + meta: z.object({ author: z.string() }).optional(), + }) + + const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) + const result = undoNullWidening( + { title: 'T', meta: null }, + nullWideningMap, + ) as Record + + expect(result).toEqual({ title: 'T' }) + expect('meta' in result).toBe(false) + }) + + it('keeps a genuine `.nullable()` null inside array items', () => { + // The widener does NOT touch `note` (it's `.nullable()`, not `.optional()`), + // so its null must survive even though it sits inside an array item — the + // exact spot the tuple/array handling could wrongly strip it. + const outputSchema = z.object({ + items: z.array(z.object({ id: z.string(), note: z.string().nullable() })), + }) + + const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) + const payload = { + items: [ + { id: '1', note: null }, + { id: '2', note: 'kept' }, + ], + } + + expect(undoNullWidening(payload, nullWideningMap)).toEqual(payload) + }) +}) diff --git a/packages/openai-base/src/adapters/chat-completions-text.ts b/packages/openai-base/src/adapters/chat-completions-text.ts index 488c31836..febffcb0b 100644 --- a/packages/openai-base/src/adapters/chat-completions-text.ts +++ b/packages/openai-base/src/adapters/chat-completions-text.ts @@ -4,7 +4,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../utils/request-options' import { makeStructuredOutputCompatible } from '../utils/schema-converter' import { buildChatCompletionsUsage } from '../usage' @@ -213,10 +213,8 @@ export abstract class OpenAIBaseChatCompletionsTextAdapter< ) } - // Transform null values to undefined to match original Zod schema expectations - // Provider returns null for optional fields we made nullable in the schema. - // Subclasses can override `transformStructuredOutput` to skip this — e.g. - // OpenRouter historically passed nulls through unchanged. + // Final provider-specific shaping pass (default passthrough). Null-widening + // from strict mode is undone by the engine, not here. const transformed = this.transformStructuredOutput(parsed) return { @@ -595,13 +593,17 @@ export abstract class OpenAIBaseChatCompletionsTextAdapter< /** * Final shaping pass applied to parsed structured-output JSON before it is - * returned to the caller. Default converts `null` values to `undefined` so - * the result aligns with the original Zod schema's optional-field - * semantics. Subclasses with different conventions (OpenRouter historically - * preserves nulls) can override. + * returned to the caller. Default is a passthrough. + * + * Provider `null`s are no longer stripped here: strict-mode null-widening is + * now undone precisely by the engine (`undoNullWidening`, driven by the + * schema's null-widening map) the moment the result is captured, so a blind + * `transformNullsToUndefined` at the adapter would only destroy genuine + * `.nullable()` nulls. Subclasses may still override to remap or reshape the + * provider's structured output. */ protected transformStructuredOutput(parsed: unknown): unknown { - return transformNullsToUndefined(parsed) + return parsed } /** diff --git a/packages/openai-base/src/adapters/responses-text.ts b/packages/openai-base/src/adapters/responses-text.ts index 454c96e03..53c793aa0 100644 --- a/packages/openai-base/src/adapters/responses-text.ts +++ b/packages/openai-base/src/adapters/responses-text.ts @@ -4,7 +4,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../utils/request-options' import { makeStructuredOutputCompatible } from '../utils/schema-converter' import { buildResponsesUsage } from '../usage' @@ -247,12 +247,8 @@ export abstract class OpenAIBaseResponsesTextAdapter< ) } - // Apply the provider-specific post-parse shaping (default: null → - // undefined to align with the original Zod schema's optional-field - // semantics; subclasses with different conventions can override - // `transformStructuredOutput`, mirroring the chat-completions base's - // hook so OpenRouter and other providers that preserve nulls in - // structured output can opt out without forking `structuredOutput`). + // Provider-specific post-parse shaping (default passthrough). Null-widening + // from strict mode is undone by the engine, not here. const transformed = this.transformStructuredOutput(parsed) return { @@ -577,7 +573,10 @@ export abstract class OpenAIBaseResponsesTextAdapter< return } - const transformed = transformNullsToUndefined(parsed) + // Route through the same hook as the non-streaming path (default + // passthrough). Engine un-widens nulls; the streaming path must not strip + // them blindly either. + const transformed = this.transformStructuredOutput(parsed) yield { type: EventType.CUSTOM, @@ -673,15 +672,17 @@ export abstract class OpenAIBaseResponsesTextAdapter< /** * Final shaping pass applied to parsed structured-output JSON before it is - * returned to the caller. Default converts `null` values to `undefined` so - * the result aligns with the original Zod schema's optional-field - * semantics. Subclasses with different conventions (OpenRouter historically - * preserves nulls) can override — mirrors the chat-completions base's hook - * so a subclass that opts out of null-stripping doesn't have to fork the - * whole `structuredOutput` method. + * returned to the caller. Default is a passthrough. + * + * Provider `null`s are no longer stripped here: strict-mode null-widening is + * now undone precisely by the engine (`undoNullWidening`, driven by the + * schema's null-widening map) the moment the result is captured, so a blind + * `transformNullsToUndefined` at the adapter would only destroy genuine + * `.nullable()` nulls. Subclasses may still override to remap or reshape the + * provider's structured output. */ protected transformStructuredOutput(parsed: unknown): unknown { - return transformNullsToUndefined(parsed) + return parsed } /** diff --git a/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts b/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts index 638822f40..f0a68951e 100644 --- a/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts +++ b/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts @@ -147,6 +147,44 @@ describe('OpenAIBaseChatCompletionsTextAdapter.structuredOutputStream', () => { expect(complete!.value.raw).toBe(json) }) + it('passes provider nulls through unchanged (engine un-widens, not the adapter)', async () => { + // Mirror of the non-streaming `transformStructuredOutput` passthrough test + // (`chat-completions-text.test.ts`) for the STREAMING path: the adapter no + // longer strips nulls — strict-mode null-widening is undone precisely by + // the engine, so a blind adapter-level strip would also destroy genuine + // `.nullable()` nulls. Guards against the stream path regressing to a strip + // while the non-stream path doesn't. + const json = '{"name":"Alice","nickname":null}' + setupStreamingMock([deltaChunk(json), finishChunk()]) + const adapter = new TestAdapter() + + const chunks = await collect( + adapter.structuredOutputStream!({ + chatOptions: { + model: 'test-model', + messages: [{ role: 'user', content: 'extract' }], + logger: testLogger, + }, + outputSchema: { + type: 'object', + properties: { + name: { type: 'string' }, + nickname: { type: 'string' }, + }, + required: ['name'], + additionalProperties: false, + }, + }), + ) + + const complete = chunks.find( + (c) => + c.type === 'CUSTOM' && + (c as { name?: string }).name === 'structured-output.complete', + ) as { value: { object: unknown } } | undefined + expect(complete!.value.object).toEqual({ name: 'Alice', nickname: null }) + }) + it('sends response_format: { type: "json_schema", strict: true } in the request', async () => { setupStreamingMock([deltaChunk('{"name":"X","age":1}'), finishChunk()]) const adapter = new TestAdapter() diff --git a/packages/openai-base/tests/chat-completions-text.test.ts b/packages/openai-base/tests/chat-completions-text.test.ts index 656dd876b..e1c82e6e5 100644 --- a/packages/openai-base/tests/chat-completions-text.test.ts +++ b/packages/openai-base/tests/chat-completions-text.test.ts @@ -723,7 +723,7 @@ describe('OpenAIBaseChatCompletionsTextAdapter', () => { ) }) - it('transforms null values to undefined', async () => { + it('passes provider nulls through unchanged (engine un-widens, not the adapter)', async () => { const nonStreamResponse = { choices: [ { @@ -756,10 +756,13 @@ describe('OpenAIBaseChatCompletionsTextAdapter', () => { // `result.data` is typed as `unknown` from the schema-less call; // narrow it to the shape this test produces. - const data = result.data as { name?: string; nickname?: string } - // null should be transformed to undefined + const data = result.data as { name?: string; nickname?: string | null } + // The adapter no longer strips nulls — strict-mode null-widening is undone + // precisely by the engine, which holds the schema's widening map. A blind + // adapter-level strip would also destroy genuine `.nullable()` nulls, so + // the adapter passes the provider's payload through verbatim. expect(data.name).toBe('Alice') - expect(data.nickname).toBeUndefined() + expect(data.nickname).toBeNull() }) it('throws on invalid JSON response', async () => { diff --git a/packages/openai-base/tests/responses-text.test.ts b/packages/openai-base/tests/responses-text.test.ts index 72b198afa..7fd8f6f97 100644 --- a/packages/openai-base/tests/responses-text.test.ts +++ b/packages/openai-base/tests/responses-text.test.ts @@ -1720,7 +1720,7 @@ describe('OpenAIBaseResponsesTextAdapter', () => { ) }) - it('transforms null values to undefined', async () => { + it('passes provider nulls through unchanged (engine un-widens, not the adapter)', async () => { const nonStreamResponse = { output: [ { @@ -1755,9 +1755,11 @@ describe('OpenAIBaseResponsesTextAdapter', () => { }, }) - // null should be transformed to undefined + // The adapter no longer strips nulls — strict-mode null-widening is undone + // precisely by the engine, which holds the schema's widening map. A blind + // adapter-level strip would also destroy genuine `.nullable()` nulls. expect((result.data as any).name).toBe('Alice') - expect((result.data as any).nickname).toBeUndefined() + expect((result.data as any).nickname).toBeNull() }) it('throws on invalid JSON response', async () => { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ab84c8703..36bd8bdbc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -999,6 +999,9 @@ importers: '@tanstack/ai-event-client': specifier: workspace:* version: link:../ai-event-client + '@tanstack/ai-utils': + specifier: workspace:* + version: link:../ai-utils partial-json: specifier: ^0.1.7 version: 0.1.7 diff --git a/testing/e2e/fixtures/structured-output-stream/basic.json b/testing/e2e/fixtures/structured-output-stream/basic.json index 62ef046fc..3a01afe8f 100644 --- a/testing/e2e/fixtures/structured-output-stream/basic.json +++ b/testing/e2e/fixtures/structured-output-stream/basic.json @@ -5,7 +5,7 @@ "userMessage": "[structured-stream] recommend a guitar as json" }, "response": { - "content": "{\"name\":\"Fender Stratocaster\",\"price\":1299,\"reason\":\"Versatile tone and comfortable playability\",\"rating\":5}" + "content": "{\"name\":\"Fender Stratocaster\",\"price\":1299,\"reason\":\"Versatile tone and comfortable playability\",\"rating\":5,\"condition\":null}" } } ] diff --git a/testing/e2e/src/lib/schemas.ts b/testing/e2e/src/lib/schemas.ts index 7eaa2d53f..8b9bf6753 100644 --- a/testing/e2e/src/lib/schemas.ts +++ b/testing/e2e/src/lib/schemas.ts @@ -5,6 +5,12 @@ export const guitarRecommendationSchema = z.object({ price: z.number(), reason: z.string(), rating: z.number().min(1).max(5), + // Optional field used to exercise strict-mode null-widening end to end: + // the schema converter widens this to `required` + nullable, so a provider + // returns `null` for an absent value. The engine must undo that widening so + // the field reads back as ABSENT (matching `.optional()`), not `null`. See + // `structured-output-stream.spec.ts`. + condition: z.string().optional(), }) export const imageAnalysisSchema = z.object({ diff --git a/testing/e2e/tests/structured-output-stream.spec.ts b/testing/e2e/tests/structured-output-stream.spec.ts index 113fc61f6..a28d65f18 100644 --- a/testing/e2e/tests/structured-output-stream.spec.ts +++ b/testing/e2e/tests/structured-output-stream.spec.ts @@ -51,6 +51,14 @@ for (const provider of providersFor('structured-output-stream')) { expect(parsed.name).toContain('Fender Stratocaster') expect(parsed.price).toBe(1299) + // `condition` is `.optional()`, so strict-mode widening made the provider + // return `null` for it (see the fixture). The engine must un-widen that + // synthesized null before the streamed `structured-output.complete` event + // reaches the consumer, so the field reads back as ABSENT — not `null`. + // Pre-fix, null-preserving adapters (e.g. openrouter) leaked the `null` + // straight through on the streaming path. + expect('condition' in parsed).toBe(false) + // Verify the response actually streamed (more than one content delta). // A regression that silently fell back to the synthetic single-delta // path would still pass the substring assertion above but fail here. diff --git a/testing/react-native-smoke/metro.config.cjs b/testing/react-native-smoke/metro.config.cjs index d14924c6c..5f221a34a 100644 --- a/testing/react-native-smoke/metro.config.cjs +++ b/testing/react-native-smoke/metro.config.cjs @@ -16,6 +16,7 @@ const packageEntryPoints = new Map([ '@tanstack/ai-event-client', resolve(repoRoot, 'packages/ai-event-client/src/index.ts'), ], + ['@tanstack/ai-utils', resolve(repoRoot, 'packages/ai-utils/src/index.ts')], ['@tanstack/ai-react', resolve(repoRoot, 'packages/ai-react/src/index.ts')], ]) const rewriteOriginRoots = [ diff --git a/testing/react-native-smoke/scripts/assert-import-surface.ts b/testing/react-native-smoke/scripts/assert-import-surface.ts index e0888b5af..d68742d5d 100644 --- a/testing/react-native-smoke/scripts/assert-import-surface.ts +++ b/testing/react-native-smoke/scripts/assert-import-surface.ts @@ -11,6 +11,7 @@ const packageEntries = new Map([ ['@tanstack/ai-react', 'packages/ai-react/src/index.ts'], ['@tanstack/ai-client', 'packages/ai-client/src/index.ts'], ['@tanstack/ai-event-client', 'packages/ai-event-client/src/index.ts'], + ['@tanstack/ai-utils', 'packages/ai-utils/src/index.ts'], ['@tanstack/ai/client', 'packages/ai/src/client.ts'], ]) diff --git a/testing/react-native-smoke/scripts/esbuild-smoke.ts b/testing/react-native-smoke/scripts/esbuild-smoke.ts index a20fdc4bc..9eb7c10a8 100644 --- a/testing/react-native-smoke/scripts/esbuild-smoke.ts +++ b/testing/react-native-smoke/scripts/esbuild-smoke.ts @@ -23,6 +23,7 @@ await build({ repoRoot, 'packages/ai-event-client/src/index.ts', ), + '@tanstack/ai-utils': resolve(repoRoot, 'packages/ai-utils/src/index.ts'), '@tanstack/ai-react': resolve(repoRoot, 'packages/ai-react/src/index.ts'), 'react-native': resolve(scriptDir, 'react-native-runtime-stub.tsx'), }, diff --git a/testing/react-native-smoke/tsconfig.json b/testing/react-native-smoke/tsconfig.json index 5723bfb01..b97ea5fef 100644 --- a/testing/react-native-smoke/tsconfig.json +++ b/testing/react-native-smoke/tsconfig.json @@ -17,6 +17,7 @@ "@tanstack/ai-event-client": [ "../../packages/ai-event-client/src/index.ts" ], + "@tanstack/ai-utils": ["../../packages/ai-utils/src/index.ts"], "@tanstack/ai-react": ["../../packages/ai-react/src/index.ts"] } },