From 16af8c9579a5febc1ee078ca93ab678acd809aa6 Mon Sep 17 00:00:00 2001 From: Drew Hoover Date: Tue, 9 Jun 2026 15:23:23 -0400 Subject: [PATCH 1/8] fix(ai): undo strict-mode null-widening before structured-output validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optional fields are widened to required+nullable for strict structured output, so providers return `null` for an absent optional. Validating that `null` against the original schema failed (`.optional()` is `T | undefined`, not `T | null`), surfacing as a StandardSchemaValidationError — most visibly through @tanstack/ai-openrouter, whose adapter preserves provider nulls. Add `undoNullWidening(value, schema)` to @tanstack/ai-utils: a schema-aware counterpart to `transformNullsToUndefined` that drops only synthesized nulls (those the original JSON Schema disallows) while preserving the ones a `.nullable()`/`.nullish()` field genuinely allows. The chat activity runs it on the structured-output result before Standard Schema validation. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../structured-output-undo-null-widening.md | 12 ++ packages/ai-utils/src/index.ts | 3 +- packages/ai-utils/src/transforms.ts | 103 ++++++++++++++++++ packages/ai-utils/tests/transforms.test.ts | 99 ++++++++++++++++- packages/ai/package.json | 1 + packages/ai/src/activities/chat/index.ts | 18 ++- ...ructured-output-null-normalization.test.ts | 55 ++++++++++ pnpm-lock.yaml | 3 + 8 files changed, 291 insertions(+), 3 deletions(-) create mode 100644 .changeset/structured-output-undo-null-widening.md create mode 100644 packages/ai/tests/chat-structured-output-null-normalization.test.ts diff --git a/.changeset/structured-output-undo-null-widening.md b/.changeset/structured-output-undo-null-widening.md new file mode 100644 index 000000000..f658ed3ef --- /dev/null +++ b/.changeset/structured-output-undo-null-widening.md @@ -0,0 +1,12 @@ +--- +'@tanstack/ai-utils': minor +'@tanstack/ai': patch +--- + +Fix structured output validation rejecting `null` for optional fields. + +Strict-mode structured output widens optional fields to `required` + nullable, so the provider returns `null` for an absent optional. Validating that `null` against the original schema then failed, because `.optional()` means `T | undefined`, not `T | null` — surfacing as a `StandardSchemaValidationError` (e.g. `Invalid type: Expected string but received null`). This was most visible through `@tanstack/ai-openrouter`, whose adapter preserves provider nulls. + +The engine now undoes the widening before validation: it drops the synthesized nulls while preserving the ones a `.nullable()`/`.nullish()` field genuinely allows, so both optional and nullable fields round-trip correctly. + +- `@tanstack/ai-utils` adds `undoNullWidening(value, schema)` — a schema-aware counterpart to `transformNullsToUndefined` that only strips nulls the original JSON Schema disallows. diff --git a/packages/ai-utils/src/index.ts b/packages/ai-utils/src/index.ts index d6b79c9b8..bf7f7a024 100644 --- a/packages/ai-utils/src/index.ts +++ b/packages/ai-utils/src/index.ts @@ -1,4 +1,5 @@ export { generateId } from './id' export { getApiKeyFromEnv } from './env' -export { transformNullsToUndefined } from './transforms' +export { transformNullsToUndefined, undoNullWidening } from './transforms' +export type { JsonSchemaNode } from './transforms' export { arrayBufferToBase64, base64ToArrayBuffer } from './base64' diff --git a/packages/ai-utils/src/transforms.ts b/packages/ai-utils/src/transforms.ts index 53eba0ecb..4d64c6ce3 100644 --- a/packages/ai-utils/src/transforms.ts +++ b/packages/ai-utils/src/transforms.ts @@ -21,6 +21,11 @@ * therefore become `{}`; arbitrary class instances become a plain-object * snapshot of just their own enumerable string properties. Don't pass * non-JSON values. + * + * Schema-blind: strips EVERY null, including ones a `.nullable()` field + * legitimately allows. When the original schema is available, prefer + * {@link undoNullWidening}, which only strips the nulls strict-mode widening + * synthesized. */ export function transformNullsToUndefined(obj: T): T { if (obj === null) { @@ -44,3 +49,101 @@ export function transformNullsToUndefined(obj: T): T { } return result as T } + +/** + * Minimal structural view of a JSON Schema node — just the keywords + * {@link undoNullWidening} consults. Kept local so `@tanstack/ai-utils` stays + * dependency-free; the richer `JSONSchema` from `@tanstack/ai` is structurally + * assignable to it. + */ +export type JsonSchemaNode = { + type?: string | Array + properties?: Record + items?: JsonSchemaNode | Array + anyOf?: Array + oneOf?: Array + [key: string]: unknown +} + +/** + * Whether the schema node permits `null` — directly via `type` or through an + * `anyOf`/`oneOf` branch (how Valibot's `nullable`/`nullish` and Zod serialize). + */ +function allowsNull(schema: JsonSchemaNode): boolean { + if (schema.type === 'null') return true + if (Array.isArray(schema.type) && schema.type.includes('null')) return true + const variants = schema.anyOf ?? schema.oneOf + return variants ? variants.some(allowsNull) : false +} + +/** + * For a composite (object/array) value under an `anyOf`/`oneOf` schema, pick the + * non-null branch describing that value's real shape so recursion can follow it + * (e.g. a `nullable(object({...}))` serializes as `anyOf: [object, null]`). + */ +function resolveSchema(schema: JsonSchemaNode, value: unknown): JsonSchemaNode { + const variants = schema.anyOf ?? schema.oneOf + if (!variants) return schema + const isArray = Array.isArray(value) + const match = variants.find((variant) => { + if (variant.type === 'null') return false + return isArray + ? variant.type === 'array' || variant.items !== undefined + : variant.type === 'object' || variant.properties !== undefined + }) + return match ?? schema +} + +function walk(value: unknown, schema: JsonSchemaNode | undefined): unknown { + if (value === null) { + // Strip only when the schema is present AND definitively disallows null — + // i.e. the null was synthesized by strict-mode null-widening of an optional + // field. Keep nulls a `.nullable()` field genuinely allows, and — being + // conservative — nulls under shapes the schema doesn't describe. + return schema && !allowsNull(schema) ? undefined : null + } + if (typeof value !== 'object') return value + // Unknown shape (no schema, or `additionalProperties`): leave it untouched + // rather than guess which nulls are synthetic. + if (!schema) return value + + if (Array.isArray(value)) { + const resolved = resolveSchema(schema, value) + const itemSchema = Array.isArray(resolved.items) + ? resolved.items[0] + : resolved.items + return value.map((item) => walk(item, itemSchema)) + } + + const resolved = resolveSchema(schema, value) + const props = resolved.properties + const result: Record = {} + for (const [key, child] of Object.entries(value as Record)) { + const next = walk(child, props?.[key]) + // A synthesized null collapsed to undefined → omit the key so the field + // reads as absent (`key in result === false`), matching how `.optional()` + // treats absence. + if (next === undefined) continue + result[key] = next + } + return result +} + +/** + * Schema-aware inverse of strict-mode null-widening for structured output. + * + * To satisfy OpenAI-style strict schemas, optional fields are widened to + * `required` with `null` added to their type, so the provider returns `null` + * for an absent optional. Validating that `null` against the ORIGINAL schema + * fails, because `.optional()` means `T | undefined`, not `T | null`. + * + * Unlike {@link transformNullsToUndefined}, this consults the original + * (un-widened) JSON Schema and only drops nulls the schema does NOT permit — + * the synthesized ones. Nulls a `.nullable()`/`.nullish()` field genuinely + * allows are preserved, so both `optional` and `nullable` fields round-trip + * correctly. With no schema, the value is returned untouched. + */ +export function undoNullWidening(value: T, schema?: JsonSchemaNode): T { + if (!schema) return value + return walk(value, schema) as T +} diff --git a/packages/ai-utils/tests/transforms.test.ts b/packages/ai-utils/tests/transforms.test.ts index 8ce65c1b6..f84ee7569 100644 --- a/packages/ai-utils/tests/transforms.test.ts +++ b/packages/ai-utils/tests/transforms.test.ts @@ -1,5 +1,9 @@ import { describe, it, expect } from 'vitest' -import { transformNullsToUndefined } from '../src/transforms' +import type { JsonSchemaNode } from '../src/transforms' +import { + transformNullsToUndefined, + undoNullWidening, +} from '../src/transforms' describe('transformNullsToUndefined', () => { it('should convert null values to undefined', () => { @@ -49,3 +53,96 @@ describe('transformNullsToUndefined', () => { expect(result).toEqual({ a: { b: { c: { e: 'keep' } } } }) }) }) + +describe('undoNullWidening', () => { + // Mirrors the un-widened JSON Schema a Valibot/Zod object produces: + // req: string (required) -> v.string() + // opt: string, not required -> v.optional(v.string()) + // nul: anyOf[string, null] -> v.nullable(v.string()) + const schema: JsonSchemaNode = { + type: 'object', + properties: { + req: { type: 'string' }, + opt: { type: 'string' }, + nul: { anyOf: [{ type: 'string' }, { type: 'null' }] }, + }, + required: ['req', 'nul'], + } + + it('drops a synthesized null on an optional field (key becomes absent)', () => { + const result = undoNullWidening({ req: 'a', opt: null }, schema) + expect(result).toEqual({ req: 'a' }) + expect('opt' in (result as object)).toBe(false) + }) + + it('keeps a genuine null on a nullable field', () => { + const result = undoNullWidening({ req: 'a', nul: null }, schema) + expect(result).toEqual({ req: 'a', nul: null }) + }) + + it('handles optional and nullable nulls in the same object', () => { + const result = undoNullWidening({ req: 'a', opt: null, nul: null }, schema) + expect(result).toEqual({ req: 'a', nul: null }) + }) + + it('leaves present values untouched', () => { + const result = undoNullWidening({ req: 'a', opt: 'b', nul: 'c' }, schema) + expect(result).toEqual({ req: 'a', opt: 'b', nul: 'c' }) + }) + + it('recurses into a nullable object via its anyOf branch', () => { + const nested: JsonSchemaNode = { + type: 'object', + properties: { + obj: { + anyOf: [ + { + type: 'object', + properties: { inner: { type: 'string' }, note: { type: 'string' } }, + required: ['inner'], + }, + { type: 'null' }, + ], + }, + }, + required: ['obj'], + } + // obj itself is present (kept), but its optional `note` came back null. + const result = undoNullWidening({ obj: { inner: 'x', note: null } }, nested) + expect(result).toEqual({ obj: { inner: 'x' } }) + }) + + it('strips synthesized nulls inside array items', () => { + const arrSchema: JsonSchemaNode = { + type: 'object', + properties: { + items: { + type: 'array', + items: { + type: 'object', + properties: { id: { type: 'string' }, label: { type: 'string' } }, + required: ['id'], + }, + }, + }, + required: ['items'], + } + const result = undoNullWidening( + { items: [{ id: '1', label: null }, { id: '2', label: 'two' }] }, + arrSchema, + ) + expect(result).toEqual({ items: [{ id: '1' }, { id: '2', label: 'two' }] }) + }) + + it('returns the value untouched when no schema is supplied', () => { + const value = { a: null, b: 1 } + expect(undoNullWidening(value)).toBe(value) + }) + + it('leaves nulls under unknown (schemaless) properties untouched', () => { + // `extra` is not described by the schema — we cannot prove its null is + // synthetic, so it is preserved. + const result = undoNullWidening({ req: 'a', extra: null } as object, schema) + expect(result).toEqual({ req: 'a', extra: null }) + }) +}) diff --git a/packages/ai/package.json b/packages/ai/package.json index 1ffd3b4f8..128a598f7 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -80,6 +80,7 @@ "@ag-ui/core": "^0.0.52", "@standard-schema/spec": "^1.1.0", "@tanstack/ai-event-client": "workspace:*", + "@tanstack/ai-utils": "workspace:*", "partial-json": "^0.1.7" }, "peerDependencies": { diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index eba41682f..25120d81b 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -6,6 +6,7 @@ */ import { devtoolsMiddleware } from '@tanstack/ai-event-client' +import { undoNullWidening } from '@tanstack/ai-utils' import { stripToSpecMiddleware } from '../../strip-to-spec-middleware' import { streamToText } from '../../stream-to-response.js' import { resolveDebugOption } from '../../logger/resolve' @@ -19,6 +20,7 @@ import { } from './tools/tool-calls' import { convertSchemaToJsonSchema, + isStandardJSONSchema, isStandardSchema, parseWithStandardSchema, } from './tools/schema-converter' @@ -2699,13 +2701,27 @@ async function runAgenticStructuredOutput< throw new Error('Failed to convert output schema to JSON Schema') } + // The un-widened schema (no `forStructuredOutput`) still distinguishes + // genuinely-nullable fields from optional ones, so we can undo strict-mode's + // null-widening before validating: optional fields are widened to + // `required` + nullable for the provider, which then returns `null` for an + // absent optional — a `null` the original `.optional()` (`T | undefined`) + // schema would otherwise reject. `undoNullWidening` drops only those + // synthesized nulls, preserving the ones a `.nullable()` field allows. + const validationSchema = isStandardJSONSchema(outputSchema) + ? convertSchemaToJsonSchema(outputSchema) + : undefined + // Validation runs INSIDE the engine (per spec §7.3) so validation failures // route through the engine's terminal-hook chooser as `onError`. We pass a // `validate` callback when the schema is a Standard Schema; otherwise we // pass through the raw data and the engine returns it unchanged. const validate = isStandardSchema(outputSchema) ? (data: unknown): unknown => - parseWithStandardSchema>(outputSchema, data) + parseWithStandardSchema>( + outputSchema, + undoNullWidening(data, validationSchema), + ) : undefined // Per issue #605: same capability check as the streaming path. When the diff --git a/packages/ai/tests/chat-structured-output-null-normalization.test.ts b/packages/ai/tests/chat-structured-output-null-normalization.test.ts new file mode 100644 index 000000000..5179f008c --- /dev/null +++ b/packages/ai/tests/chat-structured-output-null-normalization.test.ts @@ -0,0 +1,55 @@ +/** + * Structured output: schema-aware null normalization. + * + * To satisfy OpenAI-style strict schemas, optional fields are widened to + * `required` + nullable, so the provider returns `null` for an absent optional. + * Validating that `null` against the original schema (`.optional()` === + * `T | undefined`, NOT `T | null`) used to throw. The engine now undoes the + * widening before validation — dropping synthesized nulls while preserving the + * ones a `.nullable()` field genuinely allows. + */ +import { describe, expect, it } from 'vitest' +import { z } from 'zod' +import { chat } from '../src/activities/chat/index' +import { createMockAdapter } from './test-utils' + +const messages = [{ role: 'user' as const, content: 'go' }] + +describe('structured output null normalization', () => { + it('drops a provider null for an optional field so validation passes', async () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + }) + const { adapter } = createMockAdapter({ + // Strict-mode widening makes the provider return `null` for the absent + // optional. A schema-blind round-trip would fail validation here. + structuredOutput: async () => ({ + data: { title: 'Ship it', note: null }, + rawText: '{"title":"Ship it","note":null}', + }), + }) + + const result = await chat({ adapter, messages, outputSchema }) + + expect(result).toEqual({ title: 'Ship it' }) + expect('note' in result).toBe(false) + }) + + it('keeps a genuine null for a nullable field', async () => { + const outputSchema = z.object({ + title: z.string(), + tag: z.string().nullable(), + }) + const { adapter } = createMockAdapter({ + structuredOutput: async () => ({ + data: { title: 'Ship it', tag: null }, + rawText: '{"title":"Ship it","tag":null}', + }), + }) + + const result = await chat({ adapter, messages, outputSchema }) + + expect(result).toEqual({ title: 'Ship it', tag: null }) + }) +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ab84c8703..36bd8bdbc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -999,6 +999,9 @@ importers: '@tanstack/ai-event-client': specifier: workspace:* version: link:../ai-event-client + '@tanstack/ai-utils': + specifier: workspace:* + version: link:../ai-utils partial-json: specifier: ^0.1.7 version: 0.1.7 From 3f446789063365c1d937e6cf24fc9d0e6a2cc086 Mon Sep 17 00:00:00 2001 From: Drew Hoover Date: Tue, 9 Jun 2026 16:04:18 -0400 Subject: [PATCH 2/8] fix(ai-utils): handle tuple items and ambiguous anyOf branches in undoNullWidening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CodeRabbit review on #732: - resolveSchema now descends only when exactly one non-null anyOf/oneOf branch matches the value's shape; ambiguous unions keep the original schema rather than risk stripping a null a sibling branch allows. - Array walking applies tuple-style `items: [a, b, …]` schemas per index instead of always using the first. Adds coverage for both and fixes the test's import order. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/ai-utils/src/transforms.ts | 21 ++++-- packages/ai-utils/tests/transforms.test.ts | 81 +++++++++++++++++++--- 2 files changed, 87 insertions(+), 15 deletions(-) diff --git a/packages/ai-utils/src/transforms.ts b/packages/ai-utils/src/transforms.ts index 4d64c6ce3..e7764ac4c 100644 --- a/packages/ai-utils/src/transforms.ts +++ b/packages/ai-utils/src/transforms.ts @@ -80,18 +80,24 @@ function allowsNull(schema: JsonSchemaNode): boolean { * For a composite (object/array) value under an `anyOf`/`oneOf` schema, pick the * non-null branch describing that value's real shape so recursion can follow it * (e.g. a `nullable(object({...}))` serializes as `anyOf: [object, null]`). + * + * Resolves only when EXACTLY ONE non-null branch matches the value's shape. If + * several could (e.g. a union of object types), the branch is ambiguous, so we + * keep the original schema and descend no further — better to leave a null in + * place than risk stripping one a sibling branch genuinely allows. */ function resolveSchema(schema: JsonSchemaNode, value: unknown): JsonSchemaNode { const variants = schema.anyOf ?? schema.oneOf if (!variants) return schema const isArray = Array.isArray(value) - const match = variants.find((variant) => { + const matches = variants.filter((variant) => { if (variant.type === 'null') return false return isArray ? variant.type === 'array' || variant.items !== undefined : variant.type === 'object' || variant.properties !== undefined }) - return match ?? schema + const [only] = matches + return matches.length === 1 && only ? only : schema } function walk(value: unknown, schema: JsonSchemaNode | undefined): unknown { @@ -108,11 +114,12 @@ function walk(value: unknown, schema: JsonSchemaNode | undefined): unknown { if (!schema) return value if (Array.isArray(value)) { - const resolved = resolveSchema(schema, value) - const itemSchema = Array.isArray(resolved.items) - ? resolved.items[0] - : resolved.items - return value.map((item) => walk(item, itemSchema)) + const { items } = resolveSchema(schema, value) + // Tuple schemas (`items: [a, b, …]`) describe each position separately; + // a single `items` schema applies to every element. + return Array.isArray(items) + ? value.map((item, index) => walk(item, items[index])) + : value.map((item) => walk(item, items)) } const resolved = resolveSchema(schema, value) diff --git a/packages/ai-utils/tests/transforms.test.ts b/packages/ai-utils/tests/transforms.test.ts index f84ee7569..eb263c9d4 100644 --- a/packages/ai-utils/tests/transforms.test.ts +++ b/packages/ai-utils/tests/transforms.test.ts @@ -1,9 +1,6 @@ -import { describe, it, expect } from 'vitest' +import { describe, expect, it } from 'vitest' +import { transformNullsToUndefined, undoNullWidening } from '../src/transforms' import type { JsonSchemaNode } from '../src/transforms' -import { - transformNullsToUndefined, - undoNullWidening, -} from '../src/transforms' describe('transformNullsToUndefined', () => { it('should convert null values to undefined', () => { @@ -98,7 +95,10 @@ describe('undoNullWidening', () => { anyOf: [ { type: 'object', - properties: { inner: { type: 'string' }, note: { type: 'string' } }, + properties: { + inner: { type: 'string' }, + note: { type: 'string' }, + }, required: ['inner'], }, { type: 'null' }, @@ -128,12 +128,77 @@ describe('undoNullWidening', () => { required: ['items'], } const result = undoNullWidening( - { items: [{ id: '1', label: null }, { id: '2', label: 'two' }] }, + { + items: [ + { id: '1', label: null }, + { id: '2', label: 'two' }, + ], + }, arrSchema, ) expect(result).toEqual({ items: [{ id: '1' }, { id: '2', label: 'two' }] }) }) + it('applies tuple-style item schemas per index', () => { + const tupleSchema: JsonSchemaNode = { + type: 'object', + properties: { + pair: { + type: 'array', + // [ { name }, { note? } ] — only the second position is optional. + items: [ + { + type: 'object', + properties: { name: { type: 'string' } }, + required: ['name'], + }, + { + type: 'object', + properties: { note: { type: 'string' } }, + required: [], + }, + ], + }, + }, + required: ['pair'], + } + const result = undoNullWidening( + { pair: [{ name: 'Ada' }, { note: null }] }, + tupleSchema, + ) + // The synthesized null in the second tuple position is dropped using that + // position's schema, not the first's. + expect(result).toEqual({ pair: [{ name: 'Ada' }, {}] }) + }) + + it('keeps nulls when the anyOf branch is ambiguous (multiple object variants)', () => { + const ambiguous: JsonSchemaNode = { + type: 'object', + properties: { + node: { + anyOf: [ + { + type: 'object', + properties: { a: { type: 'string' } }, + required: [], + }, + { + type: 'object', + properties: { b: { type: 'string' } }, + required: [], + }, + { type: 'null' }, + ], + }, + }, + required: ['node'], + } + // Two object branches match, so we can't tell which applies — leave the + // value (and any nulls inside it) untouched rather than risk mis-stripping. + const value = { node: { a: null } } + expect(undoNullWidening(value, ambiguous)).toEqual({ node: { a: null } }) + }) + it('returns the value untouched when no schema is supplied', () => { const value = { a: null, b: 1 } expect(undoNullWidening(value)).toBe(value) @@ -142,7 +207,7 @@ describe('undoNullWidening', () => { it('leaves nulls under unknown (schemaless) properties untouched', () => { // `extra` is not described by the schema — we cannot prove its null is // synthetic, so it is preserved. - const result = undoNullWidening({ req: 'a', extra: null } as object, schema) + const result = undoNullWidening({ req: 'a', extra: null }, schema) expect(result).toEqual({ req: 'a', extra: null }) }) }) From 95e6295c064b56bbb1404675cac40ab62c64991d Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Wed, 10 Jun 2026 15:50:40 +1000 Subject: [PATCH 3/8] refactor(ai): record null-widening map at conversion time instead of re-deriving it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the schema-guessing `undoNullWidening` — which reverse-engineered which nulls strict-mode widening synthesized by pattern-matching response values against the un-widened schema's anyOf branches, and bailed on ambiguity — with a precise map recorded by the widening pass itself. `makeStructuredOutputCompatible` now returns the strict schema plus a `NullWideningMap` marking exactly the positions where it added `null`. The new `convertSchemaForStructuredOutput` exposes both, and the chat activity threads that map into `undoNullWidening`. This drops `resolveSchema`/`allowsNull` branch guessing, preserves `.nullish()` nulls by construction, and closes the ambiguous-union gap where synthesized nulls were previously left in place. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../structured-output-undo-null-widening.md | 3 +- packages/ai-utils/src/index.ts | 2 +- packages/ai-utils/src/transforms.ts | 106 +++----- packages/ai-utils/tests/transforms.test.ts | 137 +++------- packages/ai/src/activities/chat/index.ts | 28 +- .../activities/chat/tools/schema-converter.ts | 239 +++++++++++------- 6 files changed, 237 insertions(+), 278 deletions(-) diff --git a/.changeset/structured-output-undo-null-widening.md b/.changeset/structured-output-undo-null-widening.md index f658ed3ef..c8185a75f 100644 --- a/.changeset/structured-output-undo-null-widening.md +++ b/.changeset/structured-output-undo-null-widening.md @@ -9,4 +9,5 @@ Strict-mode structured output widens optional fields to `required` + nullable, s The engine now undoes the widening before validation: it drops the synthesized nulls while preserving the ones a `.nullable()`/`.nullish()` field genuinely allows, so both optional and nullable fields round-trip correctly. -- `@tanstack/ai-utils` adds `undoNullWidening(value, schema)` — a schema-aware counterpart to `transformNullsToUndefined` that only strips nulls the original JSON Schema disallows. +- The strict-conversion pass now records a `NullWideningMap` marking exactly the positions where it added `null`, so the response can be un-widened precisely — no re-deriving or guessing which nulls were synthetic. +- `@tanstack/ai-utils` adds `undoNullWidening(value, map)` — a counterpart to `transformNullsToUndefined` that strips only the nulls the widening pass synthesized, driven by that map. diff --git a/packages/ai-utils/src/index.ts b/packages/ai-utils/src/index.ts index bf7f7a024..843d5eb37 100644 --- a/packages/ai-utils/src/index.ts +++ b/packages/ai-utils/src/index.ts @@ -1,5 +1,5 @@ export { generateId } from './id' export { getApiKeyFromEnv } from './env' export { transformNullsToUndefined, undoNullWidening } from './transforms' -export type { JsonSchemaNode } from './transforms' +export type { NullWideningMap } from './transforms' export { arrayBufferToBase64, base64ToArrayBuffer } from './base64' diff --git a/packages/ai-utils/src/transforms.ts b/packages/ai-utils/src/transforms.ts index e7764ac4c..c083952ad 100644 --- a/packages/ai-utils/src/transforms.ts +++ b/packages/ai-utils/src/transforms.ts @@ -51,82 +51,50 @@ export function transformNullsToUndefined(obj: T): T { } /** - * Minimal structural view of a JSON Schema node — just the keywords - * {@link undoNullWidening} consults. Kept local so `@tanstack/ai-utils` stays - * dependency-free; the richer `JSONSchema` from `@tanstack/ai` is structurally - * assignable to it. - */ -export type JsonSchemaNode = { - type?: string | Array - properties?: Record - items?: JsonSchemaNode | Array - anyOf?: Array - oneOf?: Array - [key: string]: unknown -} - -/** - * Whether the schema node permits `null` — directly via `type` or through an - * `anyOf`/`oneOf` branch (how Valibot's `nullable`/`nullish` and Zod serialize). - */ -function allowsNull(schema: JsonSchemaNode): boolean { - if (schema.type === 'null') return true - if (Array.isArray(schema.type) && schema.type.includes('null')) return true - const variants = schema.anyOf ?? schema.oneOf - return variants ? variants.some(allowsNull) : false -} - -/** - * For a composite (object/array) value under an `anyOf`/`oneOf` schema, pick the - * non-null branch describing that value's real shape so recursion can follow it - * (e.g. a `nullable(object({...}))` serializes as `anyOf: [object, null]`). + * Records exactly where strict-mode null-widening synthesized a `null`, so + * {@link undoNullWidening} can strip those nulls and leave every other one + * untouched. Built by the widening pass itself as it walks the schema (see + * `convertSchemaForStructuredOutput` in `@tanstack/ai`), so it can never drift + * from what was actually widened — no value-shape guessing required. * - * Resolves only when EXACTLY ONE non-null branch matches the value's shape. If - * several could (e.g. a union of object types), the branch is ambiguous, so we - * keep the original schema and descend no further — better to leave a null in - * place than risk stripping one a sibling branch genuinely allows. + * - `widened`: the widening pass added `null` to THIS position's type (an + * optional field promoted to `required` + nullable). A `null` here is + * synthetic → strip it. Positions a `.nullable()`/`.nullish()` field already + * allowed carry no `widened` mark, so their nulls survive. + * - `properties` / `items`: descend into a nested object / array to reach + * widened positions deeper in the tree. Only objects and arrays the widener + * actually recursed into appear here. */ -function resolveSchema(schema: JsonSchemaNode, value: unknown): JsonSchemaNode { - const variants = schema.anyOf ?? schema.oneOf - if (!variants) return schema - const isArray = Array.isArray(value) - const matches = variants.filter((variant) => { - if (variant.type === 'null') return false - return isArray - ? variant.type === 'array' || variant.items !== undefined - : variant.type === 'object' || variant.properties !== undefined - }) - const [only] = matches - return matches.length === 1 && only ? only : schema +export type NullWideningMap = { + widened?: boolean + properties?: Record + items?: NullWideningMap | Array } -function walk(value: unknown, schema: JsonSchemaNode | undefined): unknown { +function walk(value: unknown, map: NullWideningMap | undefined): unknown { if (value === null) { - // Strip only when the schema is present AND definitively disallows null — - // i.e. the null was synthesized by strict-mode null-widening of an optional - // field. Keep nulls a `.nullable()` field genuinely allows, and — being - // conservative — nulls under shapes the schema doesn't describe. - return schema && !allowsNull(schema) ? undefined : null + // Strip only nulls the widening pass synthesized (marked `widened`); keep + // every genuine `.nullable()`/`.nullish()` null and every null the map + // doesn't describe. + return map?.widened ? undefined : null } - if (typeof value !== 'object') return value - // Unknown shape (no schema, or `additionalProperties`): leave it untouched - // rather than guess which nulls are synthetic. - if (!schema) return value + if (typeof value !== 'object' || !map) return value if (Array.isArray(value)) { - const { items } = resolveSchema(schema, value) - // Tuple schemas (`items: [a, b, …]`) describe each position separately; - // a single `items` schema applies to every element. + const { items } = map + if (!items) return value + // Tuple maps (`items: [a, b, …]`) describe each position separately; + // a single `items` map applies to every element. return Array.isArray(items) ? value.map((item, index) => walk(item, items[index])) : value.map((item) => walk(item, items)) } - const resolved = resolveSchema(schema, value) - const props = resolved.properties + const { properties } = map + if (!properties) return value const result: Record = {} for (const [key, child] of Object.entries(value as Record)) { - const next = walk(child, props?.[key]) + const next = walk(child, properties[key]) // A synthesized null collapsed to undefined → omit the key so the field // reads as absent (`key in result === false`), matching how `.optional()` // treats absence. @@ -137,20 +105,20 @@ function walk(value: unknown, schema: JsonSchemaNode | undefined): unknown { } /** - * Schema-aware inverse of strict-mode null-widening for structured output. + * Inverse of strict-mode null-widening for structured output. * * To satisfy OpenAI-style strict schemas, optional fields are widened to * `required` with `null` added to their type, so the provider returns `null` * for an absent optional. Validating that `null` against the ORIGINAL schema * fails, because `.optional()` means `T | undefined`, not `T | null`. * - * Unlike {@link transformNullsToUndefined}, this consults the original - * (un-widened) JSON Schema and only drops nulls the schema does NOT permit — - * the synthesized ones. Nulls a `.nullable()`/`.nullish()` field genuinely + * Unlike {@link transformNullsToUndefined}, this consults a {@link + * NullWideningMap} recorded by the widening pass and drops ONLY the nulls that + * pass actually synthesized. Nulls a `.nullable()`/`.nullish()` field genuinely * allows are preserved, so both `optional` and `nullable` fields round-trip - * correctly. With no schema, the value is returned untouched. + * correctly. With no map, the value is returned untouched. */ -export function undoNullWidening(value: T, schema?: JsonSchemaNode): T { - if (!schema) return value - return walk(value, schema) as T +export function undoNullWidening(value: T, map?: NullWideningMap): T { + if (!map) return value + return walk(value, map) as T } diff --git a/packages/ai-utils/tests/transforms.test.ts b/packages/ai-utils/tests/transforms.test.ts index eb263c9d4..e063866d2 100644 --- a/packages/ai-utils/tests/transforms.test.ts +++ b/packages/ai-utils/tests/transforms.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest' import { transformNullsToUndefined, undoNullWidening } from '../src/transforms' -import type { JsonSchemaNode } from '../src/transforms' +import type { NullWideningMap } from '../src/transforms' describe('transformNullsToUndefined', () => { it('should convert null values to undefined', () => { @@ -52,80 +52,65 @@ describe('transformNullsToUndefined', () => { }) describe('undoNullWidening', () => { - // Mirrors the un-widened JSON Schema a Valibot/Zod object produces: - // req: string (required) -> v.string() - // opt: string, not required -> v.optional(v.string()) - // nul: anyOf[string, null] -> v.nullable(v.string()) - const schema: JsonSchemaNode = { - type: 'object', + // The widening pass records a map of the nulls it synthesized. For an object + // with one optional field (`opt`) and one nullable field (`nul`), only `opt` + // is widened — so only `opt` is marked: + // req: string (required) -> not widened, absent from the map + // opt: optional(string) -> widened to `required` + null + // nul: nullable(string) -> already allowed null, not widened + const map: NullWideningMap = { properties: { - req: { type: 'string' }, - opt: { type: 'string' }, - nul: { anyOf: [{ type: 'string' }, { type: 'null' }] }, + opt: { widened: true }, }, - required: ['req', 'nul'], } - it('drops a synthesized null on an optional field (key becomes absent)', () => { - const result = undoNullWidening({ req: 'a', opt: null }, schema) + it('drops a synthesized null on a widened field (key becomes absent)', () => { + const result = undoNullWidening({ req: 'a', opt: null }, map) expect(result).toEqual({ req: 'a' }) expect('opt' in (result as object)).toBe(false) }) - it('keeps a genuine null on a nullable field', () => { - const result = undoNullWidening({ req: 'a', nul: null }, schema) + it('keeps a genuine null on a field the widener did not touch', () => { + const result = undoNullWidening({ req: 'a', nul: null }, map) expect(result).toEqual({ req: 'a', nul: null }) }) - it('handles optional and nullable nulls in the same object', () => { - const result = undoNullWidening({ req: 'a', opt: null, nul: null }, schema) + it('handles widened and genuine nulls in the same object', () => { + const result = undoNullWidening({ req: 'a', opt: null, nul: null }, map) expect(result).toEqual({ req: 'a', nul: null }) }) it('leaves present values untouched', () => { - const result = undoNullWidening({ req: 'a', opt: 'b', nul: 'c' }, schema) + const result = undoNullWidening({ req: 'a', opt: 'b', nul: 'c' }, map) expect(result).toEqual({ req: 'a', opt: 'b', nul: 'c' }) }) - it('recurses into a nullable object via its anyOf branch', () => { - const nested: JsonSchemaNode = { - type: 'object', + it('descends into a widened object to drop its inner synthesized null', () => { + // `obj` is itself optional (so it may come back null) AND has an inner + // optional `note`. The map marks both the object and the nested field. + const nested: NullWideningMap = { properties: { obj: { - anyOf: [ - { - type: 'object', - properties: { - inner: { type: 'string' }, - note: { type: 'string' }, - }, - required: ['inner'], - }, - { type: 'null' }, - ], + widened: true, + properties: { note: { widened: true } }, }, }, - required: ['obj'], } - // obj itself is present (kept), but its optional `note` came back null. + // obj is present (kept), but its optional `note` came back null. const result = undoNullWidening({ obj: { inner: 'x', note: null } }, nested) expect(result).toEqual({ obj: { inner: 'x' } }) + + // …and when the whole object comes back null, the key drops out. + expect(undoNullWidening({ obj: null }, nested)).toEqual({}) }) it('strips synthesized nulls inside array items', () => { - const arrSchema: JsonSchemaNode = { - type: 'object', + const arrMap: NullWideningMap = { properties: { items: { - type: 'array', - items: { - type: 'object', - properties: { id: { type: 'string' }, label: { type: 'string' } }, - required: ['id'], - }, + items: { properties: { label: { widened: true } } }, }, }, - required: ['items'], } const result = undoNullWidening( { @@ -134,80 +119,38 @@ describe('undoNullWidening', () => { { id: '2', label: 'two' }, ], }, - arrSchema, + arrMap, ) expect(result).toEqual({ items: [{ id: '1' }, { id: '2', label: 'two' }] }) }) - it('applies tuple-style item schemas per index', () => { - const tupleSchema: JsonSchemaNode = { - type: 'object', + it('applies tuple-style item maps per index', () => { + // [ { name }, { note? } ] — only the second position has a widened field. + const tupleMap: NullWideningMap = { properties: { pair: { - type: 'array', - // [ { name }, { note? } ] — only the second position is optional. - items: [ - { - type: 'object', - properties: { name: { type: 'string' } }, - required: ['name'], - }, - { - type: 'object', - properties: { note: { type: 'string' } }, - required: [], - }, - ], + items: [{}, { properties: { note: { widened: true } } }], }, }, - required: ['pair'], } const result = undoNullWidening( { pair: [{ name: 'Ada' }, { note: null }] }, - tupleSchema, + tupleMap, ) // The synthesized null in the second tuple position is dropped using that - // position's schema, not the first's. + // position's map, not the first's. expect(result).toEqual({ pair: [{ name: 'Ada' }, {}] }) }) - it('keeps nulls when the anyOf branch is ambiguous (multiple object variants)', () => { - const ambiguous: JsonSchemaNode = { - type: 'object', - properties: { - node: { - anyOf: [ - { - type: 'object', - properties: { a: { type: 'string' } }, - required: [], - }, - { - type: 'object', - properties: { b: { type: 'string' } }, - required: [], - }, - { type: 'null' }, - ], - }, - }, - required: ['node'], - } - // Two object branches match, so we can't tell which applies — leave the - // value (and any nulls inside it) untouched rather than risk mis-stripping. - const value = { node: { a: null } } - expect(undoNullWidening(value, ambiguous)).toEqual({ node: { a: null } }) - }) - - it('returns the value untouched when no schema is supplied', () => { + it('returns the value untouched when no map is supplied', () => { const value = { a: null, b: 1 } expect(undoNullWidening(value)).toBe(value) }) - it('leaves nulls under unknown (schemaless) properties untouched', () => { - // `extra` is not described by the schema — we cannot prove its null is - // synthetic, so it is preserved. - const result = undoNullWidening({ req: 'a', extra: null }, schema) + it('leaves nulls under positions the map does not describe', () => { + // `extra` carries no map entry — the widener never synthesized a null + // there, so it is preserved. + const result = undoNullWidening({ req: 'a', extra: null }, map) expect(result).toEqual({ req: 'a', extra: null }) }) }) diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index 25120d81b..84d7d60eb 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -19,8 +19,8 @@ import { executeToolCalls, } from './tools/tool-calls' import { + convertSchemaForStructuredOutput, convertSchemaToJsonSchema, - isStandardJSONSchema, isStandardSchema, parseWithStandardSchema, } from './tools/schema-converter' @@ -2693,25 +2693,19 @@ async function runAgenticStructuredOutput< // Same strict-conversion as the streaming path (`forStructuredOutput: true`) // so the same Zod schema produces the same JSON Schema regardless of - // stream mode — Promise and stream:true must not diverge here. - const jsonSchema = convertSchemaToJsonSchema(outputSchema, { - forStructuredOutput: true, - }) + // stream mode — Promise and stream:true must not diverge here. The same + // pass also records a `nullWideningMap`: optional fields are widened to + // `required` + nullable for the provider, which then returns `null` for an + // absent optional — a `null` the original `.optional()` (`T | undefined`) + // schema would otherwise reject. The map pinpoints exactly those synthesized + // nulls so `undoNullWidening` can drop them while preserving the ones a + // `.nullable()` field genuinely allows. + const { jsonSchema, nullWideningMap } = + convertSchemaForStructuredOutput(outputSchema) if (!jsonSchema) { throw new Error('Failed to convert output schema to JSON Schema') } - // The un-widened schema (no `forStructuredOutput`) still distinguishes - // genuinely-nullable fields from optional ones, so we can undo strict-mode's - // null-widening before validating: optional fields are widened to - // `required` + nullable for the provider, which then returns `null` for an - // absent optional — a `null` the original `.optional()` (`T | undefined`) - // schema would otherwise reject. `undoNullWidening` drops only those - // synthesized nulls, preserving the ones a `.nullable()` field allows. - const validationSchema = isStandardJSONSchema(outputSchema) - ? convertSchemaToJsonSchema(outputSchema) - : undefined - // Validation runs INSIDE the engine (per spec §7.3) so validation failures // route through the engine's terminal-hook chooser as `onError`. We pass a // `validate` callback when the schema is a Standard Schema; otherwise we @@ -2720,7 +2714,7 @@ async function runAgenticStructuredOutput< ? (data: unknown): unknown => parseWithStandardSchema>( outputSchema, - undoNullWidening(data, validationSchema), + undoNullWidening(data, nullWideningMap), ) : undefined diff --git a/packages/ai/src/activities/chat/tools/schema-converter.ts b/packages/ai/src/activities/chat/tools/schema-converter.ts index 6bcd81aeb..cda434bd1 100644 --- a/packages/ai/src/activities/chat/tools/schema-converter.ts +++ b/packages/ai/src/activities/chat/tools/schema-converter.ts @@ -2,6 +2,7 @@ import type { StandardJSONSchemaV1, StandardSchemaV1, } from '@standard-schema/spec' +import type { NullWideningMap } from '@tanstack/ai-utils' import type { JSONSchema, SchemaInput } from '../../../types' /** @@ -82,6 +83,22 @@ export function isStandardSchema(schema: unknown): schema is StandardSchemaV1 { ) } +/** + * Result of {@link makeStructuredOutputCompatible}: the strict-ready schema plus + * a {@link NullWideningMap} recording every position where a `null` was + * synthesized, so the response can be un-widened before validation without + * re-deriving (or guessing) which nulls were synthetic. + */ +interface StructuredOutputConversion { + schema: JSONSchema + nullWidening: NullWideningMap | undefined +} + +/** Drop an empty map to `undefined` so leaf/no-op subtrees don't litter it. */ +function pruneMap(map: NullWideningMap): NullWideningMap | undefined { + return Object.keys(map).length > 0 ? map : undefined +} + /** * Transform a JSON schema to be compatible with OpenAI's structured output requirements. * OpenAI requires: @@ -89,59 +106,76 @@ export function isStandardSchema(schema: unknown): schema is StandardSchemaV1 { * - Optional fields should have null added to their type union * - additionalProperties must be false for objects * + * Alongside the transformed schema it returns a {@link NullWideningMap} marking + * exactly the positions where `null` was added, so `undoNullWidening` can strip + * those synthesized nulls (and only those) from the provider's response. + * * @param schema - JSON schema to transform * @param originalRequired - Original required array (to know which fields were optional) - * @returns Transformed schema compatible with OpenAI structured output + * @returns Transformed schema + the null-widening map for the round trip */ function makeStructuredOutputCompatible( schema: JSONSchema, originalRequired: Array = [], -): JSONSchema { +): StructuredOutputConversion { const result: JSONSchema = { ...schema } + const map: NullWideningMap = {} // Handle object types if (result.type === 'object' && result.properties) { const properties: Record = { ...result.properties } const allPropertyNames = Object.keys(properties) + const propertyMaps: Record = {} // Transform each property for (const propName of allPropertyNames) { const prop = properties[propName] if (!prop) continue const wasOptional = !originalRequired.includes(propName) + // `null` synthesized AT this property (the field itself can come back null). + let widenedHere = false + // Map describing widened positions INSIDE this property. + let childMap: NullWideningMap | undefined // Recursively transform nested objects/arrays if (prop.type === 'object' && prop.properties) { - const transformed = makeStructuredOutputCompatible( - prop, - prop.required || [], - ) + const nested = makeStructuredOutputCompatible(prop, prop.required || []) properties[propName] = wasOptional - ? { ...transformed, type: ['object', 'null'] } - : transformed + ? { ...nested.schema, type: ['object', 'null'] } + : nested.schema + widenedHere = wasOptional + childMap = nested.nullWidening } else if (prop.type === 'array' && prop.items) { const items = Array.isArray(prop.items) ? prop.items[0] : prop.items - const transformed: JSONSchema = { + const nestedItems = items + ? makeStructuredOutputCompatible(items, items.required || []) + : undefined + properties[propName] = { ...prop, - items: items - ? makeStructuredOutputCompatible(items, items.required || []) - : prop.items, + items: nestedItems ? nestedItems.schema : prop.items, + ...(wasOptional ? { type: ['array', 'null'] } : {}), } - properties[propName] = wasOptional - ? { ...transformed, type: ['array', 'null'] } - : transformed + widenedHere = wasOptional + childMap = nestedItems?.nullWidening + ? { items: nestedItems.nullWidening } + : undefined } else if (wasOptional) { - // Make optional fields nullable by adding null to the type + // Make optional fields nullable by adding null to the type. Mark + // `widenedHere` only where we actually add `null`; a field already + // typed nullable (`.nullish()`) is left as-is and keeps its null. if (prop.type && !Array.isArray(prop.type)) { - properties[propName] = { - ...prop, - type: [prop.type, 'null'], - } + properties[propName] = { ...prop, type: [prop.type, 'null'] } + widenedHere = true } else if (Array.isArray(prop.type) && !prop.type.includes('null')) { - properties[propName] = { - ...prop, - type: [...prop.type, 'null'], - } + properties[propName] = { ...prop, type: [...prop.type, 'null'] } + widenedHere = true + } + } + + if (widenedHere || childMap) { + propertyMaps[propName] = { + ...(childMap ?? {}), + ...(widenedHere ? { widened: true } : {}), } } } @@ -151,17 +185,23 @@ function makeStructuredOutputCompatible( result.required = allPropertyNames // additionalProperties must be false result.additionalProperties = false + if (Object.keys(propertyMaps).length > 0) map.properties = propertyMaps } // Handle array types with object items if (result.type === 'array' && result.items) { const items = Array.isArray(result.items) ? result.items[0] : result.items if (items) { - result.items = makeStructuredOutputCompatible(items, items.required || []) + const nestedItems = makeStructuredOutputCompatible( + items, + items.required || [], + ) + result.items = nestedItems.schema + if (nestedItems.nullWidening) map.items = nestedItems.nullWidening } } - return result + return { schema: result, nullWidening: pruneMap(map) } } /** @@ -179,6 +219,48 @@ export interface ConvertSchemaOptions { forStructuredOutput?: boolean } +/** + * Normalize any supported schema input to a typed, UN-widened `JSONSchema` — + * the shared first half of conversion, before any structured-output widening. + * + * - Standard JSON Schemas are rebuilt structurally (dropping `$schema`, which + * LLM providers ignore) and given the explicit `type`/`properties`/`required` + * defaults object shapes need downstream. + * - Plain `JSONSchema` inputs are rebuilt into the typed view; non-object inputs + * are surfaced untouched (they can't be widened). + * - Standard Schema validators lacking a `~standard.jsonSchema` converter throw + * with actionable guidance, rather than shipping `{ '~standard': … }` to the + * provider and producing an opaque downstream error. + */ +function toTypedJsonSchema(schema: SchemaInput): JSONSchema | undefined { + if (isStandardJSONSchema(schema)) { + const jsonSchema = schema['~standard'].jsonSchema.input({ + target: 'draft-07', + }) + const result: JSONSchema = toJsonSchema(jsonSchema) + if ('properties' in result && !result.type) result.type = 'object' + if (result.type === 'object' && !('properties' in result)) { + result.properties = {} + } + if (result.type === 'object' && !('required' in result)) { + result.required = [] + } + return result + } + + if (isStandardSchema(schema)) { + throw new Error( + 'Schema is a Standard Schema validator but does not expose a JSON Schema ' + + 'converter on `~standard.jsonSchema`. Use Zod v4.2+, ArkType v2.1.28+, ' + + 'or wrap a Valibot schema with `toStandardJsonSchema()` from ' + + '`@valibot/to-json-schema` before passing it as `outputSchema`.', + ) + } + + if (typeof schema !== 'object') return schema + return toJsonSchema(schema) +} + /** * Converts a Standard JSON Schema compliant schema or plain JSONSchema to JSON Schema format * compatible with LLM providers. @@ -247,77 +329,48 @@ export function convertSchemaToJsonSchema( const { forStructuredOutput = false } = options - // If it's a Standard JSON Schema compliant schema, use the standard interface - if (isStandardJSONSchema(schema)) { - const jsonSchema = schema['~standard'].jsonSchema.input({ - target: 'draft-07', - }) - - // Rebuild structurally so the typed JSONSchema view is acquired without - // a `Record as JSONSchema` cast; `toJsonSchema()` also - // drops the `$schema` key which LLM providers don't need. - let result: JSONSchema = toJsonSchema(jsonSchema) - - // Ensure object schemas always have type: "object" - // If it has properties (even empty), it should be an object type - if ('properties' in result && !result.type) { - result.type = 'object' - } - - // Ensure properties exists for object types (even if empty) - if (result.type === 'object' && !('properties' in result)) { - result.properties = {} - } - - // Ensure required exists for object types (even if empty array) - if (result.type === 'object' && !('required' in result)) { - result.required = [] - } - - // Apply structured output transformation if requested - if (forStructuredOutput) { - result = makeStructuredOutputCompatible(result, result.required || []) - } - - return result - } - - // Detect Standard Schema validators (Zod, ArkType, Valibot, …) that don't - // expose a `~standard.jsonSchema` converter. These would otherwise fall - // through to the JSONSchema pass-through below and ship `{ '~standard': … }` - // straight to the LLM provider, producing an opaque downstream error. Fail - // fast with actionable guidance instead. - if (isStandardSchema(schema)) { - throw new Error( - 'Schema is a Standard Schema validator but does not expose a JSON Schema ' + - 'converter on `~standard.jsonSchema`. Use Zod v4.2+, ArkType v2.1.28+, ' + - 'or wrap a Valibot schema with `toStandardJsonSchema()` from ' + - '`@valibot/to-json-schema` before passing it as `outputSchema`.', - ) - } - - // If it's not a Standard JSON Schema, assume it's already a JSONSchema and pass through - // Still apply structured output transformation if requested - - // At this branch, `schema` is the plain `JSONSchema` arm of `SchemaInput` - // (the two `~standard` arms were handled above). When no transformation - // is requested we pass the schema through by reference to preserve - // identity for callers that compare via `===`. - if (typeof schema !== 'object') { - // The SchemaInput union is object-shaped on every arm; if we ever hit a - // non-object here, propagate it untouched and let the downstream - // provider error loudly rather than silently widen. + // Plain-JSONSchema passthrough: with no widening requested, return the schema + // by reference so callers comparing via `===` keep identity. Only the widening + // path needs the rebuilt, normalized view from `toTypedJsonSchema`. + if ( + !forStructuredOutput && + !isStandardJSONSchema(schema) && + !isStandardSchema(schema) + ) { return schema } - if (forStructuredOutput) { - // Build a typed view structurally so we don't need a SchemaInput→JSONSchema - // cast on the transformation path. - const typedView = toJsonSchema(schema) - return makeStructuredOutputCompatible(typedView, typedView.required || []) - } + const base = toTypedJsonSchema(schema) + // Non-object inputs can't be widened; surface them untouched. + if (!base || typeof base !== 'object') return base + if (!forStructuredOutput) return base + return makeStructuredOutputCompatible(base, base.required || []).schema +} - return schema +/** + * Convert a schema for structured output AND capture the {@link NullWideningMap} + * recording every `null` the strict-mode widening synthesized. The map lets the + * caller undo that widening on the provider's response (via `undoNullWidening`) + * before validating against the original schema — optional fields read back as + * absent while genuine `.nullable()` nulls survive. The map is `undefined` when + * the schema isn't a widenable object or when no field needed widening. + */ +export function convertSchemaForStructuredOutput( + schema: SchemaInput | undefined, +): { + jsonSchema: JSONSchema | undefined + nullWideningMap: NullWideningMap | undefined +} { + if (!schema) return { jsonSchema: undefined, nullWideningMap: undefined } + const base = toTypedJsonSchema(schema) + if (!base || typeof base !== 'object') { + return { jsonSchema: base, nullWideningMap: undefined } + } + const { schema: jsonSchema, nullWidening } = makeStructuredOutputCompatible( + base, + base.required || [], + ) + return { jsonSchema, nullWideningMap: nullWidening } } /** From c6dedf99120e7d5be7c0528dc6aca460501997d9 Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Wed, 10 Jun 2026 16:51:06 +1000 Subject: [PATCH 4/8] fix(ai): un-widen structured-output nulls in the engine for both stream modes and every adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strict-mode structured output widens optional fields to `required` + nullable, so providers return `null` for an absent optional. That `null` fails validation against the original `.optional()` schema (`T | undefined`, not `T | null`). Previously only the Promise path un-widened, and only for adapters that preserved provider nulls (OpenRouter). The OpenAI-family adapters instead blind-stripped every null via `transformStructuredOutput`, which masked the bug but also destroyed genuine `.nullable()` nulls — and the streaming path didn't un-widen at all. Move un-widening into the engine, the one layer that holds the schema's null-widening map: - Add `finalStructuredOutput.normalize`, applied the instant the structured output is captured, so it flows to BOTH the streaming `structured-output.complete` event and the Promise result (plus the native-combined harvest path). Both activity callers now pass it via `convertSchemaForStructuredOutput`; streaming switches off the map-less `convertSchemaToJsonSchema`. Validation runs on already-normalized data. - openai-base `transformStructuredOutput` default is now a passthrough — the blind null-strip is gone (the engine un-widens precisely instead). Fixes the responses-text streaming path that bypassed the hook. OpenAI/Grok/Groq inherit this; OpenRouter's now-redundant override is simplified and its dead `transformNullsToUndefined` imports dropped. Genuine `.nullable()` nulls now survive on every adapter and both directions; synthesized optional nulls are dropped everywhere. Tests: streaming normalization + a converter→undo round-trip (closing the untested map-production gap); adapter passthrough tests updated; e2e gains an optional field returning `null` asserted un-widened across all 5 streaming providers (real regression guard for OpenRouter). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../structured-output-undo-null-widening.md | 18 ++- docs/config.json | 3 +- docs/structured-outputs/overview.md | 4 +- .../src/adapters/responses-text.ts | 12 +- packages/ai-openrouter/src/adapters/text.ts | 12 +- .../tests/openrouter-adapter.test.ts | 8 +- packages/ai/src/activities/chat/index.ts | 114 +++++++++++++----- ...ructured-output-null-normalization.test.ts | 108 ++++++++++++++++- .../src/adapters/chat-completions-text.ts | 22 ++-- .../src/adapters/responses-text.ts | 31 ++--- .../tests/chat-completions-text.test.ts | 11 +- .../openai-base/tests/responses-text.test.ts | 8 +- .../structured-output-stream/basic.json | 2 +- testing/e2e/src/lib/schemas.ts | 6 + .../tests/structured-output-stream.spec.ts | 8 ++ 15 files changed, 279 insertions(+), 88 deletions(-) diff --git a/.changeset/structured-output-undo-null-widening.md b/.changeset/structured-output-undo-null-widening.md index c8185a75f..35f69fb12 100644 --- a/.changeset/structured-output-undo-null-widening.md +++ b/.changeset/structured-output-undo-null-widening.md @@ -1,13 +1,19 @@ --- '@tanstack/ai-utils': minor -'@tanstack/ai': patch +'@tanstack/ai': minor +'@tanstack/openai-base': minor +'@tanstack/ai-openrouter': patch --- -Fix structured output validation rejecting `null` for optional fields. +Fix structured output validation rejecting `null` for optional fields, across both stream modes and every adapter. -Strict-mode structured output widens optional fields to `required` + nullable, so the provider returns `null` for an absent optional. Validating that `null` against the original schema then failed, because `.optional()` means `T | undefined`, not `T | null` — surfacing as a `StandardSchemaValidationError` (e.g. `Invalid type: Expected string but received null`). This was most visible through `@tanstack/ai-openrouter`, whose adapter preserves provider nulls. +Strict-mode structured output widens optional fields to `required` + nullable, so the provider returns `null` for an absent optional. Validating that `null` against the original schema then failed, because `.optional()` means `T | undefined`, not `T | null` — surfacing as a `StandardSchemaValidationError` (e.g. `Invalid type: Expected string but received null`). -The engine now undoes the widening before validation: it drops the synthesized nulls while preserving the ones a `.nullable()`/`.nullish()` field genuinely allows, so both optional and nullable fields round-trip correctly. +The engine now undoes the widening as a single, schema-aware step the moment the structured output is captured, so the fix applies uniformly: -- The strict-conversion pass now records a `NullWideningMap` marking exactly the positions where it added `null`, so the response can be un-widened precisely — no re-deriving or guessing which nulls were synthetic. -- `@tanstack/ai-utils` adds `undoNullWidening(value, map)` — a counterpart to `transformNullsToUndefined` that strips only the nulls the widening pass synthesized, driven by that map. +- The strict-conversion pass records a `NullWideningMap` marking exactly the positions where it added `null`, so the response can be un-widened precisely — no re-deriving or guessing which nulls were synthetic. +- `@tanstack/ai-utils` adds `undoNullWidening(value, map)` — a counterpart to `transformNullsToUndefined` that strips only the nulls the widening pass synthesized, preserving the ones a `.nullable()`/`.nullish()` field genuinely allows. +- The engine applies this via a new `finalStructuredOutput.normalize` hook the instant the result is captured, so **both** the `Promise` result **and** the streaming `structured-output.complete` event carry the un-widened object. Previously only the `Promise` path was corrected, and only for adapters that preserved provider nulls. +- `@tanstack/openai-base` adapters (and the OpenAI/Grok/Groq adapters built on them) no longer blind-strip every `null` from structured output via `transformStructuredOutput` — that default is now a passthrough. The blind strip masked the validation bug but also destroyed genuine `.nullable()` nulls; precise un-widening in the engine fixes both. The `transformStructuredOutput` hook remains for provider-specific reshaping. + +Adapters that already preserve provider nulls (`@tanstack/ai-openrouter`, Anthropic, Gemini, Ollama) now get correct un-widening on their streaming structured output too, not just `Promise`. diff --git a/docs/config.json b/docs/config.json index e3fc3b712..956cbe65f 100644 --- a/docs/config.json +++ b/docs/config.json @@ -167,7 +167,8 @@ { "label": "Overview", "to": "structured-outputs/overview", - "addedAt": "2026-05-19" + "addedAt": "2026-05-19", + "updatedAt": "2026-06-10" }, { "label": "One-Shot Extraction", diff --git a/docs/structured-outputs/overview.md b/docs/structured-outputs/overview.md index 98ef6f7ea..5245d8e52 100644 --- a/docs/structured-outputs/overview.md +++ b/docs/structured-outputs/overview.md @@ -86,7 +86,9 @@ Pick the journey that matches what you're building. The four guides under "Struc The streaming and multi-turn paths both build on `useChat({ outputSchema })`. The "with tools" path layers on top of either. Pick the one that describes your shipping shape — start there, follow the cross-links when you need a piece of another story. -> **Note:** Server-side validation is **path-dependent**. For the non-streaming agentic path (`await chat({ outputSchema })`), the engine runs Standard Schema validation inside the finalization step and routes failures through `onError` (the awaited promise rejects). For the streaming path (`chat({ outputSchema, stream: true })`), validation is deliberately deferred to the consumer — the engine forwards the adapter-emitted `structured-output.complete` event verbatim, and consumers read the validated object from the `value.object` field (or call `parseWithStandardSchema` themselves on the raw text). The schema you pass to `useChat({ outputSchema })` on the client is used for TypeScript inference and (in `useChat`) for client-side `parsePartialJSON`-based progressive parsing — the typed-object guarantee comes from the server-side path you pick. +> **Note:** Server-side validation is **path-dependent**. For the non-streaming agentic path (`await chat({ outputSchema })`), the engine runs Standard Schema validation inside the finalization step and routes failures through `onError` (the awaited promise rejects). For the streaming path (`chat({ outputSchema, stream: true })`), Standard Schema _validation_ is deliberately deferred to the consumer — consumers read the object from the `structured-output.complete` event's `value.object` field (or call `parseWithStandardSchema` themselves on the raw text). The schema you pass to `useChat({ outputSchema })` on the client is used for TypeScript inference and (in `useChat`) for client-side `parsePartialJSON`-based progressive parsing — the typed-object guarantee comes from the server-side path you pick. +> +> On **both** paths the engine normalizes the captured object before it reaches you: to satisfy strict providers, optional fields are widened to `required` + nullable, so the provider returns `null` for an absent optional. The engine undoes exactly that widening — an `.optional()` field that came back `null` reads back as **absent** (matching `T | undefined`), while a genuine `.nullable()` field's `null` is **preserved**. So `value.object` (streaming) and the awaited result (non-streaming) both carry the un-widened shape your schema describes. ## Middleware integration diff --git a/packages/ai-openrouter/src/adapters/responses-text.ts b/packages/ai-openrouter/src/adapters/responses-text.ts index a658d985e..91f7d9335 100644 --- a/packages/ai-openrouter/src/adapters/responses-text.ts +++ b/packages/ai-openrouter/src/adapters/responses-text.ts @@ -5,7 +5,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../internal/request-options' import { makeStructuredOutputCompatible } from '../internal/schema-converter' import { convertFunctionToolToResponsesFormat } from '../internal/responses-tool-converter' @@ -697,14 +697,12 @@ export class OpenRouterResponsesTextAdapter< /** * OpenRouter routes through a wide variety of upstream providers; some - * return `null` as a distinct sentinel rather than collapsing it to absent. - * Stripping nulls would erase that distinction, so we passthrough. - * - * `transformNullsToUndefined` is imported for parity with the other - * provider adapters but intentionally not invoked here. + * return `null` as a distinct sentinel rather than collapsing it to absent, + * so we passthrough and let the engine un-widen strict-mode nulls precisely. + * Matches the base adapters' default — kept as an explicit override because + * OpenRouter extends `BaseTextAdapter` directly, not the OpenAI base. */ protected transformStructuredOutput(parsed: unknown): unknown { - void transformNullsToUndefined return parsed } diff --git a/packages/ai-openrouter/src/adapters/text.ts b/packages/ai-openrouter/src/adapters/text.ts index 2648da0a3..415be9ad6 100644 --- a/packages/ai-openrouter/src/adapters/text.ts +++ b/packages/ai-openrouter/src/adapters/text.ts @@ -5,7 +5,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../internal/request-options' import { makeStructuredOutputCompatible } from '../internal/schema-converter' import { convertToolsToProviderFormat } from '../tools' @@ -624,14 +624,12 @@ export class OpenRouterTextAdapter< * Final shaping pass applied to parsed structured-output JSON before it is * returned to the caller. OpenRouter routes through a wide variety of * upstream providers; some return `null` as a distinct sentinel ("the field - * exists, the value is null") rather than collapsing it to absent. Stripping - * nulls would erase that distinction, so we passthrough. - * - * `transformNullsToUndefined` is imported for parity with the other - * provider adapters but intentionally not invoked here. + * exists, the value is null") rather than collapsing it to absent, so we + * passthrough and let the engine un-widen strict-mode nulls precisely. This + * now matches the base adapters' default — kept as an explicit override + * because OpenRouter extends `BaseTextAdapter` directly, not the OpenAI base. */ protected transformStructuredOutput(parsed: unknown): unknown { - void transformNullsToUndefined return parsed } diff --git a/packages/ai-openrouter/tests/openrouter-adapter.test.ts b/packages/ai-openrouter/tests/openrouter-adapter.test.ts index 83d8c9251..9b9dfbef0 100644 --- a/packages/ai-openrouter/tests/openrouter-adapter.test.ts +++ b/packages/ai-openrouter/tests/openrouter-adapter.test.ts @@ -1249,7 +1249,13 @@ describe('OpenRouter structured output', () => { outputSchema, }) - expect(result).toEqual({ name: 'Alice', age: 30, nickname: null }) + // `nickname` was optional, so strict-mode widening made it `required` + + // nullable and the provider returned `null` for the absent value. The + // engine un-widens that synthesized null before returning, so the optional + // field reads back as absent — matching `.optional()` semantics — rather + // than leaking the synthetic `null` through. + expect(result).toEqual({ name: 'Alice', age: 30 }) + expect('nickname' in (result as object)).toBe(false) // The structured-output streaming call carries the strict-transformed schema. const structuredCall = mockSend.mock.calls.find( diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index 84d7d60eb..25a82103b 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -415,11 +415,21 @@ interface TextEngineConfig< * (used by runStreamingStructuredOutput). When false, chunks are * consumed internally for middleware visibility but not yielded * (used by runAgenticStructuredOutput). - * - validate: optional callback invoked AFTER the structured-output result - * is captured but BEFORE the terminal hook fires. If it throws, the - * engine records a `finalizationError` and fires `onError` instead of - * `onFinish` (per spec §7.3). On success, the returned value is stored - * as the validated result and retrievable via + * - normalize: optional schema-aware transform applied to the captured + * structured-output object the moment it enters the engine — BEFORE it is + * stored, validated, or yielded. Used to undo strict-mode null-widening + * (`undoNullWidening`): strict schemas widen optional fields to + * `required` + nullable so the provider returns `null` for an absent + * optional, and this strips exactly those synthesized nulls while keeping + * the ones a `.nullable()` field genuinely allows. Applied here (not in + * the adapter) because the engine is the only layer holding the original + * schema's null-widening map, and applying it at capture fixes BOTH the + * streaming chunk and the Promise result with one transform. + * - validate: optional callback invoked AFTER `normalize` and AFTER the + * structured-output result is captured, but BEFORE the terminal hook + * fires. If it throws, the engine records a `finalizationError` and fires + * `onError` instead of `onFinish` (per spec §7.3). On success, the + * returned value is stored as the validated result and retrievable via * `getValidatedStructuredOutput()`. Used by `runAgenticStructuredOutput` * to perform Standard Schema validation inside the engine. * - nativeCombined: when true, the adapter declared @@ -434,6 +444,7 @@ interface TextEngineConfig< finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean + normalize?: (data: unknown) => unknown validate?: (data: unknown) => unknown nativeCombined?: boolean } @@ -539,8 +550,8 @@ class TextEngine< // to carry, so the client matches it to the streaming text deltas. private combinedStructuredMessageId: string | null = null // Holds the validated value when `finalStructuredOutput.validate` is provided - // and succeeds. Distinct from `structuredOutputResult.data` (the raw, - // unvalidated payload from the structured-output.complete chunk). + // and succeeds. Distinct from `structuredOutputResult.data` (the normalized + // but unvalidated payload from the structured-output.complete chunk). private validatedStructuredOutput: unknown = undefined private hasValidatedStructuredOutput = false private finalizationError: { @@ -551,6 +562,7 @@ class TextEngine< private readonly finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean + normalize?: (data: unknown) => unknown validate?: (data: unknown) => unknown nativeCombined?: boolean } @@ -2049,15 +2061,29 @@ class TextEngine< // All narrowing below is via the discriminated-union `chunk.type` // — no `as` casts. + // The chunk forwarded to middleware/consumers. Replaced below only for + // the structured-output.complete event, whose `object` we normalize + // (un-widen) so streaming consumers see the same cleaned payload the + // Promise path validates and returns. + let outboundChunk: StreamChunk = chunk + if ( chunk.type === EventType.CUSTOM && chunk.name === 'structured-output.complete' ) { const parsed = readStructuredOutputCompleteValue(chunk.value) if (parsed) { - this.structuredOutputResult = { - data: parsed.object, - rawText: parsed.raw, + const object = this.finalStructuredOutput.normalize + ? this.finalStructuredOutput.normalize(parsed.object) + : parsed.object + this.structuredOutputResult = { data: object, rawText: parsed.raw } + // Rewrite the outbound event so the yielded chunk carries the + // normalized object (the original `chunk.value` still holds the + // widened one). Preserve every other field — `messageId`, + // `reasoning` — by spreading the original value. + const value = chunk.value + if (object !== parsed.object && value && typeof value === 'object') { + outboundChunk = { ...chunk, value: { ...value, object } } } } } @@ -2081,7 +2107,7 @@ class TextEngine< // 7b. Pipe through middleware const outputChunks = await this.middlewareRunner.runOnChunk( this.middlewareCtx, - chunk, + outboundChunk, ) // 7c. Decide consumer visibility — only yieldChunks=true callers get them. @@ -2238,7 +2264,14 @@ class TextEngine< } else { try { const parsed: unknown = JSON.parse(rawText) - this.structuredOutputResult = { data: parsed, rawText } + // Normalize (un-widen) before storing so the synthesized + // structured-output.complete chunk and the Promise result both + // carry the cleaned payload. JSON.parse preserves provider nulls, so + // this is where native-combined output gets its widening undone. + const data = this.finalStructuredOutput.normalize + ? this.finalStructuredOutput.normalize(parsed) + : parsed + this.structuredOutputResult = { data, rawText } } catch (err: unknown) { const detail = rawText.slice(0, 200) + (rawText.length > 200 ? '...' : '') @@ -2706,16 +2739,21 @@ async function runAgenticStructuredOutput< throw new Error('Failed to convert output schema to JSON Schema') } + // Un-widening runs in the engine the moment the structured output is + // captured (`finalStructuredOutput.normalize`), so it applies uniformly to + // every adapter and to both stream modes — the engine is the only layer + // holding the schema's `nullWideningMap`. Validation then runs on the + // already-normalized data, so `validate` is a plain Standard Schema parse. + const normalize = (data: unknown): unknown => + undoNullWidening(data, nullWideningMap) + // Validation runs INSIDE the engine (per spec §7.3) so validation failures // route through the engine's terminal-hook chooser as `onError`. We pass a // `validate` callback when the schema is a Standard Schema; otherwise we - // pass through the raw data and the engine returns it unchanged. + // pass through the (normalized) data and the engine returns it unchanged. const validate = isStandardSchema(outputSchema) ? (data: unknown): unknown => - parseWithStandardSchema>( - outputSchema, - undoNullWidening(data, nullWideningMap), - ) + parseWithStandardSchema>(outputSchema, data) : undefined // Per issue #605: same capability check as the streaming path. When the @@ -2744,6 +2782,7 @@ async function runAgenticStructuredOutput< finalStructuredOutput: { jsonSchema, yieldChunks: false, + normalize, ...(validate ? { validate } : {}), ...(nativeCombined ? { nativeCombined: true } : {}), }, @@ -2916,17 +2955,23 @@ async function* fallbackStructuredOutputStream( * RUN_STARTED/RUN_FINISHED are suppressed; the structured-output finalization * step's pair brackets the run for the consumer. * - * Schema validation is intentionally NOT run on this path — it is the - * consumer's responsibility. The `structured-output.complete` CUSTOM event - * is forwarded with the adapter-produced `value.object` as-is. This is a - * deliberate asymmetry vs. `runAgenticStructuredOutput` (Promise path), - * which DOES run Standard Schema validation inside the engine and routes - * validation failures through `onError`. The reason for the asymmetry: + * Standard Schema *validation* is intentionally NOT run on this path — it is + * the consumer's responsibility. This is a deliberate asymmetry vs. + * `runAgenticStructuredOutput` (Promise path), which DOES validate inside + * the engine and routes validation failures through `onError`. The reason: * streaming consumers typically render partial JSON progressively (via * `parsePartialJSON` or `useChat`'s `partial` slot) and validate downstream * after assembly. Running validation server-side would force a hard error * on partial-by-design payloads. See `docs/structured-outputs/overview.md`. * + * Null-widening normalization, however, IS run on both paths: the + * `structured-output.complete` CUSTOM event is forwarded with its `value.object` + * already un-widened (synthesized strict-mode nulls dropped, genuine + * `.nullable()` nulls kept), so a consumer validating the assembled object + * against the original schema doesn't choke on a `null` for an `.optional()` + * field. Same `convertSchemaForStructuredOutput` pass and same + * `undoNullWidening` map as the Promise path — the two must not diverge. + * * Pre-flight validation (missing schema, unconvertible schema) throws * synchronously at call time rather than as a yielded RUN_ERROR mid-stream — * those are programmer errors, not runtime conditions. @@ -2944,14 +2989,17 @@ function runStreamingStructuredOutput< } // forStructuredOutput strict-converts the schema once at the activity - // boundary. Adapters can re-convert if their wire format diverges, but the - // default flow hands them a strict-ready schema. - const jsonSchema = convertSchemaToJsonSchema(outputSchema, { - forStructuredOutput: true, - }) + // boundary, capturing the null-widening map so the engine can un-widen the + // provider's response before it reaches the consumer. Adapters can re-convert + // if their wire format diverges, but the default flow hands them a + // strict-ready schema. + const { jsonSchema, nullWideningMap } = + convertSchemaForStructuredOutput(outputSchema) if (!jsonSchema) { throw new Error('Failed to convert output schema to JSON Schema') } + const normalize = (data: unknown): unknown => + undoNullWidening(data, nullWideningMap) // The implementation generator yields the broader internal type // (`StreamChunk | StructuredOutputCompleteEvent`) so agent-loop @@ -2962,6 +3010,7 @@ function runStreamingStructuredOutput< return runStreamingStructuredOutputImpl( options, jsonSchema, + normalize, ) as StructuredOutputStream> } @@ -2987,6 +3036,7 @@ async function* runStreamingStructuredOutputImpl< >( options: TextActivityOptions, jsonSchema: NonNullable>, + normalize: (data: unknown) => unknown, ): StructuredOutputStreamInternal> { const { adapter, @@ -3031,6 +3081,7 @@ async function* runStreamingStructuredOutputImpl< finalStructuredOutput: { jsonSchema, yieldChunks: true, + normalize, ...(nativeCombined ? { nativeCombined: true } : {}), }, }, @@ -3045,9 +3096,10 @@ async function* runStreamingStructuredOutputImpl< await mcpManager.dispose() } - // Schema validation for the streaming variant remains the consumer's - // responsibility — they read the CUSTOM 'structured-output.complete' from - // the yielded stream. Matches pre-fix behavior. + // Standard Schema validation for the streaming variant remains the + // consumer's responsibility — they read the CUSTOM 'structured-output.complete' + // from the yielded stream. (Null-widening normalization, by contrast, already + // ran inside the engine via `normalize`, so the object they read is un-widened.) void outputSchema } diff --git a/packages/ai/tests/chat-structured-output-null-normalization.test.ts b/packages/ai/tests/chat-structured-output-null-normalization.test.ts index 5179f008c..8879af545 100644 --- a/packages/ai/tests/chat-structured-output-null-normalization.test.ts +++ b/packages/ai/tests/chat-structured-output-null-normalization.test.ts @@ -10,11 +10,26 @@ */ import { describe, expect, it } from 'vitest' import { z } from 'zod' +import { undoNullWidening } from '@tanstack/ai-utils' import { chat } from '../src/activities/chat/index' -import { createMockAdapter } from './test-utils' +import { convertSchemaForStructuredOutput } from '../src/activities/chat/tools/schema-converter' +import { EventType } from '../src/types' +import { collectChunks, createMockAdapter } from './test-utils' +import type { StreamChunk } from '../src/types' const messages = [{ role: 'user' as const, content: 'go' }] +/** Find the terminal `structured-output.complete` event and return its object. */ +function completeObject(chunks: Array): unknown { + const complete = chunks.find( + (c) => + c.type === EventType.CUSTOM && + (c as { name?: string }).name === 'structured-output.complete', + ) + expect(complete).toBeDefined() + return (complete as { value: { object: unknown } }).value.object +} + describe('structured output null normalization', () => { it('drops a provider null for an optional field so validation passes', async () => { const outputSchema = z.object({ @@ -52,4 +67,95 @@ describe('structured output null normalization', () => { expect(result).toEqual({ title: 'Ship it', tag: null }) }) + + // The streaming path doesn't schema-validate server-side, but it now un-widens + // the terminal `structured-output.complete` object inside the engine — so a + // consumer validating the assembled object downstream doesn't choke on a + // synthesized `null` for an `.optional()` field, while genuine `.nullable()` + // nulls still reach them. Mirrors the Promise behaviour above. + describe('streaming (stream: true)', () => { + it('un-widens the streamed structured-output.complete object', async () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + tag: z.string().nullable(), + }) + const { adapter } = createMockAdapter({ + // No native structuredOutputStream → engine wraps structuredOutput via + // the fallback stream, then normalizes the complete event. + structuredOutput: async () => ({ + data: { title: 'Ship it', note: null, tag: null }, + rawText: '{"title":"Ship it","note":null,"tag":null}', + }), + }) + + const stream = chat({ adapter, messages, outputSchema, stream: true }) + const chunks = await collectChunks( + stream as unknown as AsyncIterable, + ) + + const object = completeObject(chunks) + // `note` (optional → synthesized null) dropped; `tag` (nullable) kept. + expect(object).toEqual({ title: 'Ship it', tag: null }) + expect('note' in (object as object)).toBe(false) + }) + }) +}) + +// Closes the gap between the two halves of the fix: the widening pass that +// PRODUCES the map and the `undoNullWidening` pass that CONSUMES it. The unit +// tests in `@tanstack/ai-utils` drive `undoNullWidening` with hand-authored +// maps; here we run a real schema through `convertSchemaForStructuredOutput` +// and feed a provider-shaped payload back through the map it produced, proving +// the two can't drift. +describe('convertSchemaForStructuredOutput → undoNullWidening round trip', () => { + it('un-widens a nested schema using the map the conversion produced', () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), // widened scalar + tag: z.string().nullable(), // genuine nullable — not widened + meta: z + .object({ author: z.string(), rev: z.number().optional() }) + .optional(), // widened object with an inner widened field + items: z.array(z.object({ id: z.string(), label: z.string().optional() })), + }) + + const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) + expect(nullWideningMap).toBeDefined() + + // What a strict provider returns: every absent optional comes back `null`. + const providerPayload = { + title: 'T', + note: null, + tag: null, + meta: { author: 'A', rev: null }, + items: [ + { id: '1', label: null }, + { id: '2', label: 'x' }, + ], + } + + expect(undoNullWidening(providerPayload, nullWideningMap)).toEqual({ + title: 'T', + tag: null, + meta: { author: 'A' }, + items: [{ id: '1' }, { id: '2', label: 'x' }], + }) + }) + + it('drops a widened nested object that comes back null', () => { + const outputSchema = z.object({ + title: z.string(), + meta: z.object({ author: z.string() }).optional(), + }) + + const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) + const result = undoNullWidening( + { title: 'T', meta: null }, + nullWideningMap, + ) as Record + + expect(result).toEqual({ title: 'T' }) + expect('meta' in result).toBe(false) + }) }) diff --git a/packages/openai-base/src/adapters/chat-completions-text.ts b/packages/openai-base/src/adapters/chat-completions-text.ts index 488c31836..febffcb0b 100644 --- a/packages/openai-base/src/adapters/chat-completions-text.ts +++ b/packages/openai-base/src/adapters/chat-completions-text.ts @@ -4,7 +4,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../utils/request-options' import { makeStructuredOutputCompatible } from '../utils/schema-converter' import { buildChatCompletionsUsage } from '../usage' @@ -213,10 +213,8 @@ export abstract class OpenAIBaseChatCompletionsTextAdapter< ) } - // Transform null values to undefined to match original Zod schema expectations - // Provider returns null for optional fields we made nullable in the schema. - // Subclasses can override `transformStructuredOutput` to skip this — e.g. - // OpenRouter historically passed nulls through unchanged. + // Final provider-specific shaping pass (default passthrough). Null-widening + // from strict mode is undone by the engine, not here. const transformed = this.transformStructuredOutput(parsed) return { @@ -595,13 +593,17 @@ export abstract class OpenAIBaseChatCompletionsTextAdapter< /** * Final shaping pass applied to parsed structured-output JSON before it is - * returned to the caller. Default converts `null` values to `undefined` so - * the result aligns with the original Zod schema's optional-field - * semantics. Subclasses with different conventions (OpenRouter historically - * preserves nulls) can override. + * returned to the caller. Default is a passthrough. + * + * Provider `null`s are no longer stripped here: strict-mode null-widening is + * now undone precisely by the engine (`undoNullWidening`, driven by the + * schema's null-widening map) the moment the result is captured, so a blind + * `transformNullsToUndefined` at the adapter would only destroy genuine + * `.nullable()` nulls. Subclasses may still override to remap or reshape the + * provider's structured output. */ protected transformStructuredOutput(parsed: unknown): unknown { - return transformNullsToUndefined(parsed) + return parsed } /** diff --git a/packages/openai-base/src/adapters/responses-text.ts b/packages/openai-base/src/adapters/responses-text.ts index 454c96e03..53c793aa0 100644 --- a/packages/openai-base/src/adapters/responses-text.ts +++ b/packages/openai-base/src/adapters/responses-text.ts @@ -4,7 +4,7 @@ import { toRunErrorPayload, toRunErrorRawEvent, } from '@tanstack/ai/adapter-internals' -import { generateId, transformNullsToUndefined } from '@tanstack/ai-utils' +import { generateId } from '@tanstack/ai-utils' import { extractRequestOptions } from '../utils/request-options' import { makeStructuredOutputCompatible } from '../utils/schema-converter' import { buildResponsesUsage } from '../usage' @@ -247,12 +247,8 @@ export abstract class OpenAIBaseResponsesTextAdapter< ) } - // Apply the provider-specific post-parse shaping (default: null → - // undefined to align with the original Zod schema's optional-field - // semantics; subclasses with different conventions can override - // `transformStructuredOutput`, mirroring the chat-completions base's - // hook so OpenRouter and other providers that preserve nulls in - // structured output can opt out without forking `structuredOutput`). + // Provider-specific post-parse shaping (default passthrough). Null-widening + // from strict mode is undone by the engine, not here. const transformed = this.transformStructuredOutput(parsed) return { @@ -577,7 +573,10 @@ export abstract class OpenAIBaseResponsesTextAdapter< return } - const transformed = transformNullsToUndefined(parsed) + // Route through the same hook as the non-streaming path (default + // passthrough). Engine un-widens nulls; the streaming path must not strip + // them blindly either. + const transformed = this.transformStructuredOutput(parsed) yield { type: EventType.CUSTOM, @@ -673,15 +672,17 @@ export abstract class OpenAIBaseResponsesTextAdapter< /** * Final shaping pass applied to parsed structured-output JSON before it is - * returned to the caller. Default converts `null` values to `undefined` so - * the result aligns with the original Zod schema's optional-field - * semantics. Subclasses with different conventions (OpenRouter historically - * preserves nulls) can override — mirrors the chat-completions base's hook - * so a subclass that opts out of null-stripping doesn't have to fork the - * whole `structuredOutput` method. + * returned to the caller. Default is a passthrough. + * + * Provider `null`s are no longer stripped here: strict-mode null-widening is + * now undone precisely by the engine (`undoNullWidening`, driven by the + * schema's null-widening map) the moment the result is captured, so a blind + * `transformNullsToUndefined` at the adapter would only destroy genuine + * `.nullable()` nulls. Subclasses may still override to remap or reshape the + * provider's structured output. */ protected transformStructuredOutput(parsed: unknown): unknown { - return transformNullsToUndefined(parsed) + return parsed } /** diff --git a/packages/openai-base/tests/chat-completions-text.test.ts b/packages/openai-base/tests/chat-completions-text.test.ts index 656dd876b..e1c82e6e5 100644 --- a/packages/openai-base/tests/chat-completions-text.test.ts +++ b/packages/openai-base/tests/chat-completions-text.test.ts @@ -723,7 +723,7 @@ describe('OpenAIBaseChatCompletionsTextAdapter', () => { ) }) - it('transforms null values to undefined', async () => { + it('passes provider nulls through unchanged (engine un-widens, not the adapter)', async () => { const nonStreamResponse = { choices: [ { @@ -756,10 +756,13 @@ describe('OpenAIBaseChatCompletionsTextAdapter', () => { // `result.data` is typed as `unknown` from the schema-less call; // narrow it to the shape this test produces. - const data = result.data as { name?: string; nickname?: string } - // null should be transformed to undefined + const data = result.data as { name?: string; nickname?: string | null } + // The adapter no longer strips nulls — strict-mode null-widening is undone + // precisely by the engine, which holds the schema's widening map. A blind + // adapter-level strip would also destroy genuine `.nullable()` nulls, so + // the adapter passes the provider's payload through verbatim. expect(data.name).toBe('Alice') - expect(data.nickname).toBeUndefined() + expect(data.nickname).toBeNull() }) it('throws on invalid JSON response', async () => { diff --git a/packages/openai-base/tests/responses-text.test.ts b/packages/openai-base/tests/responses-text.test.ts index 72b198afa..7fd8f6f97 100644 --- a/packages/openai-base/tests/responses-text.test.ts +++ b/packages/openai-base/tests/responses-text.test.ts @@ -1720,7 +1720,7 @@ describe('OpenAIBaseResponsesTextAdapter', () => { ) }) - it('transforms null values to undefined', async () => { + it('passes provider nulls through unchanged (engine un-widens, not the adapter)', async () => { const nonStreamResponse = { output: [ { @@ -1755,9 +1755,11 @@ describe('OpenAIBaseResponsesTextAdapter', () => { }, }) - // null should be transformed to undefined + // The adapter no longer strips nulls — strict-mode null-widening is undone + // precisely by the engine, which holds the schema's widening map. A blind + // adapter-level strip would also destroy genuine `.nullable()` nulls. expect((result.data as any).name).toBe('Alice') - expect((result.data as any).nickname).toBeUndefined() + expect((result.data as any).nickname).toBeNull() }) it('throws on invalid JSON response', async () => { diff --git a/testing/e2e/fixtures/structured-output-stream/basic.json b/testing/e2e/fixtures/structured-output-stream/basic.json index 62ef046fc..3a01afe8f 100644 --- a/testing/e2e/fixtures/structured-output-stream/basic.json +++ b/testing/e2e/fixtures/structured-output-stream/basic.json @@ -5,7 +5,7 @@ "userMessage": "[structured-stream] recommend a guitar as json" }, "response": { - "content": "{\"name\":\"Fender Stratocaster\",\"price\":1299,\"reason\":\"Versatile tone and comfortable playability\",\"rating\":5}" + "content": "{\"name\":\"Fender Stratocaster\",\"price\":1299,\"reason\":\"Versatile tone and comfortable playability\",\"rating\":5,\"condition\":null}" } } ] diff --git a/testing/e2e/src/lib/schemas.ts b/testing/e2e/src/lib/schemas.ts index 7eaa2d53f..8b9bf6753 100644 --- a/testing/e2e/src/lib/schemas.ts +++ b/testing/e2e/src/lib/schemas.ts @@ -5,6 +5,12 @@ export const guitarRecommendationSchema = z.object({ price: z.number(), reason: z.string(), rating: z.number().min(1).max(5), + // Optional field used to exercise strict-mode null-widening end to end: + // the schema converter widens this to `required` + nullable, so a provider + // returns `null` for an absent value. The engine must undo that widening so + // the field reads back as ABSENT (matching `.optional()`), not `null`. See + // `structured-output-stream.spec.ts`. + condition: z.string().optional(), }) export const imageAnalysisSchema = z.object({ diff --git a/testing/e2e/tests/structured-output-stream.spec.ts b/testing/e2e/tests/structured-output-stream.spec.ts index 113fc61f6..a28d65f18 100644 --- a/testing/e2e/tests/structured-output-stream.spec.ts +++ b/testing/e2e/tests/structured-output-stream.spec.ts @@ -51,6 +51,14 @@ for (const provider of providersFor('structured-output-stream')) { expect(parsed.name).toContain('Fender Stratocaster') expect(parsed.price).toBe(1299) + // `condition` is `.optional()`, so strict-mode widening made the provider + // return `null` for it (see the fixture). The engine must un-widen that + // synthesized null before the streamed `structured-output.complete` event + // reaches the consumer, so the field reads back as ABSENT — not `null`. + // Pre-fix, null-preserving adapters (e.g. openrouter) leaked the `null` + // straight through on the streaming path. + expect('condition' in parsed).toBe(false) + // Verify the response actually streamed (more than one content delta). // A regression that silently fell back to the synthetic single-delta // path would still pass the substring assertion above but fail here. From f63c35b50a6bbb7cb564b21f6bfced648c9b51a3 Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Wed, 10 Jun 2026 17:14:34 +1000 Subject: [PATCH 5/8] test(ai): cover native-combined + streaming-rewrite null normalization; fix comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the engine-level un-widening commit, addressing review gaps: - Fix an inaccurate inline comment: the `structured-output.complete` event's value is `{ object, raw, reasoning? }` — it carries no `messageId` (that's on `structured-output.start`). The outbound-chunk rewrite preserves `raw`, not `messageId`. - Add native-combined mode coverage (the `harvestCombinedStructuredOutput` capture site was untested): both the harvested Promise result and the synthesized streaming complete event must un-widen. - Add a streaming-rewrite test asserting the engine replaces only `object` (un-widened) while spreading the event's sibling `raw`/`reasoning` through untouched — guards the `{ ...value, object }` contract. - Add a round-trip case proving a genuine `.nullable()` null inside an array item survives (the spot the array/tuple handling could wrongly strip). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/ai/src/activities/chat/index.ts | 4 +- ...ructured-output-null-normalization.test.ts | 156 +++++++++++++++++- 2 files changed, 155 insertions(+), 5 deletions(-) diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index 25a82103b..d71fe0b88 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -2079,8 +2079,8 @@ class TextEngine< this.structuredOutputResult = { data: object, rawText: parsed.raw } // Rewrite the outbound event so the yielded chunk carries the // normalized object (the original `chunk.value` still holds the - // widened one). Preserve every other field — `messageId`, - // `reasoning` — by spreading the original value. + // widened one). Preserve every other field — `raw`, `reasoning` — + // by spreading the original value. const value = chunk.value if (object !== parsed.object && value && typeof value === 'object') { outboundChunk = { ...chunk, value: { ...value, object } } diff --git a/packages/ai/tests/chat-structured-output-null-normalization.test.ts b/packages/ai/tests/chat-structured-output-null-normalization.test.ts index 8879af545..24488cbdc 100644 --- a/packages/ai/tests/chat-structured-output-null-normalization.test.ts +++ b/packages/ai/tests/chat-structured-output-null-normalization.test.ts @@ -19,15 +19,51 @@ import type { StreamChunk } from '../src/types' const messages = [{ role: 'user' as const, content: 'go' }] -/** Find the terminal `structured-output.complete` event and return its object. */ -function completeObject(chunks: Array): unknown { +/** Find the terminal `structured-output.complete` event and return its value. */ +function completeValue(chunks: Array): { + object: unknown + raw: string + reasoning?: string +} { const complete = chunks.find( (c) => c.type === EventType.CUSTOM && (c as { name?: string }).name === 'structured-output.complete', ) expect(complete).toBeDefined() - return (complete as { value: { object: unknown } }).value.object + return (complete as { value: { object: unknown; raw: string; reasoning?: string } }) + .value +} + +const completeObject = (chunks: Array): unknown => + completeValue(chunks).object + +/** A native-combined turn: the schema-constrained JSON arrives as assistant text. */ +function textTurn(json: string): Array { + const timestamp = Date.now() + return [ + { type: EventType.RUN_STARTED, runId: 'r1', threadId: 't1', timestamp }, + { + type: EventType.TEXT_MESSAGE_START, + messageId: 'm1', + role: 'assistant', + timestamp, + }, + { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: 'm1', + delta: json, + timestamp, + }, + { type: EventType.TEXT_MESSAGE_END, messageId: 'm1', timestamp }, + { + type: EventType.RUN_FINISHED, + runId: 'r1', + threadId: 't1', + finishReason: 'stop', + timestamp, + }, + ] as Array } describe('structured output null normalization', () => { @@ -99,6 +135,101 @@ describe('structured output null normalization', () => { expect(object).toEqual({ title: 'Ship it', tag: null }) expect('note' in (object as object)).toBe(false) }) + + it('rewrites only `object`, preserving the event’s `raw` and `reasoning`', async () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + }) + const raw = '{"title":"Ship it","note":null}' + // A NATIVE structuredOutputStream emits the terminal complete event with + // the widened object plus sibling `raw`/`reasoning` fields. The engine's + // outbound rewrite must replace `object` (un-widened) while spreading the + // rest of the value through untouched. + const { adapter } = createMockAdapter({ + structuredOutputStream: () => + (async function* () { + yield { type: EventType.RUN_STARTED, runId: 'r', threadId: 't' } + yield { + type: EventType.CUSTOM, + name: 'structured-output.complete', + value: { + object: { title: 'Ship it', note: null }, + raw, + reasoning: 'thought about it', + }, + } + yield { + type: EventType.RUN_FINISHED, + runId: 'r', + threadId: 't', + finishReason: 'stop', + } + })() as AsyncIterable, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages, + outputSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const value = completeValue(chunks) + expect(value.object).toEqual({ title: 'Ship it' }) + expect('note' in (value.object as object)).toBe(false) + // Sibling fields survive the rewrite. + expect(value.raw).toBe(raw) + expect(value.reasoning).toBe('thought about it') + }) + }) + + // Native-combined mode (adapter declares `supportsCombinedToolsAndSchema`): + // the engine harvests the JSON from the agent loop's accumulated final-turn + // text (`JSON.parse`, which preserves provider nulls) rather than from a + // separate structuredOutput call — a distinct capture site that must also + // un-widen. Covers both Promise and streaming. + describe('native-combined mode', () => { + const outputSchema = z.object({ + title: z.string(), + note: z.string().optional(), + tag: z.string().nullable(), + }) + const json = '{"title":"Ship it","note":null,"tag":null}' + + it('un-widens the harvested Promise result', async () => { + const { adapter } = createMockAdapter({ + iterations: [textTurn(json)], + supportsCombinedToolsAndSchema: true, + }) + + const result = await chat({ adapter, messages, outputSchema }) + + expect(result).toEqual({ title: 'Ship it', tag: null }) + expect('note' in result).toBe(false) + }) + + it('un-widens the synthesized streaming complete event', async () => { + const { adapter } = createMockAdapter({ + iterations: [textTurn(json)], + supportsCombinedToolsAndSchema: true, + }) + + const chunks = await collectChunks( + chat({ + adapter, + messages, + outputSchema, + stream: true, + }) as unknown as AsyncIterable, + ) + + const object = completeObject(chunks) + expect(object).toEqual({ title: 'Ship it', tag: null }) + expect('note' in (object as object)).toBe(false) + }) }) }) @@ -158,4 +289,23 @@ describe('convertSchemaForStructuredOutput → undoNullWidening round trip', () expect(result).toEqual({ title: 'T' }) expect('meta' in result).toBe(false) }) + + it('keeps a genuine `.nullable()` null inside array items', () => { + // The widener does NOT touch `note` (it's `.nullable()`, not `.optional()`), + // so its null must survive even though it sits inside an array item — the + // exact spot the tuple/array handling could wrongly strip it. + const outputSchema = z.object({ + items: z.array(z.object({ id: z.string(), note: z.string().nullable() })), + }) + + const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) + const payload = { + items: [ + { id: '1', note: null }, + { id: '2', note: 'kept' }, + ], + } + + expect(undoNullWidening(payload, nullWideningMap)).toEqual(payload) + }) }) From 5b263ad17dabe09b7870607da68b417114549e7f Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Wed, 10 Jun 2026 17:25:45 +1000 Subject: [PATCH 6/8] test(openai-base): assert structuredOutputStream passes provider nulls through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CodeRabbit review (PR #732): the non-streaming passthrough assertion had no streaming sibling. Adds a `structuredOutputStream()` case emitting a provider `null` and asserting the terminal `structured-output.complete` object preserves it — guarding against the stream path regressing to a blind null-strip while the non-stream path relies on engine-level un-widening. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...mpletions-structured-output-stream.test.ts | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts b/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts index 638822f40..f0a68951e 100644 --- a/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts +++ b/packages/openai-base/tests/chat-completions-structured-output-stream.test.ts @@ -147,6 +147,44 @@ describe('OpenAIBaseChatCompletionsTextAdapter.structuredOutputStream', () => { expect(complete!.value.raw).toBe(json) }) + it('passes provider nulls through unchanged (engine un-widens, not the adapter)', async () => { + // Mirror of the non-streaming `transformStructuredOutput` passthrough test + // (`chat-completions-text.test.ts`) for the STREAMING path: the adapter no + // longer strips nulls — strict-mode null-widening is undone precisely by + // the engine, so a blind adapter-level strip would also destroy genuine + // `.nullable()` nulls. Guards against the stream path regressing to a strip + // while the non-stream path doesn't. + const json = '{"name":"Alice","nickname":null}' + setupStreamingMock([deltaChunk(json), finishChunk()]) + const adapter = new TestAdapter() + + const chunks = await collect( + adapter.structuredOutputStream!({ + chatOptions: { + model: 'test-model', + messages: [{ role: 'user', content: 'extract' }], + logger: testLogger, + }, + outputSchema: { + type: 'object', + properties: { + name: { type: 'string' }, + nickname: { type: 'string' }, + }, + required: ['name'], + additionalProperties: false, + }, + }), + ) + + const complete = chunks.find( + (c) => + c.type === 'CUSTOM' && + (c as { name?: string }).name === 'structured-output.complete', + ) as { value: { object: unknown } } | undefined + expect(complete!.value.object).toEqual({ name: 'Alice', nickname: null }) + }) + it('sends response_format: { type: "json_schema", strict: true } in the request', async () => { setupStreamingMock([deltaChunk('{"name":"X","age":1}'), finishChunk()]) const adapter = new TestAdapter() From 49f7a48ab53f5de760767d117542d3c66bbcd86c Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jun 2026 07:34:26 +0000 Subject: [PATCH 7/8] ci: apply automated fixes --- .../chat-structured-output-null-normalization.test.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/ai/tests/chat-structured-output-null-normalization.test.ts b/packages/ai/tests/chat-structured-output-null-normalization.test.ts index 24488cbdc..7d5a372bf 100644 --- a/packages/ai/tests/chat-structured-output-null-normalization.test.ts +++ b/packages/ai/tests/chat-structured-output-null-normalization.test.ts @@ -31,8 +31,9 @@ function completeValue(chunks: Array): { (c as { name?: string }).name === 'structured-output.complete', ) expect(complete).toBeDefined() - return (complete as { value: { object: unknown; raw: string; reasoning?: string } }) - .value + return ( + complete as { value: { object: unknown; raw: string; reasoning?: string } } + ).value } const completeObject = (chunks: Array): unknown => @@ -248,7 +249,9 @@ describe('convertSchemaForStructuredOutput → undoNullWidening round trip', () meta: z .object({ author: z.string(), rev: z.number().optional() }) .optional(), // widened object with an inner widened field - items: z.array(z.object({ id: z.string(), label: z.string().optional() })), + items: z.array( + z.object({ id: z.string(), label: z.string().optional() }), + ), }) const { nullWideningMap } = convertSchemaForStructuredOutput(outputSchema) From 0c1be6c16950a9139768d08b6ef8faa16cb5271b Mon Sep 17 00:00:00 2001 From: Drew Hoover Date: Wed, 10 Jun 2026 10:29:41 -0400 Subject: [PATCH 8/8] test(react-native-smoke): register @tanstack/ai-utils in resolution configs The chat activity and schema-converter now import @tanstack/ai-utils, so the React Native smoke graph reaches it. The smoke fixture resolves workspace packages to source via explicit per-tool mappings, so add @tanstack/ai-utils (mirroring @tanstack/ai-event-client) to the tsconfig paths, metro packageEntryPoints, the esbuild alias map, and the import-surface walker. Fixes the TS2307 'Cannot find module @tanstack/ai-utils' in the smoke typecheck. Co-Authored-By: Claude Opus 4.8 (1M context) --- testing/react-native-smoke/metro.config.cjs | 1 + testing/react-native-smoke/scripts/assert-import-surface.ts | 1 + testing/react-native-smoke/scripts/esbuild-smoke.ts | 1 + testing/react-native-smoke/tsconfig.json | 1 + 4 files changed, 4 insertions(+) diff --git a/testing/react-native-smoke/metro.config.cjs b/testing/react-native-smoke/metro.config.cjs index d14924c6c..5f221a34a 100644 --- a/testing/react-native-smoke/metro.config.cjs +++ b/testing/react-native-smoke/metro.config.cjs @@ -16,6 +16,7 @@ const packageEntryPoints = new Map([ '@tanstack/ai-event-client', resolve(repoRoot, 'packages/ai-event-client/src/index.ts'), ], + ['@tanstack/ai-utils', resolve(repoRoot, 'packages/ai-utils/src/index.ts')], ['@tanstack/ai-react', resolve(repoRoot, 'packages/ai-react/src/index.ts')], ]) const rewriteOriginRoots = [ diff --git a/testing/react-native-smoke/scripts/assert-import-surface.ts b/testing/react-native-smoke/scripts/assert-import-surface.ts index e0888b5af..d68742d5d 100644 --- a/testing/react-native-smoke/scripts/assert-import-surface.ts +++ b/testing/react-native-smoke/scripts/assert-import-surface.ts @@ -11,6 +11,7 @@ const packageEntries = new Map([ ['@tanstack/ai-react', 'packages/ai-react/src/index.ts'], ['@tanstack/ai-client', 'packages/ai-client/src/index.ts'], ['@tanstack/ai-event-client', 'packages/ai-event-client/src/index.ts'], + ['@tanstack/ai-utils', 'packages/ai-utils/src/index.ts'], ['@tanstack/ai/client', 'packages/ai/src/client.ts'], ]) diff --git a/testing/react-native-smoke/scripts/esbuild-smoke.ts b/testing/react-native-smoke/scripts/esbuild-smoke.ts index a20fdc4bc..9eb7c10a8 100644 --- a/testing/react-native-smoke/scripts/esbuild-smoke.ts +++ b/testing/react-native-smoke/scripts/esbuild-smoke.ts @@ -23,6 +23,7 @@ await build({ repoRoot, 'packages/ai-event-client/src/index.ts', ), + '@tanstack/ai-utils': resolve(repoRoot, 'packages/ai-utils/src/index.ts'), '@tanstack/ai-react': resolve(repoRoot, 'packages/ai-react/src/index.ts'), 'react-native': resolve(scriptDir, 'react-native-runtime-stub.tsx'), }, diff --git a/testing/react-native-smoke/tsconfig.json b/testing/react-native-smoke/tsconfig.json index 5723bfb01..b97ea5fef 100644 --- a/testing/react-native-smoke/tsconfig.json +++ b/testing/react-native-smoke/tsconfig.json @@ -17,6 +17,7 @@ "@tanstack/ai-event-client": [ "../../packages/ai-event-client/src/index.ts" ], + "@tanstack/ai-utils": ["../../packages/ai-utils/src/index.ts"], "@tanstack/ai-react": ["../../packages/ai-react/src/index.ts"] } },