Add some stuff here

ericallam · ericallam · commit fc915f670605 · 2026-06-01T17:39:57.000+01:00
diff --git a/references/ai-chat/src/app/actions.ts b/references/ai-chat/src/app/actions.ts
@@ -9,6 +9,8 @@ import type {
   aiChatSession,
   upgradeTestAgent,
   cfTrustTestAgent,
+  generatorToolTest,
+  toolModelOutputTest,
 } from "@/trigger/chat";
 import type { ChatUiMessage } from "@/lib/chat-tools-schemas";
 import { prisma } from "@/lib/prisma";
@@ -22,7 +24,9 @@ export type ChatReferenceTaskId =
   | "ai-chat-raw"
   | "ai-chat-session"
   | "upgrade-test"
-  | "cf-trust-test";
+  | "cf-trust-test"
+  | "generator-tool-test"
+  | "tool-model-output-test";
 
 function isChatReferenceTaskId(id: string): id is ChatReferenceTaskId {
   return (
@@ -31,7 +35,9 @@ function isChatReferenceTaskId(id: string): id is ChatReferenceTaskId {
     id === "ai-chat-raw" ||
     id === "ai-chat-session" ||
     id === "upgrade-test" ||
-    id === "cf-trust-test"
+    id === "cf-trust-test" ||
+    id === "generator-tool-test" ||
+    id === "tool-model-output-test"
   );
 }
 
@@ -42,7 +48,9 @@ type TaskIdentifierForChat =
   | (typeof aiChatRaw)["id"]
   | (typeof aiChatSession)["id"]
   | (typeof upgradeTestAgent)["id"]
-  | (typeof cfTrustTestAgent)["id"];
+  | (typeof cfTrustTestAgent)["id"]
+  | (typeof generatorToolTest)["id"]
+  | (typeof toolModelOutputTest)["id"];
 
 /**
  * Server-mediated start: creates the Session row + triggers the first
@@ -75,6 +83,8 @@ const startActionByTaskId: Record<
   "ai-chat-session": startChatSessionFor("ai-chat-session"),
   "upgrade-test": startChatSessionFor("upgrade-test"),
   "cf-trust-test": startChatSessionFor("cf-trust-test"),
+  "generator-tool-test": startChatSessionFor("generator-tool-test"),
+  "tool-model-output-test": startChatSessionFor("tool-model-output-test"),
 };
 
 export async function startChatSession(input: {
diff --git a/references/ai-chat/src/components/chat-sidebar.tsx b/references/ai-chat/src/components/chat-sidebar.tsx
@@ -120,6 +120,7 @@ export function ChatSidebar({
             <option value="stress-emit">stress-emit (UI stress test)</option>
             <option value="cf-trust-test">cf-trust-test (Cloudflare proxy trust)</option>
             <option value="tool-model-output-test">tool-model-output-test (toModelOutput cross-turn)</option>
+            <option value="generator-tool-test">generator-tool-test (async-generator tool output)</option>
           </select>
         </div>
         <label
diff --git a/references/ai-chat/src/trigger/chat.ts b/references/ai-chat/src/trigger/chat.ts
@@ -1193,3 +1193,108 @@ export const toolModelOutputFnTest = chat.agent({
     });
   },
 });
+
+// ============================================================================
+// generator-tool-test: TRI-10306 repro
+//
+// GovSignals reported that an async-generator tool's final yielded object shows
+// up as `{}` for BOTH the telemetry trace AND the tool result the model
+// receives. On ai v6 + @ai-sdk/provider-utils 4.0.x (what this reference
+// resolves) the traced `executeTool` consumes the generator correctly — the
+// LAST yield becomes the output — so the expectation here is a NON-repro. This
+// agent makes that falsifiable.
+//
+// The tool mirrors the customer's shape exactly: yield a progress chunk, then
+// yield the final structured result (no explicit `return`). The final object
+// carries GEN_MARKER so every observation point has a deterministic,
+// model-independent signal:
+//
+//   - onStepFinish({ toolResults }): exactly what `executeTool` produced. `{}`
+//     or a missing marker => Symptom A (model side) reproduced.
+//   - logGeneratorProbe(messages): on turn 2+, whether the prior-turn tool
+//     result still carries GEN_MARKER after the SDK re-converts history.
+//   - The `ai.toolCall.result` span attribute (inspect via dashboard / MCP):
+//     Symptom B (telemetry side). `{}` there while the probes are correct =>
+//     it's our OTel attribute flattening, not the model input.
+// ============================================================================
+
+const GEN_MARKER = "LIBRARY-MARKER-4731";
+
+const searchLibrary = tool({
+  description:
+    "Search the library. You MUST call this tool to answer any library question. " +
+    "It streams a progress update, then returns the structured results.",
+  inputSchema: z.object({ query: z.string() }),
+  // Mirrors the customer's tool: a preliminary progress yield, then the final
+  // structured result as the LAST yield (no explicit `return`).
+  execute: async function* ({ query }) {
+    yield { text: `Searching library for "${query}"…` };
+    yield {
+      success: true,
+      marker: GEN_MARKER,
+      data: { results: [{ id: 1, title: "Durable agents 101" }] },
+      metadata: { totalFound: 1 },
+    };
+  },
+});
+
+/**
+ * Deterministic, model-independent verdict for the cross-turn path: does each
+ * incoming tool-result message still carry GEN_MARKER after the SDK's internal
+ * re-conversion of prior-turn history? `messages` is the literal output of the
+ * `toModelMessages` wrapper handed to `run()`.
+ */
+function logGeneratorProbe(messages: ModelMessage[]) {
+  for (const m of messages) {
+    if (m.role !== "tool") continue;
+    const serialized = JSON.stringify(m.content);
+    logger.info("generator-tool-test: incoming tool result", {
+      messageCount: messages.length,
+      containsMarker: serialized.includes(GEN_MARKER),
+      serialized: serialized.slice(0, 500),
+    });
+  }
+}
+
+export const generatorToolTest = chat.agent({
+  id: "generator-tool-test",
+  idleTimeoutInSeconds: 60,
+  // Declared on the config so the SDK threads them through its internal
+  // convertToModelMessages on turn 2+; handed back typed on the run payload.
+  tools: { searchLibrary },
+  run: async ({ messages, tools, signal }) => {
+    logGeneratorProbe(messages);
+    return streamText({
+      model: openai("gpt-4o-mini"),
+      system:
+        "You are a library assistant. For ANY user question you MUST first call " +
+        "the searchLibrary tool, then answer based on its result. If the user asks " +
+        "for the marker, report the `marker` field from the tool result verbatim.",
+      messages,
+      tools,
+      stopWhen: stepCountIs(5),
+      abortSignal: signal,
+      // Mirror the customer's telemetry config so the AI SDK emits an
+      // `ai.toolCall` span with `ai.toolCall.result` (Symptom B — does our
+      // OTel attribute flattening collapse the structured output to `{}`?).
+      experimental_telemetry: {
+        isEnabled: true,
+        recordInputs: true,
+        recordOutputs: true,
+        functionId: "generator-tool-test.tool-loop",
+      },
+      // Authoritative, model-independent capture of what executeTool produced
+      // for the model on this step (Symptom A).
+      onStepFinish: ({ toolResults }) => {
+        for (const tr of toolResults ?? []) {
+          const serialized = JSON.stringify((tr as { output?: unknown }).output);
+          logger.info("generator-tool-test: onStepFinish toolResult", {
+            toolName: (tr as { toolName?: string }).toolName,
+            containsMarker: serialized.includes(GEN_MARKER),
+            output: serialized.slice(0, 500),
+          });
+        }
+      },
+    });
+  },
+});