From 3d5ba096628e575fc473058d9210ce094828cc1d Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Wed, 13 May 2026 13:39:05 -0700
Subject: [PATCH] chore(core): hygiene cleanup from issue #431

Bundle the items from #431 that still apply to current main:

- A. Warn + emit SYSTEM_DEBUG_TOOL_DROP when providers return more than
  one tool call in a single turn, so dropped extras are observable
  rather than silently lost.
- B. Replace the string-match `isSetupError` check with `PlanningError`
  and `NoStartingUrlError` subclasses (re-exported from the public API),
  so setup-error detection survives refactors of the underlying message
  text.
- D. Bump the `wait` tool's upper bound from 30s to 120s and rewrite
  its `execute` to sleep directly with abort-signal polling instead of
  going through `page.waitForTimeout` (which is abort-blind), so a 120s
  wait can be interrupted by a user abort within ~500ms.
- E. Drop the unused `actionLoopSystemPrompt` export; refactor the
  prompts tests to call `buildActionLoopSystemPrompt(false, false)`
  directly.

Item C (harden image-strip detection) was a no-op: the fallback the
issue refers to is not present in the current main lineage, so there is
nothing to harden.

Item F (persona prompt scroll guidance) is deferred to a follow-up PR
that adds a real `scroll` tool, so the prompt text becomes accurate
rather than being stripped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/core/schemas/webagent-event.json     |  52 ++++++++
 packages/core/src/core.ts                     |   8 +-
 packages/core/src/errors.ts                   |  30 +++++
 packages/core/src/events.ts                   |  18 +++
 packages/core/src/prompts.ts                  |   5 +-
 packages/core/src/tools/webActionTools.ts     |  62 ++++++++--
 packages/core/src/webAgent.ts                 |  47 +++++--
 packages/core/test/events.test.ts             |   1 +
 packages/core/test/prompts.test.ts            |   5 +-
 .../core/test/tools/webActionTools.test.ts    |  38 +++++-
 packages/core/test/webAgent.test.ts           | 117 ++++++++++++++++++
 11 files changed, 347 insertions(+), 36 deletions(-)

diff --git a/packages/core/schemas/webagent-event.json b/packages/core/schemas/webagent-event.json
index 55b28f20..296a72d6 100644
--- a/packages/core/schemas/webagent-event.json
+++ b/packages/core/schemas/webagent-event.json
@@ -4059,6 +4059,41 @@
       ],
       "type": "object"
     },
+    "ToolDropDebugEventData": {
+      "additionalProperties": false,
+      "description": "Event data for tool-drop diagnostics: emitted when a provider returns more than one tool call in a single turn and the extras are dropped. The system prompt instructs the model to call exactly one tool per turn, but some providers occasionally return multiple — this event surfaces those cases so they can be observed instead of silently lost.",
+      "properties": {
+        "droppedCount": {
+          "description": "Number of tool calls that were dropped (returnedCount - 1).",
+          "type": "number"
+        },
+        "droppedTools": {
+          "description": "Names of the dropped tools (in original order, excluding the first).",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "iterationId": {
+          "type": "string"
+        },
+        "keptTool": {
+          "description": "Name of the tool that was kept (first in the provider's response).",
+          "type": "string"
+        },
+        "timestamp": {
+          "type": "number"
+        }
+      },
+      "required": [
+        "droppedCount",
+        "droppedTools",
+        "iterationId",
+        "keptTool",
+        "timestamp"
+      ],
+      "type": "object"
+    },
     "ValidationErrorEventData": {
       "additionalProperties": false,
       "description": "Event data for validation errors during action response processing",
@@ -4520,6 +4555,23 @@
           ],
           "type": "object"
         },
+        {
+          "additionalProperties": false,
+          "properties": {
+            "data": {
+              "$ref": "#/definitions/ToolDropDebugEventData"
+            },
+            "type": {
+              "const": "system:debug_tool_drop",
+              "type": "string"
+            }
+          },
+          "required": [
+            "type",
+            "data"
+          ],
+          "type": "object"
+        },
         {
           "additionalProperties": false,
           "properties": {
diff --git a/packages/core/src/core.ts b/packages/core/src/core.ts
index e70ddf3c..75e4f2c7 100644
--- a/packages/core/src/core.ts
+++ b/packages/core/src/core.ts
@@ -49,7 +49,13 @@ export type { Logger } from "./loggers/types.js";
 export type { Action, TaskValidationResult } from "./schemas.js";
 
 // Error types
-export { RecoverableError, BrowserException, NavigationTimeoutException } from "./errors.js";
+export {
+  RecoverableError,
+  BrowserException,
+  NavigationTimeoutException,
+  PlanningError,
+  NoStartingUrlError,
+} from "./errors.js";
 
 // Navigation retry configuration
 export type { NavigationRetryConfig } from "./browser/navigationRetry.js";
diff --git a/packages/core/src/errors.ts b/packages/core/src/errors.ts
index c25fc736..1a8a0fd9 100644
--- a/packages/core/src/errors.ts
+++ b/packages/core/src/errors.ts
@@ -96,6 +96,36 @@ export class ToolExecutionError extends RecoverableError {
   }
 }
 
+/**
+ * Thrown when task planning fails (model never returns a usable plan after retries).
+ *
+ * Setup error: extends Error rather than RecoverableError because the agent's
+ * execute loop should propagate it to the caller rather than treat it as a
+ * retryable mid-task error.
+ */
+export class PlanningError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "PlanningError";
+  }
+}
+
+/**
+ * Defensive guard: thrown by `navigateToStart` when `this.url` is unexpectedly
+ * unset. Under normal flow `planTask` defaults `this.url` to `about:blank`, so
+ * this should not fire — it exists to fail loudly rather than silently navigate
+ * nowhere if that invariant is ever broken.
+ *
+ * Setup error: extends Error rather than RecoverableError for the same reason
+ * as PlanningError.
+ */
+export class NoStartingUrlError extends Error {
+  constructor(message = "No starting URL determined") {
+    super(message);
+    this.name = "NoStartingUrlError";
+  }
+}
+
 /**
  * Thrown when navigation times out after all retry attempts.
  *
diff --git a/packages/core/src/events.ts b/packages/core/src/events.ts
index 442a0d59..31b2f205 100644
--- a/packages/core/src/events.ts
+++ b/packages/core/src/events.ts
@@ -40,6 +40,7 @@ export enum WebAgentEventType {
   // System/Debug
   SYSTEM_DEBUG_COMPRESSION = "system:debug_compression",
   SYSTEM_DEBUG_MESSAGE = "system:debug_message",
+  SYSTEM_DEBUG_TOOL_DROP = "system:debug_tool_drop",
 
   // CDP endpoint failover
   CDP_ENDPOINT_CONNECTED = "cdp:endpoint_connected",
@@ -270,6 +271,22 @@ export interface MessagesDebugEventData extends WebAgentEventData {
   messages: any[];
 }
 
+/**
+ * Event data for tool-drop diagnostics: emitted when a provider returns more
+ * than one tool call in a single turn and the extras are dropped. The system
+ * prompt instructs the model to call exactly one tool per turn, but some
+ * providers occasionally return multiple — this event surfaces those cases
+ * so they can be observed instead of silently lost.
+ */
+export interface ToolDropDebugEventData extends WebAgentEventData {
+  /** Number of tool calls that were dropped (returnedCount - 1). */
+  droppedCount: number;
+  /** Names of the dropped tools (in original order, excluding the first). */
+  droppedTools: string[];
+  /** Name of the tool that was kept (first in the provider's response). */
+  keptTool: string;
+}
+
 /**
  * Event data for waiting notifications
  */
@@ -377,6 +394,7 @@ export type WebAgentEvent =
     }
   | { type: WebAgentEventType.SYSTEM_DEBUG_COMPRESSION; data: CompressionDebugEventData }
   | { type: WebAgentEventType.SYSTEM_DEBUG_MESSAGE; data: MessagesDebugEventData }
+  | { type: WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP; data: ToolDropDebugEventData }
   | { type: WebAgentEventType.CDP_ENDPOINT_CONNECTED; data: CdpEndpointConnectedEventData }
   | { type: WebAgentEventType.CDP_ENDPOINT_CYCLE; data: CdpEndpointCycleEventData }
   | { type: WebAgentEventType.BROWSER_RECONNECTED; data: BrowserReconnectedEventData }
diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
index 2c9b374a..9401ae27 100644
--- a/packages/core/src/prompts.ts
+++ b/packages/core/src/prompts.ts
@@ -43,8 +43,8 @@ export const TOOL_STRINGS = {
       description: "Press Enter key on an element (useful for form submission)",
     },
     wait: {
-      description: "Wait for a specified number of seconds",
-      seconds: "Number of seconds to wait (0-30)",
+      description: "Wait for a specified number of seconds (up to 120 for slow-loading pages)",
+      seconds: "Number of seconds to wait (0-120)",
     },
     goto: {
       description: "Navigate to a URL that was previously seen in the conversation",
@@ -422,7 +422,6 @@ const buildActionLoopSystemPrompt = (
     currentDate: getCurrentFormattedDate(),
   });
 
-export const actionLoopSystemPrompt = buildActionLoopSystemPrompt(false, false);
 export { buildActionLoopSystemPrompt };
 
 /**
diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts
index 8d91f928..7d379677 100644
--- a/packages/core/src/tools/webActionTools.ts
+++ b/packages/core/src/tools/webActionTools.ts
@@ -225,22 +225,58 @@ export function createWebActionTools(context: WebActionContext) {
     wait: tool({
       description: TOOL_STRINGS.webActions.wait.description,
       inputSchema: z.object({
-        seconds: z.number().min(0).max(30).describe(TOOL_STRINGS.webActions.wait.seconds),
+        seconds: z.number().min(0).max(120).describe(TOOL_STRINGS.webActions.wait.seconds),
       }),
       execute: async ({ seconds }) => {
-        // Wait uses browser.performAction which expects value as string
-        const result = await performActionWithValidation(
-          PageAction.Wait,
-          context,
-          undefined,
-          String(seconds),
+        // Sleep here rather than going through browser.performAction (which
+        // calls page.waitForTimeout — a fixed, abort-blind timeout). Polling
+        // the abort signal keeps user aborts responsive to within ~500ms even
+        // at the full 120s cap.
+        return withSpan(
+          SpanName.BROWSER_ACTION,
+          { attributes: { "pilo.browser.action_type": "wait" } },
+          async (span) => {
+            context.eventEmitter.emit(WebAgentEventType.AGENT_ACTION, {
+              action: "wait",
+              value: String(seconds),
+            });
+            context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_STARTED, {
+              action: "wait",
+              value: String(seconds),
+            });
+
+            const ABORT_POLL_MS = 500;
+            const deadline = Date.now() + seconds * 1000;
+            while (Date.now() < deadline) {
+              if (context.abortSignal?.aborted) {
+                const error = new Error("Wait cancelled by abort signal");
+                context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+                  success: false,
+                  action: "wait",
+                  error: error.message,
+                });
+                span.setAttribute("pilo.browser.success", false);
+                throw error;
+              }
+              await new Promise((resolve) =>
+                setTimeout(resolve, Math.min(ABORT_POLL_MS, deadline - Date.now())),
+              );
+            }
+
+            context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+              success: true,
+              action: "wait",
+            });
+            context.eventEmitter.emit(WebAgentEventType.AGENT_WAITING, { seconds });
+            span.setAttribute("pilo.browser.success", true);
+
+            return {
+              success: true,
+              action: "wait",
+              value: String(seconds),
+            };
+          },
         );
-
-        if (result.success) {
-          context.eventEmitter.emit(WebAgentEventType.AGENT_WAITING, { seconds });
-        }
-
-        return result;
       },
     }),
 
diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts
index 3f12a73d..c0aee042 100644
--- a/packages/core/src/webAgent.ts
+++ b/packages/core/src/webAgent.ts
@@ -20,7 +20,13 @@ import {
 import { SnapshotCompressor } from "./snapshotCompressor.js";
 import { Logger } from "./loggers/types.js";
 import { ConsoleLogger } from "./loggers/console.js";
-import { BrowserDisconnectedError, RecoverableError, ToolExecutionError } from "./errors.js";
+import {
+  BrowserDisconnectedError,
+  NoStartingUrlError,
+  PlanningError,
+  RecoverableError,
+  ToolExecutionError,
+} from "./errors.js";
 import { generateTextWithRetry } from "./utils/retry.js";
 import type { AwaitedProperties } from "./utils/types.js";
 import {
@@ -1016,6 +1022,25 @@ export class WebAgent {
       );
     }
 
+    // The system prompt instructs the model to call exactly one tool per turn,
+    // but providers occasionally return more (especially on retries or with
+    // certain models). Warn + emit a debug event so the drop is observable
+    // instead of silently lost.
+    if (aiResponse.toolResults.length > 1) {
+      const keptTool = aiResponse.toolResults[0].toolName;
+      const droppedTools = aiResponse.toolResults.slice(1).map((r: any) => r.toolName);
+      console.warn(
+        `[WebAgent] Provider returned ${aiResponse.toolResults.length} tool calls in one turn; ` +
+          `keeping '${keptTool}', dropping: ${droppedTools.join(", ")}`,
+      );
+      this.emit(WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP, {
+        iterationId: this.currentIterationId,
+        droppedCount: droppedTools.length,
+        droppedTools,
+        keptTool,
+      });
+    }
+
     const toolResult = aiResponse.toolResults[0];
     const actionOutput = toolResult.output as any;
 
@@ -1465,12 +1490,11 @@ export class WebAgent {
           });
           recordSanitizedException(span, error);
 
-          // Check if the error message already contains "Failed to generate plan" to avoid double-wrapping
-          if (errorMsg.includes("Failed to generate plan")) {
-            throw new Error(errorMsg);
-          } else {
-            throw new Error(`Failed to generate plan: ${errorMsg}`);
+          // Avoid double-wrapping if we already produced a PlanningError up-stack.
+          if (error instanceof PlanningError) {
+            throw error;
           }
+          throw new PlanningError(`Failed to generate plan: ${errorMsg}`);
         }
       },
     );
@@ -1516,14 +1540,11 @@ export class WebAgent {
   }
 
   /**
-   * Check if error is a setup/planning error that should be re-thrown
+   * Check if error is a setup/planning error that should be re-thrown to the
+   * caller rather than converted into a TASK_FAILED result.
    */
   private isSetupError(error: unknown): boolean {
-    return (
-      error instanceof Error &&
-      (error.message.includes("Failed to generate plan") ||
-        error.message.includes("No starting URL"))
-    );
+    return error instanceof PlanningError || error instanceof NoStartingUrlError;
   }
 
   /**
@@ -1620,7 +1641,7 @@ export class WebAgent {
 
   private async navigateToStart(task: string): Promise<void> {
     if (!this.url) {
-      throw new Error("No starting URL determined");
+      throw new NoStartingUrlError();
     }
 
     if (this.url !== "about:blank") {
diff --git a/packages/core/test/events.test.ts b/packages/core/test/events.test.ts
index 574c2c6a..f8f1b271 100644
--- a/packages/core/test/events.test.ts
+++ b/packages/core/test/events.test.ts
@@ -135,6 +135,7 @@ describe("WebAgentEventEmitter", () => {
         "browser:screenshot_captured_image",
         "system:debug_compression",
         "system:debug_message",
+        "system:debug_tool_drop",
         "cdp:endpoint_connected",
         "cdp:endpoint_cycle",
         "browser:reconnected",
diff --git a/packages/core/test/prompts.test.ts b/packages/core/test/prompts.test.ts
index 71c19603..6ded1a1d 100644
--- a/packages/core/test/prompts.test.ts
+++ b/packages/core/test/prompts.test.ts
@@ -1,7 +1,6 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import {
   buildPlanPrompt,
-  actionLoopSystemPrompt,
   buildActionLoopSystemPrompt,
   buildTaskAndPlanPrompt,
   buildPageSnapshotPrompt,
@@ -10,6 +9,10 @@ import {
   buildExtractionPrompt,
 } from "../src/prompts.js";
 
+// Default action-loop prompt used by the tests below. Mirrors the historical
+// `actionLoopSystemPrompt` export (built with both gating flags off).
+const actionLoopSystemPrompt = buildActionLoopSystemPrompt(false, false);
+
 // Mock Date for consistent test results
 const mockDate = new Date("2024-01-15T10:00:00Z");
 
diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts
index c2bc2e96..9b2693a5 100644
--- a/packages/core/test/tools/webActionTools.test.ts
+++ b/packages/core/test/tools/webActionTools.test.ts
@@ -145,7 +145,9 @@ describe("Web Action Tools", () => {
       expect(tools.enter.description).toBe(
         "Press Enter key on an element (useful for form submission)",
       );
-      expect(tools.wait.description).toBe("Wait for a specified number of seconds");
+      expect(tools.wait.description).toBe(
+        "Wait for a specified number of seconds (up to 120 for slow-loading pages)",
+      );
       expect(tools.goto.description).toBe(
         "Navigate to a URL that was previously seen in the conversation",
       );
@@ -392,20 +394,43 @@ describe("Web Action Tools", () => {
 
   describe("Wait Action", () => {
     it("should execute wait action successfully", async () => {
+      // wait no longer goes through browser.performAction — it sleeps directly
+      // in the tool so the abort signal can interrupt it.
       const performActionSpy = vi.spyOn(mockBrowser, "performAction");
       const emitSpy = vi.spyOn(eventEmitter, "emit");
 
-      const result = await tools.wait.execute({ seconds: 2 });
+      const promise = tools.wait.execute({ seconds: 2 });
+      await vi.advanceTimersByTimeAsync(2000);
+      const result = await promise;
 
-      expect(performActionSpy).toHaveBeenCalledWith("", PageAction.Wait, "2");
+      expect(performActionSpy).not.toHaveBeenCalled();
       expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.AGENT_WAITING, { seconds: 2 });
       expect(result).toEqual({
         success: true,
         action: "wait",
-        value: "2", // performActionWithValidation adds value field
+        value: "2",
       });
     });
 
+    it("should abort wait when abort signal fires", async () => {
+      const controller = new AbortController();
+      context.abortSignal = controller.signal;
+      tools = createWebActionTools(context);
+
+      const promise = tools.wait.execute({ seconds: 60 });
+      // Attach a rejection handler before triggering abort so vitest's
+      // fake-timer scheduler doesn't see an unhandled rejection.
+      const settled = promise.catch((err: unknown) => err);
+
+      controller.abort();
+      // Advance past one poll interval (500ms) so the loop notices the abort.
+      await vi.advanceTimersByTimeAsync(500);
+
+      const err = await settled;
+      expect(err).toBeInstanceOf(Error);
+      expect((err as Error).message).toContain("Wait cancelled by abort signal");
+    });
+
     it("should validate wait time constraints", () => {
       const schema = tools.wait.inputSchema;
 
@@ -415,7 +440,10 @@ describe("Web Action Tools", () => {
       const tooShort = schema.safeParse({ seconds: -1 });
       expect(tooShort.success).toBe(false);
 
-      const tooLong = schema.safeParse({ seconds: 31 });
+      const validLong = schema.safeParse({ seconds: 120 });
+      expect(validLong.success).toBe(true);
+
+      const tooLong = schema.safeParse({ seconds: 121 });
       expect(tooLong.success).toBe(false);
     });
   });
diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts
index 100bfabd..65e1918c 100644
--- a/packages/core/test/webAgent.test.ts
+++ b/packages/core/test/webAgent.test.ts
@@ -5,6 +5,7 @@ import { WebAgentEventEmitter, WebAgentEventType } from "../src/events.js";
 import { LanguageModel, streamText } from "ai";
 import { Logger } from "../src/loggers/types.js";
 import { generateTextWithRetry } from "../src/utils/retry.js";
+import { PlanningError } from "../src/errors.js";
 
 // Mock the AI module
 vi.mock("ai", () => ({
@@ -649,6 +650,17 @@ describe("WebAgent", () => {
       ).rejects.toThrow(/Failed to generate plan/);
     });
 
+    it("should propagate planning failures as PlanningError instances", async () => {
+      mockGenerateTextWithRetry.mockResolvedValueOnce({
+        text: "No tool result",
+        toolResults: [],
+      } as any);
+
+      await expect(
+        webAgent.execute("Test task", { startingUrl: "https://example.com" }),
+      ).rejects.toBeInstanceOf(PlanningError);
+    });
+
     it("should start on about:blank when planner returns no URL", async () => {
       // Planner returns plan without url (e.g., webSearch-enabled scenario)
       mockGenerateTextWithRetry.mockResolvedValueOnce({
@@ -1114,6 +1126,111 @@ describe("WebAgent", () => {
 
       expect(result.success).toBe(true);
     });
+
+    it("should warn and emit SYSTEM_DEBUG_TOOL_DROP when provider returns multiple tool calls", async () => {
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      // Provider returns 3 tool calls in one turn; only the first ('click')
+      // should be processed, and the extras should be reported.
+      mockStreamText.mockReturnValueOnce(
+        createMockStreamResponse({
+          text: "Multi",
+          toolResults: [
+            {
+              type: "tool-result",
+              toolCallId: "click_1",
+              toolName: "click",
+              input: { ref: "btn1" },
+              output: { action: "click", ref: "btn1", isTerminal: false },
+            },
+            {
+              type: "tool-result",
+              toolCallId: "fill_1",
+              toolName: "fill",
+              input: { ref: "input1", value: "x" },
+              output: { action: "fill", ref: "input1", isTerminal: false },
+            },
+            {
+              type: "tool-result",
+              toolCallId: "done_1",
+              toolName: "done",
+              input: { result: "x" },
+              output: { action: "done", result: "x", isTerminal: true },
+            },
+          ],
+          response: {
+            messages: [
+              { role: "assistant", content: "Multi" },
+              {
+                role: "tool",
+                content: [
+                  {
+                    type: "tool-result",
+                    toolCallId: "click_1",
+                    toolName: "click",
+                    output: { action: "click", ref: "btn1" },
+                  },
+                ],
+              },
+            ],
+          },
+        }) as any,
+      );
+
+      // Second turn: wrap up with a clean done.
+      mockStreamText.mockReturnValueOnce(
+        createMockStreamResponse({
+          text: "Done",
+          toolResults: [
+            {
+              type: "tool-result",
+              toolCallId: "done_2",
+              toolName: "done",
+              input: { result: "Clicked" },
+              output: { action: "done", result: "Clicked", isTerminal: true },
+            },
+          ],
+          response: {
+            messages: [
+              { role: "assistant", content: "Done" },
+              {
+                role: "tool",
+                content: [
+                  {
+                    type: "tool-result",
+                    toolCallId: "done_2",
+                    toolName: "done",
+                    output: { action: "done", result: "Clicked", isTerminal: true },
+                  },
+                ],
+              },
+            ],
+          },
+        }) as any,
+      );
+
+      mockGenerateTextWithRetry.mockResolvedValueOnce(mockValidationResponse("complete"));
+
+      const result = await webAgent.execute("Multi-tool", {
+        startingUrl: "https://example.com",
+      });
+
+      expect(result.success).toBe(true);
+
+      const dropEvent = mockLogger.events.find(
+        (e) => e.type === WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP,
+      );
+      expect(dropEvent).toBeDefined();
+      expect(dropEvent?.data.droppedCount).toBe(2);
+      expect(dropEvent?.data.droppedTools).toEqual(["fill", "done"]);
+      expect(dropEvent?.data.keptTool).toBe("click");
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("Provider returned 3 tool calls in one turn"),
+      );
+
+      warnSpy.mockRestore();
+    });
   });
 
   describe("error handling", () => {