mozilla · lmorchard · May 13, 2026
diff --git a/packages/core/schemas/webagent-event.json b/packages/core/schemas/webagent-event.json
@@ -4059,6 +4059,41 @@
       ],
       "type": "object"
     },
+    "ToolDropDebugEventData": {
+      "additionalProperties": false,
+      "description": "Event data for tool-drop diagnostics: emitted when a provider returns more than one tool call in a single turn and the extras are dropped. The system prompt instructs the model to call exactly one tool per turn, but some providers occasionally return multiple — this event surfaces those cases so they can be observed instead of silently lost.",
+      "properties": {
+        "droppedCount": {
+          "description": "Number of tool calls that were dropped (returnedCount - 1).",
+          "type": "number"
+        },
+        "droppedTools": {
+          "description": "Names of the dropped tools (in original order, excluding the first).",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "iterationId": {
+          "type": "string"
+        },
+        "keptTool": {
+          "description": "Name of the tool that was kept (first in the provider's response).",
+          "type": "string"
+        },
+        "timestamp": {
+          "type": "number"
+        }
+      },
+      "required": [
+        "droppedCount",
+        "droppedTools",
+        "iterationId",
+        "keptTool",
+        "timestamp"
+      ],
+      "type": "object"
+    },
     "ValidationErrorEventData": {
       "additionalProperties": false,
       "description": "Event data for validation errors during action response processing",
@@ -4520,6 +4555,23 @@
           ],
           "type": "object"
         },
+        {
+          "additionalProperties": false,
+          "properties": {
+            "data": {
+              "$ref": "#/definitions/ToolDropDebugEventData"
+            },
+            "type": {
+              "const": "system:debug_tool_drop",
+              "type": "string"
+            }
+          },
+          "required": [
+            "type",
+            "data"
+          ],
+          "type": "object"
+        },
         {
           "additionalProperties": false,
           "properties": {

diff --git a/packages/core/src/core.ts b/packages/core/src/core.ts
@@ -49,7 +49,13 @@ export type { Logger } from "./loggers/types.js";
 export type { Action, TaskValidationResult } from "./schemas.js";
 
 // Error types
-export { RecoverableError, BrowserException, NavigationTimeoutException } from "./errors.js";
+export {
+  RecoverableError,
+  BrowserException,
+  NavigationTimeoutException,
+  PlanningError,
+  NoStartingUrlError,
+} from "./errors.js";
 
 // Navigation retry configuration
 export type { NavigationRetryConfig } from "./browser/navigationRetry.js";

diff --git a/packages/core/src/errors.ts b/packages/core/src/errors.ts
@@ -96,6 +96,36 @@ export class ToolExecutionError extends RecoverableError {
   }
 }
 
+/**
+ * Thrown when task planning fails (model never returns a usable plan after retries).
+ *
+ * Setup error: extends Error rather than RecoverableError because the agent's
+ * execute loop should propagate it to the caller rather than treat it as a
+ * retryable mid-task error.
+ */
+export class PlanningError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "PlanningError";
+  }
+}
+
+/**
+ * Defensive guard: thrown by `navigateToStart` when `this.url` is unexpectedly
+ * unset. Under normal flow `planTask` defaults `this.url` to `about:blank`, so
+ * this should not fire — it exists to fail loudly rather than silently navigate
+ * nowhere if that invariant is ever broken.
+ *
+ * Setup error: extends Error rather than RecoverableError for the same reason
+ * as PlanningError.
+ */
+export class NoStartingUrlError extends Error {
+  constructor(message = "No starting URL determined") {
+    super(message);
+    this.name = "NoStartingUrlError";
+  }
+}
+
 /**
  * Thrown when navigation times out after all retry attempts.
  *

diff --git a/packages/core/src/events.ts b/packages/core/src/events.ts
@@ -40,6 +40,7 @@ export enum WebAgentEventType {
   // System/Debug
   SYSTEM_DEBUG_COMPRESSION = "system:debug_compression",
   SYSTEM_DEBUG_MESSAGE = "system:debug_message",
+  SYSTEM_DEBUG_TOOL_DROP = "system:debug_tool_drop",
 
   // CDP endpoint failover
   CDP_ENDPOINT_CONNECTED = "cdp:endpoint_connected",
@@ -270,6 +271,22 @@ export interface MessagesDebugEventData extends WebAgentEventData {
   messages: any[];
 }
 
+/**
+ * Event data for tool-drop diagnostics: emitted when a provider returns more
+ * than one tool call in a single turn and the extras are dropped. The system
+ * prompt instructs the model to call exactly one tool per turn, but some
+ * providers occasionally return multiple — this event surfaces those cases
+ * so they can be observed instead of silently lost.
+ */
+export interface ToolDropDebugEventData extends WebAgentEventData {
+  /** Number of tool calls that were dropped (returnedCount - 1). */
+  droppedCount: number;
+  /** Names of the dropped tools (in original order, excluding the first). */
+  droppedTools: string[];
+  /** Name of the tool that was kept (first in the provider's response). */
+  keptTool: string;
+}
+
 /**
  * Event data for waiting notifications
  */
@@ -377,6 +394,7 @@ export type WebAgentEvent =
     }
   | { type: WebAgentEventType.SYSTEM_DEBUG_COMPRESSION; data: CompressionDebugEventData }
   | { type: WebAgentEventType.SYSTEM_DEBUG_MESSAGE; data: MessagesDebugEventData }
+  | { type: WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP; data: ToolDropDebugEventData }
   | { type: WebAgentEventType.CDP_ENDPOINT_CONNECTED; data: CdpEndpointConnectedEventData }
   | { type: WebAgentEventType.CDP_ENDPOINT_CYCLE; data: CdpEndpointCycleEventData }
   | { type: WebAgentEventType.BROWSER_RECONNECTED; data: BrowserReconnectedEventData }

diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
@@ -43,8 +43,8 @@ export const TOOL_STRINGS = {
       description: "Press Enter key on an element (useful for form submission)",
     },
     wait: {
-      description: "Wait for a specified number of seconds",
-      seconds: "Number of seconds to wait (0-30)",
+      description: "Wait for a specified number of seconds (up to 120 for slow-loading pages)",
+      seconds: "Number of seconds to wait (0-120)",
     },
     goto: {
       description: "Navigate to a URL that was previously seen in the conversation",
@@ -422,7 +422,6 @@ const buildActionLoopSystemPrompt = (
     currentDate: getCurrentFormattedDate(),
   });
 
-export const actionLoopSystemPrompt = buildActionLoopSystemPrompt(false, false);
 export { buildActionLoopSystemPrompt };
 
 /**

diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts
@@ -225,22 +225,58 @@ export function createWebActionTools(context: WebActionContext) {
     wait: tool({
       description: TOOL_STRINGS.webActions.wait.description,
       inputSchema: z.object({
-        seconds: z.number().min(0).max(30).describe(TOOL_STRINGS.webActions.wait.seconds),
+        seconds: z.number().min(0).max(120).describe(TOOL_STRINGS.webActions.wait.seconds),
       }),
       execute: async ({ seconds }) => {
-        // Wait uses browser.performAction which expects value as string
-        const result = await performActionWithValidation(
-          PageAction.Wait,
-          context,
-          undefined,
-          String(seconds),
+        // Sleep here rather than going through browser.performAction (which
+        // calls page.waitForTimeout — a fixed, abort-blind timeout). Polling
+        // the abort signal keeps user aborts responsive to within ~500ms even
+        // at the full 120s cap.
+        return withSpan(
+          SpanName.BROWSER_ACTION,
+          { attributes: { "pilo.browser.action_type": "wait" } },
+          async (span) => {
+            context.eventEmitter.emit(WebAgentEventType.AGENT_ACTION, {
+              action: "wait",
+              value: String(seconds),
+            });
+            context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_STARTED, {
+              action: "wait",
+              value: String(seconds),
+            });
+
+            const ABORT_POLL_MS = 500;
+            const deadline = Date.now() + seconds * 1000;
+            while (Date.now() < deadline) {
+              if (context.abortSignal?.aborted) {
+                const error = new Error("Wait cancelled by abort signal");
+                context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+                  success: false,
+                  action: "wait",
+                  error: error.message,
+                });
+                span.setAttribute("pilo.browser.success", false);
+                throw error;
+              }
+              await new Promise((resolve) =>
+                setTimeout(resolve, Math.min(ABORT_POLL_MS, deadline - Date.now())),
+              );
+            }
+
+            context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+              success: true,
+              action: "wait",
+            });
+            context.eventEmitter.emit(WebAgentEventType.AGENT_WAITING, { seconds });
+            span.setAttribute("pilo.browser.success", true);
+
+            return {
+              success: true,
+              action: "wait",
+              value: String(seconds),
+            };
+          },
         );
-
-        if (result.success) {
-          context.eventEmitter.emit(WebAgentEventType.AGENT_WAITING, { seconds });
-        }
-
-        return result;
       },
     }),
 

diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts
@@ -20,7 +20,13 @@ import {
 import { SnapshotCompressor } from "./snapshotCompressor.js";
 import { Logger } from "./loggers/types.js";
 import { ConsoleLogger } from "./loggers/console.js";
-import { BrowserDisconnectedError, RecoverableError, ToolExecutionError } from "./errors.js";
+import {
+  BrowserDisconnectedError,
+  NoStartingUrlError,
+  PlanningError,
+  RecoverableError,
+  ToolExecutionError,
+} from "./errors.js";
 import { generateTextWithRetry } from "./utils/retry.js";
 import type { AwaitedProperties } from "./utils/types.js";
 import {
@@ -1016,6 +1022,25 @@ export class WebAgent {
       );
     }
 
+    // The system prompt instructs the model to call exactly one tool per turn,
+    // but providers occasionally return more (especially on retries or with
+    // certain models). Warn + emit a debug event so the drop is observable
+    // instead of silently lost.
+    if (aiResponse.toolResults.length > 1) {
+      const keptTool = aiResponse.toolResults[0].toolName;
+      const droppedTools = aiResponse.toolResults.slice(1).map((r: any) => r.toolName);
+      console.warn(
+        `[WebAgent] Provider returned ${aiResponse.toolResults.length} tool calls in one turn; ` +
+          `keeping '${keptTool}', dropping: ${droppedTools.join(", ")}`,
+      );
+      this.emit(WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP, {
+        iterationId: this.currentIterationId,
+        droppedCount: droppedTools.length,
+        droppedTools,
+        keptTool,
+      });
+    }
+
     const toolResult = aiResponse.toolResults[0];
     const actionOutput = toolResult.output as any;
 
@@ -1465,12 +1490,11 @@ export class WebAgent {
           });
           recordSanitizedException(span, error);
 
-          // Check if the error message already contains "Failed to generate plan" to avoid double-wrapping
-          if (errorMsg.includes("Failed to generate plan")) {
-            throw new Error(errorMsg);
-          } else {
-            throw new Error(`Failed to generate plan: ${errorMsg}`);
+          // Avoid double-wrapping if we already produced a PlanningError up-stack.
+          if (error instanceof PlanningError) {
+            throw error;
           }
+          throw new PlanningError(`Failed to generate plan: ${errorMsg}`);
         }
       },
     );
@@ -1516,14 +1540,11 @@ export class WebAgent {
   }
 
   /**
-   * Check if error is a setup/planning error that should be re-thrown
+   * Check if error is a setup/planning error that should be re-thrown to the
+   * caller rather than converted into a TASK_FAILED result.
    */
   private isSetupError(error: unknown): boolean {
-    return (
-      error instanceof Error &&
-      (error.message.includes("Failed to generate plan") ||
-        error.message.includes("No starting URL"))
-    );
+    return error instanceof PlanningError || error instanceof NoStartingUrlError;
   }
 
   /**
@@ -1620,7 +1641,7 @@ export class WebAgent {
 
   private async navigateToStart(task: string): Promise<void> {
     if (!this.url) {
-      throw new Error("No starting URL determined");
+      throw new NoStartingUrlError();
     }
 
     if (this.url !== "about:blank") {

diff --git a/packages/core/test/events.test.ts b/packages/core/test/events.test.ts
@@ -135,6 +135,7 @@ describe("WebAgentEventEmitter", () => {
         "browser:screenshot_captured_image",
         "system:debug_compression",
         "system:debug_message",
+        "system:debug_tool_drop",
         "cdp:endpoint_connected",
         "cdp:endpoint_cycle",
         "browser:reconnected",

diff --git a/packages/core/test/prompts.test.ts b/packages/core/test/prompts.test.ts
@@ -1,7 +1,6 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import {
   buildPlanPrompt,
-  actionLoopSystemPrompt,
   buildActionLoopSystemPrompt,
   buildTaskAndPlanPrompt,
   buildPageSnapshotPrompt,
@@ -10,6 +9,10 @@ import {
   buildExtractionPrompt,
 } from "../src/prompts.js";
 
+// Default action-loop prompt used by the tests below. Mirrors the historical
+// `actionLoopSystemPrompt` export (built with both gating flags off).
+const actionLoopSystemPrompt = buildActionLoopSystemPrompt(false, false);
+
 // Mock Date for consistent test results
 const mockDate = new Date("2024-01-15T10:00:00Z");