Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions packages/core/schemas/webagent-event.json
Original file line number Diff line number Diff line change
Expand Up @@ -4059,6 +4059,41 @@
],
"type": "object"
},
"ToolDropDebugEventData": {
"additionalProperties": false,
"description": "Event data for tool-drop diagnostics: emitted when a provider returns more than one tool call in a single turn and the extras are dropped. The system prompt instructs the model to call exactly one tool per turn, but some providers occasionally return multiple — this event surfaces those cases so they can be observed instead of silently lost.",
"properties": {
"droppedCount": {
"description": "Number of tool calls that were dropped (returnedCount - 1).",
"type": "number"
},
"droppedTools": {
"description": "Names of the dropped tools (in original order, excluding the first).",
"items": {
"type": "string"
},
"type": "array"
},
"iterationId": {
"type": "string"
},
"keptTool": {
"description": "Name of the tool that was kept (first in the provider's response).",
"type": "string"
},
"timestamp": {
"type": "number"
}
},
"required": [
"droppedCount",
"droppedTools",
"iterationId",
"keptTool",
"timestamp"
],
"type": "object"
},
"ValidationErrorEventData": {
"additionalProperties": false,
"description": "Event data for validation errors during action response processing",
Expand Down Expand Up @@ -4520,6 +4555,23 @@
],
"type": "object"
},
{
"additionalProperties": false,
"properties": {
"data": {
"$ref": "#/definitions/ToolDropDebugEventData"
},
"type": {
"const": "system:debug_tool_drop",
"type": "string"
}
},
"required": [
"type",
"data"
],
"type": "object"
},
{
"additionalProperties": false,
"properties": {
Expand Down
8 changes: 7 additions & 1 deletion packages/core/src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ export type { Logger } from "./loggers/types.js";
export type { Action, TaskValidationResult } from "./schemas.js";

// Error types
export { RecoverableError, BrowserException, NavigationTimeoutException } from "./errors.js";
export {
RecoverableError,
BrowserException,
NavigationTimeoutException,
PlanningError,
NoStartingUrlError,
} from "./errors.js";

// Navigation retry configuration
export type { NavigationRetryConfig } from "./browser/navigationRetry.js";
Expand Down
30 changes: 30 additions & 0 deletions packages/core/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,36 @@ export class ToolExecutionError extends RecoverableError {
}
}

/**
* Thrown when task planning fails (model never returns a usable plan after retries).
*
* Setup error: extends Error rather than RecoverableError because the agent's
* execute loop should propagate it to the caller rather than treat it as a
* retryable mid-task error.
*/
export class PlanningError extends Error {
constructor(message: string) {
super(message);
this.name = "PlanningError";
}
}

/**
* Defensive guard: thrown by `navigateToStart` when `this.url` is unexpectedly
* unset. Under normal flow `planTask` defaults `this.url` to `about:blank`, so
* this should not fire — it exists to fail loudly rather than silently navigate
* nowhere if that invariant is ever broken.
*
* Setup error: extends Error rather than RecoverableError for the same reason
* as PlanningError.
*/
export class NoStartingUrlError extends Error {
constructor(message = "No starting URL determined") {
super(message);
this.name = "NoStartingUrlError";
}
}

/**
* Thrown when navigation times out after all retry attempts.
*
Expand Down
18 changes: 18 additions & 0 deletions packages/core/src/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export enum WebAgentEventType {
// System/Debug
SYSTEM_DEBUG_COMPRESSION = "system:debug_compression",
SYSTEM_DEBUG_MESSAGE = "system:debug_message",
SYSTEM_DEBUG_TOOL_DROP = "system:debug_tool_drop",

// CDP endpoint failover
CDP_ENDPOINT_CONNECTED = "cdp:endpoint_connected",
Expand Down Expand Up @@ -270,6 +271,22 @@ export interface MessagesDebugEventData extends WebAgentEventData {
messages: any[];
}

/**
* Event data for tool-drop diagnostics: emitted when a provider returns more
* than one tool call in a single turn and the extras are dropped. The system
* prompt instructs the model to call exactly one tool per turn, but some
* providers occasionally return multiple — this event surfaces those cases
* so they can be observed instead of silently lost.
*/
export interface ToolDropDebugEventData extends WebAgentEventData {
/** Number of tool calls that were dropped (returnedCount - 1). */
droppedCount: number;
/** Names of the dropped tools (in original order, excluding the first). */
droppedTools: string[];
/** Name of the tool that was kept (first in the provider's response). */
keptTool: string;
}

/**
* Event data for waiting notifications
*/
Expand Down Expand Up @@ -377,6 +394,7 @@ export type WebAgentEvent =
}
| { type: WebAgentEventType.SYSTEM_DEBUG_COMPRESSION; data: CompressionDebugEventData }
| { type: WebAgentEventType.SYSTEM_DEBUG_MESSAGE; data: MessagesDebugEventData }
| { type: WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP; data: ToolDropDebugEventData }
| { type: WebAgentEventType.CDP_ENDPOINT_CONNECTED; data: CdpEndpointConnectedEventData }
| { type: WebAgentEventType.CDP_ENDPOINT_CYCLE; data: CdpEndpointCycleEventData }
| { type: WebAgentEventType.BROWSER_RECONNECTED; data: BrowserReconnectedEventData }
Expand Down
5 changes: 2 additions & 3 deletions packages/core/src/prompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ export const TOOL_STRINGS = {
description: "Press Enter key on an element (useful for form submission)",
},
wait: {
description: "Wait for a specified number of seconds",
seconds: "Number of seconds to wait (0-30)",
description: "Wait for a specified number of seconds (up to 120 for slow-loading pages)",
seconds: "Number of seconds to wait (0-120)",
},
goto: {
description: "Navigate to a URL that was previously seen in the conversation",
Expand Down Expand Up @@ -422,7 +422,6 @@ const buildActionLoopSystemPrompt = (
currentDate: getCurrentFormattedDate(),
});

export const actionLoopSystemPrompt = buildActionLoopSystemPrompt(false, false);
export { buildActionLoopSystemPrompt };

/**
Expand Down
62 changes: 49 additions & 13 deletions packages/core/src/tools/webActionTools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,22 +225,58 @@ export function createWebActionTools(context: WebActionContext) {
wait: tool({
description: TOOL_STRINGS.webActions.wait.description,
inputSchema: z.object({
seconds: z.number().min(0).max(30).describe(TOOL_STRINGS.webActions.wait.seconds),
seconds: z.number().min(0).max(120).describe(TOOL_STRINGS.webActions.wait.seconds),
}),
execute: async ({ seconds }) => {
// Wait uses browser.performAction which expects value as string
const result = await performActionWithValidation(
PageAction.Wait,
context,
undefined,
String(seconds),
// Sleep here rather than going through browser.performAction (which
// calls page.waitForTimeout — a fixed, abort-blind timeout). Polling
// the abort signal keeps user aborts responsive to within ~500ms even
// at the full 120s cap.
return withSpan(
SpanName.BROWSER_ACTION,
{ attributes: { "pilo.browser.action_type": "wait" } },
async (span) => {
context.eventEmitter.emit(WebAgentEventType.AGENT_ACTION, {
action: "wait",
value: String(seconds),
});
context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_STARTED, {
action: "wait",
value: String(seconds),
});

const ABORT_POLL_MS = 500;
const deadline = Date.now() + seconds * 1000;
while (Date.now() < deadline) {
if (context.abortSignal?.aborted) {
const error = new Error("Wait cancelled by abort signal");
context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
success: false,
action: "wait",
error: error.message,
});
span.setAttribute("pilo.browser.success", false);
throw error;
}
await new Promise((resolve) =>
setTimeout(resolve, Math.min(ABORT_POLL_MS, deadline - Date.now())),
);
}

context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
success: true,
action: "wait",
});
context.eventEmitter.emit(WebAgentEventType.AGENT_WAITING, { seconds });
span.setAttribute("pilo.browser.success", true);

return {
success: true,
action: "wait",
value: String(seconds),
};
},
);

if (result.success) {
context.eventEmitter.emit(WebAgentEventType.AGENT_WAITING, { seconds });
}

return result;
},
}),

Expand Down
47 changes: 34 additions & 13 deletions packages/core/src/webAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ import {
import { SnapshotCompressor } from "./snapshotCompressor.js";
import { Logger } from "./loggers/types.js";
import { ConsoleLogger } from "./loggers/console.js";
import { BrowserDisconnectedError, RecoverableError, ToolExecutionError } from "./errors.js";
import {
BrowserDisconnectedError,
NoStartingUrlError,
PlanningError,
RecoverableError,
ToolExecutionError,
} from "./errors.js";
import { generateTextWithRetry } from "./utils/retry.js";
import type { AwaitedProperties } from "./utils/types.js";
import {
Expand Down Expand Up @@ -1016,6 +1022,25 @@ export class WebAgent {
);
}

// The system prompt instructs the model to call exactly one tool per turn,
// but providers occasionally return more (especially on retries or with
// certain models). Warn + emit a debug event so the drop is observable
// instead of silently lost.
if (aiResponse.toolResults.length > 1) {
const keptTool = aiResponse.toolResults[0].toolName;
const droppedTools = aiResponse.toolResults.slice(1).map((r: any) => r.toolName);
console.warn(
`[WebAgent] Provider returned ${aiResponse.toolResults.length} tool calls in one turn; ` +
`keeping '${keptTool}', dropping: ${droppedTools.join(", ")}`,
);
this.emit(WebAgentEventType.SYSTEM_DEBUG_TOOL_DROP, {
iterationId: this.currentIterationId,
droppedCount: droppedTools.length,
droppedTools,
keptTool,
});
}

const toolResult = aiResponse.toolResults[0];
const actionOutput = toolResult.output as any;

Expand Down Expand Up @@ -1465,12 +1490,11 @@ export class WebAgent {
});
recordSanitizedException(span, error);

// Check if the error message already contains "Failed to generate plan" to avoid double-wrapping
if (errorMsg.includes("Failed to generate plan")) {
throw new Error(errorMsg);
} else {
throw new Error(`Failed to generate plan: ${errorMsg}`);
// Avoid double-wrapping if we already produced a PlanningError up-stack.
if (error instanceof PlanningError) {
throw error;
}
throw new PlanningError(`Failed to generate plan: ${errorMsg}`);
}
},
);
Expand Down Expand Up @@ -1516,14 +1540,11 @@ export class WebAgent {
}

/**
* Check if error is a setup/planning error that should be re-thrown
* Check if error is a setup/planning error that should be re-thrown to the
* caller rather than converted into a TASK_FAILED result.
*/
private isSetupError(error: unknown): boolean {
return (
error instanceof Error &&
(error.message.includes("Failed to generate plan") ||
error.message.includes("No starting URL"))
);
return error instanceof PlanningError || error instanceof NoStartingUrlError;
}

/**
Expand Down Expand Up @@ -1620,7 +1641,7 @@ export class WebAgent {

private async navigateToStart(task: string): Promise<void> {
if (!this.url) {
throw new Error("No starting URL determined");
throw new NoStartingUrlError();
}

if (this.url !== "about:blank") {
Expand Down
1 change: 1 addition & 0 deletions packages/core/test/events.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ describe("WebAgentEventEmitter", () => {
"browser:screenshot_captured_image",
"system:debug_compression",
"system:debug_message",
"system:debug_tool_drop",
"cdp:endpoint_connected",
"cdp:endpoint_cycle",
"browser:reconnected",
Expand Down
5 changes: 4 additions & 1 deletion packages/core/test/prompts.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import {
buildPlanPrompt,
actionLoopSystemPrompt,
buildActionLoopSystemPrompt,
buildTaskAndPlanPrompt,
buildPageSnapshotPrompt,
Expand All @@ -10,6 +9,10 @@ import {
buildExtractionPrompt,
} from "../src/prompts.js";

// Default action-loop prompt used by the tests below. Mirrors the historical
// `actionLoopSystemPrompt` export (built with both gating flags off).
const actionLoopSystemPrompt = buildActionLoopSystemPrompt(false, false);

// Mock Date for consistent test results
const mockDate = new Date("2024-01-15T10:00:00Z");

Expand Down
Loading
Loading