From a1ab4150876fc6cc0d6514ab5062b4a86066cdb6 Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Wed, 13 May 2026 15:48:21 -0700
Subject: [PATCH 1/7] feat(core): add page exploration tools, structured
 extract
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds three additions to the agent's tool surface, plus a refactor of the
retry layer:

- `search_page`: zero-LLM text search of the current page via a TreeWalker.
  Returns matches with surrounding context and the nearest `data-pilo-ref`
  ancestor (`nearestRef`) so the agent can chain directly into `click`/`fill`
  without paying for an `extract` round-trip.

- `find_elements`: zero-LLM CSS-selector query. Optional `withinRef` scopes
  the query to an aria-tree subtree. Returns each match's tag, text,
  requested attributes (`href`/`src` auto-resolved to absolute URLs), and
  `nearestRef`.

- `extract({outputSchema})`: optional JSON Schema argument routes the existing
  extract through the AI SDK's `generateObject` (via the new
  `generateObjectWithRetry`) and returns `data: object` instead of
  `extractedData: string`. The markdown branch behavior is byte-identical
  to the prior implementation when `outputSchema` is absent.

Implemented across both browser backends:
- Playwright iterates same-origin + accessible cross-origin frames and tags
  per-frame matches with `frameUrl`, matching the existing aria-tree behavior.
- Extension is top-frame only (matches `ExtensionBrowser.getTreeWithRefs`),
  so `frameUrl` is always undefined in extension results.

Wiring is unconditional — these are pure DOM primitives with no API key /
callback / provider dependency. They live in a new `inspectionTools.ts`
factory, alongside `webActionTools` / `searchTools` / `tabstackTools` /
`interactiveToolSet` in `webAgent.ts`. `search_page` and `find_elements`
are added to the `pageChanged` exempt list.

Refs are resolved via the existing `data-pilo-ref` DOM attribute that
`ariaSnapshot.ts` already sets during tree generation, so no changes are
needed to the aria-tree bundle.

Refactor: extracted a shared `retryDriver<T>` from `generateTextWithRetry`
and `generateObjectWithRetry`. The two public wrappers become thin call
sites via `validateResult` and `getFinishReason` hooks. Net reduction in
`retry.ts` line count.

Also: `NoObjectGeneratedError` is now non-retryable in `isRetryableError`,
preventing 3× cost amplification on schema-validation failures.

Tests: +1305 across core/cli/server/extension (+24 search_page block, +30
find_elements block, +8 structured extract + new retry block, plus
MockBrowser stubs and a new `NoObjectGeneratedError` non-retry case).

Closes #432

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/core/src/browser/ariaBrowser.ts      |  68 +++
 .../core/src/browser/playwrightBrowser.ts     | 329 ++++++++++++-
 packages/core/src/core.ts                     |  10 +-
 packages/core/src/prompts.ts                  |  29 +-
 packages/core/src/tools/inspectionTools.ts    | 160 ++++++
 packages/core/src/tools/webActionTools.ts     |  47 +-
 packages/core/src/utils/retry.ts              | 116 ++++-
 packages/core/src/webAgent.ts                 |  21 +-
 packages/core/test/playwrightBrowser.test.ts  | 462 ++++++++++++++++++
 .../core/test/tools/inspectionTools.test.ts   | 446 +++++++++++++++++
 .../core/test/tools/webActionTools.test.ts    | 129 ++++-
 packages/core/test/utils/retry.test.ts        | 176 ++++++-
 packages/core/test/webAgent.test.ts           |  16 +
 .../src/background/ExtensionBrowser.ts        | 222 ++++++++-
 .../extension/test/ExtensionBrowser.test.ts   | 260 ++++++++++
 15 files changed, 2456 insertions(+), 35 deletions(-)
 create mode 100644 packages/core/src/tools/inspectionTools.ts
 create mode 100644 packages/core/test/tools/inspectionTools.test.ts
diff --git a/packages/core/src/browser/ariaBrowser.ts b/packages/core/src/browser/ariaBrowser.ts
index f3e39c91..044b74e9 100644
--- a/packages/core/src/browser/ariaBrowser.ts
+++ b/packages/core/src/browser/ariaBrowser.ts
@@ -35,6 +35,68 @@ export enum LoadState {
   Load = "load",
 }
 
+/**
+ * Options for searchPage — a zero-LLM, in-page text search.
+ */
+export interface SearchPageOptions {
+  pattern: string;
+  regex?: boolean;
+  caseSensitive?: boolean;
+  contextChars?: number;
+  maxResults?: number;
+}
+
+/**
+ * A single match returned by searchPage.
+ */
+export interface SearchPageMatch {
+  match: string;
+  contextBefore: string;
+  contextAfter: string;
+  nearestRef?: string;
+  frameUrl?: string;
+}
+
+/**
+ * Aggregate result returned by searchPage.
+ */
+export interface SearchPageResult {
+  totalMatches: number;
+  truncated: boolean;
+  matches: SearchPageMatch[];
+}
+
+/**
+ * Options for findElements — a zero-LLM CSS-selector query.
+ */
+export interface FindElementsOptions {
+  selector: string;
+  withinRef?: string;
+  attributes?: string[];
+  maxResults?: number;
+  includeText?: boolean;
+}
+
+/**
+ * A single element returned by findElements.
+ */
+export interface FindElementsMatch {
+  tag: string;
+  text?: string;
+  attributes?: Record<string, string>;
+  nearestRef?: string;
+  frameUrl?: string;
+}
+
+/**
+ * Aggregate result returned by findElements.
+ */
+export interface FindElementsResult {
+  totalMatches: number;
+  truncated: boolean;
+  elements: FindElementsMatch[];
+}
+
 /**
  * Limited interface for temporary tab operations.
  * Used for "side quest" operations like search that shouldn't affect main page state.
@@ -104,4 +166,10 @@ export interface AriaBrowser {
    * @returns The result of the function
    */
   runInTemporaryTab<T>(fn: (tab: TemporaryTab) => Promise<T>): Promise<T>;
+
+  /** Searches visible text in the page (and same-origin/cross-origin frames where supported) */
+  searchPage(opts: SearchPageOptions): Promise<SearchPageResult>;
+
+  /** Queries elements by CSS selector (optionally scoped to a `data-pilo-ref` subtree) */
+  findElements(opts: FindElementsOptions): Promise<FindElementsResult>;
 }
diff --git a/packages/core/src/browser/playwrightBrowser.ts b/packages/core/src/browser/playwrightBrowser.ts
index 7d95f5fe..a06ec915 100644
--- a/packages/core/src/browser/playwrightBrowser.ts
+++ b/packages/core/src/browser/playwrightBrowser.ts
@@ -11,7 +11,18 @@ import {
   Locator,
   errors as playwrightErrors,
 } from "playwright";
-import { AriaBrowser, PageAction, LoadState, TemporaryTab } from "./ariaBrowser.js";
+import {
+  AriaBrowser,
+  PageAction,
+  LoadState,
+  TemporaryTab,
+  SearchPageOptions,
+  SearchPageMatch,
+  SearchPageResult,
+  FindElementsOptions,
+  FindElementsMatch,
+  FindElementsResult,
+} from "./ariaBrowser.js";
 import { PlaywrightBlocker } from "@ghostery/adblocker-playwright";
 import fetch from "cross-fetch";
 import TurndownService from "turndown";
@@ -973,6 +984,322 @@ export class PlaywrightBrowser implements AriaBrowser {
     }
   }
 
+  async searchPage(opts: SearchPageOptions): Promise<SearchPageResult> {
+    if (!this.page) throw new Error("Browser not started");
+
+    const evalOpts = {
+      pattern: opts.pattern,
+      regex: opts.regex ?? false,
+      caseSensitive: opts.caseSensitive ?? false,
+      contextChars: opts.contextChars ?? 80,
+      maxResults: opts.maxResults ?? 10,
+    };
+
+    const aggregated: SearchPageMatch[] = [];
+    let totalMatches = 0;
+
+    // Main frame
+    try {
+      const mainResult = await this.page.evaluate(
+        PlaywrightBrowser.searchInDocumentSource,
+        evalOpts,
+      );
+      totalMatches += mainResult.totalMatches;
+      for (const m of mainResult.matches) {
+        if (aggregated.length >= evalOpts.maxResults) break;
+        aggregated.push({ ...m, frameUrl: undefined });
+      }
+    } catch (error) {
+      if (error instanceof Error && this.isBrowserDisconnectedError(error)) {
+        throw new BrowserDisconnectedError(error.message);
+      }
+      const message = error instanceof Error ? error.message : String(error);
+      throw new BrowserActionException("search_page", `search_page failed: ${message}`);
+    }
+
+    // Iterate non-main frames (same-origin / accessible cross-origin)
+    const frames = this.page.frames();
+    for (const frame of frames) {
+      if (frame === this.page.mainFrame()) continue;
+      try {
+        const frameResult = await frame.evaluate(
+          PlaywrightBrowser.searchInDocumentSource,
+          evalOpts,
+        );
+        totalMatches += frameResult.totalMatches;
+        const frameUrl = frame.url();
+        for (const m of frameResult.matches) {
+          if (aggregated.length >= evalOpts.maxResults) break;
+          aggregated.push({ ...m, frameUrl });
+        }
+      } catch {
+        // Cross-origin or detached frame, skip silently (mirrors getTreeWithRefsImpl)
+      }
+    }
+
+    return {
+      totalMatches,
+      truncated: totalMatches > aggregated.length,
+      matches: aggregated,
+    };
+  }
+
+  /**
+   * In-page text-search helper. Runs inside `page.evaluate` / `frame.evaluate`,
+   * so it must be self-contained and use only DOM APIs available in the page context.
+   * Returns the per-frame partial result; the wrapper tags each match with `frameUrl`.
+   */
+  private static readonly searchInDocumentSource = (opts: {
+    pattern: string;
+    regex: boolean;
+    caseSensitive: boolean;
+    contextChars: number;
+    maxResults: number;
+  }): {
+    totalMatches: number;
+    matches: Array<{
+      match: string;
+      contextBefore: string;
+      contextAfter: string;
+      nearestRef?: string;
+    }>;
+  } => {
+    const flags = opts.caseSensitive ? "g" : "gi";
+    const re = opts.regex
+      ? new RegExp(opts.pattern, flags)
+      : new RegExp(opts.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), flags);
+
+    const matches: Array<{
+      match: string;
+      contextBefore: string;
+      contextAfter: string;
+      nearestRef?: string;
+    }> = [];
+    let totalMatches = 0;
+
+    if (!document.body) {
+      return { totalMatches, matches };
+    }
+
+    const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, {
+      acceptNode(node) {
+        const p = node.parentElement;
+        if (!p) return NodeFilter.FILTER_REJECT;
+        const tag = p.tagName;
+        if (tag === "SCRIPT" || tag === "STYLE" || tag === "NOSCRIPT") {
+          return NodeFilter.FILTER_REJECT;
+        }
+        return NodeFilter.FILTER_ACCEPT;
+      },
+    });
+
+    let node: Node | null;
+    while ((node = walker.nextNode())) {
+      const text = (node as Text).data;
+      re.lastIndex = 0;
+      let m: RegExpExecArray | null;
+      while ((m = re.exec(text)) !== null) {
+        totalMatches++;
+        if (matches.length < opts.maxResults) {
+          const start = Math.max(0, m.index - opts.contextChars);
+          const end = Math.min(text.length, m.index + m[0].length + opts.contextChars);
+          const parentEl = (node as Text).parentElement;
+          const refEl = parentEl?.closest("[data-pilo-ref]") ?? null;
+          matches.push({
+            match: m[0],
+            contextBefore: text.slice(start, m.index),
+            contextAfter: text.slice(m.index + m[0].length, end),
+            nearestRef: refEl?.getAttribute("data-pilo-ref") ?? undefined,
+          });
+        }
+        // Zero-width match guard
+        if (m.index === re.lastIndex) re.lastIndex++;
+      }
+    }
+
+    return { totalMatches, matches };
+  };
+
+  async findElements(opts: FindElementsOptions): Promise<FindElementsResult> {
+    if (!this.page) throw new Error("Browser not started");
+
+    const evalOpts = {
+      selector: opts.selector,
+      withinRef: opts.withinRef ?? null,
+      attributes: opts.attributes ?? null,
+      maxResults: opts.maxResults ?? 20,
+      includeText: opts.includeText ?? true,
+    };
+
+    const aggregated: FindElementsMatch[] = [];
+    let totalMatches = 0;
+    let anyFrameFoundRef = evalOpts.withinRef === null;
+
+    // Main frame
+    let mainResult:
+      | { totalMatches: number; matches: Array<Omit<FindElementsMatch, "frameUrl">> }
+      | { error: string; kind: "bad-selector" | "within-ref-miss" };
+    try {
+      mainResult = await this.page.evaluate(
+        PlaywrightBrowser.findElementsInDocumentSource,
+        evalOpts,
+      );
+    } catch (error) {
+      if (error instanceof Error && this.isBrowserDisconnectedError(error)) {
+        throw new BrowserDisconnectedError(error.message);
+      }
+      const message = error instanceof Error ? error.message : String(error);
+      throw new BrowserActionException("find_elements", `find_elements failed: ${message}`);
+    }
+
+    if ("error" in mainResult) {
+      if (mainResult.kind !== "within-ref-miss") {
+        // Bad selector or other in-page error: short-circuit, surface as recoverable
+        throw new BrowserActionException(
+          "find_elements",
+          `find_elements failed: ${mainResult.error}`,
+        );
+      }
+      // withinRef miss in main frame — continue iterating frames
+    } else {
+      anyFrameFoundRef = true;
+      totalMatches += mainResult.totalMatches;
+      for (const m of mainResult.matches) {
+        if (aggregated.length >= evalOpts.maxResults) break;
+        aggregated.push({ ...m, frameUrl: undefined });
+      }
+    }
+
+    // Iterate non-main frames (same-origin / accessible cross-origin)
+    const frames = this.page.frames();
+    for (const frame of frames) {
+      if (frame === this.page.mainFrame()) continue;
+      let frameResult:
+        | { totalMatches: number; matches: Array<Omit<FindElementsMatch, "frameUrl">> }
+        | { error: string; kind: "bad-selector" | "within-ref-miss" };
+      try {
+        frameResult = await frame.evaluate(
+          PlaywrightBrowser.findElementsInDocumentSource,
+          evalOpts,
+        );
+      } catch {
+        // Cross-origin or detached frame, skip silently (mirrors getTreeWithRefsImpl)
+        continue;
+      }
+
+      if ("error" in frameResult) {
+        if (frameResult.kind !== "within-ref-miss") {
+          // Bad selector in this frame — selector is identical across frames, so
+          // short-circuit rather than continue (mirrors plan contract).
+          throw new BrowserActionException(
+            "find_elements",
+            `find_elements failed: ${frameResult.error}`,
+          );
+        }
+        // withinRef miss in this frame — try next frame
+        continue;
+      }
+
+      anyFrameFoundRef = true;
+      totalMatches += frameResult.totalMatches;
+      const frameUrl = frame.url();
+      for (const m of frameResult.matches) {
+        if (aggregated.length >= evalOpts.maxResults) break;
+        aggregated.push({ ...m, frameUrl });
+      }
+    }
+
+    if (!anyFrameFoundRef) {
+      throw new BrowserActionException(
+        "find_elements",
+        `find_elements failed: withinRef "${evalOpts.withinRef}" not found`,
+      );
+    }
+
+    return {
+      totalMatches,
+      truncated: totalMatches > aggregated.length,
+      elements: aggregated,
+    };
+  }
+
+  /**
+   * In-page CSS-selector query helper. Runs inside `page.evaluate` / `frame.evaluate`,
+   * so it must be self-contained and use only DOM APIs available in the page context.
+   * Returns either a per-frame partial result OR an `{ error }` object for bad
+   * selectors / withinRef-not-found in this frame; the wrapper interprets these.
+   */
+  private static readonly findElementsInDocumentSource = (opts: {
+    selector: string;
+    withinRef: string | null;
+    attributes: string[] | null;
+    maxResults: number;
+    includeText: boolean;
+  }):
+    | {
+        totalMatches: number;
+        matches: Array<{
+          tag: string;
+          text?: string;
+          attributes?: Record<string, string>;
+          nearestRef?: string;
+        }>;
+      }
+    | { error: string; kind: "bad-selector" | "within-ref-miss" } => {
+    // Resolve scope root
+    let root: Document | Element = document;
+    if (opts.withinRef !== null) {
+      const r = document.querySelector(`[data-pilo-ref="${CSS.escape(opts.withinRef)}"]`);
+      if (!r)
+        return {
+          error: `withinRef "${opts.withinRef}" not found in this frame`,
+          kind: "within-ref-miss",
+        };
+      root = r;
+    }
+
+    let nodeList: NodeListOf<Element>;
+    try {
+      nodeList = root.querySelectorAll(opts.selector);
+    } catch (e) {
+      return { error: e instanceof Error ? e.message : String(e), kind: "bad-selector" };
+    }
+
+    const totalMatches = nodeList.length;
+    const matches: Array<{
+      tag: string;
+      text?: string;
+      attributes?: Record<string, string>;
+      nearestRef?: string;
+    }> = [];
+    for (let i = 0; i < nodeList.length && matches.length < opts.maxResults; i++) {
+      const el = nodeList[i];
+      let attrs: Record<string, string> | undefined;
+      if (opts.attributes && opts.attributes.length > 0) {
+        attrs = {};
+        for (const name of opts.attributes) {
+          const v = el.getAttribute(name);
+          if (v !== null) attrs[name] = v;
+        }
+      }
+      // Auto-resolve href/src to absolute URLs when present, even if not requested explicitly
+      const href = (el as HTMLAnchorElement | HTMLAreaElement).href;
+      const src = (el as HTMLImageElement | HTMLScriptElement | HTMLIFrameElement).src;
+      if (typeof href === "string" && href) (attrs ??= {})["href"] = href;
+      if (typeof src === "string" && src) (attrs ??= {})["src"] = src;
+
+      matches.push({
+        tag: el.tagName.toLowerCase(),
+        text: opts.includeText ? (el.textContent ?? "").trim().slice(0, 500) : undefined,
+        attributes: attrs && Object.keys(attrs).length > 0 ? attrs : undefined,
+        nearestRef:
+          (el.closest("[data-pilo-ref]") as Element | null)?.getAttribute("data-pilo-ref") ??
+          undefined,
+      });
+    }
+    return { totalMatches, matches };
+  };
+
   /**
    * Check if an action requires an element reference
    */
diff --git a/packages/core/src/core.ts b/packages/core/src/core.ts
index e70ddf3c..cf4110d5 100644
--- a/packages/core/src/core.ts
+++ b/packages/core/src/core.ts
@@ -5,7 +5,15 @@
  */
 
 export { WebAgent } from "./webAgent.js";
-export type { AriaBrowser } from "./browser/ariaBrowser.js";
+export type {
+  AriaBrowser,
+  SearchPageOptions,
+  SearchPageMatch,
+  SearchPageResult,
+  FindElementsOptions,
+  FindElementsMatch,
+  FindElementsResult,
+} from "./browser/ariaBrowser.js";
 export { PageAction, LoadState } from "./browser/ariaBrowser.js";
 export type { TaskExecutionResult, TaskError, WebAgentOptions } from "./webAgent.js";
 export { TaskErrorCode } from "./webAgent.js";
diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
index 2c9b374a..cf032561 100644
--- a/packages/core/src/prompts.ts
+++ b/packages/core/src/prompts.ts
@@ -57,9 +57,12 @@ export const TOOL_STRINGS = {
       description: "Go forward to the next page",
     },
     extract: {
-      description: "Extract specific data from the current page for later reference",
+      description:
+        "Extract data from the current page. Pass `outputSchema` (a JSON Schema object) to get structured data; omit it for markdown text.",
       dataDescription:
         "Describe what information to extract. Focus on content, not element references.",
+      outputSchema:
+        "Optional JSON Schema describing the desired structured output. When provided, returns `data` (an object matching the schema) instead of `extractedData` (markdown).",
     },
     done: {
       description: "Complete the task with your final answer",
@@ -85,6 +88,25 @@ export const TOOL_STRINGS = {
         "Search the web for information. Returns the search results page as markdown. Use when you need to find websites or information but don't know the URL.",
       query: "The search query to execute",
     },
+    searchPage: {
+      description:
+        "Search visible text on the current page. Free and fast — prefer this over extract when you know what text to look for.",
+      pattern: "Text or regex pattern to search for",
+      regex: "Treat `pattern` as a regular expression",
+      caseSensitive: "Match case sensitively",
+      contextChars: "Characters of context before/after each match (0-500)",
+      maxResults: "Maximum number of matches to return (1-50)",
+    },
+    findElements: {
+      description:
+        'Query elements by CSS selector. Free and fast — useful for inventory questions ("how many cards?") before deciding to extract.',
+      selector: "CSS selector",
+      withinRef: "Optional aria-tree ref to scope the query to that element's subtree",
+      attributes:
+        "Element attributes to include (e.g., ['href', 'data-id']). href/src are auto-included as absolute URLs.",
+      maxResults: "Maximum number of elements to return (1-100)",
+      includeText: "Include each element's text content (truncated to 500 chars)",
+    },
   },
 
   /**
@@ -178,7 +200,9 @@ function buildToolExamples(
     `- goto({"url": "https://example.com"}) - ${TOOL_STRINGS.webActions.goto.description}`,
     `- back() - ${TOOL_STRINGS.webActions.back.description}`,
     `- forward() - ${TOOL_STRINGS.webActions.forward.description}`,
-    `- extract({"description": "data to extract"}) - ${TOOL_STRINGS.webActions.extract.description}`,
+    `- extract({"description": "data to extract", "outputSchema": {"type": "object", "properties": {"title": {"type": "string"}}}}) - ${TOOL_STRINGS.webActions.extract.description}`,
+    `- search_page({"pattern": "logout"}) - ${TOOL_STRINGS.webActions.searchPage.description}`,
+    `- find_elements({"selector": "a.nav-link"}) - ${TOOL_STRINGS.webActions.findElements.description}`,
   ];
 
   if (hasWebSearch) {
@@ -349,6 +373,7 @@ Analyze the current page state and determine your next action based on previous
 - If you don't find relevant links or buttons, and the site has a search form, prioritize using it for navigation
 - If you have found the core information requested but cannot access supplementary details due to site limitations, use done() with what you have — only use abort() when the core task cannot be completed at all
 - For research: Use extract() immediately when finding relevant data
+- For inventory questions ("how many X?", "is Y on the page?", "what's the href of link Z?"), prefer search_page or find_elements — they are zero-LLM and instant. Reserve extract() for synthesized or structured data from the CURRENT page; pass outputSchema to extract() when you need JSON-shaped output instead of markdown{% if hasTabstack %}. Use tabstack_extract_json only for off-page URL fetches, not the current page{% endif %}
 - For academic papers or documents that require reading, counting, or extracting content (e.g., counting figures/tables, reading body text): PDFs are often unscrollable and unreadable{% if hasTabstack %} — use tabstack_extract_markdown to read PDF content directly{% endif %}{% if not hasTabstack %} — use webSearch to find an HTML version (e.g., ACL Anthology, Semantic Scholar) or the abstract page before attempting the PDF{% endif %}
 {% if hasWebSearch %}- If you need to search the web, use webSearch({query}) directly rather than filling in a browser search engine (DuckDuckGo, Google, Bing, etc.) — webSearch avoids CAPTCHA and bot detection that will block browser-based searches{% endif %}
 {% if hasTabstack %}- **Tabstack cloud tools are available — prefer them over manual browsing when they fit:**
diff --git a/packages/core/src/tools/inspectionTools.ts b/packages/core/src/tools/inspectionTools.ts
new file mode 100644
index 00000000..877e94d6
--- /dev/null
+++ b/packages/core/src/tools/inspectionTools.ts
@@ -0,0 +1,160 @@
+/**
+ * Inspection Tools
+ *
+ * Zero-LLM page-inspection tools — fast, deterministic primitives the agent
+ * can call before falling back to LLM-driven extraction. `search_page` walks
+ * visible page text and returns matches with surrounding context and the
+ * nearest `data-pilo-ref` ancestor. `find_elements` queries by CSS selector
+ * and returns each match's tag, text, requested attributes (with `href`/`src`
+ * auto-resolved to absolute URLs), and the nearest `data-pilo-ref` ancestor.
+ */
+
+import { tool } from "ai";
+import { z } from "zod";
+import type { AriaBrowser } from "../browser/ariaBrowser.js";
+import { WebAgentEventEmitter, WebAgentEventType } from "../events.js";
+import { TOOL_STRINGS } from "../prompts.js";
+
+interface InspectionToolContext {
+  browser: AriaBrowser;
+  eventEmitter: WebAgentEventEmitter;
+}
+
+export function createInspectionTools(context: InspectionToolContext) {
+  return {
+    search_page: tool({
+      description: TOOL_STRINGS.webActions.searchPage.description,
+      inputSchema: z.object({
+        pattern: z.string().describe(TOOL_STRINGS.webActions.searchPage.pattern),
+        regex: z.boolean().default(false).describe(TOOL_STRINGS.webActions.searchPage.regex),
+        caseSensitive: z
+          .boolean()
+          .default(false)
+          .describe(TOOL_STRINGS.webActions.searchPage.caseSensitive),
+        contextChars: z
+          .number()
+          .min(0)
+          .max(500)
+          .default(80)
+          .describe(TOOL_STRINGS.webActions.searchPage.contextChars),
+        maxResults: z
+          .number()
+          .min(1)
+          .max(50)
+          .default(10)
+          .describe(TOOL_STRINGS.webActions.searchPage.maxResults),
+      }),
+      execute: async ({ pattern, regex, caseSensitive, contextChars, maxResults }) => {
+        context.eventEmitter.emit(WebAgentEventType.AGENT_ACTION, {
+          action: "search_page",
+          value: pattern,
+        });
+
+        try {
+          const result = await context.browser.searchPage({
+            pattern,
+            regex,
+            caseSensitive,
+            contextChars,
+            maxResults,
+          });
+
+          context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+            success: true,
+            action: "search_page",
+          });
+
+          return {
+            success: true,
+            action: "search_page",
+            pattern,
+            ...result,
+          };
+        } catch (error) {
+          const errorMessage = error instanceof Error ? error.message : String(error);
+
+          context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+            success: false,
+            action: "search_page",
+            error: errorMessage,
+            isRecoverable: true,
+          });
+
+          return {
+            success: false,
+            action: "search_page",
+            pattern,
+            error: errorMessage,
+            isRecoverable: true,
+          };
+        }
+      },
+    }),
+    find_elements: tool({
+      description: TOOL_STRINGS.webActions.findElements.description,
+      inputSchema: z.object({
+        selector: z.string().describe(TOOL_STRINGS.webActions.findElements.selector),
+        withinRef: z.string().optional().describe(TOOL_STRINGS.webActions.findElements.withinRef),
+        attributes: z
+          .array(z.string())
+          .optional()
+          .describe(TOOL_STRINGS.webActions.findElements.attributes),
+        maxResults: z
+          .number()
+          .min(1)
+          .max(100)
+          .default(20)
+          .describe(TOOL_STRINGS.webActions.findElements.maxResults),
+        includeText: z
+          .boolean()
+          .default(true)
+          .describe(TOOL_STRINGS.webActions.findElements.includeText),
+      }),
+      execute: async ({ selector, withinRef, attributes, maxResults, includeText }) => {
+        context.eventEmitter.emit(WebAgentEventType.AGENT_ACTION, {
+          action: "find_elements",
+          value: selector,
+        });
+
+        try {
+          const result = await context.browser.findElements({
+            selector,
+            withinRef,
+            attributes,
+            maxResults,
+            includeText,
+          });
+
+          context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+            success: true,
+            action: "find_elements",
+          });
+
+          return {
+            success: true,
+            action: "find_elements",
+            selector,
+            ...result,
+          };
+        } catch (error) {
+          const errorMessage = error instanceof Error ? error.message : String(error);
+
+          context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+            success: false,
+            action: "find_elements",
+            error: errorMessage,
+            isRecoverable: true,
+          });
+
+          return {
+            success: false,
+            action: "find_elements",
+            selector,
+            error: errorMessage,
+            isRecoverable: true,
+          };
+        }
+      },
+    }),
+  };
+}
diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts
index 8d91f928..eaad1130 100644
--- a/packages/core/src/tools/webActionTools.ts
+++ b/packages/core/src/tools/webActionTools.ts
@@ -5,14 +5,14 @@
  * Each tool includes description, inputSchema, and execute function.
  */
 
-import { tool } from "ai";
+import { tool, jsonSchema } from "ai";
 import { z } from "zod";
 import { AriaBrowser, PageAction } from "../browser/ariaBrowser.js";
 import { WebAgentEventEmitter, WebAgentEventType } from "../events.js";
 import { buildExtractionPrompt, TOOL_STRINGS } from "../prompts.js";
 import type { ProviderConfig } from "../provider.js";
 import { BrowserException } from "../errors.js";
-import { generateTextWithRetry } from "../utils/retry.js";
+import { generateTextWithRetry, generateObjectWithRetry } from "../utils/retry.js";
 import {
   withSpan,
   SpanStatusCode,
@@ -311,8 +311,12 @@ export function createWebActionTools(context: WebActionContext) {
       description: TOOL_STRINGS.webActions.extract.description,
       inputSchema: z.object({
         description: z.string().describe(TOOL_STRINGS.webActions.extract.dataDescription),
+        outputSchema: z
+          .record(z.string(), z.any())
+          .optional()
+          .describe(TOOL_STRINGS.webActions.extract.outputSchema),
       }),
-      execute: async ({ description }) => {
+      execute: async ({ description, outputSchema }) => {
         // Extract doesn't use browser.performAction - it's a special AI operation
         context.eventEmitter.emit(WebAgentEventType.AGENT_ACTION, {
           action: "extract",
@@ -325,7 +329,42 @@ export function createWebActionTools(context: WebActionContext) {
         // Build extraction prompt
         const prompt = buildExtractionPrompt(description, markdown);
 
-        // Use the provider to extract the data with retry
+        // Structured branch: when outputSchema is provided, use generateObject with
+        // jsonSchema() to validate the LLM output against the schema.
+        if (outputSchema) {
+          const { object } = await generateObjectWithRetry(
+            {
+              ...context.providerConfig,
+              prompt,
+              schema: jsonSchema(outputSchema as any),
+              maxOutputTokens: 5000,
+              abortSignal: context.abortSignal,
+            },
+            {
+              maxAttempts: 3,
+              onRetry: (attempt, error) => {
+                context.eventEmitter.emit(WebAgentEventType.AGENT_STATUS, {
+                  message: `Extract (structured) retry attempt ${attempt} after error: ${error instanceof Error ? error.message : String(error)}`,
+                });
+              },
+            },
+          );
+
+          // Emit the extracted data event (stringified for event consumers
+          // that expect a string payload)
+          context.eventEmitter.emit(WebAgentEventType.AGENT_EXTRACTED, {
+            extractedData: JSON.stringify(object),
+          });
+
+          return {
+            success: true,
+            action: "extract",
+            description,
+            data: object,
+          };
+        }
+
+        // Markdown branch (default): use the provider to extract the data with retry
         const extractResponse = await generateTextWithRetry(
           {
             ...context.providerConfig,
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index 256e2cb3..860b2c70 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -5,7 +5,7 @@
  * Handles transient errors while avoiding retry on non-recoverable errors.
  */
 
-import { generateText } from "ai";
+import { generateText, generateObject, NoObjectGeneratedError } from "ai";
 import {
   DEFAULT_RETRY_MAX_ATTEMPTS,
   DEFAULT_RETRY_INITIAL_DELAY_MS,
@@ -21,11 +21,23 @@ import {
 
 /**
  * Check if an error is retryable
- * Non-retryable: 4xx errors except 429 (rate limit)
+ * Non-retryable:
+ *  - 4xx errors except 429 (rate limit)
+ *  - Auth/permission errors detected by message
+ *  - Structured-output failures from `generateObject` (`NoObjectGeneratedError`):
+ *    the model produced JSON that failed schema validation or parsing. Retrying
+ *    the same prompt against the same schema will not fix this and just burns
+ *    tokens, so we surface immediately.
  */
 function isRetryableError(error: unknown): boolean {
   if (!(error instanceof Error)) return true;
 
+  // Structured-output failures are non-retryable: the same prompt + schema will
+  // produce the same failure mode.
+  if (error instanceof NoObjectGeneratedError) {
+    return false;
+  }
+
   const errorAny = error as any;
   const statusCode = errorAny.statusCode || errorAny.status || errorAny.response?.status;
 
@@ -74,17 +86,33 @@ export interface RetryOptions {
 }
 
 /**
- * Wrapper for generateText with retry logic
+ * Internal options for the shared retry driver. Wrapper-specific hooks let the
+ * public wrappers (text vs object) plug in their own success validation and
+ * telemetry extraction without leaking concerns into the driver.
+ */
+interface RetryDriverOptions<T> extends RetryOptions {
+  /**
+   * Optional post-success validation hook. If it throws, the thrown error is
+   * treated like any other error from `call`: it goes through retry classification.
+   * Used by `generateTextWithRetry` to enforce the `toolChoice: "required"` contract.
+   */
+  validateResult?: (result: T) => void;
+  /**
+   * Optional telemetry extractor. Called on success to record finish_reason on
+   * the span. Different result shapes have different finish-reason locations.
+   */
+  getFinishReason?: (result: T) => unknown;
+}
+
+/**
+ * Shared retry driver. Owns the loop, exponential backoff + jitter,
+ * max-attempts handling, non-retryable short-circuit via `isRetryableError`,
+ * `onRetry` callback dispatch, and span/telemetry recording.
  *
- * @param params - Parameters for generateText call
- * @param retryOptions - Optional retry configuration
- * @returns The generateText result
- * @throws The last error if all retries fail
+ * Wrapper functions (`generateTextWithRetry`, `generateObjectWithRetry`) build a
+ * call closure and supply wrapper-specific hooks via `options`.
  */
-export async function generateTextWithRetry<TOOLS extends Record<string, any> = any>(
-  params: Parameters<typeof generateText<TOOLS>>[0],
-  retryOptions?: RetryOptions,
-): Promise<Awaited<ReturnType<typeof generateText<TOOLS>>>> {
+async function retryDriver<T>(call: () => Promise<T>, options: RetryDriverOptions<T>): Promise<T> {
   return withSpan(SpanName.AI_GENERATE, {}, async (span) => {
     const {
       maxAttempts = DEFAULT_RETRY_MAX_ATTEMPTS,
@@ -92,22 +120,26 @@ export async function generateTextWithRetry<TOOLS extends Record<string, any> =
       maxDelay = DEFAULT_RETRY_MAX_DELAY_MS,
       backoffFactor = DEFAULT_RETRY_BACKOFF_FACTOR,
       onRetry,
-    } = retryOptions || {};
+      validateResult,
+      getFinishReason,
+    } = options;
 
     let lastError: unknown;
     let delay = initialDelay;
 
     for (let attempt = 1; attempt <= maxAttempts; attempt++) {
       try {
-        const result = await generateText(params);
+        const result = await call();
 
-        if (params.toolChoice === "required" && !result.toolResults?.length) {
-          throw new Error("Tool call was required but model did not call any tools");
+        if (validateResult) {
+          validateResult(result);
         }
 
         // Record success attributes
         span.setAttribute("pilo.ai.attempts", attempt);
-        span.setAttribute("pilo.ai.finish_reason", String(result.finishReason));
+        if (getFinishReason) {
+          span.setAttribute("pilo.ai.finish_reason", String(getFinishReason(result)));
+        }
         return result;
       } catch (error) {
         lastError = error;
@@ -175,3 +207,55 @@ export async function generateTextWithRetry<TOOLS extends Record<string, any> =
     throw lastError;
   });
 }
+
+/**
+ * Wrapper for generateText with retry logic
+ *
+ * @param params - Parameters for generateText call
+ * @param retryOptions - Optional retry configuration
+ * @returns The generateText result
+ * @throws The last error if all retries fail
+ */
+export async function generateTextWithRetry<TOOLS extends Record<string, any> = any>(
+  params: Parameters<typeof generateText<TOOLS>>[0],
+  retryOptions?: RetryOptions,
+): Promise<Awaited<ReturnType<typeof generateText<TOOLS>>>> {
+  type Result = Awaited<ReturnType<typeof generateText<TOOLS>>>;
+
+  return retryDriver<Result>(() => generateText(params), {
+    ...retryOptions,
+    // When the caller required a tool call, treat a tool-less response as an
+    // error so the retry loop can re-prompt the model.
+    validateResult: (result) => {
+      if (params.toolChoice === "required" && !result.toolResults?.length) {
+        throw new Error("Tool call was required but model did not call any tools");
+      }
+    },
+    getFinishReason: (result) => result.finishReason,
+  });
+}
+
+/**
+ * Wrapper for generateObject with retry logic
+ *
+ * Mirrors generateTextWithRetry's retry/backoff/non-retryable behavior, but for
+ * structured object generation. No tool-call validation since generateObject
+ * does not accept tools. `NoObjectGeneratedError` (schema/parse failures from
+ * the model output) is treated as non-retryable by `isRetryableError`.
+ *
+ * @param params - Parameters for generateObject call
+ * @param retryOptions - Optional retry configuration
+ * @returns The generateObject result
+ * @throws The last error if all retries fail
+ */
+export async function generateObjectWithRetry(
+  params: Parameters<typeof generateObject>[0],
+  retryOptions?: RetryOptions,
+): Promise<Awaited<ReturnType<typeof generateObject>>> {
+  type Result = Awaited<ReturnType<typeof generateObject>>;
+
+  return retryDriver<Result>(() => generateObject(params), {
+    ...retryOptions,
+    getFinishReason: (result) => result.finishReason,
+  });
+}
diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts
index 3f12a73d..d6ba0095 100644
--- a/packages/core/src/webAgent.ts
+++ b/packages/core/src/webAgent.ts
@@ -34,6 +34,7 @@ import {
 } from "./prompts.js";
 import { createWebActionTools } from "./tools/webActionTools.js";
 import { createSearchTools } from "./tools/searchTools.js";
+import { createInspectionTools } from "./tools/inspectionTools.js";
 import { SearchService } from "./search/searchService.js";
 import { createPlanningTools } from "./tools/planningTools.js";
 import { createValidationTools } from "./tools/validationTools.js";
@@ -389,6 +390,12 @@ export class WebAgent {
       abortSignal: this.abortSignal,
     });
 
+    // Inspection tools (zero-LLM page-inspection primitives) are always available.
+    const inspectionTools = createInspectionTools({
+      browser: this.browser,
+      eventEmitter: this.eventEmitter,
+    });
+
     // Only include search tools if a search service was created
     const searchTools = this.searchService
       ? createSearchTools({ searchService: this.searchService, eventEmitter: this.eventEmitter })
@@ -448,7 +455,13 @@ export class WebAgent {
     }
 
     // Merge all tools
-    const allTools = { ...webActionTools, ...searchTools, ...tabstackTools, ...interactiveToolSet };
+    const allTools = {
+      ...webActionTools,
+      ...inspectionTools,
+      ...searchTools,
+      ...tabstackTools,
+      ...interactiveToolSet,
+    };
 
     // Skip the first page snapshot when starting on about:blank (e.g., search-first flow).
     // The empty page has no useful elements and the snapshot prompt causes the model
@@ -1044,7 +1057,11 @@ export class WebAgent {
     }
 
     // Determine if page changed (most actions change the page, except extract and webSearch)
-    const pageChanged = actionOutput.action !== "extract" && actionOutput.action !== "webSearch";
+    const pageChanged =
+      actionOutput.action !== "extract" &&
+      actionOutput.action !== "webSearch" &&
+      actionOutput.action !== "search_page" &&
+      actionOutput.action !== "find_elements";
 
     // Check for terminal actions
     if (actionOutput.isTerminal) {
diff --git a/packages/core/test/playwrightBrowser.test.ts b/packages/core/test/playwrightBrowser.test.ts
index f61b3dc2..6b8a566f 100644
--- a/packages/core/test/playwrightBrowser.test.ts
+++ b/packages/core/test/playwrightBrowser.test.ts
@@ -1081,4 +1081,466 @@ describe("PlaywrightBrowser", () => {
       await expect(browser.getScreenshot()).rejects.not.toThrow(BrowserDisconnectedError);
     });
   });
+
+  describe("searchPage", () => {
+    let browser: PlaywrightBrowser;
+    let mainFrame: any;
+
+    beforeEach(() => {
+      browser = new PlaywrightBrowser({ browser: "chromium" });
+      mainFrame = { evaluate: vi.fn(), url: vi.fn().mockReturnValue("https://example.com/") };
+      (browser as any).page = {
+        evaluate: vi.fn(),
+        frames: vi.fn().mockReturnValue([mainFrame]),
+        mainFrame: vi.fn().mockReturnValue(mainFrame),
+      };
+    });
+
+    it("throws when browser not started", async () => {
+      const fresh = new PlaywrightBrowser();
+      await expect(fresh.searchPage({ pattern: "x" })).rejects.toThrow("Browser not started");
+    });
+
+    it("returns a literal match with context and nearestRef from the main frame", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 1,
+        matches: [
+          {
+            match: "logout",
+            contextBefore: "click ",
+            contextAfter: " here",
+            nearestRef: "E5",
+          },
+        ],
+      });
+
+      const result = await browser.searchPage({ pattern: "logout" });
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.truncated).toBe(false);
+      expect(result.matches).toHaveLength(1);
+      expect(result.matches[0]).toEqual({
+        match: "logout",
+        contextBefore: "click ",
+        contextAfter: " here",
+        nearestRef: "E5",
+        frameUrl: undefined,
+      });
+
+      // Wrapper should forward the resolved opts (with defaults applied)
+      const callArg = (browser as any).page.evaluate.mock.calls[0][1];
+      expect(callArg).toEqual({
+        pattern: "logout",
+        regex: false,
+        caseSensitive: false,
+        contextChars: 80,
+        maxResults: 10,
+      });
+    });
+
+    it("forwards regex and caseSensitive flags to the in-page helper", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({ totalMatches: 0, matches: [] });
+
+      await browser.searchPage({
+        pattern: "Lo[gG]out",
+        regex: true,
+        caseSensitive: true,
+        contextChars: 20,
+        maxResults: 3,
+      });
+
+      const callArg = (browser as any).page.evaluate.mock.calls[0][1];
+      expect(callArg).toEqual({
+        pattern: "Lo[gG]out",
+        regex: true,
+        caseSensitive: true,
+        contextChars: 20,
+        maxResults: 3,
+      });
+    });
+
+    it("marks the result as truncated when totalMatches exceeds maxResults", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 25,
+        matches: Array.from({ length: 10 }, (_, i) => ({
+          match: `m${i}`,
+          contextBefore: "",
+          contextAfter: "",
+          nearestRef: undefined,
+        })),
+      });
+
+      const result = await browser.searchPage({ pattern: "x", maxResults: 10 });
+
+      expect(result.totalMatches).toBe(25);
+      expect(result.matches).toHaveLength(10);
+      expect(result.truncated).toBe(true);
+    });
+
+    it("wraps a bad-regex evaluate rejection in BrowserActionException", async () => {
+      (browser as any).page.evaluate.mockRejectedValue(
+        new Error("SyntaxError: Invalid regular expression"),
+      );
+
+      await expect(browser.searchPage({ pattern: "(", regex: true })).rejects.toThrow(
+        BrowserActionException,
+      );
+      await expect(browser.searchPage({ pattern: "(", regex: true })).rejects.toThrow(
+        /search_page failed/,
+      );
+    });
+
+    it("still throws BrowserDisconnectedError when main-frame evaluate is a disconnect", async () => {
+      (browser as any).page.evaluate.mockRejectedValue(
+        new Error("Target page, context or browser has been closed"),
+      );
+
+      await expect(browser.searchPage({ pattern: "x" })).rejects.toThrow(BrowserDisconnectedError);
+    });
+
+    it("aggregates matches from non-main frames and tags them with frameUrl", async () => {
+      const childFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          totalMatches: 1,
+          matches: [
+            {
+              match: "logout",
+              contextBefore: "the ",
+              contextAfter: " link",
+              nearestRef: "E12",
+            },
+          ],
+        }),
+        url: vi.fn().mockReturnValue("https://iframe.example/"),
+      };
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 1,
+        matches: [
+          {
+            match: "logout",
+            contextBefore: "",
+            contextAfter: "",
+            nearestRef: "E1",
+          },
+        ],
+      });
+      (browser as any).page.frames.mockReturnValue([mainFrame, childFrame]);
+
+      const result = await browser.searchPage({ pattern: "logout" });
+
+      expect(result.totalMatches).toBe(2);
+      expect(result.matches).toHaveLength(2);
+      expect(result.matches[0].frameUrl).toBeUndefined();
+      expect(result.matches[1].frameUrl).toBe("https://iframe.example/");
+    });
+
+    it("silently skips frames that throw (cross-origin / detached)", async () => {
+      const goodFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          totalMatches: 1,
+          matches: [{ match: "foo", contextBefore: "", contextAfter: "", nearestRef: undefined }],
+        }),
+        url: vi.fn().mockReturnValue("https://good.example/"),
+      };
+      const badFrame = {
+        evaluate: vi.fn().mockRejectedValue(new Error("cross-origin")),
+        url: vi.fn().mockReturnValue("https://bad.example/"),
+      };
+      (browser as any).page.evaluate.mockResolvedValue({ totalMatches: 0, matches: [] });
+      (browser as any).page.frames.mockReturnValue([mainFrame, goodFrame, badFrame]);
+
+      const result = await browser.searchPage({ pattern: "foo" });
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.matches).toHaveLength(1);
+      expect(result.matches[0].frameUrl).toBe("https://good.example/");
+    });
+
+    it("stops collecting matches across frames once maxResults is reached but keeps counting totalMatches", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 2,
+        matches: [
+          { match: "a", contextBefore: "", contextAfter: "", nearestRef: undefined },
+          { match: "b", contextBefore: "", contextAfter: "", nearestRef: undefined },
+        ],
+      });
+      const childFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          totalMatches: 3,
+          matches: [
+            { match: "c", contextBefore: "", contextAfter: "", nearestRef: undefined },
+            { match: "d", contextBefore: "", contextAfter: "", nearestRef: undefined },
+            { match: "e", contextBefore: "", contextAfter: "", nearestRef: undefined },
+          ],
+        }),
+        url: vi.fn().mockReturnValue("https://iframe.example/"),
+      };
+      (browser as any).page.frames.mockReturnValue([mainFrame, childFrame]);
+
+      const result = await browser.searchPage({ pattern: "x", maxResults: 3 });
+
+      expect(result.totalMatches).toBe(5);
+      expect(result.matches).toHaveLength(3);
+      expect(result.truncated).toBe(true);
+      // Order: main frame matches first, then we take 1 from child to fill to 3
+      expect(result.matches.map((m) => m.match)).toEqual(["a", "b", "c"]);
+    });
+  });
+
+  describe("findElements", () => {
+    let browser: PlaywrightBrowser;
+    let mainFrame: any;
+
+    beforeEach(() => {
+      browser = new PlaywrightBrowser({ browser: "chromium" });
+      mainFrame = { evaluate: vi.fn(), url: vi.fn().mockReturnValue("https://example.com/") };
+      (browser as any).page = {
+        evaluate: vi.fn(),
+        frames: vi.fn().mockReturnValue([mainFrame]),
+        mainFrame: vi.fn().mockReturnValue(mainFrame),
+      };
+    });
+
+    it("throws when browser not started", async () => {
+      const fresh = new PlaywrightBrowser();
+      await expect(fresh.findElements({ selector: "a" })).rejects.toThrow("Browser not started");
+    });
+
+    it("returns elements from the main frame with auto-resolved href and nearestRef", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 1,
+        matches: [
+          {
+            tag: "a",
+            text: "Home",
+            attributes: { href: "https://example.com/home" },
+            nearestRef: "E5",
+          },
+        ],
+      });
+
+      const result = await browser.findElements({ selector: "a.nav-link" });
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.truncated).toBe(false);
+      expect(result.elements).toHaveLength(1);
+      expect(result.elements[0]).toEqual({
+        tag: "a",
+        text: "Home",
+        attributes: { href: "https://example.com/home" },
+        nearestRef: "E5",
+        frameUrl: undefined,
+      });
+
+      // Wrapper should forward the resolved opts (with defaults applied)
+      const callArg = (browser as any).page.evaluate.mock.calls[0][1];
+      expect(callArg).toEqual({
+        selector: "a.nav-link",
+        withinRef: null,
+        attributes: null,
+        maxResults: 20,
+        includeText: true,
+      });
+    });
+
+    it("forwards withinRef, attributes, maxResults, and includeText", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 0,
+        matches: [],
+      });
+
+      await browser.findElements({
+        selector: "[data-id]",
+        withinRef: "E42",
+        attributes: ["data-id", "class"],
+        maxResults: 5,
+        includeText: false,
+      });
+
+      const callArg = (browser as any).page.evaluate.mock.calls[0][1];
+      expect(callArg).toEqual({
+        selector: "[data-id]",
+        withinRef: "E42",
+        attributes: ["data-id", "class"],
+        maxResults: 5,
+        includeText: false,
+      });
+    });
+
+    it("aggregates elements from non-main frames and tags them with frameUrl", async () => {
+      const childFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          totalMatches: 1,
+          matches: [
+            {
+              tag: "img",
+              text: "",
+              attributes: { src: "https://iframe.example/cat.png" },
+              nearestRef: "E12",
+            },
+          ],
+        }),
+        url: vi.fn().mockReturnValue("https://iframe.example/"),
+      };
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 1,
+        matches: [
+          {
+            tag: "a",
+            text: "Home",
+            attributes: { href: "https://example.com/home" },
+            nearestRef: "E1",
+          },
+        ],
+      });
+      (browser as any).page.frames.mockReturnValue([mainFrame, childFrame]);
+
+      const result = await browser.findElements({ selector: "a, img" });
+
+      expect(result.totalMatches).toBe(2);
+      expect(result.elements).toHaveLength(2);
+      expect(result.elements[0].frameUrl).toBeUndefined();
+      expect(result.elements[1].frameUrl).toBe("https://iframe.example/");
+    });
+
+    it("throws BrowserActionException with the in-page error when selector is invalid (main frame)", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        error: "Failed to execute 'querySelectorAll': '???' is not a valid selector.",
+        kind: "bad-selector",
+      });
+
+      await expect(browser.findElements({ selector: "???" })).rejects.toThrow(
+        BrowserActionException,
+      );
+      await expect(browser.findElements({ selector: "???" })).rejects.toThrow(
+        /find_elements failed:.*not a valid selector/,
+      );
+    });
+
+    it("short-circuits on bad-selector error in a non-main frame", async () => {
+      const childFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          error: "Failed to execute 'querySelectorAll': '???' is not a valid selector.",
+          kind: "bad-selector",
+        }),
+        url: vi.fn().mockReturnValue("https://iframe.example/"),
+      };
+      (browser as any).page.evaluate.mockResolvedValue({ totalMatches: 0, matches: [] });
+      (browser as any).page.frames.mockReturnValue([mainFrame, childFrame]);
+
+      await expect(browser.findElements({ selector: "???" })).rejects.toThrow(
+        BrowserActionException,
+      );
+    });
+
+    it("wraps a thrown evaluate rejection in BrowserActionException", async () => {
+      (browser as any).page.evaluate.mockRejectedValue(new Error("kaboom"));
+
+      await expect(browser.findElements({ selector: "a" })).rejects.toThrow(BrowserActionException);
+      await expect(browser.findElements({ selector: "a" })).rejects.toThrow(/find_elements failed/);
+    });
+
+    it("still throws BrowserDisconnectedError when main-frame evaluate is a disconnect", async () => {
+      (browser as any).page.evaluate.mockRejectedValue(
+        new Error("Target page, context or browser has been closed"),
+      );
+
+      await expect(browser.findElements({ selector: "a" })).rejects.toThrow(
+        BrowserDisconnectedError,
+      );
+    });
+
+    it("skips frames whose withinRef lookup misses and uses one that hits", async () => {
+      // Main frame: withinRef not found here
+      (browser as any).page.evaluate.mockResolvedValue({
+        error: 'withinRef "E42" not found in this frame',
+        kind: "within-ref-miss",
+      });
+      // Child frame: withinRef hits, returns one element
+      const childFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          totalMatches: 1,
+          matches: [
+            {
+              tag: "li",
+              text: "Item",
+              nearestRef: "E43",
+            },
+          ],
+        }),
+        url: vi.fn().mockReturnValue("https://iframe.example/"),
+      };
+      (browser as any).page.frames.mockReturnValue([mainFrame, childFrame]);
+
+      const result = await browser.findElements({ selector: "li", withinRef: "E42" });
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.elements).toHaveLength(1);
+      expect(result.elements[0].frameUrl).toBe("https://iframe.example/");
+    });
+
+    it("throws BrowserActionException when withinRef is not found in any frame", async () => {
+      // Main frame: withinRef not found
+      (browser as any).page.evaluate.mockResolvedValue({
+        error: 'withinRef "Z9" not found in this frame',
+        kind: "within-ref-miss",
+      });
+      // Child frame: withinRef not found
+      const childFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          error: 'withinRef "Z9" not found in this frame',
+          kind: "within-ref-miss",
+        }),
+        url: vi.fn().mockReturnValue("https://iframe.example/"),
+      };
+      (browser as any).page.frames.mockReturnValue([mainFrame, childFrame]);
+
+      await expect(browser.findElements({ selector: "a", withinRef: "Z9" })).rejects.toThrow(
+        BrowserActionException,
+      );
+      await expect(browser.findElements({ selector: "a", withinRef: "Z9" })).rejects.toThrow(
+        /find_elements failed: withinRef "Z9" not found/,
+      );
+    });
+
+    it("silently skips frames that throw (cross-origin / detached)", async () => {
+      const goodFrame = {
+        evaluate: vi.fn().mockResolvedValue({
+          totalMatches: 1,
+          matches: [{ tag: "a", text: "Foo", attributes: undefined, nearestRef: undefined }],
+        }),
+        url: vi.fn().mockReturnValue("https://good.example/"),
+      };
+      const badFrame = {
+        evaluate: vi.fn().mockRejectedValue(new Error("cross-origin")),
+        url: vi.fn().mockReturnValue("https://bad.example/"),
+      };
+      (browser as any).page.evaluate.mockResolvedValue({ totalMatches: 0, matches: [] });
+      (browser as any).page.frames.mockReturnValue([mainFrame, goodFrame, badFrame]);
+
+      const result = await browser.findElements({ selector: "a" });
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.elements).toHaveLength(1);
+      expect(result.elements[0].frameUrl).toBe("https://good.example/");
+    });
+
+    it("marks the result as truncated when totalMatches exceeds returned elements", async () => {
+      (browser as any).page.evaluate.mockResolvedValue({
+        totalMatches: 50,
+        matches: Array.from({ length: 20 }, (_, i) => ({
+          tag: "li",
+          text: `Item ${i}`,
+          attributes: undefined,
+          nearestRef: undefined,
+        })),
+      });
+
+      const result = await browser.findElements({ selector: "li", maxResults: 20 });
+
+      expect(result.totalMatches).toBe(50);
+      expect(result.elements).toHaveLength(20);
+      expect(result.truncated).toBe(true);
+    });
+  });
 });
diff --git a/packages/core/test/tools/inspectionTools.test.ts b/packages/core/test/tools/inspectionTools.test.ts
new file mode 100644
index 00000000..b9cf52cd
--- /dev/null
+++ b/packages/core/test/tools/inspectionTools.test.ts
@@ -0,0 +1,446 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { createInspectionTools } from "../../src/tools/inspectionTools.js";
+import { WebAgentEventEmitter, WebAgentEventType } from "../../src/events.js";
+import type {
+  AriaBrowser,
+  SearchPageResult,
+  FindElementsResult,
+} from "../../src/browser/ariaBrowser.js";
+
+// Mock the ai module — mirror searchTools.test.ts so the tool's
+// description/inputSchema/execute are passed through verbatim.
+vi.mock("ai", () => ({
+  tool: vi.fn((config: unknown) => {
+    const typedConfig = config as {
+      description: string;
+      inputSchema: unknown;
+      execute: (args: unknown, options?: unknown) => Promise<unknown>;
+    };
+    return {
+      ...typedConfig,
+      description: typedConfig.description,
+      inputSchema: typedConfig.inputSchema,
+      execute: typedConfig.execute,
+    };
+  }),
+}));
+
+const createMockBrowser = (
+  searchResult: SearchPageResult = { totalMatches: 0, truncated: false, matches: [] },
+  findResult: FindElementsResult = { totalMatches: 0, truncated: false, elements: [] },
+): AriaBrowser =>
+  ({
+    searchPage: vi.fn().mockResolvedValue(searchResult),
+    findElements: vi.fn().mockResolvedValue(findResult),
+  }) as unknown as AriaBrowser;
+
+describe("Inspection Tools", () => {
+  let mockBrowser: AriaBrowser;
+  let eventEmitter: WebAgentEventEmitter;
+  let tools: ReturnType<typeof createInspectionTools>;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockBrowser = createMockBrowser();
+    eventEmitter = new WebAgentEventEmitter();
+
+    tools = createInspectionTools({
+      browser: mockBrowser,
+      eventEmitter,
+    });
+  });
+
+  describe("Tool Structure", () => {
+    it("should create search_page tool", () => {
+      expect(tools).toBeDefined();
+      expect(tools.search_page).toBeDefined();
+    });
+
+    it("should have a description that mentions searching visible text", () => {
+      expect(tools.search_page.description).toContain("Search visible text");
+    });
+
+    it("should validate input schema correctly", () => {
+      const schema = tools.search_page.inputSchema as {
+        safeParse: (input: unknown) => { success: boolean; data?: any };
+      };
+
+      // Pattern is required
+      const valid = schema.safeParse({ pattern: "logout" });
+      expect(valid.success).toBe(true);
+
+      // Missing pattern should fail
+      const invalid = schema.safeParse({});
+      expect(invalid.success).toBe(false);
+
+      // Defaults applied when omitted
+      if (valid.success && valid.data) {
+        expect(valid.data.regex).toBe(false);
+        expect(valid.data.caseSensitive).toBe(false);
+        expect(valid.data.contextChars).toBe(80);
+        expect(valid.data.maxResults).toBe(10);
+      }
+    });
+
+    it("should reject out-of-range contextChars and maxResults", () => {
+      const schema = tools.search_page.inputSchema as {
+        safeParse: (input: unknown) => { success: boolean };
+      };
+
+      expect(schema.safeParse({ pattern: "x", contextChars: -1 }).success).toBe(false);
+      expect(schema.safeParse({ pattern: "x", contextChars: 501 }).success).toBe(false);
+      expect(schema.safeParse({ pattern: "x", maxResults: 0 }).success).toBe(false);
+      expect(schema.safeParse({ pattern: "x", maxResults: 51 }).success).toBe(false);
+    });
+  });
+
+  describe("search_page execution", () => {
+    it("should call browser.searchPage with the provided options", async () => {
+      const mockResult: SearchPageResult = {
+        totalMatches: 2,
+        truncated: false,
+        matches: [
+          {
+            match: "logout",
+            contextBefore: "click ",
+            contextAfter: " here",
+            nearestRef: "E12",
+          },
+          {
+            match: "Logout",
+            contextBefore: "the ",
+            contextAfter: " button",
+            nearestRef: undefined,
+            frameUrl: "https://iframe.example/",
+          },
+        ],
+      };
+      vi.mocked(mockBrowser.searchPage).mockResolvedValue(mockResult);
+
+      const result = await tools.search_page.execute!(
+        { pattern: "logout", regex: false, caseSensitive: false, contextChars: 80, maxResults: 10 },
+        { toolCallId: "test", messages: [] } as any,
+      );
+
+      expect(mockBrowser.searchPage).toHaveBeenCalledWith({
+        pattern: "logout",
+        regex: false,
+        caseSensitive: false,
+        contextChars: 80,
+        maxResults: 10,
+      });
+
+      expect(result).toEqual({
+        success: true,
+        action: "search_page",
+        pattern: "logout",
+        totalMatches: 2,
+        truncated: false,
+        matches: mockResult.matches,
+      });
+    });
+
+    it("should emit AGENT_ACTION and BROWSER_ACTION_COMPLETED on success", async () => {
+      const emitSpy = vi.spyOn(eventEmitter, "emit");
+
+      await tools.search_page.execute!(
+        { pattern: "foo", regex: false, caseSensitive: false, contextChars: 80, maxResults: 10 },
+        { toolCallId: "test", messages: [] } as any,
+      );
+
+      expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.AGENT_ACTION, {
+        action: "search_page",
+        value: "foo",
+      });
+      expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+        success: true,
+        action: "search_page",
+      });
+    });
+
+    it("should return a recoverable error result when the browser throws", async () => {
+      vi.mocked(mockBrowser.searchPage).mockRejectedValue(new Error("bad regex"));
+
+      const result = await tools.search_page.execute!(
+        { pattern: "(", regex: true, caseSensitive: false, contextChars: 80, maxResults: 10 },
+        { toolCallId: "test", messages: [] } as any,
+      );
+
+      expect(result).toEqual({
+        success: false,
+        action: "search_page",
+        pattern: "(",
+        error: "bad regex",
+        isRecoverable: true,
+      });
+    });
+
+    it("should emit failure event when browser throws", async () => {
+      vi.mocked(mockBrowser.searchPage).mockRejectedValue(new Error("kaboom"));
+
+      const emitSpy = vi.spyOn(eventEmitter, "emit");
+
+      await tools.search_page.execute!(
+        { pattern: "x", regex: false, caseSensitive: false, contextChars: 80, maxResults: 10 },
+        { toolCallId: "test", messages: [] } as any,
+      );
+
+      expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+        success: false,
+        action: "search_page",
+        error: "kaboom",
+        isRecoverable: true,
+      });
+    });
+
+    it("should coerce non-Error rejections to string", async () => {
+      vi.mocked(mockBrowser.searchPage).mockRejectedValue("string error");
+
+      const result = await tools.search_page.execute!(
+        { pattern: "x", regex: false, caseSensitive: false, contextChars: 80, maxResults: 10 },
+        { toolCallId: "test", messages: [] } as any,
+      );
+
+      expect(result).toMatchObject({
+        success: false,
+        action: "search_page",
+        pattern: "x",
+        error: "string error",
+        isRecoverable: true,
+      });
+    });
+  });
+
+  describe("find_elements", () => {
+    describe("Tool Structure", () => {
+      it("should create find_elements tool", () => {
+        expect(tools.find_elements).toBeDefined();
+      });
+
+      it("should have a description that mentions CSS selector", () => {
+        expect(tools.find_elements.description).toContain("CSS selector");
+      });
+
+      it("should validate input schema correctly", () => {
+        const schema = tools.find_elements.inputSchema as {
+          safeParse: (input: unknown) => { success: boolean; data?: any };
+        };
+
+        // selector is required
+        const valid = schema.safeParse({ selector: "a" });
+        expect(valid.success).toBe(true);
+
+        // Missing selector should fail
+        const invalid = schema.safeParse({});
+        expect(invalid.success).toBe(false);
+
+        // Defaults applied when omitted
+        if (valid.success && valid.data) {
+          expect(valid.data.maxResults).toBe(20);
+          expect(valid.data.includeText).toBe(true);
+          // withinRef / attributes are optional and not defaulted
+          expect(valid.data.withinRef).toBeUndefined();
+          expect(valid.data.attributes).toBeUndefined();
+        }
+      });
+
+      it("should reject out-of-range maxResults", () => {
+        const schema = tools.find_elements.inputSchema as {
+          safeParse: (input: unknown) => { success: boolean };
+        };
+
+        expect(schema.safeParse({ selector: "a", maxResults: 0 }).success).toBe(false);
+        expect(schema.safeParse({ selector: "a", maxResults: 101 }).success).toBe(false);
+        expect(schema.safeParse({ selector: "a", maxResults: 1 }).success).toBe(true);
+        expect(schema.safeParse({ selector: "a", maxResults: 100 }).success).toBe(true);
+      });
+    });
+
+    describe("find_elements execution", () => {
+      it("should call browser.findElements with the provided options", async () => {
+        const mockResult: FindElementsResult = {
+          totalMatches: 2,
+          truncated: false,
+          elements: [
+            {
+              tag: "a",
+              text: "Home",
+              attributes: { href: "https://example.com/home" },
+              nearestRef: "E5",
+            },
+            {
+              tag: "a",
+              text: "About",
+              attributes: { href: "https://example.com/about" },
+              nearestRef: "E6",
+              frameUrl: "https://iframe.example/",
+            },
+          ],
+        };
+        vi.mocked(mockBrowser.findElements).mockResolvedValue(mockResult);
+
+        const result = await tools.find_elements.execute!(
+          {
+            selector: "a.nav-link",
+            withinRef: "E1",
+            attributes: ["href"],
+            maxResults: 20,
+            includeText: true,
+          },
+          { toolCallId: "test", messages: [] } as any,
+        );
+
+        expect(mockBrowser.findElements).toHaveBeenCalledWith({
+          selector: "a.nav-link",
+          withinRef: "E1",
+          attributes: ["href"],
+          maxResults: 20,
+          includeText: true,
+        });
+
+        expect(result).toEqual({
+          success: true,
+          action: "find_elements",
+          selector: "a.nav-link",
+          totalMatches: 2,
+          truncated: false,
+          elements: mockResult.elements,
+        });
+      });
+
+      it("should propagate withinRef when provided and omit when not", async () => {
+        vi.mocked(mockBrowser.findElements).mockResolvedValue({
+          totalMatches: 0,
+          truncated: false,
+          elements: [],
+        });
+
+        // With withinRef
+        await tools.find_elements.execute!(
+          { selector: "a", withinRef: "E42", maxResults: 20, includeText: true },
+          { toolCallId: "test", messages: [] } as any,
+        );
+        expect(mockBrowser.findElements).toHaveBeenLastCalledWith({
+          selector: "a",
+          withinRef: "E42",
+          attributes: undefined,
+          maxResults: 20,
+          includeText: true,
+        });
+
+        // Without withinRef (omitted by schema)
+        await tools.find_elements.execute!({ selector: "a", maxResults: 20, includeText: true }, {
+          toolCallId: "test",
+          messages: [],
+        } as any);
+        expect(mockBrowser.findElements).toHaveBeenLastCalledWith({
+          selector: "a",
+          withinRef: undefined,
+          attributes: undefined,
+          maxResults: 20,
+          includeText: true,
+        });
+      });
+
+      it("should forward an attributes filter to the browser", async () => {
+        vi.mocked(mockBrowser.findElements).mockResolvedValue({
+          totalMatches: 0,
+          truncated: false,
+          elements: [],
+        });
+
+        await tools.find_elements.execute!(
+          {
+            selector: "[data-id]",
+            attributes: ["data-id", "class"],
+            maxResults: 20,
+            includeText: true,
+          },
+          { toolCallId: "test", messages: [] } as any,
+        );
+
+        expect(mockBrowser.findElements).toHaveBeenLastCalledWith({
+          selector: "[data-id]",
+          withinRef: undefined,
+          attributes: ["data-id", "class"],
+          maxResults: 20,
+          includeText: true,
+        });
+      });
+
+      it("should emit AGENT_ACTION and BROWSER_ACTION_COMPLETED on success", async () => {
+        const emitSpy = vi.spyOn(eventEmitter, "emit");
+
+        await tools.find_elements.execute!(
+          { selector: "a.nav", maxResults: 20, includeText: true },
+          { toolCallId: "test", messages: [] } as any,
+        );
+
+        expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.AGENT_ACTION, {
+          action: "find_elements",
+          value: "a.nav",
+        });
+        expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+          success: true,
+          action: "find_elements",
+        });
+      });
+
+      it("should return a recoverable error result when the browser throws (bad selector)", async () => {
+        vi.mocked(mockBrowser.findElements).mockRejectedValue(
+          new Error("Failed to execute 'querySelectorAll': '???' is not a valid selector."),
+        );
+
+        const result = await tools.find_elements.execute!(
+          { selector: "???", maxResults: 20, includeText: true },
+          { toolCallId: "test", messages: [] } as any,
+        );
+
+        expect(result).toMatchObject({
+          success: false,
+          action: "find_elements",
+          selector: "???",
+          isRecoverable: true,
+        });
+        expect((result as { error: string }).error).toMatch(/not a valid selector/);
+      });
+
+      it("should emit failure event when browser throws (withinRef not found)", async () => {
+        vi.mocked(mockBrowser.findElements).mockRejectedValue(
+          new Error('withinRef "Z9" not found'),
+        );
+
+        const emitSpy = vi.spyOn(eventEmitter, "emit");
+
+        await tools.find_elements.execute!(
+          { selector: "a", withinRef: "Z9", maxResults: 20, includeText: true },
+          { toolCallId: "test", messages: [] } as any,
+        );
+
+        expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.BROWSER_ACTION_COMPLETED, {
+          success: false,
+          action: "find_elements",
+          error: 'withinRef "Z9" not found',
+          isRecoverable: true,
+        });
+      });
+
+      it("should coerce non-Error rejections to string", async () => {
+        vi.mocked(mockBrowser.findElements).mockRejectedValue("string error");
+
+        const result = await tools.find_elements.execute!(
+          { selector: "a", maxResults: 20, includeText: true },
+          { toolCallId: "test", messages: [] } as any,
+        );
+
+        expect(result).toMatchObject({
+          success: false,
+          action: "find_elements",
+          selector: "a",
+          error: "string error",
+          isRecoverable: true,
+        });
+      });
+    });
+  });
+});
diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts
index c2bc2e96..46d6cb90 100644
--- a/packages/core/test/tools/webActionTools.test.ts
+++ b/packages/core/test/tools/webActionTools.test.ts
@@ -5,7 +5,7 @@ import { WebAgentEventEmitter, WebAgentEventType } from "../../src/events.js";
 import { LanguageModel } from "ai";
 import { z } from "zod";
 import { InvalidRefException, BrowserActionException } from "../../src/errors.js";
-import { generateTextWithRetry } from "../../src/utils/retry.js";
+import { generateTextWithRetry, generateObjectWithRetry } from "../../src/utils/retry.js";
 
 // Mock the ai module
 vi.mock("ai", () => ({
@@ -16,14 +16,20 @@ vi.mock("ai", () => ({
     execute: config.execute,
   })),
   generateText: vi.fn(),
+  generateObject: vi.fn(),
+  // jsonSchema() is called inline in webActionTools to wrap the user's schema;
+  // return a marker we can identify in test assertions.
+  jsonSchema: vi.fn((schema: any) => ({ __jsonSchema: true, schema })),
 }));
 
 // Mock the retry module to bypass retry logic in tests
 vi.mock("../../src/utils/retry.js", () => ({
   generateTextWithRetry: vi.fn(),
+  generateObjectWithRetry: vi.fn(),
 }));
 
 const mockGenerateTextWithRetry = vi.mocked(generateTextWithRetry);
+const mockGenerateObjectWithRetry = vi.mocked(generateObjectWithRetry);
 
 // Mock browser implementation
 class MockBrowser implements AriaBrowser {
@@ -83,6 +89,22 @@ class MockBrowser implements AriaBrowser {
     };
     return fn(mockTab);
   }
+
+  async searchPage(): Promise<{
+    totalMatches: number;
+    truncated: boolean;
+    matches: any[];
+  }> {
+    return { totalMatches: 0, truncated: false, matches: [] };
+  }
+
+  async findElements(): Promise<{
+    totalMatches: number;
+    truncated: boolean;
+    elements: any[];
+  }> {
+    return { totalMatches: 0, truncated: false, elements: [] };
+  }
 }
 
 describe("Web Action Tools", () => {
@@ -152,7 +174,7 @@ describe("Web Action Tools", () => {
       expect(tools.back.description).toBe("Go back to the previous page");
       expect(tools.forward.description).toBe("Go forward to the next page");
       expect(tools.extract.description).toBe(
-        "Extract specific data from the current page for later reference",
+        "Extract data from the current page. Pass `outputSchema` (a JSON Schema object) to get structured data; omit it for markdown text.",
       );
       expect(tools.done.description).toBe("Complete the task with your final answer");
       expect(tools.abort.description).toContain("Abort the task when it cannot be completed");
@@ -568,6 +590,109 @@ describe("Web Action Tools", () => {
         expect.any(Object),
       );
     });
+
+    it("should route through generateObject when outputSchema is provided", async () => {
+      const getMarkdownSpy = vi.spyOn(mockBrowser, "getMarkdown");
+      const emitSpy = vi.spyOn(eventEmitter, "emit");
+
+      const extracted = { title: "Hello", price: 9.99 };
+      mockGenerateObjectWithRetry.mockResolvedValueOnce({
+        object: extracted,
+      } as any);
+
+      const userSchema = {
+        type: "object",
+        properties: {
+          title: { type: "string" },
+          price: { type: "number" },
+        },
+        required: ["title", "price"],
+      };
+
+      const result = await tools.extract.execute({
+        description: "product details",
+        outputSchema: userSchema,
+      });
+
+      expect(getMarkdownSpy).toHaveBeenCalled();
+      // generateTextWithRetry should NOT have been called in the structured branch
+      expect(mockGenerateTextWithRetry).not.toHaveBeenCalled();
+      // generateObjectWithRetry should be called with the wrapped schema (marker from
+      // the jsonSchema() mock) and the provider/prompt/abort settings.
+      expect(mockGenerateObjectWithRetry).toHaveBeenCalledWith(
+        {
+          model: { specificationVersion: "v1" },
+          prompt: expect.stringContaining("product details"),
+          schema: { __jsonSchema: true, schema: userSchema },
+          maxOutputTokens: 5000,
+          abortSignal: undefined,
+        },
+        expect.objectContaining({
+          maxAttempts: 3,
+          onRetry: expect.any(Function),
+        }),
+      );
+
+      expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.AGENT_ACTION, {
+        action: "extract",
+        ref: undefined,
+        value: "product details",
+      });
+      expect(emitSpy).toHaveBeenCalledWith(WebAgentEventType.AGENT_EXTRACTED, {
+        extractedData: JSON.stringify(extracted),
+      });
+
+      expect(result).toEqual({
+        success: true,
+        action: "extract",
+        description: "product details",
+        data: extracted,
+      });
+      // The structured branch returns `data`, not `extractedData`.
+      expect((result as any).extractedData).toBeUndefined();
+    });
+
+    it("should still use generateText (markdown branch) when outputSchema is omitted", async () => {
+      mockGenerateTextWithRetry.mockResolvedValueOnce({
+        text: "markdown extracted",
+      } as any);
+
+      const result = await tools.extract.execute({ description: "Get info" });
+
+      // generateObjectWithRetry should NOT be called in the markdown branch
+      expect(mockGenerateObjectWithRetry).not.toHaveBeenCalled();
+      expect(mockGenerateTextWithRetry).toHaveBeenCalledTimes(1);
+
+      expect(result).toEqual({
+        success: true,
+        action: "extract",
+        description: "Get info",
+        extractedData: "markdown extracted",
+      });
+      // The markdown branch returns `extractedData`, not `data`.
+      expect((result as any).data).toBeUndefined();
+    });
+
+    it("should validate extract inputSchema with optional outputSchema", () => {
+      const schema = tools.extract.inputSchema;
+
+      // Just a description is valid
+      const validMinimal = schema.safeParse({ description: "data" });
+      expect(validMinimal.success).toBe(true);
+
+      // Description + outputSchema is valid
+      const validWithSchema = schema.safeParse({
+        description: "data",
+        outputSchema: { type: "object", properties: { title: { type: "string" } } },
+      });
+      expect(validWithSchema.success).toBe(true);
+
+      // Missing description is invalid
+      const invalid = schema.safeParse({
+        outputSchema: { type: "object" },
+      });
+      expect(invalid.success).toBe(false);
+    });
   });
 
   describe("Terminal Actions", () => {
diff --git a/packages/core/test/utils/retry.test.ts b/packages/core/test/utils/retry.test.ts
index 8fc7dc5a..4846640f 100644
--- a/packages/core/test/utils/retry.test.ts
+++ b/packages/core/test/utils/retry.test.ts
@@ -3,13 +3,19 @@
  */
 
 import { describe, it, expect, vi, beforeEach } from "vitest";
-import { generateTextWithRetry } from "../../src/utils/retry.js";
-import { generateText } from "ai";
+import { generateTextWithRetry, generateObjectWithRetry } from "../../src/utils/retry.js";
+import { generateText, generateObject, NoObjectGeneratedError } from "ai";
 
-// Mock the ai module
-vi.mock("ai", () => ({
-  generateText: vi.fn(),
-}));
+// Mock the ai module, but keep the real error classes so `instanceof` checks in
+// `isRetryableError` behave correctly against errors constructed in tests.
+vi.mock("ai", async () => {
+  const actual = await vi.importActual<typeof import("ai")>("ai");
+  return {
+    ...actual,
+    generateText: vi.fn(),
+    generateObject: vi.fn(),
+  };
+});
 
 describe("generateTextWithRetry", () => {
   const mockGenerateText = generateText as any;
@@ -157,3 +163,161 @@ describe("generateTextWithRetry", () => {
     expect(mockGenerateText).toHaveBeenCalledTimes(3);
   });
 });
+
+describe("generateObjectWithRetry", () => {
+  const mockGenerateObject = generateObject as any;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("should succeed on first attempt and return the result", async () => {
+    const expectedResult = {
+      object: { title: "Hello", count: 42 },
+      finishReason: "stop",
+    };
+    mockGenerateObject.mockResolvedValueOnce(expectedResult);
+
+    const result = await generateObjectWithRetry({
+      prompt: "test",
+      model: "test-model",
+      schema: { jsonSchema: { type: "object" } } as any,
+    } as any);
+
+    expect(result).toEqual(expectedResult);
+    expect(mockGenerateObject).toHaveBeenCalledTimes(1);
+    expect(mockGenerateObject).toHaveBeenCalledWith(
+      expect.objectContaining({
+        prompt: "test",
+        model: "test-model",
+      }),
+    );
+  });
+
+  it("should retry on transient error and succeed", async () => {
+    const expectedResult = {
+      object: { ok: true },
+      finishReason: "stop",
+    };
+    const transientError = new Error("Network error");
+
+    mockGenerateObject.mockRejectedValueOnce(transientError).mockResolvedValueOnce(expectedResult);
+
+    const onRetry = vi.fn();
+    const result = await generateObjectWithRetry(
+      {
+        prompt: "test",
+        model: "test-model",
+        schema: { jsonSchema: { type: "object" } } as any,
+      } as any,
+      {
+        maxAttempts: 3,
+        initialDelay: 10,
+        onRetry,
+      },
+    );
+
+    expect(result).toEqual(expectedResult);
+    expect(mockGenerateObject).toHaveBeenCalledTimes(2);
+    expect(onRetry).toHaveBeenCalledWith(1, transientError);
+  });
+
+  it("should not retry on non-retryable error (401)", async () => {
+    const authError = new Error("Unauthorized") as any;
+    authError.status = 401;
+
+    mockGenerateObject.mockRejectedValueOnce(authError);
+
+    await expect(
+      generateObjectWithRetry({
+        prompt: "test",
+        model: "test-model",
+        schema: { jsonSchema: { type: "object" } } as any,
+      } as any),
+    ).rejects.toThrow("Unauthorized");
+
+    expect(mockGenerateObject).toHaveBeenCalledTimes(1);
+  });
+
+  it("should retry on rate limit error (429)", async () => {
+    const expectedResult = {
+      object: { ok: true },
+      finishReason: "stop",
+    };
+    const rateLimitError = new Error("Rate limit exceeded") as any;
+    rateLimitError.status = 429;
+
+    mockGenerateObject.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce(expectedResult);
+
+    const result = await generateObjectWithRetry(
+      {
+        prompt: "test",
+        model: "test-model",
+        schema: { jsonSchema: { type: "object" } } as any,
+      } as any,
+      {
+        maxAttempts: 3,
+        initialDelay: 10,
+      },
+    );
+
+    expect(result).toEqual(expectedResult);
+    expect(mockGenerateObject).toHaveBeenCalledTimes(2);
+  });
+
+  it("should throw last error after max attempts", async () => {
+    const persistentError = new Error("Persistent error");
+
+    mockGenerateObject
+      .mockRejectedValueOnce(persistentError)
+      .mockRejectedValueOnce(persistentError)
+      .mockRejectedValueOnce(persistentError);
+
+    await expect(
+      generateObjectWithRetry(
+        {
+          prompt: "test",
+          model: "test-model",
+          schema: { jsonSchema: { type: "object" } } as any,
+        } as any,
+        {
+          maxAttempts: 3,
+          initialDelay: 10,
+        },
+      ),
+    ).rejects.toThrow("Persistent error");
+
+    expect(mockGenerateObject).toHaveBeenCalledTimes(3);
+  });
+
+  it("should not retry on NoObjectGeneratedError (schema validation failure)", async () => {
+    // The AI SDK throws NoObjectGeneratedError when the model returns JSON that
+    // fails schema validation or fails to parse. Retrying the same prompt+schema
+    // wastes tokens, so we surface it immediately as non-retryable.
+    const schemaError = new NoObjectGeneratedError({
+      message: "Model output failed schema validation",
+      text: '{"bad": "shape"}',
+      response: {} as any,
+      usage: {} as any,
+      finishReason: "stop",
+    });
+
+    mockGenerateObject.mockRejectedValueOnce(schemaError);
+
+    await expect(
+      generateObjectWithRetry(
+        {
+          prompt: "test",
+          model: "test-model",
+          schema: { jsonSchema: { type: "object" } } as any,
+        } as any,
+        {
+          maxAttempts: 3,
+          initialDelay: 10,
+        },
+      ),
+    ).rejects.toThrow("Model output failed schema validation");
+
+    expect(mockGenerateObject).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts
index 100bfabd..d3a82efe 100644
--- a/packages/core/test/webAgent.test.ts
+++ b/packages/core/test/webAgent.test.ts
@@ -202,6 +202,22 @@ class MockBrowser implements AriaBrowser {
     return fn(mockTab);
   }
 
+  async searchPage(): Promise<{
+    totalMatches: number;
+    truncated: boolean;
+    matches: any[];
+  }> {
+    return { totalMatches: 0, truncated: false, matches: [] };
+  }
+
+  async findElements(): Promise<{
+    totalMatches: number;
+    truncated: boolean;
+    elements: any[];
+  }> {
+    return { totalMatches: 0, truncated: false, elements: [] };
+  }
+
   // Test helpers
   setPageSnapshot(snapshot: string): void {
     this.pageSnapshot = snapshot;
diff --git a/packages/extension/src/background/ExtensionBrowser.ts b/packages/extension/src/background/ExtensionBrowser.ts
index 6619d4c6..fc29d6d8 100644
--- a/packages/extension/src/background/ExtensionBrowser.ts
+++ b/packages/extension/src/background/ExtensionBrowser.ts
@@ -1,5 +1,13 @@
 import browser from "webextension-polyfill";
-import type { AriaBrowser } from "pilo-core/core";
+import type {
+  AriaBrowser,
+  SearchPageOptions,
+  SearchPageMatch,
+  SearchPageResult,
+  FindElementsOptions,
+  FindElementsMatch,
+  FindElementsResult,
+} from "pilo-core/core";
 import { PageAction, LoadState } from "pilo-core/core";
 import type { Tabs } from "webextension-polyfill";
 import { createLogger } from "../shared/utils/logger";
@@ -639,6 +647,218 @@ export class ExtensionBrowser implements AriaBrowser {
     return tabs[0];
   }
 
+  async searchPage(opts: SearchPageOptions): Promise<SearchPageResult> {
+    const tab = await this.getActiveTab();
+    this.logger.info("searchPage() called", { tabId: tab.id, pattern: opts.pattern });
+
+    const evalOpts = {
+      pattern: opts.pattern,
+      regex: opts.regex ?? false,
+      caseSensitive: opts.caseSensitive ?? false,
+      contextChars: opts.contextChars ?? 80,
+      maxResults: opts.maxResults ?? 10,
+    };
+
+    let result: { totalMatches: number; matches: Array<Omit<SearchPageMatch, "frameUrl">> };
+    try {
+      const [scriptResult] = await browser.scripting.executeScript({
+        target: { tabId: tab.id! },
+        func: (params: {
+          pattern: string;
+          regex: boolean;
+          caseSensitive: boolean;
+          contextChars: number;
+          maxResults: number;
+        }) => {
+          const flags = params.caseSensitive ? "g" : "gi";
+          const re = params.regex
+            ? new RegExp(params.pattern, flags)
+            : new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), flags);
+
+          const matches: Array<{
+            match: string;
+            contextBefore: string;
+            contextAfter: string;
+            nearestRef?: string;
+          }> = [];
+          let totalMatches = 0;
+
+          if (!document.body) {
+            return { totalMatches, matches };
+          }
+
+          const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, {
+            acceptNode(node) {
+              const p = node.parentElement;
+              if (!p) return NodeFilter.FILTER_REJECT;
+              const tag = p.tagName;
+              if (tag === "SCRIPT" || tag === "STYLE" || tag === "NOSCRIPT") {
+                return NodeFilter.FILTER_REJECT;
+              }
+              return NodeFilter.FILTER_ACCEPT;
+            },
+          });
+
+          let node: Node | null;
+          while ((node = walker.nextNode())) {
+            const text = (node as Text).data;
+            re.lastIndex = 0;
+            let m: RegExpExecArray | null;
+            while ((m = re.exec(text)) !== null) {
+              totalMatches++;
+              if (matches.length < params.maxResults) {
+                const start = Math.max(0, m.index - params.contextChars);
+                const end = Math.min(text.length, m.index + m[0].length + params.contextChars);
+                const parentEl = (node as Text).parentElement;
+                const refEl = parentEl?.closest("[data-pilo-ref]") ?? null;
+                matches.push({
+                  match: m[0],
+                  contextBefore: text.slice(start, m.index),
+                  contextAfter: text.slice(m.index + m[0].length, end),
+                  nearestRef: refEl?.getAttribute("data-pilo-ref") ?? undefined,
+                });
+              }
+              if (m.index === re.lastIndex) re.lastIndex++;
+            }
+          }
+
+          return { totalMatches, matches };
+        },
+        args: [evalOpts],
+      });
+
+      result = scriptResult.result as typeof result;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.error("searchPage execution error", { tabId: tab.id }, error);
+      throw new Error(`search_page failed: ${message}`);
+    }
+
+    const aggregated: SearchPageMatch[] = result.matches.map((m) => ({
+      ...m,
+      frameUrl: undefined,
+    }));
+
+    return {
+      totalMatches: result.totalMatches,
+      truncated: result.totalMatches > aggregated.length,
+      matches: aggregated,
+    };
+  }
+
+  async findElements(opts: FindElementsOptions): Promise<FindElementsResult> {
+    const tab = await this.getActiveTab();
+    this.logger.info("findElements() called", { tabId: tab.id, selector: opts.selector });
+
+    const evalOpts = {
+      selector: opts.selector,
+      withinRef: opts.withinRef ?? null,
+      attributes: opts.attributes ?? null,
+      maxResults: opts.maxResults ?? 20,
+      includeText: opts.includeText ?? true,
+    };
+
+    let scriptOutcome:
+      | { totalMatches: number; matches: Array<Omit<FindElementsMatch, "frameUrl">> }
+      | { error: string; kind: "bad-selector" | "within-ref-miss" };
+    try {
+      const [scriptResult] = await browser.scripting.executeScript({
+        target: { tabId: tab.id! },
+        func: (params: {
+          selector: string;
+          withinRef: string | null;
+          attributes: string[] | null;
+          maxResults: number;
+          includeText: boolean;
+        }):
+          | {
+              totalMatches: number;
+              matches: Array<{
+                tag: string;
+                text?: string;
+                attributes?: Record<string, string>;
+                nearestRef?: string;
+              }>;
+            }
+          | { error: string; kind: "bad-selector" | "within-ref-miss" } => {
+          // Resolve scope root
+          let root: Document | Element = document;
+          if (params.withinRef !== null) {
+            const r = document.querySelector(`[data-pilo-ref="${CSS.escape(params.withinRef)}"]`);
+            if (!r)
+              return {
+                error: `withinRef "${params.withinRef}" not found in this frame`,
+                kind: "within-ref-miss",
+              };
+            root = r;
+          }
+
+          let nodeList: NodeListOf<Element>;
+          try {
+            nodeList = root.querySelectorAll(params.selector);
+          } catch (e) {
+            return { error: e instanceof Error ? e.message : String(e), kind: "bad-selector" };
+          }
+
+          const totalMatches = nodeList.length;
+          const matches: Array<{
+            tag: string;
+            text?: string;
+            attributes?: Record<string, string>;
+            nearestRef?: string;
+          }> = [];
+          for (let i = 0; i < nodeList.length && matches.length < params.maxResults; i++) {
+            const el = nodeList[i];
+            let attrs: Record<string, string> | undefined;
+            if (params.attributes && params.attributes.length > 0) {
+              attrs = {};
+              for (const name of params.attributes) {
+                const v = el.getAttribute(name);
+                if (v !== null) attrs[name] = v;
+              }
+            }
+            const href = (el as HTMLAnchorElement | HTMLAreaElement).href;
+            const src = (el as HTMLImageElement | HTMLScriptElement | HTMLIFrameElement).src;
+            if (typeof href === "string" && href) (attrs ??= {})["href"] = href;
+            if (typeof src === "string" && src) (attrs ??= {})["src"] = src;
+
+            matches.push({
+              tag: el.tagName.toLowerCase(),
+              text: params.includeText ? (el.textContent ?? "").trim().slice(0, 500) : undefined,
+              attributes: attrs && Object.keys(attrs).length > 0 ? attrs : undefined,
+              nearestRef:
+                (el.closest("[data-pilo-ref]") as Element | null)?.getAttribute("data-pilo-ref") ??
+                undefined,
+            });
+          }
+          return { totalMatches, matches };
+        },
+        args: [evalOpts],
+      });
+
+      scriptOutcome = scriptResult.result as typeof scriptOutcome;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.error("findElements execution error", { tabId: tab.id }, error);
+      throw new Error(`find_elements failed: ${message}`);
+    }
+
+    if ("error" in scriptOutcome) {
+      throw new Error(`find_elements failed: ${scriptOutcome.error}`);
+    }
+
+    const aggregated: FindElementsMatch[] = scriptOutcome.matches.map((m) => ({
+      ...m,
+      frameUrl: undefined,
+    }));
+
+    return {
+      totalMatches: scriptOutcome.totalMatches,
+      truncated: scriptOutcome.totalMatches > aggregated.length,
+      elements: aggregated,
+    };
+  }
+
   async runInTemporaryTab<T>(
     _fn: (tab: {
       goto: (url: string) => Promise<void>;
diff --git a/packages/extension/test/ExtensionBrowser.test.ts b/packages/extension/test/ExtensionBrowser.test.ts
index 9b48d7d1..37753bf0 100644
--- a/packages/extension/test/ExtensionBrowser.test.ts
+++ b/packages/extension/test/ExtensionBrowser.test.ts
@@ -48,6 +48,266 @@ describe("ExtensionBrowser", () => {
     ]);
   });
 
+  describe("searchPage", () => {
+    it("returns matches from a single executeScript call (top frame only, frameUrl undefined)", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: {
+            totalMatches: 1,
+            matches: [
+              {
+                match: "logout",
+                contextBefore: "click ",
+                contextAfter: " here",
+                nearestRef: "E5",
+              },
+            ],
+          },
+        } as any,
+      ]);
+
+      const result = await extensionBrowser.searchPage({ pattern: "logout" });
+
+      expect(browser.scripting.executeScript).toHaveBeenCalledTimes(1);
+      const call = vi.mocked(browser.scripting.executeScript).mock.calls[0][0] as any;
+      expect(call.target).toEqual({ tabId: mockTabId });
+      expect(call.args).toEqual([
+        {
+          pattern: "logout",
+          regex: false,
+          caseSensitive: false,
+          contextChars: 80,
+          maxResults: 10,
+        },
+      ]);
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.truncated).toBe(false);
+      expect(result.matches).toHaveLength(1);
+      expect(result.matches[0]).toEqual({
+        match: "logout",
+        contextBefore: "click ",
+        contextAfter: " here",
+        nearestRef: "E5",
+        frameUrl: undefined,
+      });
+    });
+
+    it("forwards regex and caseSensitive flags", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        { result: { totalMatches: 0, matches: [] } } as any,
+      ]);
+
+      await extensionBrowser.searchPage({
+        pattern: "Lo[gG]out",
+        regex: true,
+        caseSensitive: true,
+        contextChars: 20,
+        maxResults: 3,
+      });
+
+      const call = vi.mocked(browser.scripting.executeScript).mock.calls[0][0] as any;
+      expect(call.args).toEqual([
+        {
+          pattern: "Lo[gG]out",
+          regex: true,
+          caseSensitive: true,
+          contextChars: 20,
+          maxResults: 3,
+        },
+      ]);
+    });
+
+    it("marks the result as truncated when totalMatches exceeds returned matches", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: {
+            totalMatches: 25,
+            matches: Array.from({ length: 10 }, (_, i) => ({
+              match: `m${i}`,
+              contextBefore: "",
+              contextAfter: "",
+              nearestRef: undefined,
+            })),
+          },
+        } as any,
+      ]);
+
+      const result = await extensionBrowser.searchPage({ pattern: "x", maxResults: 10 });
+
+      expect(result.totalMatches).toBe(25);
+      expect(result.matches).toHaveLength(10);
+      expect(result.truncated).toBe(true);
+    });
+
+    it("wraps executeScript rejection as a search_page error", async () => {
+      vi.mocked(browser.scripting.executeScript).mockRejectedValue(
+        new Error("SyntaxError: Invalid regular expression"),
+      );
+
+      await expect(extensionBrowser.searchPage({ pattern: "(", regex: true })).rejects.toThrow(
+        /search_page failed/,
+      );
+    });
+  });
+
+  describe("findElements", () => {
+    it("returns elements from a single executeScript call (top frame only, frameUrl undefined)", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: {
+            totalMatches: 1,
+            matches: [
+              {
+                tag: "a",
+                text: "Home",
+                attributes: { href: "https://example.com/home" },
+                nearestRef: "E5",
+              },
+            ],
+          },
+        } as any,
+      ]);
+
+      const result = await extensionBrowser.findElements({ selector: "a.nav-link" });
+
+      expect(browser.scripting.executeScript).toHaveBeenCalledTimes(1);
+      const call = vi.mocked(browser.scripting.executeScript).mock.calls[0][0] as any;
+      expect(call.target).toEqual({ tabId: mockTabId });
+      expect(call.args).toEqual([
+        {
+          selector: "a.nav-link",
+          withinRef: null,
+          attributes: null,
+          maxResults: 20,
+          includeText: true,
+        },
+      ]);
+
+      expect(result.totalMatches).toBe(1);
+      expect(result.truncated).toBe(false);
+      expect(result.elements).toHaveLength(1);
+      expect(result.elements[0]).toEqual({
+        tag: "a",
+        text: "Home",
+        attributes: { href: "https://example.com/home" },
+        nearestRef: "E5",
+        frameUrl: undefined,
+      });
+    });
+
+    it("forwards withinRef, attributes, maxResults, and includeText", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        { result: { totalMatches: 0, matches: [] } } as any,
+      ]);
+
+      await extensionBrowser.findElements({
+        selector: "[data-id]",
+        withinRef: "E42",
+        attributes: ["data-id", "class"],
+        maxResults: 5,
+        includeText: false,
+      });
+
+      const call = vi.mocked(browser.scripting.executeScript).mock.calls[0][0] as any;
+      expect(call.args).toEqual([
+        {
+          selector: "[data-id]",
+          withinRef: "E42",
+          attributes: ["data-id", "class"],
+          maxResults: 5,
+          includeText: false,
+        },
+      ]);
+    });
+
+    it("returns auto-resolved href and src attributes from the in-page result", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: {
+            totalMatches: 2,
+            matches: [
+              {
+                tag: "a",
+                text: "Home",
+                attributes: { href: "https://example.com/home" },
+                nearestRef: undefined,
+              },
+              {
+                tag: "img",
+                text: "",
+                attributes: { src: "https://example.com/cat.png" },
+                nearestRef: undefined,
+              },
+            ],
+          },
+        } as any,
+      ]);
+
+      const result = await extensionBrowser.findElements({ selector: "a, img" });
+
+      expect(result.elements).toHaveLength(2);
+      expect(result.elements[0].attributes).toEqual({ href: "https://example.com/home" });
+      expect(result.elements[1].attributes).toEqual({ src: "https://example.com/cat.png" });
+    });
+
+    it("marks the result as truncated when totalMatches exceeds returned elements", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: {
+            totalMatches: 50,
+            matches: Array.from({ length: 20 }, (_, i) => ({
+              tag: "li",
+              text: `Item ${i}`,
+              attributes: undefined,
+              nearestRef: undefined,
+            })),
+          },
+        } as any,
+      ]);
+
+      const result = await extensionBrowser.findElements({ selector: "li", maxResults: 20 });
+
+      expect(result.totalMatches).toBe(50);
+      expect(result.elements).toHaveLength(20);
+      expect(result.truncated).toBe(true);
+    });
+
+    it("throws when the in-page function returns an error (bad selector)", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: {
+            error: "Failed to execute 'querySelectorAll': '???' is not a valid selector.",
+          },
+        } as any,
+      ]);
+
+      await expect(extensionBrowser.findElements({ selector: "???" })).rejects.toThrow(
+        /find_elements failed.*not a valid selector/,
+      );
+    });
+
+    it("throws when the in-page function returns a withinRef-not-found error (top frame only)", async () => {
+      vi.mocked(browser.scripting.executeScript).mockResolvedValue([
+        {
+          result: { error: 'withinRef "Z9" not found in this frame' },
+        } as any,
+      ]);
+
+      await expect(
+        extensionBrowser.findElements({ selector: "a", withinRef: "Z9" }),
+      ).rejects.toThrow(/find_elements failed.*withinRef "Z9" not found/);
+    });
+
+    it("wraps executeScript rejection as a find_elements error", async () => {
+      vi.mocked(browser.scripting.executeScript).mockRejectedValue(new Error("kaboom"));
+
+      await expect(extensionBrowser.findElements({ selector: "a" })).rejects.toThrow(
+        /find_elements failed/,
+      );
+    });
+  });
+
   describe("Click Action - New Tab Prevention", () => {
     it("should successfully perform click action", async () => {
       vi.mocked(browser.scripting.executeScript).mockImplementation(async () => {

From 295c2399509aed89827a7c31c3401b43647cdd47 Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Thu, 14 May 2026 09:21:12 -0700
Subject: [PATCH 2/7] tune(prompts): snapshot-first guidance for page
 exploration tools
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reframes the inspection-tool guidance around "trust the snapshot first;
escalate only when needed" rather than aggressively pushing the new tools
in all cases. Iter1 of the local prompt-tuning loop over-steered the
agent into calling search_page/find_elements to "confirm" what was
already visible in the aria-tree snapshot, costing +24% input tokens vs
baseline.

Changes:

- Best-practices block: lead with "default: trust the snapshot" and
  introduce inspection tools as escalations for cases the snapshot
  doesn't cover (truncated content, values buried in long page text,
  attributes at scale).
- extract.description: clarify that extract is for cases the snapshot
  doesn't already answer; explicit "do not pass empty {}" warning.
- search_page.description: scoped to "when the snapshot doesn't show the
  answer"; added concrete alternate-spelling guidance.
- find_elements.description: scoped to truncated snapshots, large
  attribute extraction, and subtree enumeration via withinRef.
- outputSchema description: explicit "REQUIRED with a real schema; {}
  is NOT valid".

Relaxed three description-string test assertions from .toBe / .toContain
to .toMatch so iteration on description copy doesn't break tests.

Local 5-task micro-eval (gemini-2.5-flash, vertex, chrome, headless):
total input tokens 390,971 (baseline) → 226,301 (iter2), -42%. Biggest
single win: search_page_lookup task (218K → 73K, -66%) — agent now
answers "CSS1 published 1996" from the snapshot directly. Sticky
remainder: model still passes outputSchema:{} instead of a real schema.

Tool wiring, types, and behavior are unchanged.
---
 packages/core/src/prompts.ts                  | 31 ++++++++++++-------
 .../core/test/tools/inspectionTools.test.ts   |  8 ++---
 .../core/test/tools/webActionTools.test.ts    |  5 ++-
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
index cf032561..7cb1023f 100644
--- a/packages/core/src/prompts.ts
+++ b/packages/core/src/prompts.ts
@@ -58,11 +58,11 @@ export const TOOL_STRINGS = {
     },
     extract: {
       description:
-        "Extract data from the current page. Pass `outputSchema` (a JSON Schema object) to get structured data; omit it for markdown text.",
+        "Extract data from the current page via an LLM round-trip. Use ONLY when the aria-tree snapshot doesn't already contain the answer — most simple reads (titles, counts, prices visible on the page) can be answered directly via done() without calling extract. When the user asks for STRUCTURED data (a list of items, a JSON object, tabular output, fields like {name, price, url}), you MUST pass a real `outputSchema` so the SDK validates and returns the object directly — do NOT serialize JSON into markdown by hand, and do NOT pass an empty {}. Omit `outputSchema` only for free-form narrative summaries.",
       dataDescription:
         "Describe what information to extract. Focus on content, not element references.",
       outputSchema:
-        "Optional JSON Schema describing the desired structured output. When provided, returns `data` (an object matching the schema) instead of `extractedData` (markdown).",
+        'JSON Schema (object) describing the desired output shape. REQUIRED whenever the task asks for structured data — lists, JSON, tables, or any answer with explicit fields. Must be a REAL schema with `type` and `properties`/`items` defined for every field you want. Example for a list of items: {"type":"array","items":{"type":"object","properties":{"name":{"type":"string"},"price":{"type":"number"}},"required":["name","price"]}}. An empty {} is NOT valid — it provides no validation and defeats the purpose. Omit this argument entirely for free-form prose summaries.',
     },
     done: {
       description: "Complete the task with your final answer",
@@ -90,20 +90,22 @@ export const TOOL_STRINGS = {
     },
     searchPage: {
       description:
-        "Search visible text on the current page. Free and fast — prefer this over extract when you know what text to look for.",
-      pattern: "Text or regex pattern to search for",
-      regex: "Treat `pattern` as a regular expression",
+        "Zero-LLM, zero-token text search of the current page. Use ONLY when the answer ISN'T already visible in the aria-tree snapshot but should be in the page text — e.g., a specific value buried in a paragraph (a year, a price, a quote, a code snippet), or checking whether some phrase appears on a long page. Returns matches with surrounding context, so you can read the answer directly from the result. If the snapshot already shows the answer, just call done() — don't search redundantly. If a query returns zero matches, try alternate spellings (e.g., 'Beautiful Soup' vs 'BeautifulSoup') or regex with word boundaries before giving up.",
+      pattern: "Text or regex pattern to search for. Try simple substrings first.",
+      regex:
+        "Treat `pattern` as a regular expression. Useful for word boundaries (\\bword\\b) or alternation.",
       caseSensitive: "Match case sensitively",
       contextChars: "Characters of context before/after each match (0-500)",
       maxResults: "Maximum number of matches to return (1-50)",
     },
     findElements: {
       description:
-        'Query elements by CSS selector. Free and fast — useful for inventory questions ("how many cards?") before deciding to extract.',
+        "Zero-LLM, zero-token CSS-selector query of the current page. Use ONLY when the aria-tree snapshot doesn't have what you need. Best fits: (1) collecting hrefs/srcs at scale (href/src auto-resolved to absolute URLs), (2) listing items inside a specific section via `withinRef` to scope to an aria-tree subtree, (3) when the snapshot is truncated and you need to enumerate beyond what's shown. For simple 'how many X are there?' questions where the snapshot shows the items, just count them in the snapshot and call done() — don't call find_elements just to confirm what you can already see.",
       selector: "CSS selector",
-      withinRef: "Optional aria-tree ref to scope the query to that element's subtree",
+      withinRef:
+        "Optional aria-tree ref (e.g. E42) to scope the query to that element's subtree. Use to list items inside a specific section.",
       attributes:
-        "Element attributes to include (e.g., ['href', 'data-id']). href/src are auto-included as absolute URLs.",
+        "Element attributes to include (e.g., ['href', 'data-id']). `href` and `src` are auto-included as absolute URLs even if not requested.",
       maxResults: "Maximum number of elements to return (1-100)",
       includeText: "Include each element's text content (truncated to 500 chars)",
     },
@@ -201,8 +203,8 @@ function buildToolExamples(
     `- back() - ${TOOL_STRINGS.webActions.back.description}`,
     `- forward() - ${TOOL_STRINGS.webActions.forward.description}`,
     `- extract({"description": "data to extract", "outputSchema": {"type": "object", "properties": {"title": {"type": "string"}}}}) - ${TOOL_STRINGS.webActions.extract.description}`,
-    `- search_page({"pattern": "logout"}) - ${TOOL_STRINGS.webActions.searchPage.description}`,
-    `- find_elements({"selector": "a.nav-link"}) - ${TOOL_STRINGS.webActions.findElements.description}`,
+    `- search_page({"pattern": "Founded in"}) - ${TOOL_STRINGS.webActions.searchPage.description}`,
+    `- find_elements({"selector": "a", "attributes": ["href"], "withinRef": "E42"}) - ${TOOL_STRINGS.webActions.findElements.description}`,
   ];
 
   if (hasWebSearch) {
@@ -372,8 +374,13 @@ Analyze the current page state and determine your next action based on previous
 - Adapt your approach based on what's actually available
 - If you don't find relevant links or buttons, and the site has a search form, prioritize using it for navigation
 - If you have found the core information requested but cannot access supplementary details due to site limitations, use done() with what you have — only use abort() when the core task cannot be completed at all
-- For research: Use extract() immediately when finding relevant data
-- For inventory questions ("how many X?", "is Y on the page?", "what's the href of link Z?"), prefer search_page or find_elements — they are zero-LLM and instant. Reserve extract() for synthesized or structured data from the CURRENT page; pass outputSchema to extract() when you need JSON-shaped output instead of markdown{% if hasTabstack %}. Use tabstack_extract_json only for off-page URL fetches, not the current page{% endif %}
+- **Reading from the page — check the snapshot first, escalate only when needed:**
+  - **Default: trust the snapshot.** The aria-tree snapshot you receive each turn shows the page's text, links, headings, prices, counts, and visible content. If your answer is already visible there (count of items, a title, a short value), call done() directly — DO NOT call any inspection tool to "confirm" what you can already read
+  - If the snapshot is truncated, OR shows the section but not the exact value buried inside it, OR you need to find a specific phrase in long page text: use search_page({pattern}) (zero-LLM, zero-token). If zero matches, try alternate spellings or regex word boundaries
+  - If you need href/src/data-* attributes at scale, or to enumerate items inside a specific section via withinRef: use find_elements({selector, withinRef?, attributes?}) (zero-LLM, zero-token)
+  - If the task asks for STRUCTURED data (a list of items, JSON object, tabular output, fields like {name, price, url}) and the snapshot doesn't already give it to you: use extract({description, outputSchema:{...real JSON Schema...}}). The outputSchema MUST be a real schema with type and properties — never {} . Without a real schema, prefer reading from the snapshot
+  - If the task asks for a free-form narrative summary that requires synthesis beyond what the snapshot shows: use extract({description}) without outputSchema{% if hasTabstack %}
+  - For OFF-page URL fetches (not the current page), tabstack_extract_json and tabstack_extract_markdown are appropriate. Never use them for the current page{% endif %}
 - For academic papers or documents that require reading, counting, or extracting content (e.g., counting figures/tables, reading body text): PDFs are often unscrollable and unreadable{% if hasTabstack %} — use tabstack_extract_markdown to read PDF content directly{% endif %}{% if not hasTabstack %} — use webSearch to find an HTML version (e.g., ACL Anthology, Semantic Scholar) or the abstract page before attempting the PDF{% endif %}
 {% if hasWebSearch %}- If you need to search the web, use webSearch({query}) directly rather than filling in a browser search engine (DuckDuckGo, Google, Bing, etc.) — webSearch avoids CAPTCHA and bot detection that will block browser-based searches{% endif %}
 {% if hasTabstack %}- **Tabstack cloud tools are available — prefer them over manual browsing when they fit:**
diff --git a/packages/core/test/tools/inspectionTools.test.ts b/packages/core/test/tools/inspectionTools.test.ts
index b9cf52cd..72a64d53 100644
--- a/packages/core/test/tools/inspectionTools.test.ts
+++ b/packages/core/test/tools/inspectionTools.test.ts
@@ -56,8 +56,8 @@ describe("Inspection Tools", () => {
       expect(tools.search_page).toBeDefined();
     });
 
-    it("should have a description that mentions searching visible text", () => {
-      expect(tools.search_page.description).toContain("Search visible text");
+    it("should have a description that mentions text search of the page", () => {
+      expect(tools.search_page.description).toMatch(/text search|search.*text|find.*phrase/i);
     });
 
     it("should validate input schema correctly", () => {
@@ -217,8 +217,8 @@ describe("Inspection Tools", () => {
         expect(tools.find_elements).toBeDefined();
       });
 
-      it("should have a description that mentions CSS selector", () => {
-        expect(tools.find_elements.description).toContain("CSS selector");
+      it("should have a description that mentions CSS selectors", () => {
+        expect(tools.find_elements.description).toMatch(/CSS.?selector/i);
       });
 
       it("should validate input schema correctly", () => {
diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts
index 46d6cb90..f48023a3 100644
--- a/packages/core/test/tools/webActionTools.test.ts
+++ b/packages/core/test/tools/webActionTools.test.ts
@@ -173,9 +173,8 @@ describe("Web Action Tools", () => {
       );
       expect(tools.back.description).toBe("Go back to the previous page");
       expect(tools.forward.description).toBe("Go forward to the next page");
-      expect(tools.extract.description).toBe(
-        "Extract data from the current page. Pass `outputSchema` (a JSON Schema object) to get structured data; omit it for markdown text.",
-      );
+      expect(tools.extract.description).toMatch(/Extract data from the current page/);
+      expect(tools.extract.description).toMatch(/outputSchema/);
       expect(tools.done.description).toBe("Complete the task with your final answer");
       expect(tools.abort.description).toContain("Abort the task when it cannot be completed");
     });

From 259f3579f842dad437e0322ab8e59131e1f6e70c Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Thu, 14 May 2026 09:26:05 -0700
Subject: [PATCH 3/7] tune(prompts): zero-match recovery + copy-and-adapt
 outputSchema examples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Iter3 of the local prompt-tuning loop. Two targeted nudges on top of the
iter2 snapshot-first framing:

- searchPage.description: require at least one zero-match recovery
  attempt (variant spelling, regex word-boundary, etc.) before answering
  "no". A single zero-match search is explicitly NOT a final answer.

- extract.outputSchema description: three copy-and-adapt one-line schema
  examples (single object, list of items, boolean+reason) plus an
  explicit "STOP and write out the shape before calling extract."

Local 5-task micro-eval (gemini-2.5-flash, vertex, chrome, headless):
total input tokens 226,301 (iter2) → 251,515 (iter3), +11%. The
regression is entirely from search_page_presence — agent now correctly
tries both spellings before concluding (the answer is correctly "No";
the page truly doesn't mention Beautiful Soup). vs baseline: -36%.

outputSchema effectiveness remains unexercised: the agent skipped
extract on the structured-data task because the HN snapshot already
contained the answer. A task where the snapshot is genuinely
insufficient is needed to evaluate the new outputSchema guidance.
---
 packages/core/src/prompts.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
index 7cb1023f..9400ad83 100644
--- a/packages/core/src/prompts.ts
+++ b/packages/core/src/prompts.ts
@@ -62,7 +62,7 @@ export const TOOL_STRINGS = {
       dataDescription:
         "Describe what information to extract. Focus on content, not element references.",
       outputSchema:
-        'JSON Schema (object) describing the desired output shape. REQUIRED whenever the task asks for structured data — lists, JSON, tables, or any answer with explicit fields. Must be a REAL schema with `type` and `properties`/`items` defined for every field you want. Example for a list of items: {"type":"array","items":{"type":"object","properties":{"name":{"type":"string"},"price":{"type":"number"}},"required":["name","price"]}}. An empty {} is NOT valid — it provides no validation and defeats the purpose. Omit this argument entirely for free-form prose summaries.',
+        'JSON Schema for the response shape. If you pass `{}` you get NOTHING — the schema must enumerate every field you want, with types. STOP and write out the shape before calling extract.\n\nSimple examples (copy and adapt):\n- Single object: {"type":"object","properties":{"price":{"type":"number"}},"required":["price"]}\n- List of items: {"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"points":{"type":"number"}},"required":["title","points"]}}\n- Boolean + reason: {"type":"object","properties":{"answer":{"type":"boolean"},"quote":{"type":"string"}},"required":["answer"]}\n\nIf you cannot describe the shape, omit this argument entirely and the response will be markdown.',
     },
     done: {
       description: "Complete the task with your final answer",
@@ -90,7 +90,7 @@ export const TOOL_STRINGS = {
     },
     searchPage: {
       description:
-        "Zero-LLM, zero-token text search of the current page. Use ONLY when the answer ISN'T already visible in the aria-tree snapshot but should be in the page text — e.g., a specific value buried in a paragraph (a year, a price, a quote, a code snippet), or checking whether some phrase appears on a long page. Returns matches with surrounding context, so you can read the answer directly from the result. If the snapshot already shows the answer, just call done() — don't search redundantly. If a query returns zero matches, try alternate spellings (e.g., 'Beautiful Soup' vs 'BeautifulSoup') or regex with word boundaries before giving up.",
+        "Zero-LLM, zero-token text search of the current page. Use ONLY when the answer ISN'T already visible in the aria-tree snapshot but should be in the page text — e.g., a specific value buried in a paragraph (a year, a price, a quote, a code snippet), or checking whether some phrase appears on a long page. Returns matches with surrounding context, so you can read the answer directly from the result. If the snapshot already shows the answer, just call done() — don't search redundantly.\n\nZero-match recovery is REQUIRED: if a search returns 0 matches but the user's question implies the term should be on the page, you MUST try at least one variant before concluding 'no'. Common variants: insert/remove spaces ('BeautifulSoup' ↔ 'Beautiful Soup'), regex alternation ({pattern: 'Beautiful ?Soup', regex: true}), case toggles, hyphenation. A single zero-match search is NOT a final answer.",
       pattern: "Text or regex pattern to search for. Try simple substrings first.",
       regex:
         "Treat `pattern` as a regular expression. Useful for word boundaries (\\bword\\b) or alternation.",

From 7c6748ef45678830e4d475e6e61f8051c86fc496 Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Fri, 15 May 2026 12:13:49 -0700
Subject: [PATCH 4/7] chore: trigger eval re-run on Browserless CDP

Empty commit to fire `evals/**` workflow after switching the eval
pipeline's PILO_PW_CDP_ENDPOINT from bundled-browser (default fallback)
to Browserless. Several iter3 failures were navCount=1 / "Execution
context destroyed" patterns consistent with bundled-browser flakiness
in the Argo pod environment, not prompt regressions. This run isolates
the prompt changes from the browser stack.

From be609b644524e71840c89ad8c32472b2aab24099 Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Fri, 15 May 2026 13:39:44 -0700
Subject: [PATCH 5/7] feat(core): reject empty extract outputSchema at runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a runtime guard to the extract tool: when outputSchema is provided
but evaluates to {} (no keys), short-circuit before any LLM call and
return a recoverable error instructing the agent to either fill in a
real schema or omit the argument.

Why: three rounds of prompt iteration could not stop gemini-2.5-flash
from passing outputSchema:{} when asked for structured output. Across
two CI eval runs (iter3 bundled + iter3 Browserless, 60 tasks total),
zero extract calls included a real JSON Schema — 3-4 calls per run
passed {} which gives no validation and is functionally identical to
omitting the argument. Prompt-only enforcement has reached a model-
capability ceiling. The guard surfaces the issue as a tool error so
the agent can self-correct mid-task.

Behavior:
- outputSchema undefined → markdown branch (unchanged)
- outputSchema with real keys → generateObject branch (unchanged)
- outputSchema = {} → recoverable error with instructions; no
  getMarkdown(), no LLM call, no token spend

Also updates the outputSchema description so the agent knows the
rejection is enforced at runtime rather than a soft prompt-level
preference.

Tests: +1 covering the empty-schema rejection (no LLM/browser calls,
returns success:false / isRecoverable:true with a guiding error).
Existing extract tests unchanged (720 / 720 passing).
---
 packages/core/src/prompts.ts                  |  2 +-
 packages/core/src/tools/webActionTools.ts     | 17 +++++++++++++
 .../core/test/tools/webActionTools.test.ts    | 24 +++++++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
index 9400ad83..f1aea12a 100644
--- a/packages/core/src/prompts.ts
+++ b/packages/core/src/prompts.ts
@@ -62,7 +62,7 @@ export const TOOL_STRINGS = {
       dataDescription:
         "Describe what information to extract. Focus on content, not element references.",
       outputSchema:
-        'JSON Schema for the response shape. If you pass `{}` you get NOTHING — the schema must enumerate every field you want, with types. STOP and write out the shape before calling extract.\n\nSimple examples (copy and adapt):\n- Single object: {"type":"object","properties":{"price":{"type":"number"}},"required":["price"]}\n- List of items: {"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"points":{"type":"number"}},"required":["title","points"]}}\n- Boolean + reason: {"type":"object","properties":{"answer":{"type":"boolean"},"quote":{"type":"string"}},"required":["answer"]}\n\nIf you cannot describe the shape, omit this argument entirely and the response will be markdown.',
+        'JSON Schema for the response shape. The schema MUST enumerate every field you want, with types — an empty {} will be REJECTED with a recoverable error (the tool checks at runtime). STOP and write out the shape before calling extract.\n\nSimple examples (copy and adapt):\n- Single object: {"type":"object","properties":{"price":{"type":"number"}},"required":["price"]}\n- List of items: {"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"points":{"type":"number"}},"required":["title","points"]}}\n- Boolean + reason: {"type":"object","properties":{"answer":{"type":"boolean"},"quote":{"type":"string"}},"required":["answer"]}\n\nIf you cannot describe the shape, OMIT this argument entirely and you will get markdown text instead.',
     },
     done: {
       description: "Complete the task with your final answer",
diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts
index eaad1130..00af99ce 100644
--- a/packages/core/src/tools/webActionTools.ts
+++ b/packages/core/src/tools/webActionTools.ts
@@ -323,6 +323,23 @@ export function createWebActionTools(context: WebActionContext) {
           value: description,
         });
 
+        // Runtime guard (before any work): an empty outputSchema {} doesn't
+        // constrain the LLM output and makes the structured branch
+        // indistinguishable from the markdown branch. Models tend to pass {}
+        // when prompted for outputSchema without supplying real properties;
+        // reject with a recoverable error so the agent fixes it or omits it.
+        if (outputSchema && Object.keys(outputSchema).length === 0) {
+          const errorMessage =
+            "outputSchema cannot be {} — that's an empty schema and does nothing. Either fill it in with real type/properties (e.g. {type:'object',properties:{title:{type:'string'}},required:['title']}) or OMIT the outputSchema argument entirely to get markdown text instead.";
+          return {
+            success: false,
+            action: "extract",
+            description,
+            error: errorMessage,
+            isRecoverable: true,
+          };
+        }
+
         // Get the page markdown content
         const markdown = await context.browser.getMarkdown();
 
diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts
index f48023a3..0e93e6ae 100644
--- a/packages/core/test/tools/webActionTools.test.ts
+++ b/packages/core/test/tools/webActionTools.test.ts
@@ -651,6 +651,30 @@ describe("Web Action Tools", () => {
       expect((result as any).extractedData).toBeUndefined();
     });
 
+    it("should reject empty outputSchema {} as a recoverable error", async () => {
+      const getMarkdownSpy = vi.spyOn(mockBrowser, "getMarkdown");
+
+      const result = await tools.extract.execute({
+        description: "product details",
+        outputSchema: {},
+      });
+
+      // Should NOT have called generateObjectWithRetry or generateTextWithRetry
+      expect(mockGenerateObjectWithRetry).not.toHaveBeenCalled();
+      expect(mockGenerateTextWithRetry).not.toHaveBeenCalled();
+      // It also short-circuits before fetching the page markdown — the schema is
+      // already invalid before any work happens.
+      expect(getMarkdownSpy).not.toHaveBeenCalled();
+
+      expect(result).toMatchObject({
+        success: false,
+        action: "extract",
+        description: "product details",
+        isRecoverable: true,
+      });
+      expect((result as any).error).toMatch(/outputSchema cannot be \{\}/);
+    });
+
     it("should still use generateText (markdown branch) when outputSchema is omitted", async () => {
       mockGenerateTextWithRetry.mockResolvedValueOnce({
         text: "markdown extracted",

From b6a40abdbc65a41d903199fd5eb70e70c29f4f52 Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Fri, 15 May 2026 14:09:55 -0700
Subject: [PATCH 6/7] =?UTF-8?q?chore:=20retrigger=20eval=20=E2=80=94=20pil?=
 =?UTF-8?q?o-secrets=20clobbered=20between=20runs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous run (pilo-batch-github-eval-khcjt) had 0/30 passes
because the GKE pilo-secrets bundle was reset to stubs/empty values
in the ~23-minute gap between two consecutive evals — likely another
local make cloud-secrets invocation from a different .env state.

This commit retriggers the eval against ad84c4b + 1bc9d4a (runtime
guard for empty extract outputSchema) with the correct secret.

From 5ad2de4f0bc4b631a79e1aa96fc4c8f1c616e10c Mon Sep 17 00:00:00 2001
From: Les Orchard <me@lmorchard.com>
Date: Fri, 15 May 2026 18:37:23 -0700
Subject: [PATCH 7/7] fix(core): soften extract outputSchema={} guard to silent
 downgrade
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hard rejection from the previous commit (be609b6) caused two task
failures on the 100-task CI eval (Google Map #4 and ESPN #0):
gemini-2.5-flash passes outputSchema:{}, sees the recoverable error,
retries with outputSchema:{} again, and after 5 consecutive errors
the agent layer aborts the whole task.

Soften the guard: when outputSchema is non-null but has no keys,
silently treat it as if it were omitted (fall through to the markdown
branch). An empty {} schema gave no validation anyway — the structured
branch with an empty schema is indistinguishable from the markdown
branch. The fall-through is logged via an AGENT_STATUS event so the
downgrade is visible in traces.

Updated the outputSchema prompt copy: "an empty {} provides no
validation and is silently downgraded to a markdown extract" instead
of "will be REJECTED with a recoverable error".

Test: updated to assert the markdown branch IS called and the status
event IS emitted when outputSchema:{} is passed. Previously asserted
the recoverable-error shape; that behavior is gone.
---
 packages/core/src/prompts.ts                  |  2 +-
 packages/core/src/tools/webActionTools.ts     | 39 ++++++++++---------
 .../core/test/tools/webActionTools.test.ts    | 33 +++++++++++-----
 3 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/packages/core/src/prompts.ts b/packages/core/src/prompts.ts
index f1aea12a..d6cee665 100644
--- a/packages/core/src/prompts.ts
+++ b/packages/core/src/prompts.ts
@@ -62,7 +62,7 @@ export const TOOL_STRINGS = {
       dataDescription:
         "Describe what information to extract. Focus on content, not element references.",
       outputSchema:
-        'JSON Schema for the response shape. The schema MUST enumerate every field you want, with types — an empty {} will be REJECTED with a recoverable error (the tool checks at runtime). STOP and write out the shape before calling extract.\n\nSimple examples (copy and adapt):\n- Single object: {"type":"object","properties":{"price":{"type":"number"}},"required":["price"]}\n- List of items: {"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"points":{"type":"number"}},"required":["title","points"]}}\n- Boolean + reason: {"type":"object","properties":{"answer":{"type":"boolean"},"quote":{"type":"string"}},"required":["answer"]}\n\nIf you cannot describe the shape, OMIT this argument entirely and you will get markdown text instead.',
+        'JSON Schema for the response shape. The schema MUST enumerate every field you want, with types — an empty {} provides no validation and is silently downgraded to a markdown extract, so you gain nothing by passing it. STOP and write out the shape before calling extract.\n\nSimple examples (copy and adapt):\n- Single object: {"type":"object","properties":{"price":{"type":"number"}},"required":["price"]}\n- List of items: {"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"points":{"type":"number"}},"required":["title","points"]}}\n- Boolean + reason: {"type":"object","properties":{"answer":{"type":"boolean"},"quote":{"type":"string"}},"required":["answer"]}\n\nIf you cannot describe the shape, OMIT this argument entirely and you will get markdown text instead.',
     },
     done: {
       description: "Complete the task with your final answer",
diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts
index 00af99ce..9e3cfc46 100644
--- a/packages/core/src/tools/webActionTools.ts
+++ b/packages/core/src/tools/webActionTools.ts
@@ -323,21 +323,20 @@ export function createWebActionTools(context: WebActionContext) {
           value: description,
         });
 
-        // Runtime guard (before any work): an empty outputSchema {} doesn't
-        // constrain the LLM output and makes the structured branch
-        // indistinguishable from the markdown branch. Models tend to pass {}
-        // when prompted for outputSchema without supplying real properties;
-        // reject with a recoverable error so the agent fixes it or omits it.
-        if (outputSchema && Object.keys(outputSchema).length === 0) {
-          const errorMessage =
-            "outputSchema cannot be {} — that's an empty schema and does nothing. Either fill it in with real type/properties (e.g. {type:'object',properties:{title:{type:'string'}},required:['title']}) or OMIT the outputSchema argument entirely to get markdown text instead.";
-          return {
-            success: false,
-            action: "extract",
-            description,
-            error: errorMessage,
-            isRecoverable: true,
-          };
+        // Soft guard: an empty outputSchema {} doesn't constrain the LLM
+        // output and makes the structured branch indistinguishable from the
+        // markdown branch. Models (notably gemini-2.5-flash) tend to pass {}
+        // when asked for outputSchema without supplying real properties.
+        // Silently downgrade to the markdown branch rather than reject — a
+        // hard rejection traps the agent in a retry loop because the model
+        // keeps producing the same empty schema.
+        const effectiveSchema =
+          outputSchema && Object.keys(outputSchema).length > 0 ? outputSchema : undefined;
+        if (outputSchema && !effectiveSchema) {
+          context.eventEmitter.emit(WebAgentEventType.AGENT_STATUS, {
+            message:
+              "extract: outputSchema was empty ({}); falling back to markdown extraction. Provide a real JSON Schema (with type/properties) for structured output.",
+          });
         }
 
         // Get the page markdown content
@@ -346,14 +345,16 @@ export function createWebActionTools(context: WebActionContext) {
         // Build extraction prompt
         const prompt = buildExtractionPrompt(description, markdown);
 
-        // Structured branch: when outputSchema is provided, use generateObject with
-        // jsonSchema() to validate the LLM output against the schema.
-        if (outputSchema) {
+        // Structured branch: when a non-empty outputSchema is provided, use
+        // generateObject with jsonSchema() to validate the LLM output against
+        // the schema. Empty {} is downgraded above to undefined and falls
+        // through to the markdown branch.
+        if (effectiveSchema) {
           const { object } = await generateObjectWithRetry(
             {
               ...context.providerConfig,
               prompt,
-              schema: jsonSchema(outputSchema as any),
+              schema: jsonSchema(effectiveSchema as any),
               maxOutputTokens: 5000,
               abortSignal: context.abortSignal,
             },
diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts
index 0e93e6ae..0f347ece 100644
--- a/packages/core/test/tools/webActionTools.test.ts
+++ b/packages/core/test/tools/webActionTools.test.ts
@@ -651,28 +651,41 @@ describe("Web Action Tools", () => {
       expect((result as any).extractedData).toBeUndefined();
     });
 
-    it("should reject empty outputSchema {} as a recoverable error", async () => {
+    it("should silently downgrade empty outputSchema {} to the markdown branch", async () => {
       const getMarkdownSpy = vi.spyOn(mockBrowser, "getMarkdown");
+      const emitSpy = vi.spyOn(eventEmitter, "emit");
+      mockGenerateTextWithRetry.mockResolvedValueOnce({
+        text: "markdown extracted",
+      } as any);
 
       const result = await tools.extract.execute({
         description: "product details",
         outputSchema: {},
       });
 
-      // Should NOT have called generateObjectWithRetry or generateTextWithRetry
+      // generateObjectWithRetry should NOT be called — empty schema falls through.
       expect(mockGenerateObjectWithRetry).not.toHaveBeenCalled();
-      expect(mockGenerateTextWithRetry).not.toHaveBeenCalled();
-      // It also short-circuits before fetching the page markdown — the schema is
-      // already invalid before any work happens.
-      expect(getMarkdownSpy).not.toHaveBeenCalled();
+      // generateTextWithRetry IS called (markdown branch took over).
+      expect(mockGenerateTextWithRetry).toHaveBeenCalledTimes(1);
+      // getMarkdown is also called as part of the normal markdown path.
+      expect(getMarkdownSpy).toHaveBeenCalled();
 
-      expect(result).toMatchObject({
-        success: false,
+      // A status event should explain the silent downgrade.
+      expect(emitSpy).toHaveBeenCalledWith(
+        WebAgentEventType.AGENT_STATUS,
+        expect.objectContaining({
+          message: expect.stringMatching(/outputSchema was empty.*markdown/i),
+        }),
+      );
+
+      // Result shape matches the markdown branch (extractedData, not data).
+      expect(result).toEqual({
+        success: true,
         action: "extract",
         description: "product details",
-        isRecoverable: true,
+        extractedData: "markdown extracted",
       });
-      expect((result as any).error).toMatch(/outputSchema cannot be \{\}/);
+      expect((result as any).data).toBeUndefined();
     });
 
     it("should still use generateText (markdown branch) when outputSchema is omitted", async () => {