Skip to content

Commit 3e9f00b

Browse files
author
SentienceDEV
committed
Predicate agent
1 parent 18d5c96 commit 3e9f00b

5 files changed

Lines changed: 452 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,90 @@ All notable changes to `@predicatelabs/sdk` will be documented in this file.
44

55
## Unreleased
66

7+
### 2026-02-15
8+
9+
#### PredicateBrowserAgent (snapshot-first, verification-first)
10+
11+
`PredicateBrowserAgent` is a new high-level agent wrapper that gives you a **browser-use-like** `step()` / `run()` surface, but keeps Predicate’s core philosophy:
12+
13+
- **Snapshot-first perception** (structured DOM snapshot is the default)
14+
- **Verification-first control plane** (you can gate progress with deterministic checks)
15+
- Optional **vision fallback** (bounded) when snapshots aren’t sufficient
16+
17+
It’s built on top of `AgentRuntime` + `RuntimeAgent`.
18+
19+
##### Quickstart (single step)
20+
21+
```ts
22+
import {
23+
AgentRuntime,
24+
PredicateBrowserAgent,
25+
type RuntimeStep,
26+
LocalLLMProvider, // or OpenAIProvider / AnthropicProvider / DeepInfraProvider
27+
} from '@predicatelabs/sdk';
28+
29+
const runtime = new AgentRuntime(browserLike, page, tracer);
30+
const llm = new LocalLLMProvider({ model: 'qwen2.5:7b', baseUrl: 'http://localhost:11434/v1' });
31+
32+
const agent = new PredicateBrowserAgent({
33+
runtime,
34+
executor: llm,
35+
config: {
36+
// Token control: include last N step summaries in the prompt (0 disables history).
37+
historyLastN: 2,
38+
},
39+
});
40+
41+
const ok = await agent.step({
42+
taskGoal: 'Find pricing and verify checkout button exists',
43+
step: { goal: 'Open pricing page' } satisfies RuntimeStep,
44+
});
45+
```
46+
47+
##### Customize the compact prompt (advanced)
48+
49+
```ts
50+
const agent = new PredicateBrowserAgent({
51+
runtime,
52+
executor: llm,
53+
config: {
54+
compactPromptBuilder: (_taskGoal, _stepGoal, domContext, _snap, historySummary) => ({
55+
systemPrompt:
56+
'You are a web automation agent. Return ONLY one action: CLICK(id) | TYPE(id,"text") | PRESS("key") | FINISH()',
57+
userPrompt: `RECENT:\n${historySummary}\n\nELEMENTS:\n${domContext}\n\nReturn the single best action:`,
58+
}),
59+
},
60+
});
61+
```
62+
63+
##### CAPTCHA handling (interface-only; no solver shipped)
64+
65+
If you set `captcha.policy="callback"`, you must provide a handler. The SDK does **not** include a public CAPTCHA solver.
66+
67+
```ts
68+
import { HumanHandoffSolver } from '@predicatelabs/sdk';
69+
70+
const agent = new PredicateBrowserAgent({
71+
runtime,
72+
executor: llm,
73+
config: {
74+
captcha: {
75+
policy: 'callback',
76+
// Manual solve in the live session; SDK waits until it clears:
77+
handler: HumanHandoffSolver({ timeoutMs: 10 * 60_000, pollMs: 1_000 }),
78+
},
79+
},
80+
});
81+
```
82+
83+
#### RuntimeAgent: structured prompt override hooks
84+
85+
`RuntimeAgent` now supports optional hooks used by `PredicateBrowserAgent`:
86+
87+
- `structuredPromptBuilder(...)`
88+
- `domContextPostprocessor(...)`
89+
- `historySummaryProvider(...)`
90+
791
### 2026-02-13
892

993
#### Expanded deterministic verifications (adaptive resnapshotting)

src/agents/browser-agent.ts

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
import type { Snapshot, StepHookContext } from '../types';
2+
import type { PermissionPolicy } from '../browser';
3+
import type { AgentRuntime } from '../agent-runtime';
4+
import type { LLMProvider } from '../llm-provider';
5+
import { RuntimeAgent } from '../runtime-agent';
6+
import type { RuntimeStep } from '../runtime-agent';
7+
import type { CaptchaOptions } from '../captcha/types';
8+
import type { CaptchaHandler } from '../captcha/types';
9+
10+
export interface PermissionRecoveryConfig {
11+
enabled?: boolean;
12+
maxRestarts?: number;
13+
autoGrant?: string[];
14+
geolocation?: Record<string, any> | null;
15+
origin?: string | null;
16+
}
17+
18+
export interface VisionFallbackConfig {
19+
enabled?: boolean;
20+
maxVisionCalls?: number;
21+
triggerRequiresVision?: boolean;
22+
triggerRepeatedNoop?: boolean;
23+
triggerCanvasOrLowActionables?: boolean;
24+
}
25+
26+
export interface CaptchaConfig {
27+
policy?: 'abort' | 'callback';
28+
// Interface-only: SDK does not ship captcha solvers. Users provide a handler/callback.
29+
handler?: CaptchaHandler | null;
30+
timeoutMs?: number | null;
31+
pollMs?: number | null;
32+
minConfidence?: number;
33+
}
34+
35+
export interface PredicateBrowserAgentConfig {
36+
// Permissions
37+
permissionStartup?: PermissionPolicy | null;
38+
permissionRecovery?: PermissionRecoveryConfig | null;
39+
40+
// Vision fallback
41+
vision?: VisionFallbackConfig;
42+
43+
// CAPTCHA handling
44+
captcha?: CaptchaConfig;
45+
46+
// Prompt / token controls
47+
historyLastN?: number; // 0 disables LLM-facing step history
48+
49+
// Compact prompt customization
50+
// builder(taskGoal, stepGoal, domContext, snapshot, historySummary) -> {systemPrompt, userPrompt}
51+
compactPromptBuilder?: (
52+
taskGoal: string,
53+
stepGoal: string,
54+
domContext: string,
55+
snap: Snapshot,
56+
historySummary: string
57+
) => { systemPrompt: string; userPrompt: string };
58+
59+
compactPromptPostprocessor?: (domContext: string) => string;
60+
}
61+
62+
function historySummary(items: string[]): string {
63+
if (!items.length) return '';
64+
return items.map(s => `- ${s}`).join('\n');
65+
}
66+
67+
function applyCaptchaConfigToRuntime(runtime: AgentRuntime, cfg: CaptchaConfig | undefined): void {
68+
if (!cfg) return;
69+
70+
const policy = (cfg.policy ?? 'abort').toLowerCase() as 'abort' | 'callback';
71+
if (policy === 'abort') {
72+
runtime.setCaptchaOptions({
73+
policy: 'abort',
74+
minConfidence: cfg.minConfidence ?? 0.7,
75+
} satisfies CaptchaOptions);
76+
return;
77+
}
78+
79+
const pollMs = cfg.pollMs ?? 1_000;
80+
const timeoutMs = cfg.timeoutMs ?? 120_000;
81+
const minConfidence = cfg.minConfidence ?? 0.7;
82+
83+
const handler = cfg.handler ?? null;
84+
if (!handler) {
85+
throw new Error(
86+
'captcha.handler is required when captcha.policy="callback". ' +
87+
'Provide a handler callback (e.g. human handoff or your external system).'
88+
);
89+
}
90+
91+
runtime.setCaptchaOptions({
92+
policy: 'callback',
93+
handler,
94+
timeoutMs,
95+
pollMs,
96+
minConfidence,
97+
} satisfies CaptchaOptions);
98+
}
99+
100+
export type StepOutcome = { stepGoal: string; ok: boolean };
101+
102+
export class PredicateBrowserAgent {
103+
readonly runtime: AgentRuntime;
104+
readonly executor: LLMProvider;
105+
readonly visionExecutor?: LLMProvider;
106+
readonly visionVerifier?: LLMProvider;
107+
readonly config: PredicateBrowserAgentConfig;
108+
109+
private history: string[] = [];
110+
private visionCallsUsed = 0;
111+
private runner: RuntimeAgent;
112+
113+
constructor(opts: {
114+
runtime: AgentRuntime;
115+
executor: LLMProvider;
116+
visionExecutor?: LLMProvider;
117+
visionVerifier?: LLMProvider;
118+
config?: PredicateBrowserAgentConfig;
119+
}) {
120+
this.runtime = opts.runtime;
121+
this.executor = opts.executor;
122+
this.visionExecutor = opts.visionExecutor;
123+
this.visionVerifier = opts.visionVerifier;
124+
this.config = {
125+
permissionStartup: null,
126+
permissionRecovery: null,
127+
vision: { enabled: false, maxVisionCalls: 0 },
128+
captcha: { policy: 'abort', handler: null },
129+
historyLastN: 0,
130+
...(opts.config ?? {}),
131+
};
132+
133+
applyCaptchaConfigToRuntime(this.runtime, this.config.captcha);
134+
135+
this.runner = new RuntimeAgent({
136+
runtime: this.runtime,
137+
executor: this.executor,
138+
visionExecutor: this.visionExecutor,
139+
visionVerifier: this.visionVerifier,
140+
structuredPromptBuilder: this.config.compactPromptBuilder,
141+
domContextPostprocessor: this.config.compactPromptPostprocessor,
142+
historySummaryProvider: () => {
143+
const n = Math.max(0, this.config.historyLastN ?? 0);
144+
if (n <= 0) return '';
145+
const slice = this.history.slice(Math.max(0, this.history.length - n));
146+
return historySummary(slice);
147+
},
148+
} as any);
149+
}
150+
151+
private recordHistory(stepGoal: string, ok: boolean) {
152+
const n = Math.max(0, this.config.historyLastN ?? 0);
153+
if (n <= 0) return;
154+
this.history.push(`${stepGoal} -> ${ok ? 'ok' : 'fail'}`);
155+
if (this.history.length > n) {
156+
this.history = this.history.slice(this.history.length - n);
157+
}
158+
}
159+
160+
async step(opts: {
161+
taskGoal: string;
162+
step: RuntimeStep;
163+
onStepStart?: (ctx: StepHookContext) => void | Promise<void>;
164+
onStepEnd?: (ctx: StepHookContext) => void | Promise<void>;
165+
}): Promise<StepOutcome> {
166+
let step = opts.step;
167+
168+
const maxVisionCalls = Math.max(0, this.config.vision?.maxVisionCalls ?? 0);
169+
if (
170+
this.config.vision?.enabled &&
171+
maxVisionCalls > 0 &&
172+
this.visionCallsUsed >= maxVisionCalls
173+
) {
174+
step = { ...step, visionExecutorEnabled: false, maxVisionExecutorAttempts: 0 };
175+
}
176+
177+
const ok = await this.runner.runStep({
178+
taskGoal: opts.taskGoal,
179+
step,
180+
onStepStart: opts.onStepStart,
181+
onStepEnd: opts.onStepEnd,
182+
});
183+
184+
this.recordHistory(step.goal, ok);
185+
return { stepGoal: step.goal, ok };
186+
}
187+
188+
async run(opts: {
189+
taskGoal: string;
190+
steps: RuntimeStep[];
191+
onStepStart?: (ctx: StepHookContext) => void | Promise<void>;
192+
onStepEnd?: (ctx: StepHookContext) => void | Promise<void>;
193+
stopOnFailure?: boolean;
194+
}): Promise<boolean> {
195+
const stopOnFailure = opts.stopOnFailure ?? true;
196+
for (const step of opts.steps) {
197+
const out = await this.step({
198+
taskGoal: opts.taskGoal,
199+
step,
200+
onStepStart: opts.onStepStart,
201+
onStepEnd: opts.onStepEnd,
202+
});
203+
if (stopOnFailure && !out.ok) return false;
204+
}
205+
return true;
206+
}
207+
}

src/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ export { SentienceDebugger, PredicateDebugger } from './debugger';
9191
export { RuntimeAgent } from './runtime-agent';
9292
export type { RuntimeStep, StepVerification } from './runtime-agent';
9393
export { parseVisionExecutorAction, executeVisionExecutorAction } from './vision-executor';
94+
export {
95+
PredicateBrowserAgent,
96+
type PredicateBrowserAgentConfig,
97+
type PermissionRecoveryConfig,
98+
type VisionFallbackConfig,
99+
type CaptchaConfig,
100+
} from './agents/browser-agent';
94101
export * from './captcha/types';
95102
export * from './captcha/strategies';
96103
export * from './tools';

src/runtime-agent.ts

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,43 @@ export class RuntimeAgent {
5858
readonly shortCircuitCanvas: boolean;
5959

6060
private structuredLLM: LLMInteractionHandler;
61+
private structuredPromptBuilder?:
62+
| ((
63+
taskGoal: string,
64+
stepGoal: string,
65+
domContext: string,
66+
snap: Snapshot,
67+
historySummary: string
68+
) => { systemPrompt: string; userPrompt: string })
69+
| undefined;
70+
private domContextPostprocessor?: ((domContext: string) => string) | undefined;
71+
private historySummaryProvider?: (() => string) | undefined;
6172

6273
constructor(opts: {
6374
runtime: AgentRuntime;
6475
executor: LLMProvider;
6576
visionExecutor?: LLMProvider;
6677
visionVerifier?: LLMProvider;
6778
shortCircuitCanvas?: boolean;
79+
structuredPromptBuilder?: (
80+
taskGoal: string,
81+
stepGoal: string,
82+
domContext: string,
83+
snap: Snapshot,
84+
historySummary: string
85+
) => { systemPrompt: string; userPrompt: string };
86+
domContextPostprocessor?: (domContext: string) => string;
87+
historySummaryProvider?: () => string;
6888
}) {
6989
this.runtime = opts.runtime;
7090
this.executor = opts.executor;
7191
this.visionExecutor = opts.visionExecutor;
7292
this.visionVerifier = opts.visionVerifier;
7393
this.shortCircuitCanvas = opts.shortCircuitCanvas ?? true;
7494
this.structuredLLM = new LLMInteractionHandler(this.executor, false);
95+
this.structuredPromptBuilder = opts.structuredPromptBuilder;
96+
this.domContextPostprocessor = opts.domContextPostprocessor;
97+
this.historySummaryProvider = opts.historySummaryProvider;
7598
}
7699

77100
async runStep(opts: {
@@ -210,8 +233,31 @@ export class RuntimeAgent {
210233
snap: Snapshot;
211234
}): Promise<string> {
212235
const { taskGoal, step, snap } = opts;
213-
const domContext = this.structuredLLM.buildContext(snap, step.goal);
214-
const combinedGoal = `${taskGoal}\n\nSTEP: ${step.goal}`;
236+
let domContext = this.structuredLLM.buildContext(snap, step.goal);
237+
if (this.domContextPostprocessor) {
238+
domContext = this.domContextPostprocessor(domContext);
239+
}
240+
241+
const historySummary = (this.historySummaryProvider?.() ?? '').trim();
242+
243+
if (this.structuredPromptBuilder) {
244+
const { systemPrompt, userPrompt } = this.structuredPromptBuilder(
245+
taskGoal,
246+
step.goal,
247+
domContext,
248+
snap,
249+
historySummary
250+
);
251+
const resp = await this.executor.generate(systemPrompt, userPrompt, { temperature: 0.0 });
252+
return this.extractActionFromText(resp.content);
253+
}
254+
255+
let combinedGoal = taskGoal;
256+
if (historySummary) {
257+
combinedGoal = `${taskGoal}\n\nRECENT STEPS:\n${historySummary}`;
258+
}
259+
combinedGoal = `${combinedGoal}\n\nSTEP: ${step.goal}`;
260+
215261
const resp = await this.structuredLLM.queryLLM(domContext, combinedGoal);
216262
return this.extractActionFromText(resp.content);
217263
}

0 commit comments

Comments
 (0)