Skip to content

Commit 20af154

Browse files
author
SentienceDEV
committed
agent examples
1 parent 3e9f00b commit 20af154

5 files changed

Lines changed: 367 additions & 0 deletions

File tree

examples/agent/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Predicate agent examples.
2+
3+
- `predicate-browser-agent-minimal.ts`: minimal `PredicateBrowserAgent` usage.
4+
- `predicate-browser-agent-custom-prompt.ts`: customize the compact prompt builder.
5+
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/**
2+
* Example: PredicateBrowserAgent with compact prompt customization.
3+
*
4+
* Usage:
5+
* ts-node examples/agent/predicate-browser-agent-custom-prompt.ts
6+
*/
7+
8+
import { Page } from 'playwright';
9+
import {
10+
AgentRuntime,
11+
PredicateBrowserAgent,
12+
type PredicateBrowserAgentConfig,
13+
RuntimeStep,
14+
SentienceBrowser,
15+
} from '../../src';
16+
import { createTracer } from '../../src/tracing/tracer-factory';
17+
import { LLMProvider, type LLMResponse } from '../../src/llm-provider';
18+
import type { Snapshot } from '../../src/types';
19+
20+
function createBrowserAdapter(browser: SentienceBrowser) {
21+
return {
22+
snapshot: async (_page: Page, options?: Record<string, any>): Promise<Snapshot> => {
23+
return await browser.snapshot(options);
24+
},
25+
};
26+
}
27+
28+
class RecordingProvider extends LLMProvider {
29+
public lastSystem: string | null = null;
30+
public lastUser: string | null = null;
31+
32+
constructor(private action: string = 'FINISH()') {
33+
super();
34+
}
35+
36+
get modelName(): string {
37+
return 'recording-provider';
38+
}
39+
supportsJsonMode(): boolean {
40+
return false;
41+
}
42+
async generate(
43+
systemPrompt: string,
44+
userPrompt: string,
45+
_options: Record<string, any> = {}
46+
): Promise<LLMResponse> {
47+
this.lastSystem = systemPrompt;
48+
this.lastUser = userPrompt;
49+
return { content: this.action, modelName: this.modelName };
50+
}
51+
}
52+
53+
const config: PredicateBrowserAgentConfig = {
54+
historyLastN: 2,
55+
compactPromptBuilder: (
56+
taskGoal: string,
57+
stepGoal: string,
58+
domContext: string,
59+
_snap: Snapshot,
60+
historySummary: string
61+
) => {
62+
const systemPrompt =
63+
'You are a web automation executor. Return ONLY ONE action: CLICK(id) | TYPE(id,"text") | PRESS("key") | FINISH(). No prose.';
64+
const userPrompt =
65+
`TASK GOAL:\n${taskGoal}\n\n` +
66+
(historySummary ? `RECENT STEPS:\n${historySummary}\n\n` : '') +
67+
`STEP GOAL:\n${stepGoal}\n\n` +
68+
`DOM CONTEXT:\n${domContext.slice(0, 4000)}\n`;
69+
return { systemPrompt, userPrompt };
70+
},
71+
};
72+
73+
async function main() {
74+
const apiKey = (process.env.PREDICATE_API_KEY ||
75+
process.env.SENTIENCE_API_KEY) as string | undefined;
76+
if (!apiKey) {
77+
console.error('Error: PREDICATE_API_KEY or SENTIENCE_API_KEY not set');
78+
process.exit(1);
79+
}
80+
81+
const runId = 'predicate-browser-agent-custom-prompt';
82+
const tracer = await createTracer({ apiKey, runId, uploadTrace: false });
83+
84+
const browser = new SentienceBrowser(apiKey, undefined, false);
85+
await browser.start();
86+
const page = browser.getPage();
87+
88+
try {
89+
await page.goto('https://example.com');
90+
await page.waitForLoadState('networkidle');
91+
92+
const runtime = new AgentRuntime(createBrowserAdapter(browser), page, tracer);
93+
const executor = new RecordingProvider('FINISH()');
94+
95+
const agent = new PredicateBrowserAgent({ runtime, executor, config });
96+
97+
const out = await agent.step({
98+
taskGoal: 'Open example.com',
99+
step: { goal: 'Take no action; just finish' } satisfies RuntimeStep,
100+
});
101+
102+
console.log(`step ok: ${out.ok}`);
103+
console.log('--- prompt preview (system) ---');
104+
console.log((executor.lastSystem || '').slice(0, 300));
105+
console.log('--- prompt preview (user) ---');
106+
console.log((executor.lastUser || '').slice(0, 300));
107+
} finally {
108+
await tracer.close(true);
109+
await browser.close();
110+
}
111+
}
112+
113+
main().catch(console.error);
114+
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/**
2+
* Example: PredicateBrowserAgent minimal demo.
3+
*
4+
* Usage:
5+
* ts-node examples/agent/predicate-browser-agent-minimal.ts
6+
*
7+
* Requires:
8+
* - PREDICATE_API_KEY or SENTIENCE_API_KEY (SentienceBrowser API key)
9+
*/
10+
11+
import { Page } from 'playwright';
12+
import {
13+
AgentRuntime,
14+
PredicateBrowserAgent,
15+
type PredicateBrowserAgentConfig,
16+
RuntimeStep,
17+
StepVerification,
18+
SentienceBrowser,
19+
exists,
20+
urlContains,
21+
} from '../../src';
22+
import { createTracer } from '../../src/tracing/tracer-factory';
23+
import { LLMProvider, type LLMResponse } from '../../src/llm-provider';
24+
import type { Snapshot } from '../../src/types';
25+
26+
function createBrowserAdapter(browser: SentienceBrowser) {
27+
return {
28+
snapshot: async (_page: Page, options?: Record<string, any>): Promise<Snapshot> => {
29+
return await browser.snapshot(options);
30+
},
31+
};
32+
}
33+
34+
class FixedActionProvider extends LLMProvider {
35+
constructor(private action: string) {
36+
super();
37+
}
38+
get modelName(): string {
39+
return 'fixed-action';
40+
}
41+
supportsJsonMode(): boolean {
42+
return false;
43+
}
44+
async generate(
45+
_systemPrompt: string,
46+
_userPrompt: string,
47+
_options: Record<string, any> = {}
48+
): Promise<LLMResponse> {
49+
return { content: this.action, modelName: this.modelName };
50+
}
51+
}
52+
53+
async function main() {
54+
const apiKey = (process.env.PREDICATE_API_KEY ||
55+
process.env.SENTIENCE_API_KEY) as string | undefined;
56+
if (!apiKey) {
57+
console.error('Error: PREDICATE_API_KEY or SENTIENCE_API_KEY not set');
58+
process.exit(1);
59+
}
60+
61+
const runId = 'predicate-browser-agent-minimal';
62+
const tracer = await createTracer({ apiKey, runId, uploadTrace: false });
63+
64+
const browser = new SentienceBrowser(apiKey, undefined, false);
65+
await browser.start();
66+
const page = browser.getPage();
67+
68+
try {
69+
await page.goto('https://example.com');
70+
await page.waitForLoadState('networkidle');
71+
72+
const runtime = new AgentRuntime(createBrowserAdapter(browser), page, tracer);
73+
74+
const executor = new FixedActionProvider('FINISH()');
75+
const config: PredicateBrowserAgentConfig = { historyLastN: 2 };
76+
77+
const agent = new PredicateBrowserAgent({ runtime, executor, config });
78+
79+
const steps: RuntimeStep[] = [
80+
{
81+
goal: 'Verify Example Domain is loaded',
82+
verifications: [
83+
{
84+
predicate: urlContains('example.com'),
85+
label: 'url_contains_example',
86+
required: true,
87+
} satisfies StepVerification,
88+
{
89+
predicate: exists('role=heading'),
90+
label: 'has_heading',
91+
required: true,
92+
} satisfies StepVerification,
93+
],
94+
maxSnapshotAttempts: 2,
95+
snapshotLimitBase: 60,
96+
},
97+
];
98+
99+
const ok = await agent.run({ taskGoal: 'Open example.com and verify', steps });
100+
console.log(`run ok: ${ok}`);
101+
} finally {
102+
await tracer.close(true);
103+
await browser.close();
104+
}
105+
}
106+
107+
main().catch(console.error);
108+

src/runtime-agent.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,106 @@ export class RuntimeAgent {
179179
}
180180
}
181181

182+
/**
183+
* Execute exactly one action for a step without owning step lifecycle.
184+
*
185+
* This is intended for orchestrators that already call `runtime.beginStep(...)` /
186+
* `runtime.emitStepEnd(...)` and want to reuse the SDK's snapshot-first action proposal
187+
* and execution logic without double-counting budgets or emitting duplicate events.
188+
*/
189+
async actOnce(opts: {
190+
taskGoal: string;
191+
step: RuntimeStep;
192+
allowVisionFallback?: boolean;
193+
historySummary?: string;
194+
}): Promise<string> {
195+
const res = await this.actOnceResult(opts);
196+
return res.action;
197+
}
198+
199+
/**
200+
* Like `actOnce`, but also returns the pre-action snapshot used for proposal.
201+
*/
202+
async actOnceWithSnapshot(opts: {
203+
taskGoal: string;
204+
step: RuntimeStep;
205+
allowVisionFallback?: boolean;
206+
historySummary?: string;
207+
}): Promise<{ action: string; snap: Snapshot }> {
208+
const res = await this.actOnceResult(opts);
209+
return { action: res.action, snap: res.snap };
210+
}
211+
212+
/**
213+
* Like `actOnce`, but also indicates whether vision was used.
214+
*/
215+
async actOnceResult(opts: {
216+
taskGoal: string;
217+
step: RuntimeStep;
218+
allowVisionFallback?: boolean;
219+
historySummary?: string;
220+
}): Promise<{ action: string; snap: Snapshot; usedVision: boolean }> {
221+
const { taskGoal, step } = opts;
222+
const allowVisionFallback = opts.allowVisionFallback ?? true;
223+
const historySummary = (opts.historySummary ?? '').trim();
224+
225+
const snap = await this.snapshotWithRamp(step);
226+
227+
if (allowVisionFallback && (await this.shouldShortCircuitToVision(step, snap))) {
228+
const provider = this.visionExecutor;
229+
if (provider && provider.supportsVision?.()) {
230+
const url = this.runtime.page?.url?.() ?? snap?.url ?? '(unknown)';
231+
const buf = (await (this.runtime.page as any).screenshot({ type: 'png' })) as Buffer;
232+
const imageBase64 = Buffer.from(buf).toString('base64');
233+
234+
const { systemPrompt, userPrompt } = this.visionExecutorPrompts({
235+
taskGoal,
236+
step,
237+
url,
238+
snap,
239+
});
240+
241+
const resp = await provider.generateWithImage(systemPrompt, userPrompt, imageBase64, {
242+
temperature: 0.0,
243+
});
244+
const action = this.extractActionFromText(resp.content);
245+
await this.executeAction(action, snap ?? undefined);
246+
return { action, snap, usedVision: true };
247+
}
248+
}
249+
250+
// Structured snapshot-first proposal.
251+
let domContext = this.structuredLLM.buildContext(snap, step.goal);
252+
if (this.domContextPostprocessor) {
253+
domContext = this.domContextPostprocessor(domContext);
254+
}
255+
256+
let action: string;
257+
if (this.structuredPromptBuilder) {
258+
const { systemPrompt, userPrompt } = this.structuredPromptBuilder(
259+
taskGoal,
260+
step.goal,
261+
domContext,
262+
snap,
263+
historySummary || (this.historySummaryProvider?.() ?? '').trim()
264+
);
265+
const resp = await this.executor.generate(systemPrompt, userPrompt, { temperature: 0.0 });
266+
action = this.extractActionFromText(resp.content);
267+
} else {
268+
let combinedGoal = taskGoal;
269+
const hs = historySummary || (this.historySummaryProvider?.() ?? '').trim();
270+
if (hs) {
271+
combinedGoal = `${taskGoal}\n\nRECENT STEPS:\n${hs}`;
272+
}
273+
combinedGoal = `${combinedGoal}\n\nSTEP: ${step.goal}`;
274+
const resp = await this.structuredLLM.queryLLM(domContext, combinedGoal);
275+
action = this.extractActionFromText(resp.content);
276+
}
277+
278+
await this.executeAction(action, snap);
279+
return { action, snap, usedVision: false };
280+
}
281+
182282
private async runHook(
183283
hook: ((ctx: StepHookContext) => void | Promise<void>) | undefined,
184284
ctx: StepHookContext

tests/runtime-agent.test.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,46 @@ function makeClickableElement(id: number): Element {
8383
}
8484

8585
describe('RuntimeAgent (runtime-backed agent)', () => {
86+
it('actOnce executes without step lifecycle', async () => {
87+
const sink = new MockSink();
88+
const tracer = new Tracer('run', sink);
89+
const page = new MockPage('https://example.com/start') as any;
90+
91+
const snapshots: Snapshot[] = [
92+
{
93+
status: 'success',
94+
url: 'https://example.com/start',
95+
elements: [makeClickableElement(1)],
96+
timestamp: 't1',
97+
},
98+
];
99+
100+
const browserLike = {
101+
snapshot: async () => snapshots.shift() as Snapshot,
102+
};
103+
104+
const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
105+
// Guard: actOnce must not call step lifecycle APIs.
106+
(runtime as any).beginStep = jest.fn(() => {
107+
throw new Error('beginStep should not be called by actOnce');
108+
});
109+
(runtime as any).emitStepEnd = jest.fn(() => {
110+
throw new Error('emitStepEnd should not be called by actOnce');
111+
});
112+
113+
const executor = new ProviderStub(['CLICK(1)']);
114+
const agent = new RuntimeAgent({ runtime, executor });
115+
116+
const action = await agent.actOnce({
117+
taskGoal: 'Do a thing',
118+
step: { goal: 'Click OK', maxSnapshotAttempts: 1 },
119+
allowVisionFallback: false,
120+
});
121+
122+
expect(action.toUpperCase().startsWith('CLICK(')).toBe(true);
123+
expect(page.mouseClickCalls.length).toBeGreaterThan(0);
124+
});
125+
86126
it('structured executor succeeds without vision', async () => {
87127
const sink = new MockSink();
88128
const tracer = new Tracer('run', sink);

0 commit comments

Comments
 (0)