Skip to content

Commit c5541b0

Browse files
author
SentienceDEV
committed
token usage & video recording
1 parent 20af154 commit c5541b0

5 files changed

Lines changed: 315 additions & 5 deletions

File tree

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,31 @@ const agent = new PredicateBrowserAgent({
8888
- `domContextPostprocessor(...)`
8989
- `historySummaryProvider(...)`
9090

91+
#### PredicateBrowserAgent: opt-in token usage accounting (best-effort)
92+
93+
If you want to measure token spend, you can enable best-effort accounting (depends on provider reporting token counts):
94+
95+
```ts
96+
const agent = new PredicateBrowserAgent({
97+
runtime,
98+
executor: llm,
99+
config: {
100+
tokenUsageEnabled: true,
101+
},
102+
});
103+
104+
const usage = agent.getTokenUsage();
105+
agent.resetTokenUsage();
106+
```
107+
108+
#### RuntimeAgent: actOnce without step lifecycle (orchestrators)
109+
110+
`RuntimeAgent` now exposes `actOnce(...)` helpers that execute exactly one action **without** calling `runtime.beginStep()` / `runtime.emitStepEnd()`. This is intended for external orchestrators (e.g. WebBench) that already own step lifecycle and just want the SDK’s snapshot-first propose+execute block.
111+
112+
- `await agent.actOnce(...) -> string`
113+
- `await agent.actOnceWithSnapshot(...) -> { action, snap }`
114+
- `await agent.actOnceResult(...) -> { action, snap, usedVision }`
115+
91116
### 2026-02-13
92117

93118
#### Expanded deterministic verifications (adaptive resnapshotting)

examples/agent/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ Predicate agent examples.
22

33
- `predicate-browser-agent-minimal.ts`: minimal `PredicateBrowserAgent` usage.
44
- `predicate-browser-agent-custom-prompt.ts`: customize the compact prompt builder.
5+
- `predicate-browser-agent-video-recording-playwright.ts`: enable Playwright video recording via context options (recommended).
56

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/**
2+
* Example: PredicateBrowserAgent + Playwright video recording (recommended approach).
3+
*
4+
* Video recording is a Playwright context feature (recordVideo), not an agent constructor flag.
5+
* This example shows how to:
6+
* 1) create a Playwright context with recordVideo enabled
7+
* 2) wrap the existing page with SentienceBrowser.fromPage(...)
8+
* 3) use AgentRuntime + PredicateBrowserAgent normally
9+
*
10+
* Usage:
11+
* ts-node examples/agent/predicate-browser-agent-video-recording-playwright.ts
12+
*/
13+
14+
import { chromium } from 'playwright';
15+
import * as fs from 'fs';
16+
import * as path from 'path';
17+
18+
import {
19+
AgentRuntime,
20+
PredicateBrowserAgent,
21+
type PredicateBrowserAgentConfig,
22+
SentienceBrowser,
23+
type RuntimeStep,
24+
} from '../../src';
25+
import { createTracer } from '../../src/tracing/tracer-factory';
26+
import { LLMProvider, type LLMResponse } from '../../src/llm-provider';
27+
import type { Snapshot } from '../../src/types';
28+
29+
function createBrowserAdapter(browser: SentienceBrowser) {
30+
return {
31+
snapshot: async (_page: any, options?: Record<string, any>): Promise<Snapshot> => {
32+
return await browser.snapshot(options);
33+
},
34+
};
35+
}
36+
37+
class FixedActionProvider extends LLMProvider {
38+
constructor(private action: string) {
39+
super();
40+
}
41+
get modelName(): string {
42+
return 'fixed-action';
43+
}
44+
supportsJsonMode(): boolean {
45+
return false;
46+
}
47+
async generate(_system: string, _user: string, _opts: Record<string, any> = {}): Promise<LLMResponse> {
48+
return { content: this.action, modelName: this.modelName };
49+
}
50+
}
51+
52+
async function main() {
53+
const apiKey = (process.env.PREDICATE_API_KEY ||
54+
process.env.SENTIENCE_API_KEY) as string | undefined;
55+
56+
const recordingsDir = path.join(process.cwd(), 'recordings');
57+
if (!fs.existsSync(recordingsDir)) fs.mkdirSync(recordingsDir, { recursive: true });
58+
59+
const pw = await chromium.launch({ headless: false });
60+
const context = await pw.newContext({
61+
recordVideo: { dir: recordingsDir, size: { width: 1280, height: 720 } },
62+
});
63+
const page = await context.newPage();
64+
65+
const runId = 'predicate-browser-agent-video-recording';
66+
const tracer = await createTracer({ apiKey, runId, uploadTrace: false });
67+
68+
// Wrap existing Playwright page.
69+
const sentienceBrowser = SentienceBrowser.fromPage(page, apiKey);
70+
71+
try {
72+
await page.goto('https://example.com');
73+
await page.waitForLoadState('networkidle');
74+
75+
const runtime = new AgentRuntime(createBrowserAdapter(sentienceBrowser), page as any, tracer);
76+
const config: PredicateBrowserAgentConfig = { historyLastN: 0 };
77+
78+
const agent = new PredicateBrowserAgent({
79+
runtime,
80+
executor: new FixedActionProvider('FINISH()'),
81+
config,
82+
});
83+
84+
const out = await agent.step({
85+
taskGoal: 'Open example.com',
86+
step: { goal: 'Finish immediately' } satisfies RuntimeStep,
87+
});
88+
console.log(`step ok: ${out.ok}`);
89+
console.log(`videos will be saved under: ${recordingsDir}`);
90+
} finally {
91+
await tracer.close(true);
92+
await context.close(); // flush video
93+
await pw.close();
94+
}
95+
}
96+
97+
main().catch(err => {
98+
console.error(err);
99+
process.exit(1);
100+
});
101+

src/agents/browser-agent.ts

Lines changed: 138 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { Snapshot, StepHookContext } from '../types';
22
import type { PermissionPolicy } from '../browser';
33
import type { AgentRuntime } from '../agent-runtime';
4-
import type { LLMProvider } from '../llm-provider';
4+
import { LLMProvider } from '../llm-provider';
55
import { RuntimeAgent } from '../runtime-agent';
66
import type { RuntimeStep } from '../runtime-agent';
77
import type { CaptchaOptions } from '../captcha/types';
@@ -46,6 +46,9 @@ export interface PredicateBrowserAgentConfig {
4646
// Prompt / token controls
4747
historyLastN?: number; // 0 disables LLM-facing step history
4848

49+
// Opt-in: track token usage from LLM provider responses (best-effort).
50+
tokenUsageEnabled?: boolean;
51+
4952
// Compact prompt customization
5053
// builder(taskGoal, stepGoal, domContext, snapshot, historySummary) -> {systemPrompt, userPrompt}
5154
compactPromptBuilder?: (
@@ -97,6 +100,113 @@ function applyCaptchaConfigToRuntime(runtime: AgentRuntime, cfg: CaptchaConfig |
97100
} satisfies CaptchaOptions);
98101
}
99102

103+
type TokenUsageTotals = {
104+
calls: number;
105+
promptTokens: number;
106+
completionTokens: number;
107+
totalTokens: number;
108+
};
109+
110+
class TokenUsageCollector {
111+
private byRole: Record<string, TokenUsageTotals> = {};
112+
private byModel: Record<string, TokenUsageTotals> = {};
113+
114+
record(role: string, resp: any): void {
115+
const pt = typeof resp?.promptTokens === 'number' ? resp.promptTokens : 0;
116+
const ct = typeof resp?.completionTokens === 'number' ? resp.completionTokens : 0;
117+
const tt = typeof resp?.totalTokens === 'number' ? resp.totalTokens : pt + ct;
118+
const model = String(resp?.modelName ?? 'unknown') || 'unknown';
119+
120+
const bump = (dst: Record<string, TokenUsageTotals>, key: string) => {
121+
const cur =
122+
dst[key] ??
123+
({ calls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 } as TokenUsageTotals);
124+
cur.calls += 1;
125+
cur.promptTokens += Math.max(0, pt);
126+
cur.completionTokens += Math.max(0, ct);
127+
cur.totalTokens += Math.max(0, tt);
128+
dst[key] = cur;
129+
};
130+
131+
bump(this.byRole, role);
132+
bump(this.byModel, model);
133+
}
134+
135+
reset(): void {
136+
this.byRole = {};
137+
this.byModel = {};
138+
}
139+
140+
summary(): {
141+
total: TokenUsageTotals;
142+
byRole: Record<string, TokenUsageTotals>;
143+
byModel: Record<string, TokenUsageTotals>;
144+
} {
145+
const sum = (src: Record<string, TokenUsageTotals>): TokenUsageTotals => {
146+
return Object.values(src).reduce(
147+
(acc, v) => ({
148+
calls: acc.calls + v.calls,
149+
promptTokens: acc.promptTokens + v.promptTokens,
150+
completionTokens: acc.completionTokens + v.completionTokens,
151+
totalTokens: acc.totalTokens + v.totalTokens,
152+
}),
153+
{ calls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 }
154+
);
155+
};
156+
return { total: sum(this.byRole), byRole: this.byRole, byModel: this.byModel };
157+
}
158+
}
159+
160+
class TokenAccountingProvider extends LLMProvider {
161+
constructor(
162+
private inner: LLMProvider,
163+
private collector: TokenUsageCollector,
164+
private role: string
165+
) {
166+
super();
167+
}
168+
get modelName(): string {
169+
return this.inner.modelName;
170+
}
171+
supportsJsonMode(): boolean {
172+
return this.inner.supportsJsonMode();
173+
}
174+
supportsVision(): boolean {
175+
return this.inner.supportsVision?.() ?? false;
176+
}
177+
async generate(
178+
systemPrompt: string,
179+
userPrompt: string,
180+
options: Record<string, any> = {}
181+
): Promise<any> {
182+
const resp = await this.inner.generate(systemPrompt, userPrompt, options);
183+
try {
184+
this.collector.record(this.role, resp);
185+
} catch {
186+
// best-effort
187+
}
188+
return resp;
189+
}
190+
async generateWithImage(
191+
systemPrompt: string,
192+
userPrompt: string,
193+
imageBase64: string,
194+
options: Record<string, any> = {}
195+
): Promise<any> {
196+
const fn = (this.inner as any).generateWithImage;
197+
if (typeof fn !== 'function') {
198+
throw new Error('Inner provider does not implement generateWithImage');
199+
}
200+
const resp = await fn.call(this.inner, systemPrompt, userPrompt, imageBase64, options);
201+
try {
202+
this.collector.record(this.role, resp);
203+
} catch {
204+
// best-effort
205+
}
206+
return resp;
207+
}
208+
}
209+
100210
export type StepOutcome = { stepGoal: string; ok: boolean };
101211

102212
export class PredicateBrowserAgent {
@@ -109,6 +219,7 @@ export class PredicateBrowserAgent {
109219
private history: string[] = [];
110220
private visionCallsUsed = 0;
111221
private runner: RuntimeAgent;
222+
private tokenUsage: TokenUsageCollector | null = null;
112223

113224
constructor(opts: {
114225
runtime: AgentRuntime;
@@ -117,10 +228,22 @@ export class PredicateBrowserAgent {
117228
visionVerifier?: LLMProvider;
118229
config?: PredicateBrowserAgentConfig;
119230
}) {
231+
const tokenUsageEnabled = Boolean(opts.config?.tokenUsageEnabled);
232+
const collector = tokenUsageEnabled ? new TokenUsageCollector() : null;
233+
120234
this.runtime = opts.runtime;
121-
this.executor = opts.executor;
122-
this.visionExecutor = opts.visionExecutor;
123-
this.visionVerifier = opts.visionVerifier;
235+
this.tokenUsage = collector;
236+
this.executor = collector
237+
? new TokenAccountingProvider(opts.executor, collector, 'executor')
238+
: opts.executor;
239+
this.visionExecutor =
240+
collector && opts.visionExecutor
241+
? new TokenAccountingProvider(opts.visionExecutor, collector, 'vision_executor')
242+
: opts.visionExecutor;
243+
this.visionVerifier =
244+
collector && opts.visionVerifier
245+
? new TokenAccountingProvider(opts.visionVerifier, collector, 'vision_verifier')
246+
: opts.visionVerifier;
124247
this.config = {
125248
permissionStartup: null,
126249
permissionRecovery: null,
@@ -148,6 +271,17 @@ export class PredicateBrowserAgent {
148271
} as any);
149272
}
150273

274+
getTokenUsage(): any {
275+
if (!this.tokenUsage) {
276+
return { enabled: false, reason: 'tokenUsageEnabled is false' };
277+
}
278+
return { enabled: true, ...this.tokenUsage.summary() };
279+
}
280+
281+
resetTokenUsage(): void {
282+
this.tokenUsage?.reset();
283+
}
284+
151285
private recordHistory(stepGoal: string, ok: boolean) {
152286
const n = Math.max(0, this.config.historyLastN ?? 0);
153287
if (n <= 0) return;

tests/predicate-browser-agent.test.ts

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@ class ProviderStub extends LLMProvider {
4444
): Promise<LLMResponse> {
4545
this.calls.push({ system: systemPrompt, user: userPrompt, options });
4646
const content = this.responses.length ? (this.responses.shift() as string) : 'FINISH()';
47-
return { content, modelName: this.modelName };
47+
return {
48+
content,
49+
modelName: this.modelName,
50+
promptTokens: 11,
51+
completionTokens: 7,
52+
totalTokens: 18,
53+
};
4854
}
4955
}
5056

@@ -103,4 +109,47 @@ describe('PredicateBrowserAgent', () => {
103109
expect(executor.calls[0].system).toContain('SYSTEM_CUSTOM');
104110
expect(executor.calls[0].user).toBe('USER_CUSTOM');
105111
});
112+
113+
it('tracks token usage when opt-in enabled', async () => {
114+
const sink = new MockSink();
115+
const tracer = new Tracer('run', sink);
116+
const page = new MockPage('https://example.com/start') as any;
117+
118+
const snapshots: Snapshot[] = [
119+
{
120+
status: 'success',
121+
url: 'https://example.com/start',
122+
elements: [makeClickableElement(1)],
123+
timestamp: 't1',
124+
},
125+
];
126+
127+
const browserLike = {
128+
snapshot: async () => snapshots.shift() as Snapshot,
129+
};
130+
131+
const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
132+
const executor = new ProviderStub(['FINISH()']);
133+
134+
const agent = new PredicateBrowserAgent({
135+
runtime,
136+
executor,
137+
config: { tokenUsageEnabled: true, captcha: { policy: 'abort' } },
138+
});
139+
140+
const out = await agent.step({
141+
taskGoal: 'test',
142+
step: { goal: 'No-op', maxSnapshotAttempts: 1 },
143+
});
144+
expect(out.ok).toBe(true);
145+
146+
const usage = agent.getTokenUsage();
147+
expect(usage.enabled).toBe(true);
148+
expect(usage.total.totalTokens).toBeGreaterThanOrEqual(18);
149+
expect(usage.byRole.executor.calls).toBeGreaterThanOrEqual(1);
150+
151+
agent.resetTokenUsage();
152+
const usage2 = agent.getTokenUsage();
153+
expect(usage2.total.totalTokens).toBe(0);
154+
});
106155
});

0 commit comments

Comments
 (0)