11import type { Snapshot , StepHookContext } from '../types' ;
22import type { PermissionPolicy } from '../browser' ;
33import type { AgentRuntime } from '../agent-runtime' ;
4- import type { LLMProvider } from '../llm-provider' ;
4+ import { LLMProvider } from '../llm-provider' ;
55import { RuntimeAgent } from '../runtime-agent' ;
66import type { RuntimeStep } from '../runtime-agent' ;
77import type { CaptchaOptions } from '../captcha/types' ;
@@ -46,6 +46,9 @@ export interface PredicateBrowserAgentConfig {
4646 // Prompt / token controls
4747 historyLastN ?: number ; // 0 disables LLM-facing step history
4848
49+ // Opt-in: track token usage from LLM provider responses (best-effort).
50+ tokenUsageEnabled ?: boolean ;
51+
4952 // Compact prompt customization
5053 // builder(taskGoal, stepGoal, domContext, snapshot, historySummary) -> {systemPrompt, userPrompt}
5154 compactPromptBuilder ?: (
@@ -97,6 +100,113 @@ function applyCaptchaConfigToRuntime(runtime: AgentRuntime, cfg: CaptchaConfig |
97100 } satisfies CaptchaOptions ) ;
98101}
99102
103+ type TokenUsageTotals = {
104+ calls : number ;
105+ promptTokens : number ;
106+ completionTokens : number ;
107+ totalTokens : number ;
108+ } ;
109+
110+ class TokenUsageCollector {
111+ private byRole : Record < string , TokenUsageTotals > = { } ;
112+ private byModel : Record < string , TokenUsageTotals > = { } ;
113+
114+ record ( role : string , resp : any ) : void {
115+ const pt = typeof resp ?. promptTokens === 'number' ? resp . promptTokens : 0 ;
116+ const ct = typeof resp ?. completionTokens === 'number' ? resp . completionTokens : 0 ;
117+ const tt = typeof resp ?. totalTokens === 'number' ? resp . totalTokens : pt + ct ;
118+ const model = String ( resp ?. modelName ?? 'unknown' ) || 'unknown' ;
119+
120+ const bump = ( dst : Record < string , TokenUsageTotals > , key : string ) => {
121+ const cur =
122+ dst [ key ] ??
123+ ( { calls : 0 , promptTokens : 0 , completionTokens : 0 , totalTokens : 0 } as TokenUsageTotals ) ;
124+ cur . calls += 1 ;
125+ cur . promptTokens += Math . max ( 0 , pt ) ;
126+ cur . completionTokens += Math . max ( 0 , ct ) ;
127+ cur . totalTokens += Math . max ( 0 , tt ) ;
128+ dst [ key ] = cur ;
129+ } ;
130+
131+ bump ( this . byRole , role ) ;
132+ bump ( this . byModel , model ) ;
133+ }
134+
135+ reset ( ) : void {
136+ this . byRole = { } ;
137+ this . byModel = { } ;
138+ }
139+
140+ summary ( ) : {
141+ total : TokenUsageTotals ;
142+ byRole : Record < string , TokenUsageTotals > ;
143+ byModel : Record < string , TokenUsageTotals > ;
144+ } {
145+ const sum = ( src : Record < string , TokenUsageTotals > ) : TokenUsageTotals => {
146+ return Object . values ( src ) . reduce (
147+ ( acc , v ) => ( {
148+ calls : acc . calls + v . calls ,
149+ promptTokens : acc . promptTokens + v . promptTokens ,
150+ completionTokens : acc . completionTokens + v . completionTokens ,
151+ totalTokens : acc . totalTokens + v . totalTokens ,
152+ } ) ,
153+ { calls : 0 , promptTokens : 0 , completionTokens : 0 , totalTokens : 0 }
154+ ) ;
155+ } ;
156+ return { total : sum ( this . byRole ) , byRole : this . byRole , byModel : this . byModel } ;
157+ }
158+ }
159+
160+ class TokenAccountingProvider extends LLMProvider {
161+ constructor (
162+ private inner : LLMProvider ,
163+ private collector : TokenUsageCollector ,
164+ private role : string
165+ ) {
166+ super ( ) ;
167+ }
168+ get modelName ( ) : string {
169+ return this . inner . modelName ;
170+ }
171+ supportsJsonMode ( ) : boolean {
172+ return this . inner . supportsJsonMode ( ) ;
173+ }
174+ supportsVision ( ) : boolean {
175+ return this . inner . supportsVision ?.( ) ?? false ;
176+ }
177+ async generate (
178+ systemPrompt : string ,
179+ userPrompt : string ,
180+ options : Record < string , any > = { }
181+ ) : Promise < any > {
182+ const resp = await this . inner . generate ( systemPrompt , userPrompt , options ) ;
183+ try {
184+ this . collector . record ( this . role , resp ) ;
185+ } catch {
186+ // best-effort
187+ }
188+ return resp ;
189+ }
190+ async generateWithImage (
191+ systemPrompt : string ,
192+ userPrompt : string ,
193+ imageBase64 : string ,
194+ options : Record < string , any > = { }
195+ ) : Promise < any > {
196+ const fn = ( this . inner as any ) . generateWithImage ;
197+ if ( typeof fn !== 'function' ) {
198+ throw new Error ( 'Inner provider does not implement generateWithImage' ) ;
199+ }
200+ const resp = await fn . call ( this . inner , systemPrompt , userPrompt , imageBase64 , options ) ;
201+ try {
202+ this . collector . record ( this . role , resp ) ;
203+ } catch {
204+ // best-effort
205+ }
206+ return resp ;
207+ }
208+ }
209+
100210export type StepOutcome = { stepGoal : string ; ok : boolean } ;
101211
102212export class PredicateBrowserAgent {
@@ -109,6 +219,7 @@ export class PredicateBrowserAgent {
109219 private history : string [ ] = [ ] ;
110220 private visionCallsUsed = 0 ;
111221 private runner : RuntimeAgent ;
222+ private tokenUsage : TokenUsageCollector | null = null ;
112223
113224 constructor ( opts : {
114225 runtime : AgentRuntime ;
@@ -117,10 +228,22 @@ export class PredicateBrowserAgent {
117228 visionVerifier ?: LLMProvider ;
118229 config ?: PredicateBrowserAgentConfig ;
119230 } ) {
231+ const tokenUsageEnabled = Boolean ( opts . config ?. tokenUsageEnabled ) ;
232+ const collector = tokenUsageEnabled ? new TokenUsageCollector ( ) : null ;
233+
120234 this . runtime = opts . runtime ;
121- this . executor = opts . executor ;
122- this . visionExecutor = opts . visionExecutor ;
123- this . visionVerifier = opts . visionVerifier ;
235+ this . tokenUsage = collector ;
236+ this . executor = collector
237+ ? new TokenAccountingProvider ( opts . executor , collector , 'executor' )
238+ : opts . executor ;
239+ this . visionExecutor =
240+ collector && opts . visionExecutor
241+ ? new TokenAccountingProvider ( opts . visionExecutor , collector , 'vision_executor' )
242+ : opts . visionExecutor ;
243+ this . visionVerifier =
244+ collector && opts . visionVerifier
245+ ? new TokenAccountingProvider ( opts . visionVerifier , collector , 'vision_verifier' )
246+ : opts . visionVerifier ;
124247 this . config = {
125248 permissionStartup : null ,
126249 permissionRecovery : null ,
@@ -148,6 +271,17 @@ export class PredicateBrowserAgent {
148271 } as any ) ;
149272 }
150273
274+ getTokenUsage ( ) : any {
275+ if ( ! this . tokenUsage ) {
276+ return { enabled : false , reason : 'tokenUsageEnabled is false' } ;
277+ }
278+ return { enabled : true , ...this . tokenUsage . summary ( ) } ;
279+ }
280+
281+ resetTokenUsage ( ) : void {
282+ this . tokenUsage ?. reset ( ) ;
283+ }
284+
151285 private recordHistory ( stepGoal : string , ok : boolean ) {
152286 const n = Math . max ( 0 , this . config . historyLastN ?? 0 ) ;
153287 if ( n <= 0 ) return ;
0 commit comments