tangle-network · tangletools · May 30, 2026 · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.31.0",
+  "version": "0.32.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {
@@ -75,11 +75,10 @@
     "lint:fix": "biome check --write src tests examples",
     "typecheck": "tsc --noEmit"
   },
-  "dependencies": {
-    "@tangle-network/agent-eval": "^0.54.0"
-  },
+  "dependencies": {},
   "devDependencies": {
     "@biomejs/biome": "^2.4.0",
+    "@tangle-network/agent-eval": "^0.61.0",
     "@tangle-network/sandbox": "^0.4.0",
     "@types/node": "^25.6.0",
     "tsup": "^8.0.0",
@@ -101,6 +100,7 @@
   "license": "MIT",
   "packageManager": "pnpm@10.28.0",
   "peerDependencies": {
+    "@tangle-network/agent-eval": ">=0.61.0 <1.0.0",
     "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
     "@tangle-network/sandbox": ">=0.1.2 <0.5.0"
   },

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/loops/index.ts b/src/loops/index.ts
@@ -27,6 +27,13 @@ export type { CreateRefineDriverOptions, RefineDecision } from './drivers/refine
 export { createRefineDriver, refineWinnerIndex } from './drivers/refine'
 export type { RunLoopOptions } from './run-loop'
 export { runLoop } from './run-loop'
+export { reportLoopUsage, type UsageSink } from './report-usage'
+export {
+  loopCampaignDispatch,
+  loopDispatch,
+  type LoopDispatchOptions,
+  type LoopOptionsForDispatch,
+} from './loop-dispatch'
 export type {
   AgentRunSpec,
   DefaultVerdict,
@@ -42,6 +49,7 @@ export type {
   LoopSandboxClient,
   LoopSandboxPlacement,
   LoopStartedPayload,
+  LoopTokenUsage,
   LoopTraceEmitter,
   LoopTraceEvent,
   LoopWinner,

diff --git a/src/loops/loop-dispatch.ts b/src/loops/loop-dispatch.ts
@@ -0,0 +1,132 @@
+/**
+ * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
+ *
+ * Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
+ * `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
+ * sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
+ * `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
+ * forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
+ * `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
+ * the third silent. The fleet's products skipped (c) and fell back to a
+ * `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
+ * to kill.
+ *
+ * `loopDispatch` collapses all three into one typed call:
+ *
+ *   const dispatch = loopDispatch({
+ *     sandboxClient,
+ *     toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
+ *   })
+ *   await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
+ *
+ * Usage is reported automatically; trace events are forwarded automatically;
+ * the ctx is built automatically. The seam becomes impossible to mis-wire.
+ *
+ * Typed structurally against the campaign `DispatchContext` (imported type-only
+ * from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
+ * inversion.
+ */
+
+// agent-eval's AgentProfile (the eval-harness unit of variation, `model: string`)
+// — NOT sandbox's AgentProfile. ProfileDispatchFn is keyed on the former.
+import type { AgentProfile } from '@tangle-network/agent-eval'
+import type {
+  CampaignTraceWriter,
+  DispatchContext,
+  DispatchFn,
+  ProfileDispatchFn,
+  Scenario,
+} from '@tangle-network/agent-eval/campaign'
+import { reportLoopUsage } from './report-usage'
+import { type RunLoopOptions, runLoop } from './run-loop'
+import type { LoopResult, LoopSandboxClient, LoopTraceEmitter } from './types'
+
+/** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
+export type LoopOptionsForDispatch<Task, Output, Decision> = Omit<
+  RunLoopOptions<Task, Output, Decision>,
+  'ctx'
+>
+
+export interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
+  /** Sandbox client used for every cell's `runLoop`. Supplied once. */
+  sandboxClient: LoopSandboxClient
+  /** Build the per-cell runLoop options from the scenario (+ profile, when
+   *  used with `runProfileMatrix`). */
+  toLoopOptions: (
+    scenario: TScenario,
+    profile: AgentProfile,
+  ) => LoopOptionsForDispatch<Task, Output, Decision>
+  /** Map the finished loop to the artifact the judges score. Default:
+   *  `result.winner?.output`. A loop with no winner yields `undefined` (judges
+   *  skip the cell) — but the loop's token usage is STILL reported, so the
+   *  integrity guard sees real activity. */
+  toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact
+  /** Forward `loop.*` trace events into the campaign's scoped trace so loop
+   *  spans correlate with the cell. Default true. */
+  forwardTrace?: boolean
+  /** Cost-meter source label for the loop's spend. Default `'loop'`. */
+  costSource?: string
+}
+
+/** Bridge a campaign `DispatchContext.trace` to a `LoopTraceEmitter` so every
+ *  `loop.*` event lands as a span under the cell's scoped trace. */
+function campaignTraceToLoopEmitter(trace: CampaignTraceWriter): LoopTraceEmitter {
+  return {
+    emit(event) {
+      trace
+        .span(event.kind, { runId: event.runId, timestamp: event.timestamp, ...event.payload })
+        .end()
+    },
+  }
+}
+
+async function runLoopForCell<Task, Output, Decision, TScenario extends Scenario, TArtifact>(
+  opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>,
+  scenario: TScenario,
+  profile: AgentProfile,
+  ctx: DispatchContext,
+): Promise<TArtifact> {
+  const loopOptions = opts.toLoopOptions(scenario, profile)
+  const result = await runLoop<Task, Output, Decision>({
+    ...loopOptions,
+    ctx: {
+      sandboxClient: opts.sandboxClient,
+      signal: ctx.signal,
+      traceEmitter:
+        opts.forwardTrace === false ? undefined : campaignTraceToLoopEmitter(ctx.trace),
+    },
+  })
+  reportLoopUsage(ctx.cost, result, opts.costSource ?? 'loop')
+  const toArtifact =
+    opts.toArtifact ?? ((r: LoopResult<Task, Output, Decision>) => r.winner?.output as TArtifact)
+  return toArtifact(result)
+}
+
+/**
+ * Adapter for `runProfileMatrix` (profile is an axis). Returns a
+ * `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
+ * reports usage automatically.
+ */
+export function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(
+  opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>,
+): ProfileDispatchFn<TScenario, TArtifact> {
+  return (profile, scenario, ctx) => runLoopForCell(opts, scenario, profile, ctx)
+}
+
+/**
+ * Adapter for `runCampaign` (no profile axis). `toLoopOptions` receives only
+ * the scenario; the `profile` passed to the shared core is a stable sentinel
+ * so a single `runLoop` config is reused across cells.
+ */
+export function loopCampaignDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(
+  opts: Omit<LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>, 'toLoopOptions'> & {
+    toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>
+  },
+): DispatchFn<TScenario, TArtifact> {
+  const profileSentinel = { id: 'loop-campaign', model: 'n/a@loop-campaign' } as AgentProfile
+  const profiled: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact> = {
+    ...opts,
+    toLoopOptions: (scenario) => opts.toLoopOptions(scenario),
+  }
+  return (scenario, ctx) => runLoopForCell(profiled, scenario, profileSentinel, ctx)
+}
diff --git a/src/loops/report-usage.ts b/src/loops/report-usage.ts
@@ -0,0 +1,41 @@
+/**
+ * Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
+ * dispatch.
+ *
+ * `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
+ * the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
+ * `runLoop` must forward the loop's cost AND token usage, or the guard reads
+ * the run as a stub and throws. `reportLoopUsage` is that one line:
+ *
+ *   const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
+ *     const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
+ *     reportLoopUsage(ctx, result)
+ *     return result.winner?.output as A
+ *   }
+ *
+ * Typed structurally against the campaign `DispatchContext.cost` so this module
+ * stays free of an agent-eval import — it works with any cost meter exposing
+ * `observe` + `observeTokens`.
+ */
+
+import type { LoopResult } from './types'
+
+/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
+export interface UsageSink {
+  observe(amountUsd: number, source: string): void
+  observeTokens(usage: { input: number; output: number }): void
+}
+
+/**
+ * Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
+ * meter so the backend-integrity guard sees real LLM activity. `source`
+ * defaults to `'loop'`.
+ */
+export function reportLoopUsage<Task, Output, Decision>(
+  cost: UsageSink,
+  result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>,
+  source = 'loop',
+): void {
+  cost.observe(result.costUsd, source)
+  cost.observeTokens({ input: result.tokenUsage.input, output: result.tokenUsage.output })
+}
diff --git a/src/loops/run-loop.ts b/src/loops/run-loop.ts
@@ -149,6 +149,7 @@ export async function runLoop<Task, Output, Decision>(
           startedAt: now(),
           endedAt: 0,
           costUsd: 0,
+          tokenUsage: { input: 0, output: 0 },
         })
       }
 
@@ -288,6 +289,8 @@ async function executeIteration<Task, Output>(args: ExecuteIterationArgs<Task, O
       const llmCall = extractLlmCallEvent(event, slot.agentRunName)
       if (llmCall) {
         slot.costUsd += llmCall.costUsd ?? 0
+        slot.tokenUsage.input += llmCall.tokensIn ?? 0
+        slot.tokenUsage.output += llmCall.tokensOut ?? 0
         args.ctx.runHandle?.observe(llmCall)
       }
     }
@@ -405,12 +408,21 @@ function finalize<Task, Output, Decision>(
 ): LoopResult<Task, Output, Decision> {
   const winner = (args.options.selectWinner ?? defaultSelectWinner)(args.iterations)
   const costUsd = args.iterations.reduce((sum, iter) => sum + (iter.costUsd || 0), 0)
+  const tokenUsage = args.iterations.reduce(
+    (acc, iter) => {
+      acc.input += iter.tokenUsage?.input ?? 0
+      acc.output += iter.tokenUsage?.output ?? 0
+      return acc
+    },
+    { input: 0, output: 0 },
+  )
   const result: LoopResult<Task, Output, Decision> = {
     decision: args.decision,
     iterations: args.iterations,
     winner,
     durationMs: args.now() - args.startMs,
     costUsd,
+    tokenUsage,
   }
   void emitTrace(args.options.ctx.traceEmitter, {
     kind: 'loop.ended',

diff --git a/src/loops/types.ts b/src/loops/types.ts
@@ -90,6 +90,15 @@ export interface OutputAdapter<Output> {
   parse(events: SandboxEvent[]): Output
 }
 
+/** LLM token usage. Structurally matches agent-eval's `RunTokenUsage` /
+ *  `CampaignTokenUsage` ({ input, output }) so a loop result maps straight
+ *  onto `ctx.cost.observeTokens` in a `runProfileMatrix` dispatch — without
+ *  which the backend-integrity guard reads the run as a stub. */
+export interface LoopTokenUsage {
+  input: number
+  output: number
+}
+
 /** @experimental */
 export interface Iteration<Task, Output> {
   /** 0-based iteration index assigned by the kernel. */
@@ -105,6 +114,8 @@ export interface Iteration<Task, Output> {
   startedAt: number
   endedAt: number
   costUsd: number
+  /** Summed LLM token usage across every `llm_call` event in this iteration. */
+  tokenUsage: LoopTokenUsage
 }
 
 /** @experimental */
@@ -144,6 +155,10 @@ export interface LoopResult<Task, Output, Decision> {
   durationMs: number
   /** Sum of every iteration's `costUsd`. */
   costUsd: number
+  /** Sum of every iteration's token usage. Forward to
+   *  `ctx.cost.observeTokens` in a `runProfileMatrix` dispatch so the
+   *  integrity guard sees real LLM activity. */
+  tokenUsage: LoopTokenUsage
 }
 
 /**