From 145dd7cb2f9f437d0b4f21a91a36eedc974d13ad Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Sat, 30 May 2026 19:41:01 -0600 Subject: [PATCH] feat(loops+otel): emit a nested GenAI-semconv span tree for loop topology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loop traces were flat, zero-duration point spans with bespoke loop.* attrs — ingestible but rendering as a flat list, with the agent-authored topology (the round-by-round move + rationale) invisible. This makes the dynamic loops render as a real topology tree in any OTel/GenAI viewer (Tangle Intelligence, Phoenix, Tempo, …) over the existing OTLP export path. Emission (kernel): - New loop.plan trace event per plan() round: { roundIndex, plannedCount, moveKind, rationale }. moveKind comes from a new OPTIONAL Driver.describePlan() (createDynamicDriver returns its chosen move's kind + rationale); refine/ fanout-vote omit it and the kernel infers kind from the planned-task count. - loop.iteration.ended now carries tokenUsage → maps to gen_ai.usage.* on the branch span. OTel mapping (otel-export): - buildLoopOtelSpans(events, traceId, parentSpanId?) reconstructs a nested, REAL-DURATION tree: loop (invoke_workflow) → loop.round (move kind/width/ rationale/decision) → loop.iteration (invoke_agent: gen_ai.agent.name, gen_ai.usage.input_tokens/output_tokens, verdict, placement, cost). - Attributes follow the CURRENT GenAI semconv (gen_ai.operation.name, gen_ai.agent.name, gen_ai.conversation.id, gen_ai.usage.input/output_tokens) — explicitly NOT the deprecated gen_ai.system / prompt_tokens / completion_tokens — plus a namespaced tangle.loop.* / tangle.cost.usd extension for what OTel hasn't standardized (topology move, verdict, placement, cost). - trace-propagation buffers events per runId and flushes the tree on loop.ended, so the live MCP→OTLP path ships the hierarchy, not flat point spans. - Bumped the stale OTLP scope version 0.23.0 → 0.33.0. loopEventToOtelSpan (flat) is retained for back-compat. Increment 2 (the planner's own gen_ai span) needs PlannerContext.traceEmitter — deferred. Tests: buildLoopOtelSpans tree shape + real durations + gen_ai/tangle attrs + no-deprecated-keys assertion (otel-export.test.ts, +4); kernel emits loop.plan with move kind + rationale and iteration tokenUsage (dynamic.test.ts, +1). Full suite 403 green, tsc + biome clean. --- src/index.ts | 1 + src/loops/drivers/dynamic.ts | 9 ++ src/loops/index.ts | 12 ++- src/loops/run-loop.ts | 18 ++++ src/loops/types.ts | 41 ++++++++ src/mcp/trace-propagation.ts | 21 +++- src/otel-export.ts | 199 ++++++++++++++++++++++++++++++++++- tests/loops/dynamic.test.ts | 68 ++++++++++++ tests/otel-export.test.ts | 196 ++++++++++++++++++++++++++++++++++ 9 files changed, 556 insertions(+), 9 deletions(-) diff --git a/src/index.ts b/src/index.ts index 9fc8836..dc2e491 100644 --- a/src/index.ts +++ b/src/index.ts @@ -135,6 +135,7 @@ export type { } from './otel-export' // ── OTEL export + trace propagation + eval-run provenance ──────────── export { + buildLoopOtelSpans, createOtelExporter, exportEvalRuns, INTELLIGENCE_WIRE_VERSION, diff --git a/src/loops/drivers/dynamic.ts b/src/loops/drivers/dynamic.ts index 3a9ec39..87c29cd 100644 --- a/src/loops/drivers/dynamic.ts +++ b/src/loops/drivers/dynamic.ts @@ -134,6 +134,15 @@ export function createDynamicDriver( // plan() — and thus the planner — runs again next round. return pending?.kind === 'stop' ? 'done' : 'continue' }, + describePlan() { + // Surface the move the planner just chose (kind + rationale) so the + // kernel's loop.plan trace event carries the agent's intent, not just the + // inferred fan-width. `pending` is the move set by the preceding plan(). + if (!pending) return undefined + return pending.rationale !== undefined + ? { kind: pending.kind, rationale: pending.rationale } + : { kind: pending.kind } + }, } } diff --git a/src/loops/index.ts b/src/loops/index.ts index 8bb4c39..9792208 100644 --- a/src/loops/index.ts +++ b/src/loops/index.ts @@ -38,15 +38,15 @@ export type { TopologyMoveEnvelope, } from './drivers/sandbox-planner' export { createSandboxPlanner } from './drivers/sandbox-planner' -export type { RunLoopOptions } from './run-loop' -export { runLoop } from './run-loop' -export { reportLoopUsage, type UsageSink } from './report-usage' export { - loopCampaignDispatch, - loopDispatch, type LoopDispatchOptions, type LoopOptionsForDispatch, + loopCampaignDispatch, + loopDispatch, } from './loop-dispatch' +export { reportLoopUsage, type UsageSink } from './report-usage' +export type { RunLoopOptions } from './run-loop' +export { runLoop } from './run-loop' export type { AgentRunSpec, DefaultVerdict, @@ -58,6 +58,8 @@ export type { LoopIterationDispatchPayload, LoopIterationEndedPayload, LoopIterationStartedPayload, + LoopPlanDescription, + LoopPlanPayload, LoopResult, LoopSandboxClient, LoopSandboxPlacement, diff --git a/src/loops/run-loop.ts b/src/loops/run-loop.ts index c7c8a77..ab80c00 100644 --- a/src/loops/run-loop.ts +++ b/src/loops/run-loop.ts @@ -109,6 +109,7 @@ export async function runLoop( const loopStart = now() const driverName = options.driver.name ?? 'driver' const iterations: Iteration[] = [] + let round = 0 await emitTrace(options.ctx.traceEmitter, { kind: 'loop.started', @@ -133,6 +134,21 @@ export async function runLoop( while (iterations.length < maxIterations) { if (controller.signal.aborted) throwAbort() const planned = await options.driver.plan(options.task, iterations) + const planDesc = options.driver.describePlan?.() + await emitTrace(options.ctx.traceEmitter, { + kind: 'loop.plan', + runId, + timestamp: now(), + payload: { + roundIndex: round, + plannedCount: planned.length, + moveKind: + planDesc?.kind ?? + (planned.length === 0 ? 'stop' : planned.length === 1 ? 'refine' : 'fanout'), + rationale: planDesc?.rationale, + }, + }) + round += 1 if (planned.length === 0) break const remaining = maxIterations - iterations.length @@ -319,6 +335,8 @@ async function executeIteration(args: ExecuteIterationArgs { * is hit, or when the abort signal fires. */ decide(history: ReadonlyArray>): Decision | Promise + /** + * Optional: describe the move `plan()` just produced, for trace emission. + * The kernel calls this immediately after `plan()` and emits the result in + * the `loop.plan` event so a topology viewer can render the agent's chosen + * move + rationale (not just the inferred fan-width). Drivers whose topology + * is a pure function of count (refine/fanout-vote) omit it — the kernel + * infers `moveKind` from the planned-task count. Agent-authored drivers + * (`createDynamicDriver`) return their chosen move's kind + rationale. + */ + describePlan?(): LoopPlanDescription | undefined +} + +/** @experimental Driver-supplied description of the just-planned move. */ +export interface LoopPlanDescription { + /** Topology move this round — e.g. `'refine' | 'fanout' | 'verify' | 'stop'`. */ + kind: string + /** Why the driver chose this move (the agent's rationale), when available. */ + rationale?: string } /** @experimental */ @@ -195,6 +213,7 @@ export interface LoopTraceEmitter { /** @experimental */ export type LoopTraceEvent = | { kind: 'loop.started'; runId: string; timestamp: number; payload: LoopStartedPayload } + | { kind: 'loop.plan'; runId: string; timestamp: number; payload: LoopPlanPayload } | { kind: 'loop.iteration.started' runId: string @@ -224,6 +243,25 @@ export interface LoopStartedPayload { maxConcurrency: number } +/** + * Emitted once per `plan()` round, immediately after the driver plans. Carries + * the topology move so a viewer renders WHAT the agent decided + WHY, not just + * the inferred fan-width. `moveKind` is the driver's `describePlan().kind` when + * provided, else inferred from `plannedCount` (0→stop, 1→refine, N→fanout). + * + * @experimental + */ +export interface LoopPlanPayload { + /** 0-based plan round (one per `plan()` call). */ + roundIndex: number + /** Tasks the driver issued this round. */ + plannedCount: number + /** Topology move — `'refine' | 'fanout' | 'verify' | 'stop'` etc. */ + moveKind: string + /** Driver rationale for the move, when available. */ + rationale?: string +} + /** @experimental */ export interface LoopIterationStartedPayload { iterationIndex: number @@ -260,6 +298,9 @@ export interface LoopIterationEndedPayload { error?: string costUsd: number durationMs: number + /** Summed LLM token usage for this iteration — maps to gen_ai.usage.* on the + * branch span. Omitted when no `llm_call` events carried token counts. */ + tokenUsage?: LoopTokenUsage } /** @experimental */ diff --git a/src/mcp/trace-propagation.ts b/src/mcp/trace-propagation.ts index 7016b1f..6146ed3 100644 --- a/src/mcp/trace-propagation.ts +++ b/src/mcp/trace-propagation.ts @@ -19,7 +19,7 @@ import type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types' import type { OtelExporter } from '../otel-export' -import { createOtelExporter, loopEventToOtelSpan } from '../otel-export' +import { buildLoopOtelSpans, createOtelExporter } from '../otel-export' export interface TraceContext { /** Trace id inherited from the parent process, or a fresh one. */ @@ -52,11 +52,26 @@ export function createPropagatingTraceEmitter(ctx: TraceContext): { } { const exporter = createOtelExporter() + // Buffer events per loop run, then emit the full nested span tree on + // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the + // OTLP collector — not a flat list of zero-duration point spans. A run that + // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for + // the short-lived MCP subprocess. + const buffers = new Map() + const emitter: LoopTraceEmitter = { emit(event: LoopTraceEvent) { if (!exporter) return - const span = loopEventToOtelSpan(event, ctx.traceId, ctx.parentSpanId) - exporter.exportSpan(span) + const buf = buffers.get(event.runId) + if (buf) buf.push(event) + else buffers.set(event.runId, [event]) + if (event.kind === 'loop.ended') { + const events = buffers.get(event.runId) ?? [event] + buffers.delete(event.runId) + for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) { + exporter.exportSpan(span) + } + } }, } diff --git a/src/otel-export.ts b/src/otel-export.ts index 52e1af6..0652b00 100644 --- a/src/otel-export.ts +++ b/src/otel-export.ts @@ -59,7 +59,21 @@ interface OtlpExport { resourceSpans: OtlpResourceSpans[] } -const SCOPE = { name: '@tangle-network/agent-runtime', version: '0.23.0' } +const SCOPE = { name: '@tangle-network/agent-runtime', version: '0.33.0' } + +/** + * Current (non-deprecated) OpenTelemetry GenAI semantic-convention keys. + * Registry: https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/ + * NB: `gen_ai.system` / `gen_ai.usage.prompt_tokens` / `completion_tokens` are + * DEPRECATED — do not emit them. We use `provider.name` + `input/output_tokens`. + */ +const GEN_AI = { + operation: 'gen_ai.operation.name', + agentName: 'gen_ai.agent.name', + conversationId: 'gen_ai.conversation.id', + inputTokens: 'gen_ai.usage.input_tokens', + outputTokens: 'gen_ai.usage.output_tokens', +} as const /** * Create an OTEL exporter. Returns undefined when no endpoint is configured. @@ -179,6 +193,189 @@ export function loopEventToOtelSpan( } } +/** + * Build a nested, real-duration OTLP span tree for ONE loop run from its full + * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat, + * zero-duration span per event), this reconstructs the topology hierarchy a + * GenAI trace viewer renders natively: + * + * loop (invoke_workflow) + * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale} + * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement + * └─ … + * + * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and + * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology / + * verdict / placement / cost (not yet standardized). Pure: feed it a buffered + * per-runId event array (e.g. flushed on `loop.ended`) and export the result. + */ +export function buildLoopOtelSpans( + events: ReadonlyArray<{ kind: string; runId: string; timestamp: number; payload: object }>, + traceId: string, + rootParentSpanId?: string, +): OtelSpan[] { + if (events.length === 0) return [] + const tid = padTraceId(traceId) + const out: OtelSpan[] = [] + const num = (v: unknown): number | undefined => + typeof v === 'number' && Number.isFinite(v) ? v : undefined + const str = (v: unknown): string | undefined => + typeof v === 'string' && v.length > 0 ? v : undefined + const rec = (v: unknown): Record => + v && typeof v === 'object' ? (v as Record) : {} + + const started = events.find((e) => e.kind === 'loop.started') + const ended = events.find((e) => e.kind === 'loop.ended') + const runId = events[0]?.runId ?? '' + const rootStart = started?.timestamp ?? events[0]!.timestamp + const rootEnd = ended?.timestamp ?? events[events.length - 1]!.timestamp + const rootId = generateSpanId() + + const make = ( + spanId: string, + parentSpanId: string | undefined, + name: string, + startMs: number, + endMs: number, + attrs: Record, + statusCode = 1, + ): OtelSpan => ({ + traceId: tid, + spanId, + parentSpanId: parentSpanId ? padSpanId(parentSpanId) : undefined, + name, + kind: 1, + startTimeUnixNano: msToNs(startMs), + endTimeUnixNano: msToNs(endMs), + attributes: toAttributes(attrs), + status: { code: statusCode }, + }) + + // root + const sp = rec(started?.payload) + const rootAttrs: Record = { + [GEN_AI.operation]: 'invoke_workflow', + [GEN_AI.conversationId]: runId, + 'tangle.loop.driver': str(sp.driver) ?? 'driver', + } + if (Array.isArray(sp.agentRunNames) && sp.agentRunNames.length > 0) { + rootAttrs['tangle.loop.agents'] = sp.agentRunNames.map(String).join(',') + } + if (ended) { + const ep = rec(ended.payload) + const win = num(ep.winnerIterationIndex) + if (win !== undefined) rootAttrs['tangle.loop.winner.iteration_index'] = win + const cost = num(ep.totalCostUsd) + if (cost !== undefined) rootAttrs['tangle.cost.usd'] = cost + const iters = num(ep.iterations) + if (iters !== undefined) rootAttrs['tangle.loop.iterations'] = iters + } + out.push(make(rootId, rootParentSpanId, 'loop', rootStart, rootEnd, rootAttrs)) + + // rounds + iterations + const iterStartTs = new Map() + const placementByIdx = new Map>() + let currentRoundId: string | undefined + let pendingRound: + | { id: string; start: number; attrs: Record } + | undefined + const flushRound = (endMs: number) => { + if (!pendingRound) return + out.push( + make(pendingRound.id, rootId, 'loop.round', pendingRound.start, endMs, pendingRound.attrs), + ) + pendingRound = undefined + } + + for (const e of events) { + const p = rec(e.payload) + switch (e.kind) { + case 'loop.plan': { + flushRound(e.timestamp) + const id = generateSpanId() + const attrs: Record = { + [GEN_AI.operation]: 'invoke_workflow', + 'tangle.loop.round.index': num(p.roundIndex) ?? 0, + 'tangle.loop.move.kind': str(p.moveKind) ?? 'unknown', + 'tangle.loop.move.width': num(p.plannedCount) ?? 0, + } + const r = str(p.rationale) + if (r) attrs['tangle.loop.move.rationale'] = r + pendingRound = { id, start: e.timestamp, attrs } + currentRoundId = id + break + } + case 'loop.iteration.started': { + const idx = num(p.iterationIndex) + if (idx !== undefined) iterStartTs.set(idx, e.timestamp) + break + } + case 'loop.iteration.dispatch': { + const idx = num(p.iterationIndex) + if (idx === undefined) break + const place: Record = {} + const kind = str(p.placement) + if (kind) place['tangle.loop.placement.kind'] = kind + const sid = str(p.sandboxId) + if (sid) place['tangle.sandbox.id'] = sid + const fid = str(p.fleetId) + if (fid) place['tangle.fleet.id'] = fid + const mid = str(p.machineId) + if (mid) place['tangle.machine.id'] = mid + placementByIdx.set(idx, place) + break + } + case 'loop.iteration.ended': { + const idx = num(p.iterationIndex) ?? 0 + const start = iterStartTs.get(idx) ?? e.timestamp + const err = str(p.error) + const attrs: Record = { + [GEN_AI.operation]: 'invoke_agent', + 'tangle.loop.iteration.index': idx, + } + const agent = str(p.agentRunName) + if (agent) attrs[GEN_AI.agentName] = agent + const tu = rec(p.tokenUsage) + const inTok = num(tu.input) + if (inTok !== undefined) attrs[GEN_AI.inputTokens] = inTok + const outTok = num(tu.output) + if (outTok !== undefined) attrs[GEN_AI.outputTokens] = outTok + const cost = num(p.costUsd) + if (cost !== undefined) attrs['tangle.cost.usd'] = cost + const verdict = rec(p.verdict) + if (typeof verdict.valid === 'boolean') attrs['tangle.loop.verdict.valid'] = verdict.valid + const score = num(verdict.score) + if (score !== undefined) attrs['tangle.loop.verdict.score'] = score + if (err) attrs['tangle.loop.error'] = err + Object.assign(attrs, placementByIdx.get(idx) ?? {}) + out.push( + make( + generateSpanId(), + currentRoundId ?? rootId, + 'loop.iteration', + start, + e.timestamp, + attrs, + err ? 2 : 1, + ), + ) + break + } + case 'loop.decision': { + if (pendingRound) { + const dec = str(p.decision) + if (dec) pendingRound.attrs['tangle.loop.decision'] = dec + flushRound(e.timestamp) + } + currentRoundId = undefined + break + } + } + } + flushRound(rootEnd) + return out +} + function parseHeadersFromEnv(): Record { if (typeof process === 'undefined') return {} const raw = process.env.OTEL_EXPORTER_OTLP_HEADERS diff --git a/tests/loops/dynamic.test.ts b/tests/loops/dynamic.test.ts index 48ea848..3472f86 100644 --- a/tests/loops/dynamic.test.ts +++ b/tests/loops/dynamic.test.ts @@ -10,6 +10,9 @@ import { type AgentRunSpec, createDynamicDriver, createSandboxPlanner, + type LoopPlanPayload, + type LoopTraceEmitter, + type LoopTraceEvent, type OutputAdapter, runLoop, type TopologyMove, @@ -472,3 +475,68 @@ describe('createSandboxPlanner', () => { ).rejects.toThrow(PlannerError) }) }) + +describe('runLoop dynamic driver — trace emission for topology viewers', () => { + it('emits loop.plan with move kind + rationale, and iteration tokenUsage', async () => { + const goal = 'trace' + const moves: TopologyMove[] = [ + { kind: 'refine', task: { goal, strategy: 'parallel-x' }, rationale: 'first pass, refine' }, + { kind: 'stop', rationale: 'valid result exists' }, + ] + let round = 0 + const planner: TopologyPlanner = () => moves[round++]! + + const client = { + async create(opts?: CreateSandboxOptions): Promise { + const name = + (opts?.backend?.profile && typeof opts.backend.profile === 'object' + ? opts.backend.profile.name + : undefined) ?? 'w' + return { + async *streamPrompt(message: string) { + const task = JSON.parse(message) as Task + // result event carries usage → kernel sums it into iteration tokenUsage + yield { + type: 'result', + data: { + strategy: task.strategy, + harness: name, + score: scoreFor(task.strategy), + usage: { inputTokens: 800, outputTokens: 200 }, + }, + } satisfies SandboxEvent + }, + } as unknown as SandboxInstance + }, + } + + const all: LoopTraceEvent[] = [] + const planPayloads: LoopPlanPayload[] = [] + const traceEmitter: LoopTraceEmitter = { + emit(e) { + all.push(e) + if (e.kind === 'loop.plan') planPayloads.push(e.payload) + }, + } + + const result = await runLoop({ + driver: createDynamicDriver({ planner }), + agentRun: workerSpecs(['w'])[0], + output, + validator, + task: { goal, strategy: 'naive' }, + ctx: { sandboxClient: client, traceEmitter }, + }) + + expect(result.decision).toBe('done') + expect(planPayloads.map((p) => p.moveKind)).toEqual(['refine', 'stop']) + expect(planPayloads[0]?.rationale).toBe('first pass, refine') + expect(planPayloads[1]?.rationale).toBe('valid result exists') + + const ended = all.find((e) => e.kind === 'loop.iteration.ended') + expect(ended?.kind).toBe('loop.iteration.ended') + if (ended?.kind === 'loop.iteration.ended') { + expect(ended.payload.tokenUsage).toEqual({ input: 800, output: 200 }) + } + }) +}) diff --git a/tests/otel-export.test.ts b/tests/otel-export.test.ts index 6979b80..84fff22 100644 --- a/tests/otel-export.test.ts +++ b/tests/otel-export.test.ts @@ -1,11 +1,207 @@ import { afterEach, describe, expect, it, vi } from 'vitest' import { + buildLoopOtelSpans, createOtelExporter, exportEvalRuns, INTELLIGENCE_WIRE_VERSION, loopEventToOtelSpan, + type OtelSpan, } from '../src/otel-export' +function attrMap(span: OtelSpan): Record { + const out: Record = {} + for (const a of span.attributes ?? []) { + const v = a.value + out[a.key] = + v.stringValue ?? + (v.intValue !== undefined ? Number(v.intValue) : undefined) ?? + v.doubleValue ?? + v.boolValue + } + return out +} + +describe('buildLoopOtelSpans — nested GenAI topology tree', () => { + // One dynamic-loop run: round 0 fans out 2 branches (with rationale), then stops. + const events = [ + { + kind: 'loop.started', + runId: 'run-1', + timestamp: 1000, + payload: { + driver: 'dynamic', + agentRunNames: ['claude', 'codex'], + maxIterations: 8, + maxConcurrency: 4, + }, + }, + { + kind: 'loop.plan', + runId: 'run-1', + timestamp: 1010, + payload: { + roundIndex: 0, + plannedCount: 2, + moveKind: 'fanout', + rationale: 'attempts disagree; fan to 2 harnesses', + }, + }, + { + kind: 'loop.iteration.started', + runId: 'run-1', + timestamp: 1020, + payload: { iterationIndex: 0, agentRunName: 'claude', taskHash: 'h0' }, + }, + { + kind: 'loop.iteration.dispatch', + runId: 'run-1', + timestamp: 1021, + payload: { + iterationIndex: 0, + agentRunName: 'claude', + placement: 'fleet', + sandboxId: 'sb0', + fleetId: 'flt', + machineId: 'm1', + }, + }, + { + kind: 'loop.iteration.started', + runId: 'run-1', + timestamp: 1022, + payload: { iterationIndex: 1, agentRunName: 'codex', taskHash: 'h1' }, + }, + { + kind: 'loop.iteration.ended', + runId: 'run-1', + timestamp: 1500, + payload: { + iterationIndex: 0, + agentRunName: 'claude', + costUsd: 0.02, + durationMs: 480, + verdict: { valid: true, score: 0.9 }, + tokenUsage: { input: 1200, output: 300 }, + }, + }, + { + kind: 'loop.iteration.ended', + runId: 'run-1', + timestamp: 1600, + payload: { + iterationIndex: 1, + agentRunName: 'codex', + costUsd: 0.03, + durationMs: 578, + verdict: { valid: false, score: 0.4 }, + tokenUsage: { input: 1100, output: 250 }, + }, + }, + { + kind: 'loop.decision', + runId: 'run-1', + timestamp: 1610, + payload: { decision: 'continue', historyLength: 2 }, + }, + { + kind: 'loop.plan', + runId: 'run-1', + timestamp: 1620, + payload: { + roundIndex: 1, + plannedCount: 0, + moveKind: 'stop', + rationale: 'valid winner exists', + }, + }, + { + kind: 'loop.decision', + runId: 'run-1', + timestamp: 1625, + payload: { decision: 'done', historyLength: 2 }, + }, + { + kind: 'loop.ended', + runId: 'run-1', + timestamp: 1700, + payload: { winnerIterationIndex: 0, totalCostUsd: 0.05, durationMs: 700, iterations: 2 }, + }, + ] + + it('builds a real-duration root → round → branch tree with a single trace id', () => { + const spans = buildLoopOtelSpans(events, 'trace-abc') + const byName = (n: string) => spans.filter((s) => s.name === n) + + const root = byName('loop') + expect(root).toHaveLength(1) + expect(spans.every((s) => s.traceId === root[0]!.traceId)).toBe(true) + // real durations, not zero-width point spans + expect(BigInt(root[0]!.endTimeUnixNano) - BigInt(root[0]!.startTimeUnixNano)).toBe( + 700n * 1_000_000n, + ) + + const rounds = byName('loop.round') + expect(rounds).toHaveLength(2) + expect(rounds.every((r) => r.parentSpanId === root[0]!.spanId)).toBe(true) + + const iters = byName('loop.iteration') + expect(iters).toHaveLength(2) + // iterations nest under round 0 (the fanout), not the root + const round0 = rounds[0]! + expect(iters.every((i) => i.parentSpanId === round0.spanId)).toBe(true) + // branch span duration reflects started→ended (480ms for iter 0) + const iter0 = iters.find((i) => attrMap(i)['tangle.loop.iteration.index'] === 0)! + expect(BigInt(iter0.endTimeUnixNano) - BigInt(iter0.startTimeUnixNano)).toBe(480n * 1_000_000n) + }) + + it('emits current (non-deprecated) gen_ai.* + tangle.* attributes', () => { + const spans = buildLoopOtelSpans(events, 'trace-abc') + const root = attrMap(spans.find((s) => s.name === 'loop')!) + expect(root['gen_ai.operation.name']).toBe('invoke_workflow') + expect(root['gen_ai.conversation.id']).toBe('run-1') + expect(root['tangle.loop.driver']).toBe('dynamic') + expect(root['tangle.loop.winner.iteration_index']).toBe(0) + expect(root['tangle.cost.usd']).toBeCloseTo(0.05, 6) + + const round0 = attrMap(spans.filter((s) => s.name === 'loop.round')[0]!) + expect(round0['tangle.loop.move.kind']).toBe('fanout') + expect(round0['tangle.loop.move.width']).toBe(2) + expect(round0['tangle.loop.move.rationale']).toBe('attempts disagree; fan to 2 harnesses') + expect(round0['tangle.loop.decision']).toBe('continue') + + const iter0 = attrMap( + spans + .filter((s) => s.name === 'loop.iteration') + .find((s) => attrMap(s)['tangle.loop.iteration.index'] === 0)!, + ) + expect(iter0['gen_ai.operation.name']).toBe('invoke_agent') + expect(iter0['gen_ai.agent.name']).toBe('claude') + expect(iter0['gen_ai.usage.input_tokens']).toBe(1200) + expect(iter0['gen_ai.usage.output_tokens']).toBe(300) + expect(iter0['tangle.loop.verdict.valid']).toBe(true) + expect(iter0['tangle.loop.verdict.score']).toBeCloseTo(0.9, 6) + expect(iter0['tangle.loop.placement.kind']).toBe('fleet') + expect(iter0['tangle.machine.id']).toBe('m1') + + // NO deprecated keys anywhere + const allKeys = spans.flatMap((s) => (s.attributes ?? []).map((a) => a.key)) + expect(allKeys).not.toContain('gen_ai.system') + expect(allKeys).not.toContain('gen_ai.usage.prompt_tokens') + expect(allKeys).not.toContain('gen_ai.usage.completion_tokens') + }) + + it('parents the loop-root under an inherited span when provided', () => { + const spans = buildLoopOtelSpans(events, 'trace-abc', 'parent-span-id') + const root = spans.find((s) => s.name === 'loop')! + expect(root.parentSpanId).toBeDefined() + expect(root.parentSpanId).toHaveLength(16) + }) + + it('returns [] for an empty event stream', () => { + expect(buildLoopOtelSpans([], 'trace-abc')).toEqual([]) + }) +}) + describe('otel-export', () => { afterEach(() => { delete process.env.OTEL_EXPORTER_OTLP_ENDPOINT