diff --git a/bin/opik-logger-darwin-arm64 b/bin/opik-logger-darwin-arm64 index c9b70a2..0d73e53 100755 Binary files a/bin/opik-logger-darwin-arm64 and b/bin/opik-logger-darwin-arm64 differ diff --git a/docs/builtin-calibration.md b/docs/builtin-calibration.md new file mode 100644 index 0000000..800b950 --- /dev/null +++ b/docs/builtin-calibration.md @@ -0,0 +1,65 @@ +# Calibrating the bundled Claude Code overhead (`cc_builtin`) + +The bundled system prompt + built-in tool schemas never appear in the +transcript, so `cc.billing`'s `static_overhead` lane relies on the +per-version table in `src/cc_builtin.go` (`ccBuiltinByVersion`). This doc is +the procedure for refreshing it on each Claude Code release — and, since the +`Components` field landed, for producing the itemized breakdown the +dashboard shows. + +## Per-release procedure + +1. **Capture one real request** on the new CC version, in a project with NO + MCP servers connected and auto-memory disabled (so the request is almost + pure bundled content). Two known-good capture paths: + - [cost-xray](https://github.com/tigerless-labs/cost-xray): transparent + local mitmproxy hop; the captured request body contains the full + `system` block and `tools` array. + - Any HTTPS-intercepting proxy with the CC CLI's proxy env vars. +2. **Split the `system` block into its named sections** (identity/harness + rules, security policy, memory instructions, environment template, + session guidance, context management). The section headings are stable + markdown headers. +3. **Measure each section** with the free `count_tokens` endpoint (the + plugin's own `countTokensFor` works, or `curl` — auth with + `ANTHROPIC_API_KEY` or the CC OAuth token). +4. **Measure the always-on `tools` array** the same way (count with and + without `tools`, diff). Per-tool figures: add tools one at a time. +5. Add the row: + +```go +"2.1.180": { + SystemPromptTokens: 4900, // Σ prompt sections + SystemToolsTokens: 1900, // tools array + SystemToolsDeferredTokens: 11300, // /context's deferred row + Components: map[string]int{ + "identity_and_rules": 2100, + "security_policy": 300, + "memory_instructions": 800, + "environment_template": 200, // static template only — the dynamic + // part (cwd, git status) is carved out + // at runtime as the `environment` item + "session_guidance": 600, + "context_management": 400, + "builtin_tool_schemas": 1900, + }, +}, +``` + +Invariant: `Σ Components == SystemPromptTokens + SystemToolsTokens`. + +## Validation + +Run a fresh session on the new version and check the trace's +`cc.billing.lanes.static_overhead.total` against `/context`'s +"System prompt" + "System tools" rows (expect agreement within ~15%; the +always-on tool set varies slightly per session config). The `unattributed` +lane absorbs the difference either way — if it jumps after a CC release, +this table is stale. + +## What NOT to do + +Do not derive these numbers as a usage residual ("call-1 usage minus known +pieces") — that residual absorbs all unobserved request content (system +reminders, deferred-name listings) and estimation drift; it inflated +static_overhead ~3.6x when tried (see git history). diff --git a/src/billing.go b/src/billing.go new file mode 100644 index 0000000..d4e2f61 --- /dev/null +++ b/src/billing.go @@ -0,0 +1,626 @@ +package main + +import ( + "encoding/json" + "path/filepath" + "sort" + "strings" +) + +// cc.billing — exact, billing-native attribution (OPIK-6873 follow-up). +// +// Anthropic's prompt caching is prefix-based, so a request's usage splits it +// into three POSITIONAL segments: [0,R) billed as cache_read, [R,R+W) as +// cache_creation, the tail as fresh input. For every LLM call in the turn we +// lay the request out as an ordered list of pieces (static config prefix, +// then the conversation in transcript order), reconcile the piece sizes to +// the call's measured usage, and cut by position. Tier tokens are therefore +// per-call billing events: additive across calls, traces and periods, and +// they sum EXACTLY to the API-reported usage at every level. +// +// Exactness contract, per call: +// input: Σ piece tiers (incl. the explicit `unattributed` tail) == +// input_tokens + cache_read + cache_creation +// output: Σ per-block attributed tokens == output_tokens +// (DeduplicateUsage normalizes block shares to the measured total) +// +// Estimated pieces are scaled down proportionally only when they OVERSHOOT +// the measured total (assistant blocks are usage-derived and never scaled); +// undershoot is parked in `unattributed` — a visible bucket holding what we +// don't parse yet (system reminders, request envelope) plus residual +// estimation error, placed at the tail of the layout, which is where +// unobserved content actually sits in the request. + +type billingTier struct { + cacheRead, cacheCreation, fresh, output float64 +} + +type billingKey struct { + lane, entity string + // "definition" = always-on config that ships regardless of activity; + // "usage" = conversation-driven content. The UI stacks the two. + kind string +} + +type billingPiece struct { + key billingKey + tokens float64 + exact bool // usage-derived (assistant blocks): never rescaled +} + +const ( + unattributedLane = "unattributed" + kindDefinition = "definition" + kindUsage = "usage" +) + +// computeBillingSnapshot returns the `cc.billing` block for the turn, or nil +// when the turn contains no usage-bearing LLM calls. +func computeBillingSnapshot(fullEntries, turnEntries []TranscriptEntry) map[string]interface{} { + calls := llmCallsInTurn(fullEntries, turnEntries) + if len(calls) == 0 { + return nil + } + + setBillingModel(fullEntries) // anchors are keyed (model, sha) + staticPieces := staticPrefixPieces(fullEntries) + skillBodyNames := skillBodyNameBySHA(fullEntries) + toolNames := toolUseNames(fullEntries) + counts := countNewEvents(turnEntries, skillBodyNames, toolNames) + + acc := map[billingKey]*billingTier{} + totals := billingTier{} + for _, call := range calls { + pieces := append(append([]billingPiece{}, staticPieces...), + conversationPieces(fullEntries[:call.entryIdx], skillBodyNames, toolNames)...) + pieces = reconcileToUsage(pieces, float64(call.read+call.write+call.fresh)) + cutByPosition(pieces, float64(call.read), float64(call.write), acc) + + attributeOutput(fullEntries[call.entryIdx:call.entryEnd], acc) + + totals.cacheRead += float64(call.read) + totals.cacheCreation += float64(call.write) + totals.fresh += float64(call.fresh) + totals.output += float64(call.output) + } + + return renderBillingSnapshot(len(calls), totals, acc, counts) +} + +type billingCall struct { + entryIdx int // index in fullEntries of the call's FIRST entry: its request is the prefix before it + entryEnd int // one past the call's LAST entry + read, write, fresh, output int +} + +// llmCallsInTurn returns the turn's LLM calls in order, one per message.id, +// with usage and the message's contiguous entry span within fullEntries. +// The transcript repeats the same usage on every entry of a multi-block +// message, so usage is taken once from the first entry seen. +func llmCallsInTurn(fullEntries, turnEntries []TranscriptEntry) []billingCall { + offset := len(fullEntries) - len(turnEntries) + var calls []billingCall + index := map[string]int{} + for i, e := range turnEntries { + if e.Type != "assistant" || e.Message == nil { + continue + } + id := e.Message.ID + if id == "" { + id = e.UUID + } + if pos, ok := index[id]; ok { + calls[pos].entryEnd = offset + i + 1 + continue + } + if e.Message.Usage == nil { + continue + } + u := e.Message.Usage + index[id] = len(calls) + calls = append(calls, billingCall{ + entryIdx: offset + i, + entryEnd: offset + i + 1, + read: u.CacheReadInputTokens, + write: u.CacheCreationInputTokens, + fresh: u.InputTokens, + output: u.OutputTokens, + }) + } + return calls +} + +// staticPrefixPieces is the request prefix that precedes the conversation: +// the bundled system prompt + built-in tool schemas (version-keyed +// estimates), memory files, agent dispatch blurbs, and MCP schemas. Ordering +// inside the prefix only matters on cache-cold calls (when the R or R+W +// boundary falls inside it); on warm calls the whole prefix is cache_read. +// An unknown cc_builtin version contributes no pieces — that mass then shows +// up in `unattributed` instead of silently vanishing. +func staticPrefixPieces(fullEntries []TranscriptEntry) []billingPiece { + var out []billingPiece + add := func(lane, entity string, tokens int) { + if tokens > 0 { + out = append(out, billingPiece{billingKey{lane, entity, kindDefinition}, float64(tokens), false}) + } + } + + if consts, matched := ccBuiltinFor(findCCVersion(fullEntries)); matched != "" { + if len(consts.Components) > 0 { + // Per-release itemization from the calibration capture + // (docs/builtin-calibration.md). + names := make([]string, 0, len(consts.Components)) + for name := range consts.Components { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + add("static_overhead", name, consts.Components[name]) + } + } else { + // Tier-1 itemization: the environment block (cwd, platform, git + // status snapshot) is dynamic but locally reconstructible — + // carve it out of the bundled prompt so it shows as its own + // item; the rest stays the per-version core estimate. + envTokens := 0 + if envText := environmentBlockText(); envText != "" { + envTokens = measuredOrEstimate(envText, "prose") + if max := consts.SystemPromptTokens / 2; envTokens > max { + envTokens = max // clamp: env can't dominate the prompt + } + add("static_overhead", "environment", envTokens) + } + add("static_overhead", "core_prompt", consts.SystemPromptTokens-envTokens) + add("static_overhead", "builtin_tool_schemas", consts.SystemToolsTokens) + } + } + if m := extractMemorySnapshot(); m != nil { + for _, f := range m["files"].([]map[string]interface{}) { + add("memory", filepath.Base(f["path"].(string)), f["body_tokens"].(int)) + } + } + if a := extractAgentsSnapshot(); a != nil { + for _, ag := range a["agents"].([]map[string]interface{}) { + add("custom_agents", ag["name"].(string), ag["body_tokens"].(int)) + } + } + if t := extractToolsSnapshot(fullEntries); t != nil { + sum := t["summary"].(map[string]interface{}) + if bySource, ok := sum["by_source"].(map[string]interface{}); ok { + if b, ok := bySource["builtin"].(map[string]interface{}); ok { + add("static_overhead", "observed_builtin_schemas", b["schema_tokens"].(int)) + } + } + if byServer, ok := sum["by_server"].([]map[string]interface{}); ok { + for _, s := range byServer { + add("mcp_servers", s["server"].(string), + s["schema_tokens"].(int)+s["instructions_tokens"].(int)) + } + } + } + return out +} + +// conversationPieces replays entries in transcript order, emitting one piece +// per content unit with the same lane attribution rules as the composition +// extractors. Order is what makes the positional tier cut valid. Assistant +// blocks use per-block attributed tokens (usage-derived → exact); parsedIdx +// advances in lockstep with ParseAssistantMessages' emission rules. +func conversationPieces(entries []TranscriptEntry, skillBodyNames map[string]string, + toolNames map[string]string) []billingPiece { + + parsed := ParseAssistantMessages(entries) + DeduplicateUsage(parsed) + parsedIdx := 0 + + var out []billingPiece + add := func(lane, entity, kind string, tokens float64, exact bool) { + if tokens > 0 { + out = append(out, billingPiece{billingKey{lane, entity, kind}, tokens, exact}) + } + } + + for _, e := range entries { + switch e.Type { + case "user": + if e.Message == nil { + continue + } + for _, c := range e.Message.Content { + switch c.Type { + case "text": + if name, ok := skillBodyNames[sha256hex(c.Text)]; ok { + add("skills", name, kindUsage, float64(measuredOrEstimate(c.Text, "skill_body")), false) + } else { + tokens := measuredOrEstimate(c.Text, "user_prompt") + add("user_prompts", promptBucket(tokens), kindUsage, float64(tokens), false) + } + case "tool_result": + lane, entity := toolLane(toolNames[c.ToolUseID]) + add(lane, entity, kindUsage, float64(measuredResultTokens(c.Content)), false) + } + } + case "attachment": + if e.Attachment == nil { + continue + } + switch e.Attachment.Type { + case "skill_listing": + // Per-skill DEFINITION pieces: the listing is one attachment, + // but each skill owns its menu block — that's what makes the + // stacked definition/usage bars and the unused badge work + // straight from billing. + blocks := parseSkillListingMenu(e.Attachment.ContentString(), e.Attachment.Names) + if len(blocks) == 0 { + add("skills", "menu", kindDefinition, + float64(tokEstimateAs(e.Attachment.ContentString(), "skill_listing_menu")), false) + break + } + names := make([]string, 0, len(blocks)) + for name := range blocks { + names = append(names, name) + } + sort.Strings(names) // deterministic layout + for _, name := range names { + add("skills", name, kindDefinition, + float64(measuredOrEstimate(blocks[name], "skill_listing_menu")), false) + } + case "file": + var w struct { + File struct { + Path string `json:"path,omitempty"` + Content string `json:"content,omitempty"` + } `json:"file"` + } + if json.Unmarshal(e.Attachment.Content, &w) == nil { + ext := strings.ToLower(filepath.Ext(w.File.Path)) + if ext == "" { + ext = "other" + } + add("file_attachments", ext, kindUsage, float64(tokEstimate(w.File.Content)), false) + } + case "deferred_tools_delta": + // The deferred catalog mixes built-in tool names with MCP + // ones — split so each lands in its lane (built-in names are + // part of Claude Code's own overhead, not MCP rent). + lines := e.Attachment.AddedLines + names := e.Attachment.AddedNames + if len(lines) != len(names) { + lines = names // fall back to names-only sizing + } + var builtinPayload, mcpPayload []string + for i, name := range names { + line := name + if i < len(lines) { + line = lines[i] + } + if strings.HasPrefix(name, "mcp__") { + mcpPayload = append(mcpPayload, line) + } else { + builtinPayload = append(builtinPayload, line) + } + } + add("static_overhead", "deferred_tool_names", kindDefinition, + float64(measuredOrEstimate(strings.Join(builtinPayload, "\n"), "deferred_tools_payload")), false) + add("mcp_servers", "catalog_deltas", kindDefinition, + float64(measuredOrEstimate(strings.Join(mcpPayload, "\n"), "deferred_tools_payload")), false) + case "mcp_instructions_delta": + // Per-server when the parallel arrays line up. + if len(e.Attachment.AddedNames) == len(e.Attachment.AddedBlocks) && len(e.Attachment.AddedNames) > 0 { + for i, name := range e.Attachment.AddedNames { + add("mcp_servers", name, kindDefinition, + float64(measuredOrEstimate(e.Attachment.AddedBlocks[i], "prose")), false) + } + } else { + add("mcp_servers", "instructions", kindDefinition, + float64(tokEstimateAs(strings.Join(e.Attachment.AddedBlocks, "\n"), "prose")), false) + } + } + case "assistant": + if e.Message == nil || len(e.Message.Content) == 0 { + continue + } + for _, c := range e.Message.Content { + if c.Type == "" { + continue + } + if parsedIdx >= len(parsed) { + break + } + p := parsed[parsedIdx] + parsedIdx++ + switch p.ContentType { + case "text", "thinking": + add("prior_assistant", p.ContentType, kindUsage, float64(p.AttributedOutputTokens), true) + case "tool_use": + lane, entity := toolLane(p.Content.Name) + add(lane, entity, kindUsage, float64(p.AttributedOutputTokens), true) + } + } + } + } + return out +} + +func toolLane(name string) (string, string) { + switch { + case name == "": + return "built_in_tools", "unknown" + case strings.HasPrefix(name, "mcp__"): + parts := strings.SplitN(name, "__", 3) + if len(parts) >= 2 { + return "mcp_servers", parts[1] + } + return "mcp_servers", "unknown" + case name == "Skill": + return "skills", "Skill" + default: + return "built_in_tools", name + } +} + +// reconcileToUsage makes Σ pieces == total exactly. Overshoot shrinks only +// the estimated pieces (usage-derived ones are already exact); undershoot +// appends the explicit `unattributed` tail piece. +func reconcileToUsage(pieces []billingPiece, total float64) []billingPiece { + sum, estSum := 0.0, 0.0 + for _, p := range pieces { + sum += p.tokens + if !p.exact { + estSum += p.tokens + } + } + switch { + case sum > total && estSum > 0: + target := total - (sum - estSum) + if target < 0 { + target = 0 + } + scale := target / estSum + for i := range pieces { + if !pieces[i].exact { + pieces[i].tokens *= scale + } + } + case sum < total: + pieces = append(pieces, billingPiece{ + billingKey{unattributedLane, "", kindUsage}, total - sum, false}) + } + return pieces +} + +// cutByPosition assigns each piece's overlap with the cache_read segment +// [0,R), the cache_creation segment [R,R+W), and the fresh tail. +func cutByPosition(pieces []billingPiece, read, write float64, acc map[billingKey]*billingTier) { + pos := func(x float64) float64 { + if x < 0 { + return 0 + } + return x + } + off := 0.0 + for _, p := range pieces { + s, e := off, off+p.tokens + t := tierFor(acc, p.key) + t.cacheRead += pos(minF(e, read) - s) + t.cacheCreation += pos(minF(e, read+write) - maxF(s, read)) + t.fresh += pos(e - maxF(s, read+write)) + off = e + } +} + +// attributeOutput books the call's own blocks against output. callEntries is +// the contiguous span of the call's entries, so per-block attributed shares +// sum to the call's usage.output_tokens by construction. +func attributeOutput(callEntries []TranscriptEntry, acc map[billingKey]*billingTier) { + parsed := ParseAssistantMessages(callEntries) + DeduplicateUsage(parsed) + for _, p := range parsed { + var key billingKey + switch p.ContentType { + case "thinking": + key = billingKey{"output", "thinking", kindUsage} + case "text": + key = billingKey{"output", "assistant_text", kindUsage} + case "tool_use": + lane, entity := toolLane(p.Content.Name) + key = billingKey{"output", lane + "/" + entity, kindUsage} + default: + continue + } + tierFor(acc, key).output += float64(p.AttributedOutputTokens) + } +} + +// countNewEvents returns the number of NEW events this turn per usage key: +// prompts per bucket, tool calls per tool/server, files per ext, skill loads +// per skill. Additive across traces (each event counted once, in its turn), +// so plain SUM yields true counts — the same split rule as everywhere else. +func countNewEvents(turnEntries []TranscriptEntry, skillBodyNames map[string]string, + toolNames map[string]string) map[billingKey]int { + + counts := map[billingKey]int{} + bump := func(lane, entity string) { + counts[billingKey{lane, entity, kindUsage}]++ + } + + for _, e := range turnEntries { + switch e.Type { + case "user": + if e.Message == nil { + continue + } + for _, c := range e.Message.Content { + switch c.Type { + case "text": + if _, ok := skillBodyNames[sha256hex(c.Text)]; ok { + continue // loads counted via buildLoadedSkillBodies below + } + bump("user_prompts", promptBucket(tokEstimateAs(c.Text, "user_prompt"))) + case "tool_result": + lane, entity := toolLane(toolNames[c.ToolUseID]) + bump(lane, entity) + } + } + case "attachment": + if e.Attachment == nil || e.Attachment.Type != "file" { + continue + } + var w struct { + File struct { + Path string `json:"path,omitempty"` + } `json:"file"` + } + if json.Unmarshal(e.Attachment.Content, &w) == nil { + ext := strings.ToLower(filepath.Ext(w.File.Path)) + if ext == "" { + ext = "other" + } + bump("file_attachments", ext) + } + } + } + for _, l := range buildLoadedSkillBodies(turnEntries) { + bump("skills", l.Name) + } + return counts +} + +func tierFor(acc map[billingKey]*billingTier, key billingKey) *billingTier { + t, ok := acc[key] + if !ok { + t = &billingTier{} + acc[key] = t + } + return t +} + +// measuredResultTokens is resultTokens with anchor lookup for the common +// string-payload shape. +func measuredResultTokens(content interface{}) int { + if s, ok := content.(string); ok { + return measuredOrEstimate(s, "tool_result") + } + return resultTokens(content) +} + +func skillBodyNameBySHA(entries []TranscriptEntry) map[string]string { + out := map[string]string{} + for _, l := range buildLoadedSkillBodies(entries) { + if l.Body != "" { + out[sha256hex(l.Body)] = l.Name + } + } + return out +} + +func toolUseNames(entries []TranscriptEntry) map[string]string { + out := map[string]string{} + for _, e := range entries { + if e.Type != "assistant" || e.Message == nil { + continue + } + for _, c := range e.Message.Content { + if c.Type == "tool_use" && c.ID != "" { + out[c.ID] = c.Name + } + } + } + return out +} + +// renderBillingSnapshot emits a SQL-first shape (OPIK-6870): +// +// cc.billing.lanes..{total, cache_read, cache_creation, fresh, output} +// cc.billing.lanes..items[] {name, total, cache_read, ...} +// +// Lane values are FIXED JSON paths so the BE's composition query stays one +// `SUM(JSONExtractInt(metadata,'cc','billing','lanes','',''))` +// per cell — no ARRAY JOIN needed for totals. Breakdowns use the existing +// generic pattern: ARRAY JOIN over `...,'lanes','','items'` with +// label field `name`. `total` is precomputed (sum of the four columns) so +// the Sankey/lane-card query is also a single path. +func renderBillingSnapshot(callCount int, totals billingTier, + acc map[billingKey]*billingTier, counts map[billingKey]int) map[string]interface{} { + + tierFields := func(t *billingTier) map[string]interface{} { + return map[string]interface{}{ + "total": round(t.cacheRead + t.cacheCreation + t.fresh + t.output), + "cache_read": round(t.cacheRead), + "cache_creation": round(t.cacheCreation), + "input": round(t.fresh), + "output": round(t.output), + } + } + + // A key with new events this turn but no tier mass yet (e.g. the very + // first event landed after the last call's request) must still surface. + for key := range counts { + tierFor(acc, key) + } + + laneTiers := map[string]*billingTier{} + laneItems := map[string][]map[string]interface{}{} + for key, t := range acc { + lt, ok := laneTiers[key.lane] + if !ok { + lt = &billingTier{} + laneTiers[key.lane] = lt + } + lt.cacheRead += t.cacheRead + lt.cacheCreation += t.cacheCreation + lt.fresh += t.fresh + lt.output += t.output + if key.entity != "" { + item := tierFields(t) + item["name"] = key.entity + item["kind"] = key.kind + item["count"] = counts[key] + laneItems[key.lane] = append(laneItems[key.lane], item) + } + } + + lanes := map[string]interface{}{} + for lane, t := range laneTiers { + obj := tierFields(t) + if items := laneItems[lane]; len(items) > 0 { + sort.Slice(items, func(i, j int) bool { + return items[i]["total"].(int) > items[j]["total"].(int) + }) + obj["items"] = items + } + lanes[lane] = obj + } + + return map[string]interface{}{ + "llm_calls": callCount, + // The session's model — lets consumers price the tier columns + // without joining back to spans. + "model": billingModel, + "totals": map[string]interface{}{ + "total": round(totals.cacheRead + totals.cacheCreation + + totals.fresh + totals.output), + "cache_read": round(totals.cacheRead), + "cache_creation": round(totals.cacheCreation), + "input": round(totals.fresh), + "output": round(totals.output), + }, + "lanes": lanes, + } +} + +func round(f float64) int { return int(f + 0.5) } + +func minF(a, b float64) float64 { + if a < b { + return a + } + return b +} + +func maxF(a, b float64) float64 { + if a > b { + return a + } + return b +} diff --git a/src/billing_test.go b/src/billing_test.go new file mode 100644 index 0000000..f775683 --- /dev/null +++ b/src/billing_test.go @@ -0,0 +1,262 @@ +package main + +import ( + "strings" + "testing" +) + +// Exactness contract: per call (and therefore per trace), the lane +// columns — including the explicit `unattributed` row — sum EXACTLY to the +// API-reported usage: input_tokens, cache_read, cache_creation, output. + +func billingColumnSums(snap map[string]interface{}) (read, write, fresh, output, rows int) { + for _, v := range snap["lanes"].(map[string]interface{}) { + row := v.(map[string]interface{}) + read += row["cache_read"].(int) + write += row["cache_creation"].(int) + fresh += row["input"].(int) + output += row["output"].(int) + rows++ + } + return +} + +func assistantCall(t *testing.T, id string, usage *Usage, blocks ...Content) []TranscriptEntry { + t.Helper() + // One entry per block, all sharing message.id and usage — the real + // transcript shape. + entries := make([]TranscriptEntry, 0, len(blocks)) + for i, b := range blocks { + uuid := "u-" + id + if i > 0 { + uuid += "#" + } + entries = append(entries, TranscriptEntry{ + Type: "assistant", UUID: uuid, + Message: &Message{ID: id, Usage: usage, Content: ContentSlice{b}}, + }) + } + return entries +} + +func TestBillingSumsExactlyToUsage(t *testing.T) { + // Realistic mess: estimates won't match usage (unattributed absorbs), + // multi-block calls, tool results, two calls with growing cache. + u1 := &Usage{InputTokens: 900, CacheCreationInputTokens: 30_000, OutputTokens: 250} + u2 := &Usage{InputTokens: 40, CacheReadInputTokens: 31_150, + CacheCreationInputTokens: 5_000, OutputTokens: 700} + + entries := []TranscriptEntry{userPromptEntry("please do the thing")} + entries = append(entries, assistantCall(t, "m1", u1, + Content{Type: "thinking", Thinking: "redacted"}, + Content{Type: "text", Text: strings.Repeat("plan ", 40)}, + Content{Type: "tool_use", ID: "tu1", Name: "Read", + Input: map[string]interface{}{"file_path": "/x"}}, + )...) + entries = append(entries, toolResultEntry("tu1", strings.Repeat("file content\n", 200))) + entries = append(entries, assistantCall(t, "m2", u2, + Content{Type: "text", Text: strings.Repeat("answer ", 60)}, + )...) + + snap := computeBillingSnapshot(entries, entries) + if snap == nil { + t.Fatal("expected billing snapshot") + } + if got := snap["llm_calls"].(int); got != 2 { + t.Fatalf("llm_calls = %d, want 2", got) + } + + wantRead := u1.CacheReadInputTokens + u2.CacheReadInputTokens + wantWrite := u1.CacheCreationInputTokens + u2.CacheCreationInputTokens + wantFresh := u1.InputTokens + u2.InputTokens + wantOut := u1.OutputTokens + u2.OutputTokens + + totals := snap["totals"].(map[string]interface{}) + if totals["cache_read"].(int) != wantRead || totals["cache_creation"].(int) != wantWrite || + totals["input"].(int) != wantFresh || totals["output"].(int) != wantOut { + t.Errorf("totals = %v, want read=%d write=%d fresh=%d output=%d", + totals, wantRead, wantWrite, wantFresh, wantOut) + } + + // THE invariant: lane columns (incl. unattributed) sum to usage, + // allowing ±1 per lane row for integer rounding of float cuts. + read, write, fresh, output, rows := billingColumnSums(snap) + closeEnough := func(got, want int) bool { + d := got - want + if d < 0 { + d = -d + } + return d <= rows + } + if !closeEnough(read, wantRead) || !closeEnough(write, wantWrite) || + !closeEnough(fresh, wantFresh) || !closeEnough(output, wantOut) { + t.Errorf("Σ lanes = read %d / write %d / fresh %d / output %d, want %d/%d/%d/%d (±%d rounding)", + read, write, fresh, output, wantRead, wantWrite, wantFresh, wantOut, rows) + } +} + +func TestBillingPositionalCutMovesContentToCacheRead(t *testing.T) { + // Deterministic layout: no HOME/cwd config, no CC version → the only + // pieces are the conversation. Call 1 bills the prompt as fresh input; + // call 2 re-reads it (plus call 1's output) from cache. + t.Setenv("HOME", t.TempDir()) + t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir()) + + prompt := "please do the thing" + pTok := tokEstimateAs(prompt, "user_prompt") + out1 := 30 + + u1 := &Usage{InputTokens: pTok, OutputTokens: out1} // cold: all fresh + u2 := &Usage{CacheReadInputTokens: pTok + out1, InputTokens: 5, OutputTokens: 10} + + entries := []TranscriptEntry{userPromptEntry(prompt)} + entries = append(entries, assistantCall(t, "m1", u1, + Content{Type: "text", Text: strings.Repeat("ok ", 15)})...) + entries = append(entries, userPromptEntry("and?")) + entries = append(entries, assistantCall(t, "m2", u2, + Content{Type: "text", Text: "done"})...) + + snap := computeBillingSnapshot(entries, entries) + lanes := map[string]map[string]interface{}{} + for lane, v := range snap["lanes"].(map[string]interface{}) { + lanes[lane] = v.(map[string]interface{}) + } + + up := lanes["user_prompts"] + if up == nil { + t.Fatalf("no user_prompts lane: %v", lanes) + } + // Fresh: prompt 1 billed cold on call 1, prompt 2 billed in call 2's tail. + wantFresh := pTok + tokEstimateAs("and?", "user_prompt") + if up["input"].(int) != wantFresh { + t.Errorf("user_prompts fresh = %d, want %d", up["input"], wantFresh) + } + if up["cache_read"].(int) != pTok { + t.Errorf("user_prompts cache_read = %d, want %d (call 2 replay)", up["cache_read"], pTok) + } + // Call 1's output replays inside call 2's cached prefix. + pa := lanes["prior_assistant"] + if pa == nil || pa["cache_read"].(int) != out1 { + t.Errorf("prior_assistant cache_read = %v, want %d", pa, out1) + } + // Output side: booked once per call, exact. + if o := lanes["output"]; o == nil || o["output"].(int) != out1+10 { + t.Errorf("output lane = %v, want output %d", o, out1+10) + } +} + +func TestBillingUnattributedAbsorbsUnknownMass(t *testing.T) { + t.Setenv("HOME", t.TempDir()) + t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir()) + + prompt := "hi" + pTok := tokEstimateAs(prompt, "user_prompt") + // Usage says far more input than we can see — e.g. system reminders. + u := &Usage{InputTokens: pTok + 5_000, OutputTokens: 5} + + entries := []TranscriptEntry{userPromptEntry(prompt)} + entries = append(entries, assistantCall(t, "m1", u, Content{Type: "text", Text: "yo"})...) + + snap := computeBillingSnapshot(entries, entries) + row, ok := snap["lanes"].(map[string]interface{})[unattributedLane].(map[string]interface{}) + if !ok { + t.Fatal("expected an unattributed lane entry") + } + if row["input"].(int) != 5_000 { + t.Errorf("unattributed fresh = %d, want 5000", row["input"]) + } + if row["total"].(int) != 5_000 { + t.Errorf("unattributed total = %d, want 5000", row["total"]) + } +} + +func TestStaticOverheadItemization(t *testing.T) { + resetTokenCache(t) + + // Unknown components: env carve-out + core remainder, conserving the + // table's system_prompt total. + ccBuiltinByVersion["8.8.8"] = ccBuiltinConstants{SystemPromptTokens: 4000, SystemToolsTokens: 900} + defer delete(ccBuiltinByVersion, "8.8.8") + + entries := []TranscriptEntry{userPromptEntry("hi")} + call := assistantCall(t, "m1", &Usage{InputTokens: 10_000, OutputTokens: 5}, + Content{Type: "text", Text: "yo"}) + entries = append(entries, call...) + entries[1].Version = "8.8.8" + + snap := computeBillingSnapshot(entries, entries) + so := snap["lanes"].(map[string]interface{})["static_overhead"].(map[string]interface{}) + byName := map[string]int{} + for _, it := range so["items"].([]map[string]interface{}) { + byName[it["name"].(string)] = it["total"].(int) + } + if byName["builtin_tool_schemas"] == 0 { + t.Errorf("missing builtin_tool_schemas item: %v", byName) + } + // env + core must conserve the prompt constant (env may be 0 in a + // temp-dir cwd, in which case core carries it all). + if got := byName["environment"] + byName["core_prompt"]; got != 4000 { + t.Errorf("environment+core_prompt = %d, want 4000", got) + } + + // With a calibrated Components table, items follow it instead. + ccBuiltinByVersion["8.8.8"] = ccBuiltinConstants{ + SystemPromptTokens: 4000, SystemToolsTokens: 900, + Components: map[string]int{ + "identity_and_rules": 2100, "memory_instructions": 800, + "environment_template": 200, "session_guidance": 900, + "builtin_tool_schemas": 900, + }, + } + snap = computeBillingSnapshot(entries, entries) + so = snap["lanes"].(map[string]interface{})["static_overhead"].(map[string]interface{}) + if so["total"].(int) != 4900 { + t.Errorf("components total = %d, want 4900", so["total"]) + } + if len(so["items"].([]map[string]interface{})) != 5 { + t.Errorf("want 5 component items, got %v", so["items"]) + } +} + +func TestDeferredCatalogSplitsBuiltinFromMcp(t *testing.T) { + resetTokenCache(t) + + delta := TranscriptEntry{Type: "attachment", Attachment: &Attachment{ + Type: "deferred_tools_delta", + AddedNames: []string{"WebSearch", "mcp__slack__send", "Monitor"}, + AddedLines: []string{ + "WebSearch: search the web for things and stuff", + "mcp__slack__send: send a slack message to a channel", + "Monitor: watch a long-running script for events", + }, + }} + entries := []TranscriptEntry{userPromptEntry("hi"), delta} + entries = append(entries, assistantCall(t, "m1", &Usage{InputTokens: 5_000, OutputTokens: 5}, + Content{Type: "text", Text: "ok"})...) + + snap := computeBillingSnapshot(entries, entries) + lanes := snap["lanes"].(map[string]interface{}) + soItems := lanes["static_overhead"].(map[string]interface{})["items"].([]map[string]interface{}) + foundBuiltin := false + for _, it := range soItems { + if it["name"] == "deferred_tool_names" && it["total"].(int) > 0 { + foundBuiltin = true + } + } + if !foundBuiltin { + t.Errorf("expected deferred_tool_names under static_overhead: %v", soItems) + } + mcp, ok := lanes["mcp_servers"].(map[string]interface{}) + if !ok { + t.Fatal("expected mcp_servers lane") + } + foundMcp := false + for _, it := range mcp["items"].([]map[string]interface{}) { + if it["name"] == "catalog_deltas" && it["total"].(int) > 0 { + foundMcp = true + } + } + if !foundMcp { + t.Errorf("expected catalog_deltas under mcp_servers: %v", mcp["items"]) + } +} diff --git a/src/cc_builtin.go b/src/cc_builtin.go index f83b261..3a255f7 100644 --- a/src/cc_builtin.go +++ b/src/cc_builtin.go @@ -10,10 +10,10 @@ import ( // // - SystemPromptTokens the bundled default system prompt // - SystemToolsTokens full JSON schemas for the default tools -// (Read, Edit, Bash, …) — not the names alone +// (Read, Edit, Bash, …) — not the names alone // - SystemToolsDeferredTokens the catalog of deferred tool definitions -// (Cron*, Task*, Web*, Monitor, …) plus any -// schemas Claude Code injects on demand +// (Cron*, Task*, Web*, Monitor, …) plus any +// schemas Claude Code injects on demand // // These are taken from `/context` for a known CC version. They drift with // each binary release, so the table below should grow over time. @@ -21,6 +21,13 @@ type ccBuiltinConstants struct { SystemPromptTokens int SystemToolsTokens int SystemToolsDeferredTokens int + // Components is the optional per-release itemization of the bundled + // block (identity/harness rules, memory instructions, tool policy, + // per-tool schemas, ...), produced by the calibration capture described + // in docs/builtin-calibration.md. When present it replaces the + // two-entity system_prompt/builtin_tool_schemas split in cc.billing. + // Component values must sum to SystemPromptTokens + SystemToolsTokens. + Components map[string]int } // ccBuiltinByVersion is a small versioned table — keys are exact CC @@ -37,6 +44,14 @@ var ccBuiltinByVersion = map[string]ccBuiltinConstants{ SystemToolsTokens: 17600, SystemToolsDeferredTokens: 19200, }, + // 2.1.173 moved most of the built-in tool catalog behind deferral: + // always-on schemas dropped 17.6k → 1.1k. Captured from /context on + // 2.1.173 + Fable (cc.context_runtime cross-check, OPIK-6873 audit). + "2.1.173": { + SystemPromptTokens: 4800, + SystemToolsTokens: 1100, + SystemToolsDeferredTokens: 11300, + }, } // ccBuiltinFor returns the constants for the given CC version. Falls diff --git a/src/context_fetch_test.go b/src/context_fetch_test.go index 0cb6ead..df2c17b 100644 --- a/src/context_fetch_test.go +++ b/src/context_fetch_test.go @@ -80,16 +80,16 @@ func TestParseContextMarkdownCategories(t *testing.T) { t.Fatalf("categories missing or wrong type: %T", out["categories"]) } wants := map[string]int{ - "system_prompt": 8000, - "system_tools": 17600, - "mcp_tools_deferred": 2500, - "system_tools_deferred": 19200, - "custom_agents": 3300, - "memory_files": 2900, - "skills": 2100, - "messages": 6100, - "free_space": 928600, - "autocompact_buffer": 33000, + "system_prompt": 8000, + "system_tools": 17600, + "mcp_tools_deferred": 2500, + "system_tools_deferred": 19200, + "custom_agents": 3300, + "memory_files": 2900, + "skills": 2100, + "messages": 6100, + "free_space": 928600, + "autocompact_buffer": 33000, } for k, want := range wants { if got := cats[k]; got != want { @@ -162,12 +162,12 @@ func TestParseTokens(t *testing.T) { func TestNormalizeCategoryKey(t *testing.T) { cases := map[string]string{ - "System prompt": "system_prompt", - "System tools (deferred)": "system_tools_deferred", - "MCP tools (deferred)": "mcp_tools_deferred", - "Custom agents": "custom_agents", - "Free space": "free_space", - "Autocompact buffer": "autocompact_buffer", + "System prompt": "system_prompt", + "System tools (deferred)": "system_tools_deferred", + "MCP tools (deferred)": "mcp_tools_deferred", + "Custom agents": "custom_agents", + "Free space": "free_space", + "Autocompact buffer": "autocompact_buffer", } for in, want := range cases { if got := normalizeCategoryKey(in); got != want { diff --git a/src/context_snapshot.go b/src/context_snapshot.go index cff0ca6..92c2ec0 100644 --- a/src/context_snapshot.go +++ b/src/context_snapshot.go @@ -130,10 +130,10 @@ func buildContextSnapshot(state *State) map[string]interface{} { } } return map[string]interface{}{ - "categories": cats, - "total_tokens": alwaysOn, // ← matches /context and the API - "deferred_tokens": deferred, // ← informational; loaded on demand - "source": "estimated_sync", + "categories": cats, + "total_tokens": alwaysOn, // ← matches /context and the API + "deferred_tokens": deferred, // ← informational; loaded on demand + "source": "estimated_sync", } } @@ -158,6 +158,7 @@ func cumulativeMessagesTokens(entries []TranscriptEntry) int { skillBodies := skillBodyHashSet(entries) total := 0 + seenMsgs := map[string]bool{} for _, e := range entries { switch e.Type { case "user": @@ -179,8 +180,18 @@ func cumulativeMessagesTokens(entries []TranscriptEntry) int { if e.Message == nil || e.Message.Usage == nil { continue } - // Anthropic's own count for this LLM call's output — - // exact, no estimation drift. + // Anthropic's own count for this LLM call's output — exact, + // no estimation drift. Counted once per message.id: multi-block + // messages repeat the same usage on every entry (one entry per + // content block), so a per-entry sum double-counts. + id := e.Message.ID + if id == "" { + id = e.UUID + } + if seenMsgs[id] { + continue + } + seenMsgs[id] = true total += e.Message.Usage.OutputTokens } } diff --git a/src/count_tokens.go b/src/count_tokens.go new file mode 100644 index 0000000..badb7c8 --- /dev/null +++ b/src/count_tokens.go @@ -0,0 +1,461 @@ +package main + +import ( + "bytes" + "encoding/json" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "sort" + "sync" + "time" +) + +// count_tokens anchoring (OPIK-6873 follow-up). +// +// Anthropic's /v1/messages/count_tokens endpoint is free (no token billing) +// and accepts Claude Code's own OAuth token, so we can measure the EXACT +// token cost of any text we possess instead of relying on calibrated +// chars/token ratios. Measurements are cached persistently keyed by +// (model, sha256) — static config content (skill menu blocks, memory files, +// agent blurbs, MCP instructions) is hash-stable, so it's measured once +// ever and steady-state API traffic is ~zero. +// +// The measurement pass runs in a detached child at turn end (same pattern +// as the /context fetcher): it re-derives the anchor candidates from the +// transcript, measures the biggest cache misses under a per-turn budget, +// and persists them. The NEXT flush picks them up via measuredOrEstimate. +// Totals are unaffected (per-call usage is already exact); anchors improve +// the split and shrink `unattributed` to genuinely unobserved content. +// +// As a bonus, on a session's cache-cold first call the bundled system +// prompt + built-in tool schemas — the one block we can never read — is +// derived as a residual: call₁ usage minus everything we CAN account for. +// Stored per CC version, replacing the hand-maintained constant table +// whenever a measurement exists. + +const ( + countTokensEndpoint = "https://api.anthropic.com/v1/messages/count_tokens" + anthropicVersion = "2023-06-01" + oauthBeta = "oauth-2025-04-20" + defaultCountModel = "claude-fable-5" + + tokenCountBudget = 10 // API calls per measurement pass, biggest-first + oauthSkewMs = 60_000 +) + +// --------------------------------------------------------------------------- +// Persistent cache: { "|": tokens } + +var ( + tokCacheOnce sync.Once + tokCacheMu sync.Mutex + tokCacheMap map[string]int +) + +func tokenCachePath() string { + home, err := os.UserHomeDir() + if err != nil || home == "" { + return filepath.Join(os.TempDir(), "opik-token-counts.json") + } + return filepath.Join(home, ".opik-token-counts.json") +} + +func loadTokenCache() map[string]int { + tokCacheOnce.Do(func() { + tokCacheMap = map[string]int{} + data, err := os.ReadFile(tokenCachePath()) + if err == nil { + _ = json.Unmarshal(data, &tokCacheMap) + } + }) + return tokCacheMap +} + +func tokenCacheGet(key string) (int, bool) { + tokCacheMu.Lock() + defer tokCacheMu.Unlock() + v, ok := loadTokenCache()[key] + return v, ok +} + +// saveTokenCounts merges updates into the on-disk cache (reload + merge + +// atomic rename, so concurrent sessions don't clobber each other's rows). +func saveTokenCounts(updates map[string]int) { + if len(updates) == 0 { + return + } + tokCacheMu.Lock() + defer tokCacheMu.Unlock() + merged := map[string]int{} + if data, err := os.ReadFile(tokenCachePath()); err == nil { + _ = json.Unmarshal(data, &merged) + } + for k, v := range updates { + merged[k] = v + } + loadTokenCache() // ensure the Once ran before replacing the map + tokCacheMap = merged + data, err := json.Marshal(merged) + if err != nil { + return + } + tmp := tokenCachePath() + ".tmp" + if os.WriteFile(tmp, data, 0o600) == nil { + _ = os.Rename(tmp, tokenCachePath()) + } +} + +// --------------------------------------------------------------------------- +// Lookup used by the attribution code + +// billingModel is the model the current snapshot's measurements are keyed +// by; set once per computeBillingSnapshot from the transcript. +var billingModel = defaultCountModel + +func setBillingModel(entries []TranscriptEntry) { + if m := mostRecentModelFromEntries(entries); m != "" { + billingModel = m + } +} + +// measuredOrEstimate returns the exact measured token count for text when +// one is cached for the current model, falling back to the calibrated +// ratio estimate. +func measuredOrEstimate(text, contentType string) int { + if text == "" { + return 0 + } + if v, ok := tokenCacheGet(billingModel + "|" + sha256hex(text)); ok { + return v + } + return tokEstimateAs(text, contentType) +} + +// --------------------------------------------------------------------------- +// Auth: Claude Code's own OAuth credential (or ANTHROPIC_API_KEY) + +func countTokensHeaders() map[string]string { + if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" { + return map[string]string{ + "x-api-key": key, + "anthropic-version": anthropicVersion, + "content-type": "application/json", + } + } + if tok := claudeOAuthToken(); tok != "" { + return map[string]string{ + "authorization": "Bearer " + tok, + "anthropic-version": anthropicVersion, + "anthropic-beta": oauthBeta, + "content-type": "application/json", + } + } + return nil +} + +func claudeOAuthToken() string { + if blob := oauthBlobFromFile(); blob != nil { + if tok := validOAuthToken(blob); tok != "" { + return tok + } + } + if runtime.GOOS == "darwin" { + if blob := oauthBlobFromKeychain(); blob != nil { + return validOAuthToken(blob) + } + } + return "" +} + +type oauthBlob struct { + AccessToken string `json:"accessToken"` + ExpiresAt int64 `json:"expiresAt"` +} + +func validOAuthToken(b *oauthBlob) string { + if b.AccessToken == "" { + return "" + } + if b.ExpiresAt > 0 && time.Now().UnixMilli() >= b.ExpiresAt-oauthSkewMs { + return "" + } + return b.AccessToken +} + +func oauthBlobFromFile() *oauthBlob { + dir := os.Getenv("CLAUDE_CONFIG_DIR") + if dir == "" { + home, err := os.UserHomeDir() + if err != nil { + return nil + } + dir = filepath.Join(home, ".claude") + } + data, err := os.ReadFile(filepath.Join(dir, ".credentials.json")) + if err != nil { + return nil + } + return parseOAuthBlob(data) +} + +func oauthBlobFromKeychain() *oauthBlob { + out, err := exec.Command("security", "find-generic-password", + "-s", "Claude Code-credentials", "-w").Output() + if err != nil { + return nil + } + return parseOAuthBlob(out) +} + +func parseOAuthBlob(data []byte) *oauthBlob { + var wrapper struct { + ClaudeAiOauth *oauthBlob `json:"claudeAiOauth"` + } + if json.Unmarshal(data, &wrapper) != nil { + return nil + } + return wrapper.ClaudeAiOauth +} + +// --------------------------------------------------------------------------- +// The endpoint + +// countTokensHTTP is swappable for tests. +var countTokensHTTP = func(payload []byte, headers map[string]string) (int, error) { + req, err := http.NewRequest("POST", countTokensEndpoint, bytes.NewReader(payload)) + if err != nil { + return 0, err + } + for k, v := range headers { + req.Header.Set(k, v) + } + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + return 0, err + } + defer resp.Body.Close() + var out struct { + InputTokens int `json:"input_tokens"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return 0, err + } + return out.InputTokens, nil +} + +func countTokensFor(model, text string, headers map[string]string) (int, error) { + payload, err := json.Marshal(map[string]interface{}{ + "model": model, + "messages": []map[string]string{ + {"role": "user", "content": text}, + }, + }) + if err != nil { + return 0, err + } + return countTokensHTTP(payload, headers) +} + +// --------------------------------------------------------------------------- +// Measurement pass (runs in the detached child) + +type anchorCandidate struct { + sha, text, contentType string +} + +// anchorCandidates gathers every text whose exact size improves attribution: +// transcript content (prompts, skill bodies, menu blocks, instruction +// blocks, string tool results) plus on-disk config (memory files, agent +// frontmatter is covered transitively by the agents extractor's content). +func anchorCandidates(entries []TranscriptEntry) []anchorCandidate { + seen := map[string]bool{} + var out []anchorCandidate + add := func(text, contentType string) { + if len(text) < 200 { // tiny texts: ratio error is ≤ a few tokens + return + } + sha := sha256hex(text) + if seen[sha] { + return + } + seen[sha] = true + out = append(out, anchorCandidate{sha, text, contentType}) + } + + skillBodyNames := skillBodyNameBySHA(entries) + for _, e := range entries { + switch e.Type { + case "user": + if e.Message == nil { + continue + } + for _, c := range e.Message.Content { + switch c.Type { + case "text": + if _, ok := skillBodyNames[sha256hex(c.Text)]; ok { + add(c.Text, "skill_body") + } else { + add(c.Text, "user_prompt") + } + case "tool_result": + if s, ok := c.Content.(string); ok { + add(s, "tool_result") + } + } + } + case "attachment": + if e.Attachment == nil { + continue + } + switch e.Attachment.Type { + case "skill_listing": + for _, block := range parseSkillListingMenu(e.Attachment.ContentString(), e.Attachment.Names) { + add(block, "skill_listing_menu") + } + case "mcp_instructions_delta": + for _, b := range e.Attachment.AddedBlocks { + add(b, "prose") + } + } + } + } + + // The reconstructed environment block — changes per session (git + // status), so it mostly rides the ratio estimate, but stable repos + // get anchored. + add(environmentBlockText(), "prose") + + // On-disk config content: memory files + agent files. + if m := extractMemorySnapshot(); m != nil { + for _, f := range m["files"].([]map[string]interface{}) { + if body, err := os.ReadFile(f["path"].(string)); err == nil { + add(string(body), "memory_file") + } + } + } + if a := extractAgentsSnapshot(); a != nil { + for _, ag := range a["agents"].([]map[string]interface{}) { + if body, err := os.ReadFile(ag["path"].(string)); err == nil { + if meta := frontmatter(string(body)); meta != "" { + add(meta, "agent_frontmatter") + } + } + } + } + return out +} + +// runTokenCountPass measures the biggest uncached candidates under budget +// and persists them, then derives the cc_builtin cold-start residual when +// the session's first call is available. Returns the number of API calls +// spent (for tests/logging). +func runTokenCountPass(entries []TranscriptEntry, budget int) int { + headers := countTokensHeaders() + if headers == nil { + debugLog("count_tokens: no credential — skipping") + return 0 + } + model := mostRecentModelFromEntries(entries) + if model == "" { + model = defaultCountModel + } + + candidates := anchorCandidates(entries) + var misses []anchorCandidate + for _, c := range candidates { + if _, ok := tokenCacheGet(model + "|" + c.sha); !ok { + misses = append(misses, c) + } + } + sort.Slice(misses, func(i, j int) bool { + return len(misses[i].text) > len(misses[j].text) + }) + + updates := map[string]int{} + spent := 0 + + // Baseline: the envelope overhead of a single one-token user message. + baselineKey := model + "|__baseline__" + baseline, ok := tokenCacheGet(baselineKey) + if !ok && len(misses) > 0 { + b, err := countTokensFor(model, ".", headers) + if err != nil { + debugLog("count_tokens: baseline failed: %v", err) + return 0 + } + baseline = b + updates[baselineKey] = b + spent++ + } + + for _, c := range misses { + if spent >= budget { + break + } + n, err := countTokensFor(model, c.text, headers) + if err != nil { + debugLog("count_tokens: measure failed: %v", err) + break + } + spent++ + tokens := n - baseline + 1 // "." ≈ 1 token of content + if tokens < 0 { + tokens = 0 + } + updates[model+"|"+c.sha] = tokens + } + + saveTokenCounts(updates) + debugLog("count_tokens: measured %d of %d misses (budget %d)", len(updates), len(misses), budget) + return spent +} + +// --------------------------------------------------------------------------- +// Detached child plumbing + +func spawnDetachedTokenCount(sessionID string) error { + if sessionID == "" || os.Getenv("OPIK_CC_DISABLE_TOKEN_COUNT") == "true" { + return nil + } + self, err := os.Executable() + if err != nil { + return err + } + cmd := exec.Command(self) + cmd.Env = append(os.Environ(), + "OPIK_CC_TOKEN_COUNT=1", + "OPIK_CC_FETCH_SESSION_ID="+sessionID, + ) + detachProcess(cmd) + devnull, _ := os.Open(os.DevNull) + if devnull != nil { + cmd.Stdin = devnull + cmd.Stdout = devnull + cmd.Stderr = devnull + } + if err := cmd.Start(); err != nil { + return err + } + return cmd.Process.Release() +} + +// runTokenCountMode is the detached child's entry point. +func runTokenCountMode() { + sid := os.Getenv("OPIK_CC_FETCH_SESSION_ID") + if sid == "" { + return + } + state, err := LoadState(sid) + if err != nil || state == nil || state.Transcript == "" { + return + } + entries, err := ReadTranscript(state.Transcript, 0) + if err != nil || len(entries) == 0 { + return + } + runTokenCountPass(entries, tokenCountBudget) +} diff --git a/src/count_tokens_test.go b/src/count_tokens_test.go new file mode 100644 index 0000000..0ebd0f5 --- /dev/null +++ b/src/count_tokens_test.go @@ -0,0 +1,115 @@ +package main + +import ( + "encoding/json" + "os" + "strings" + "testing" +) + +// resetTokenCache points the cache at a fresh temp HOME and clears the +// in-memory state between tests (the production code loads once per +// process). +func resetTokenCache(t *testing.T) { + t.Helper() + t.Setenv("HOME", t.TempDir()) + t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir()) + tokCacheMu.Lock() + tokCacheMap = map[string]int{} + tokCacheMu.Unlock() + billingModel = defaultCountModel +} + +func TestMeasuredOrEstimatePrefersAnchor(t *testing.T) { + resetTokenCache(t) + text := strings.Repeat("some skill menu line\n", 30) + + est := measuredOrEstimate(text, "skill_listing_menu") + if est != tokEstimateAs(text, "skill_listing_menu") { + t.Fatalf("without anchor, want ratio estimate %d, got %d", + tokEstimateAs(text, "skill_listing_menu"), est) + } + + saveTokenCounts(map[string]int{billingModel + "|" + sha256hex(text): 4242}) + if got := measuredOrEstimate(text, "skill_listing_menu"); got != 4242 { + t.Errorf("with anchor, want 4242, got %d", got) + } +} + +func TestSaveTokenCountsMergesOnDisk(t *testing.T) { + resetTokenCache(t) + saveTokenCounts(map[string]int{"m|a": 1}) + saveTokenCounts(map[string]int{"m|b": 2}) + + // Reload from disk into a fresh map to prove both rows persisted. + var onDisk map[string]int + data, err := jsonReadFile(tokenCachePath()) + if err != nil { + t.Fatal(err) + } + if err := json.Unmarshal(data, &onDisk); err != nil { + t.Fatal(err) + } + if onDisk["m|a"] != 1 || onDisk["m|b"] != 2 { + t.Errorf("on-disk cache = %v, want both rows", onDisk) + } +} + +func TestRunTokenCountPassBudgetAndBaseline(t *testing.T) { + resetTokenCache(t) + t.Setenv("ANTHROPIC_API_KEY", "test-key") // auth via env, no keychain + + // Mock the endpoint: baseline (".") costs 8; any text costs 8 - 1 + its + // length/4 so measured = len/4 exactly after baseline subtraction. + calls := 0 + old := countTokensHTTP + countTokensHTTP = func(payload []byte, headers map[string]string) (int, error) { + calls++ + var req struct { + Messages []struct { + Content string `json:"content"` + } `json:"messages"` + } + _ = json.Unmarshal(payload, &req) + text := req.Messages[0].Content + if text == "." { + return 8, nil + } + return 8 - 1 + len(text)/4, nil + } + defer func() { countTokensHTTP = old }() + + big := strings.Repeat("a long tool result line\n", 100) // 2400 chars + small := strings.Repeat("a smaller prompt body here\n", 10) // 270 chars + entries := []TranscriptEntry{ + userPromptEntry(small), + toolResultEntry("tu1", big), + assistantCall(t, "m1", &Usage{InputTokens: 50, OutputTokens: 5}, + Content{Type: "tool_use", ID: "tu1", Name: "Read", + Input: map[string]interface{}{"f": "x"}})[0], + } + + // Budget of 2 = baseline + ONE measurement; biggest-first → the big + // tool result gets measured, the small prompt does not. + spent := runTokenCountPass(entries, 2) + if spent != 2 { + t.Fatalf("spent = %d, want 2 (baseline + 1 text)", spent) + } + if got, ok := tokenCacheGet(defaultCountModel + "|" + sha256hex(big)); !ok || got != len(big)/4 { + t.Errorf("big text anchor = %d/%v, want %d", got, ok, len(big)/4) + } + if _, ok := tokenCacheGet(defaultCountModel + "|" + sha256hex(small)); ok { + t.Error("small text should not have been measured under budget") + } + + // Second pass: baseline + big are cached; remaining budget measures small. + spent = runTokenCountPass(entries, 2) + if _, ok := tokenCacheGet(defaultCountModel + "|" + sha256hex(small)); !ok { + t.Error("second pass should measure the remaining miss") + } + _ = spent +} + +func jsonReadFile(path string) ([]byte, error) { + return os.ReadFile(path) +} diff --git a/src/cumulative_test.go b/src/cumulative_test.go new file mode 100644 index 0000000..b2abb80 --- /dev/null +++ b/src/cumulative_test.go @@ -0,0 +1,129 @@ +package main + +import ( + "encoding/json" + "strings" + "testing" +) + +// Shared fixtures + regressions that guard the billing block's attribution +// rules (the composition emitters they originally covered were consolidated +// into cc.billing — see billing.go). + +func userPromptEntry(text string) TranscriptEntry { + return TranscriptEntry{Type: "user", Message: &Message{Content: ContentSlice{ + {Type: "text", Text: text}, + }}} +} + +func toolResultEntry(toolUseID, payload string) TranscriptEntry { + return TranscriptEntry{Type: "user", Message: &Message{Content: ContentSlice{ + {Type: "tool_result", ToolUseID: toolUseID, Content: payload}, + }}} +} + +func fileAttachmentEntry(path, content string) TranscriptEntry { + raw, _ := json.Marshal(map[string]interface{}{ + "file": map[string]string{"path": path, "content": content}, + }) + return TranscriptEntry{Type: "attachment", Attachment: &Attachment{Type: "file", Content: raw}} +} + +func TestBillingNoUsageDoubleCountAcrossMultiBlockEntries(t *testing.T) { + // The transcript repeats the same message.usage on EVERY entry of a + // multi-block message. Regression for the 2x inflation found in the + // OPIK-6873 audit: usage must be booked once per message.id. + t.Setenv("HOME", t.TempDir()) + t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir()) + + usage := &Usage{InputTokens: 100, OutputTokens: 1000} + entries := []TranscriptEntry{ + userPromptEntry("go"), + {Type: "assistant", UUID: "u1", Message: &Message{ID: "m1", Usage: usage, Content: ContentSlice{ + {Type: "thinking", Thinking: "redacted"}, + }}}, + {Type: "assistant", UUID: "u1#1", Message: &Message{ID: "m1", Usage: usage, Content: ContentSlice{ + {Type: "text", Text: strings.Repeat("answer ", 50)}, + }}}, + } + + snap := computeBillingSnapshot(entries, entries) + if got := snap["llm_calls"].(int); got != 1 { + t.Errorf("llm_calls = %d, want 1 (two entries, one message)", got) + } + totals := snap["totals"].(map[string]interface{}) + if totals["output"].(int) != 1000 { + t.Errorf("output total = %d, want 1000 booked once", totals["output"]) + } + if totals["input"].(int) != 100 { + t.Errorf("fresh total = %d, want 100 booked once", totals["input"]) + } +} + +func TestBillingExcludesSkillBodiesFromUserPrompts(t *testing.T) { + // A slash-loaded skill body is a user text block in the transcript; it + // must be attributed to the skills lane, never to user_prompts. + t.Setenv("HOME", t.TempDir()) + t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir()) + + body := "Base directory for this skill: /tmp/skills/opik\n\n" + strings.Repeat("skill body ", 300) + bodyTok := tokEstimateAs(body, "skill_body") + promptTok := tokEstimateAs("real question", "user_prompt") + + turn1 := []TranscriptEntry{ + userPromptEntry("opik:opik\n/opik:opik"), + userPromptEntry(body), + } + entries := append(turn1, userPromptEntry("real question")) + call := assistantCall(t, "m1", &Usage{InputTokens: 5_000, OutputTokens: 10}, + Content{Type: "text", Text: "ok"}) + entries = append(entries, call...) + + snap := computeBillingSnapshot(entries, entries) + lanes := snap["lanes"].(map[string]interface{}) + + skills := lanes["skills"].(map[string]interface{}) + if skills["total"].(int) < bodyTok { + t.Errorf("skills total = %d, want >= body %d", skills["total"], bodyTok) + } + up := lanes["user_prompts"].(map[string]interface{}) + // user_prompts must hold only the preamble + real question, not the body. + if up["total"].(int) >= bodyTok { + t.Errorf("user_prompts total = %d — skill body leaked in (body=%d, prompt=%d)", + up["total"], bodyTok, promptTok) + } + + // And the load is counted once, on the skills item. + for _, v := range skills["items"].([]map[string]interface{}) { + if v["name"] == "opik:opik" && v["kind"] == kindUsage { + if v["count"].(int) != 1 { + t.Errorf("skill load count = %d, want 1", v["count"]) + } + return + } + } + t.Error("expected a skills usage item for opik:opik") +} + +func TestRepeatSkillLoadsKeepDistinctEvents(t *testing.T) { + slashLoad := func(name, body string) []TranscriptEntry { + return []TranscriptEntry{ + userPromptEntry("" + name + "\n/" + name + ""), + userPromptEntry("Base directory for this skill: /tmp/skills/x\n\n" + body), + } + } + entries := append(slashLoad("opik:opik", "body one"), slashLoad("opik:opik", "body two")...) + + loads := buildLoadedSkillBodies(entries) + if len(loads) != 2 { + t.Fatalf("want 2 load events for 2 slash loads, got %d", len(loads)) + } + if loads[0].ToolUseID == loads[1].ToolUseID { + t.Errorf("load events must have distinct ids, both %q", loads[0].ToolUseID) + } + for _, l := range loads { + if !strings.HasPrefix(l.ToolUseID, "slash:") { + t.Errorf("slash load id = %q, want slash:", l.ToolUseID) + } + } +} diff --git a/src/dryrun_test.go b/src/dryrun_test.go index 3e5a1be..cd265de 100644 --- a/src/dryrun_test.go +++ b/src/dryrun_test.go @@ -18,17 +18,13 @@ func TestDryRunOnTestThread(t *testing.T) { t.Fatal(err) } snaps := domainSnapshotsFromEntries(entries, entries) - for _, domain := range []string{"tools", "skills", "user_prompts", "tool_results", "thinking", "memory", "agents", "cc_builtin", "assistant_text", "prior_assistant", "file_attachments", "output_tokens"} { + for _, domain := range []string{"billing", "cc_builtin"} { fmt.Printf("--- %s ---\n", domain) if snaps[domain] == nil { fmt.Println("(nil)") continue } - out := snaps[domain] - if domain == "tools" { - out = map[string]interface{}{"summary": snaps[domain]["summary"]} - } - b, _ := json.MarshalIndent(out, "", " ") + b, _ := json.MarshalIndent(snaps[domain], "", " ") fmt.Println(string(b)) } } diff --git a/src/environment.go b/src/environment.go new file mode 100644 index 0000000..5f112f5 --- /dev/null +++ b/src/environment.go @@ -0,0 +1,42 @@ +package main + +import ( + "fmt" + "os/exec" + "runtime" + "strings" +) + +// environmentBlockText approximates the dynamic environment block Claude +// Code injects into its system prompt every session: working directory, +// platform/OS, and a git status snapshot with recent commits. We never see +// the real text, but its inputs are all local, so a faithful reconstruction +// sized via measuredOrEstimate (anchored when stable) lets cc.billing show +// the env block as its own static_overhead item — and surfaces that a repo +// with a long status/commit log pays for it on every request. +func environmentBlockText() string { + cwd := inferCwd() + if cwd == "" { + return "" + } + var b strings.Builder + fmt.Fprintf(&b, "Primary working directory: %s\n", cwd) + fmt.Fprintf(&b, "Platform: %s\n", runtime.GOOS) + if out, err := exec.Command("uname", "-sr").Output(); err == nil { + fmt.Fprintf(&b, "OS Version: %s\n", strings.TrimSpace(string(out))) + } + + if branch := git(cwd, "branch", "--show-current"); branch != "" { + fmt.Fprintf(&b, "Is a git repository: true\n") + fmt.Fprintf(&b, "Current branch: %s\n", branch) + if status := git(cwd, "status", "--porcelain"); status != "" { + fmt.Fprintf(&b, "Status:\n%s\n", status) + } else { + b.WriteString("Status:\n(clean)\n") + } + if log := git(cwd, "log", "--oneline", "-5"); log != "" { + fmt.Fprintf(&b, "Recent commits:\n%s\n", log) + } + } + return b.String() +} diff --git a/src/extractors.go b/src/extractors.go index 1a86a0f..85e92eb 100644 --- a/src/extractors.go +++ b/src/extractors.go @@ -76,7 +76,7 @@ func extractMemorySnapshot() map[string]interface{} { // tokenize as separate short tokens — denser than a skill body // (3.5) or prose (3.9). Calibrated against /context's "Memory // files" rows across 7 rule files (mean 2.37 chars/token). - tokens := tokEstimateAs(s, "memory_file") + tokens := measuredOrEstimate(s, "memory_file") files = append(files, map[string]interface{}{ "path": p, "sha256": sha256hex(s), @@ -182,7 +182,7 @@ func extractAgentsSnapshot() map[string]interface{} { if displayName == "" { displayName = f.basename } - tokens := tokEstimateAs(meta, "agent_frontmatter") + tokens := measuredOrEstimate(meta, "agent_frontmatter") agents = append(agents, map[string]interface{}{ "name": f.nsPrefix + displayName, "path": f.path, @@ -290,242 +290,8 @@ func readInstalledPluginPaths(home string) map[string]string { return out } -// extractThinkingSnapshot aggregates thinking-block tokens bucketed by effort -// level. Level is derived from actual thinking tokens per LLM call (the -// transcript does not expose the requested budget_tokens). -// -// Buckets: minimal ≤500, light 501–3 000, medium 3 001–10 000, heavy >10 000. -// -// `cc.thinking.{summary, by_level}`. -func extractThinkingSnapshot(entries []TranscriptEntry, parsed []ParsedEntry) map[string]interface{} { - if parsed == nil { - parsed = ParseAssistantMessages(entries) - DeduplicateUsage(parsed) - } - - // Sum thinking tokens per LLM call (MessageID). - callThinking := map[string]int{} - anonTokens := 0 - for _, p := range parsed { - if p.ContentType != "thinking" { - continue - } - if p.MessageID == "" { - anonTokens += p.AttributedOutputTokens - continue - } - callThinking[p.MessageID] += p.AttributedOutputTokens - } - if anonTokens > 0 { - callThinking["__anon"] = anonTokens - } - if len(callThinking) == 0 { - return nil - } - - type levelGroup struct{ calls, tokens int } - byLevel := map[string]*levelGroup{} - totalTokens, totalCalls := 0, 0 - - for _, tok := range callThinking { - l := thinkingLevel(tok) - g, ok := byLevel[l] - if !ok { - g = &levelGroup{} - byLevel[l] = g - } - g.calls++ - g.tokens += tok - totalTokens += tok - totalCalls++ - } - - order := []string{"minimal", "light", "medium", "heavy"} - byLevelOut := make([]map[string]interface{}, 0, len(byLevel)) - for _, l := range order { - g, ok := byLevel[l] - if !ok { - continue - } - byLevelOut = append(byLevelOut, map[string]interface{}{ - "level": l, - "tokens": g.tokens, - "call_count": g.calls, - }) - } - - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": totalTokens, - "call_count": totalCalls, - }, - "by_level": byLevelOut, - } -} - -func thinkingLevel(tokens int) string { - switch { - case tokens > 10000: - return "heavy" - case tokens > 3000: - return "medium" - case tokens > 500: - return "light" - default: - return "minimal" - } -} - -// extractToolResultsSnapshot aggregates tool_result bytes grouped by the -// tool that produced them. `cc.tool_results.{summary, by_tool}`. -// -// Two result-delivery shapes need special handling: -// - Normal tool_result: user message content block with type="tool_result". -// - ToolSearch: result is delivered as a `deferred_tools_delta` attachment -// (addedLines + addedNames). We attribute that delta's addedLines size -// to the ToolSearch tool_use that immediately preceded it. -func extractToolResultsSnapshot(entries []TranscriptEntry) map[string]interface{} { - toolNames := map[string]string{} - for _, e := range entries { - if e.Type != "assistant" || e.Message == nil { - continue - } - for _, c := range e.Message.Content { - if c.Type == "tool_use" && c.ID != "" { - toolNames[c.ID] = c.Name - } - } - } - - type group struct { - tokens, count int - } - byTool := map[string]*group{} - totalTokens, totalCount := 0, 0 - - // Walk entries in order. Pair ToolSearch tool_uses with the - // `deferred_tools_delta` that immediately follows them (no intervening - // non-delta event). Two protections against mis-attribution: - // - // 1. Pending IDs are a FIFO queue so back-to-back ToolSearches each - // get their own delta — neither is silently overwritten. - // 2. Any event between the ToolSearch and the delta — a regular - // tool_result, a non-delta attachment, an assistant message — - // drains the queue, so an unrelated delta (e.g. one triggered by - // an MCP toggle) can't be mis-attributed to a stale ToolSearch. - var pendingToolSearches []string // tool_use IDs awaiting a delta - resultedIDs := map[string]bool{} - for _, e := range entries { - switch e.Type { - case "assistant": - if e.Message == nil { - // Empty assistant entry — drain pending queue (no - // immediate delta means the ToolSearch's result must have - // already arrived as a normal tool_result, handled below). - pendingToolSearches = pendingToolSearches[:0] - continue - } - drainedThisEntry := false - for _, c := range e.Message.Content { - if c.Type == "tool_use" && c.Name == "ToolSearch" { - if !drainedThisEntry { - pendingToolSearches = pendingToolSearches[:0] - drainedThisEntry = true - } - pendingToolSearches = append(pendingToolSearches, c.ID) - } else if c.Type == "tool_use" { - // Some other tool_use sits between ToolSearch and a - // future delta — drain. - pendingToolSearches = pendingToolSearches[:0] - } - } - case "user": - if e.Message == nil { - pendingToolSearches = pendingToolSearches[:0] - continue - } - for _, c := range e.Message.Content { - if c.Type != "tool_result" { - continue - } - name := toolNames[c.ToolUseID] - if name == "" { - name = "unknown" - } - tokens := resultTokens(c.Content) - g, exists := byTool[name] - if !exists { - g = &group{} - byTool[name] = g - } - g.tokens += tokens - g.count++ - totalTokens += tokens - totalCount++ - resultedIDs[c.ToolUseID] = true - } - // Any user message (with tool_result OR otherwise) breaks the - // ToolSearch→delta adjacency, so drain. - pendingToolSearches = pendingToolSearches[:0] - case "attachment": - if e.Attachment == nil { - continue - } - if e.Attachment.Type != "deferred_tools_delta" { - // Non-delta attachment between ToolSearch and a delta — - // breaks adjacency. - pendingToolSearches = pendingToolSearches[:0] - continue - } - if len(pendingToolSearches) == 0 { - continue - } - // Synthesize a ToolSearch tool_result from the delta payload — - // the addedLines text is exactly what the model sees. - payload := strings.Join(e.Attachment.AddedLines, "\n") - if payload == "" { - payload = strings.Join(e.Attachment.AddedNames, "\n") - } - pendingID := pendingToolSearches[0] - pendingToolSearches = pendingToolSearches[1:] - if payload == "" { - continue - } - tokens := tokEstimateAs(payload, "deferred_tools_payload") - g, exists := byTool["ToolSearch"] - if !exists { - g = &group{} - byTool["ToolSearch"] = g - } - g.tokens += tokens - g.count++ - totalTokens += tokens - totalCount++ - resultedIDs[pendingID] = true - } - } - if totalCount == 0 { - return nil - } - - byToolOut := make([]map[string]interface{}, 0, len(byTool)) - for name, g := range byTool { - byToolOut = append(byToolOut, map[string]interface{}{ - "name": name, - "tokens": g.tokens, - "count": g.count, - }) - } - sort.Slice(byToolOut, func(i, j int) bool { - return byToolOut[i]["tokens"].(int) > byToolOut[j]["tokens"].(int) - }) - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": totalTokens, - "count": totalCount, - }, - "by_tool": byToolOut, - } +type toolResultGroup struct { + tokens, count int } // resultTokens estimates the token cost of a tool_result.content payload, @@ -555,43 +321,6 @@ func resultTokens(content interface{}) int { } } -// extractUserPromptsSnapshot returns the per-turn user-text contribution. -// Tool results don't count here (they're under cc.tool_results); skill -// bodies don't count either (they're under cc.skills.loaded). Without -// excluding skill bodies, a `Skill` tool_use would inflate user_prompts -// by the entire skill body — 100K+ tokens for claude-api. -// `cc.user_prompts.summary`. -func extractUserPromptsSnapshot(entries []TranscriptEntry) map[string]interface{} { - skillBodyHashes := skillBodyHashSet(entries) - - totalTokens, count := 0, 0 - for _, e := range entries { - if e.Type != "user" || e.Message == nil { - continue - } - for _, c := range e.Message.Content { - if c.Type != "text" { - continue - } - if skillBodyHashes[sha256hex(c.Text)] { - continue - } - totalTokens += tokEstimateAs(c.Text, "user_prompt") - count++ - } - } - if count == 0 { - return nil - } - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": totalTokens, - "count": count, - "bucket": promptBucket(totalTokens), - }, - } -} - // skillBodyHashSet returns the set of sha256 hashes of every skill body // loaded this session — across both Skill-tool-use loads and slash-command // loads. Comparing by hash (rather than the raw string) is safer against @@ -623,196 +352,4 @@ func promptBucket(tokens int) string { } } -// extractFileAttachmentsSnapshot returns @-mentioned + system-injected file -// attachments this turn grouped by file extension. Skill bodies are NOT here — -// they go under cc.skills.loaded. `cc.file_attachments.{summary, by_type}`. -func extractFileAttachmentsSnapshot(entries []TranscriptEntry) map[string]interface{} { - type group struct{ tokens, count int } - byExt := map[string]*group{} - total, fileCount := 0, 0 - - for _, e := range entries { - if e.Type != "attachment" || e.Attachment == nil { - continue - } - if e.Attachment.Type != "file" { - continue - } - var wrapper struct { - File struct { - Path string `json:"path,omitempty"` - Content string `json:"content,omitempty"` - } `json:"file"` - } - if err := json.Unmarshal(e.Attachment.Content, &wrapper); err != nil { - continue - } - tokens := tokEstimate(wrapper.File.Content) - - ext := strings.ToLower(filepath.Ext(wrapper.File.Path)) - if ext == "" { - ext = "other" - } - - g, ok := byExt[ext] - if !ok { - g = &group{} - byExt[ext] = g - } - g.tokens += tokens - g.count++ - total += tokens - fileCount++ - } - - if fileCount == 0 { - return nil - } - - byTypeOut := make([]map[string]interface{}, 0, len(byExt)) - for ext, g := range byExt { - byTypeOut = append(byTypeOut, map[string]interface{}{ - "ext": ext, - "tokens": g.tokens, - "file_count": g.count, - }) - } - sort.Slice(byTypeOut, func(i, j int) bool { - return byTypeOut[i]["tokens"].(int) > byTypeOut[j]["tokens"].(int) - }) - - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": total, - "file_count": fileCount, - }, - "by_type": byTypeOut, - } -} - -// extractPriorAssistantSnapshot is the cumulative cost of prior assistant -// output in the session — what gets replayed every turn. -// `cc.prior_assistant.summary`. -func extractPriorAssistantSnapshot(fullEntries, turnEntries []TranscriptEntry) map[string]interface{} { - sessionTokens, sessionMsgs := assistantOutputTotals(fullEntries) - turnTokens, turnMsgs := assistantOutputTotals(turnEntries) - priorTokens := sessionTokens - turnTokens - priorMsgs := sessionMsgs - turnMsgs - if priorTokens < 0 { - priorTokens = 0 - } - if priorMsgs < 0 { - priorMsgs = 0 - } - if priorTokens == 0 && priorMsgs == 0 { - return nil - } - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": priorTokens, - "message_count": priorMsgs, - }, - } -} - -func assistantOutputTotals(entries []TranscriptEntry) (tokens, msgs int) { - for _, e := range entries { - if e.Type != "assistant" || e.Message == nil || e.Message.Usage == nil { - continue - } - tokens += e.Message.Usage.OutputTokens - msgs++ - } - return -} - -// extractAssistantTextSnapshot returns the per-turn text-block contribution. -// `cc.assistant_text.summary`. -func extractAssistantTextSnapshot(entries []TranscriptEntry) map[string]interface{} { - total, count := 0, 0 - for _, e := range entries { - if e.Type != "assistant" || e.Message == nil { - continue - } - for _, c := range e.Message.Content { - if c.Type == "text" { - total += tokEstimateAs(c.Text, "assistant_text") - count++ - } - } - } - if count == 0 { - return nil - } - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": total, - "block_count": count, - }, - } -} - -// extractOutputTokensSnapshot aggregates attributed output tokens by category -// at the trace level. This lets the Sankey visualization use -// sum(metadata.cc.output_tokens.by_category.*) directly without span -// aggregation. `cc.output_tokens.{summary, by_category}`. -// -// Categories: -// - thinking — extended thinking blocks -// - assistant_text — visible text responses -// - builtin_tool_use — CC built-in tools (Bash, Read, Edit, …) -// - mcp_tool_use — MCP tool calls (name prefix "mcp__") -// - skill_invocations — Skill tool invocations -// -// `parsed` should be the dedup-applied output of ParseAssistantMessages + -// DeduplicateUsage. Pass nil to reparse from entries. -func extractOutputTokensSnapshot(entries []TranscriptEntry, parsed []ParsedEntry) map[string]interface{} { - if parsed == nil { - parsed = ParseAssistantMessages(entries) - DeduplicateUsage(parsed) - } - - var ( - thinking int - assistantText int - builtinToolUse int - mcpToolUse int - skillInvocations int - ) - - for _, p := range parsed { - tok := p.AttributedOutputTokens - switch p.ContentType { - case "thinking": - thinking += tok - case "text": - assistantText += tok - case "tool_use": - switch { - case strings.HasPrefix(p.Content.Name, "mcp__"): - mcpToolUse += tok - case p.Content.Name == "Skill": - skillInvocations += tok - default: - builtinToolUse += tok - } - } - } - - total := thinking + assistantText + builtinToolUse + mcpToolUse + skillInvocations - if total == 0 { - return nil - } - return map[string]interface{}{ - "summary": map[string]interface{}{ - "total_tokens": total, - }, - "by_category": map[string]interface{}{ - "thinking": thinking, - "assistant_text": assistantText, - "builtin_tool_use": builtinToolUse, - "mcp_tool_use": mcpToolUse, - "skill_invocations": skillInvocations, - }, - } -} +type fileAttachmentGroup struct{ tokens, count int } diff --git a/src/extractors_test.go b/src/extractors_test.go index b7a7967..36a6c4c 100644 --- a/src/extractors_test.go +++ b/src/extractors_test.go @@ -6,197 +6,6 @@ import ( "testing" ) -func TestExtractOutputTokensSnapshot(t *testing.T) { - // One LLM call with thinking + text + builtin tool + MCP tool + Skill. - // All blocks share the same message.id so DeduplicateUsage can attribute. - const msgID = "msg_abc123" - entries := []TranscriptEntry{ - { - Type: "assistant", - UUID: "u1", - Message: &Message{ - ID: msgID, - Model: "claude-opus-4-8", - Usage: &Usage{OutputTokens: 1000}, - Content: ContentSlice{ - {Type: "thinking", Thinking: "..."}, - {Type: "text", Text: "hello world"}, - {Type: "tool_use", ID: "t1", Name: "Bash", Input: map[string]interface{}{"command": "ls"}}, - {Type: "tool_use", ID: "t2", Name: "mcp__slack__send", Input: map[string]interface{}{}}, - {Type: "tool_use", ID: "t3", Name: "Skill", Input: map[string]interface{}{}}, - }, - }, - }, - } - - parsed := ParseAssistantMessages(entries) - DeduplicateUsage(parsed) - - snap := extractOutputTokensSnapshot(entries, parsed) - if snap == nil { - t.Fatal("expected non-nil snapshot") - } - - summary, _ := snap["summary"].(map[string]interface{}) - if summary == nil { - t.Fatal("missing summary") - } - total, _ := summary["total_tokens"].(int) - if total != 1000 { - t.Errorf("total_tokens = %d, want 1000", total) - } - - cat, _ := snap["by_category"].(map[string]interface{}) - if cat == nil { - t.Fatal("missing by_category") - } - - // Sum of all categories must equal total. - catSum := 0 - for _, key := range []string{"thinking", "assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} { - v, _ := cat[key].(int) - catSum += v - } - if catSum != total { - t.Errorf("sum(by_category) = %d, want %d (total_tokens)", catSum, total) - } - - // thinking must be > 0 (leftover after non-thinking blocks). - if thinking, _ := cat["thinking"].(int); thinking == 0 { - t.Error("thinking should be > 0") - } - - // Each non-thinking category must have been assigned something. - for _, key := range []string{"assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} { - if v, _ := cat[key].(int); v == 0 { - t.Errorf("by_category[%s] = 0, expected > 0", key) - } - } -} - -func TestExtractOutputTokensSnapshotNilOnEmpty(t *testing.T) { - snap := extractOutputTokensSnapshot(nil, nil) - if snap != nil { - t.Errorf("expected nil on empty entries, got %v", snap) - } -} - -func TestExtractThinkingSnapshotByLevel(t *testing.T) { - // Three LLM calls with different thinking budgets: - // msg1 → 200 tokens (minimal) - // msg2 → 2000 tokens (light) - // msg3 → 15000 tokens (heavy) - makeEntry := func(msgID string, thinkingTokens int) TranscriptEntry { - return TranscriptEntry{ - Type: "assistant", - Message: &Message{ - ID: msgID, - Model: "claude-sonnet-4-6", - Usage: &Usage{OutputTokens: thinkingTokens}, - Content: ContentSlice{ - {Type: "thinking", Thinking: "..."}, - }, - }, - } - } - entries := []TranscriptEntry{ - makeEntry("msg1", 200), - makeEntry("msg2", 2000), - makeEntry("msg3", 15000), - } - parsed := ParseAssistantMessages(entries) - DeduplicateUsage(parsed) - - snap := extractThinkingSnapshot(entries, parsed) - if snap == nil { - t.Fatal("expected non-nil snapshot") - } - - summary := snap["summary"].(map[string]interface{}) - if total := summary["total_tokens"].(int); total != 17200 { - t.Errorf("total_tokens = %d, want 17200", total) - } - if calls := summary["call_count"].(int); calls != 3 { - t.Errorf("call_count = %d, want 3", calls) - } - - byLevel := snap["by_level"].([]map[string]interface{}) - levels := map[string]map[string]interface{}{} - for _, l := range byLevel { - levels[l["level"].(string)] = l - } - - if _, ok := levels["minimal"]; !ok { - t.Error("expected minimal level") - } - if _, ok := levels["light"]; !ok { - t.Error("expected light level") - } - if _, ok := levels["heavy"]; !ok { - t.Error("expected heavy level") - } - if _, ok := levels["medium"]; ok { - t.Error("unexpected medium level") - } - - if levels["minimal"]["call_count"].(int) != 1 { - t.Errorf("minimal call_count = %d, want 1", levels["minimal"]["call_count"]) - } - if levels["heavy"]["tokens"].(int) != 15000 { - t.Errorf("heavy tokens = %d, want 15000", levels["heavy"]["tokens"]) - } -} - -func TestExtractFileAttachmentsSnapshotByType(t *testing.T) { - makeAttachment := func(path, content string) TranscriptEntry { - raw, _ := json.Marshal(map[string]interface{}{ - "file": map[string]string{"path": path, "content": content}, - }) - return TranscriptEntry{ - Type: "attachment", - Attachment: &Attachment{ - Type: "file", - Content: raw, - }, - } - } - entries := []TranscriptEntry{ - makeAttachment("/repo/main.go", "package main\nfunc main() {}"), - makeAttachment("/repo/util.go", "package main"), - makeAttachment("/repo/README.md", "# Hello"), - makeAttachment("/repo/Makefile", "build:"), // no extension → "other" - } - - snap := extractFileAttachmentsSnapshot(entries) - if snap == nil { - t.Fatal("expected non-nil snapshot") - } - - summary := snap["summary"].(map[string]interface{}) - if fc := summary["file_count"].(int); fc != 4 { - t.Errorf("file_count = %d, want 4", fc) - } - - byType := snap["by_type"].([]map[string]interface{}) - exts := map[string]map[string]interface{}{} - for _, row := range byType { - exts[row["ext"].(string)] = row - } - - if _, ok := exts[".go"]; !ok { - t.Error("expected .go entry") - } - if _, ok := exts[".md"]; !ok { - t.Error("expected .md entry") - } - if _, ok := exts["other"]; !ok { - t.Error("expected other entry for Makefile") - } - if exts[".go"]["file_count"].(int) != 2 { - t.Errorf(".go file_count = %d, want 2", exts[".go"]["file_count"]) - } -} - func TestExtractAgentsSnapshotPrefersFrontmatterName(t *testing.T) { home := t.TempDir() cwd := t.TempDir() @@ -280,4 +89,3 @@ description: Should not show up. t.Errorf("bad:ghost must be filtered out (disabled plugin), got %v", names) } } - diff --git a/src/main.go b/src/main.go index 8afae26..da8e7bf 100644 --- a/src/main.go +++ b/src/main.go @@ -25,8 +25,8 @@ type HookInput struct { Prompt string `json:"prompt"` AgentID string `json:"agent_id"` AgentType string `json:"agent_type"` - AgentTranscriptPath string `json:"agent_transcript_path"` - CustomInstructions string `json:"custom_instructions"` + AgentTranscriptPath string `json:"agent_transcript_path"` + CustomInstructions string `json:"custom_instructions"` } var ( @@ -49,6 +49,14 @@ func main() { // background subprocess to ask claude `/context` and PATCH the result // onto the trace. This path never reads stdin; it shells out to claude // and exits. + // Detached token-count mode: measure exact token counts for anchor + // candidates via the free count_tokens endpoint and persist them for + // the next flush (see count_tokens.go). + if os.Getenv("OPIK_CC_TOKEN_COUNT") == "1" { + runTokenCountMode() + os.Exit(0) + } + if os.Getenv("OPIK_CC_CONTEXT_FETCH") == "1" { var err error config, err = LoadConfig() @@ -233,6 +241,9 @@ func onStop() { if err := spawnDetachedContextFetch(state.SessionID, state.TraceID, state.Cwd); err != nil { debugLog("spawn context fetch: %v", err) } + if err := spawnDetachedTokenCount(state.SessionID); err != nil { + debugLog("spawn token count: %v", err) + } debugLog("done") } @@ -727,12 +738,12 @@ func processTranscriptEntries(traceID string, entries []TranscriptEntry, parentS if p.Usage != nil && span.Usage == nil { span.Usage = map[string]int{ - "prompt_tokens": p.Usage.InputTokens, - "completion_tokens": p.Usage.OutputTokens, - "total_tokens": p.Usage.InputTokens + p.Usage.OutputTokens, - "original_usage.input_tokens": p.Usage.InputTokens, - "original_usage.output_tokens": p.Usage.OutputTokens, - "original_usage.cache_read_input_tokens": p.Usage.CacheReadInputTokens, + "prompt_tokens": p.Usage.InputTokens, + "completion_tokens": p.Usage.OutputTokens, + "total_tokens": p.Usage.InputTokens + p.Usage.OutputTokens, + "original_usage.input_tokens": p.Usage.InputTokens, + "original_usage.output_tokens": p.Usage.OutputTokens, + "original_usage.cache_read_input_tokens": p.Usage.CacheReadInputTokens, "original_usage.cache_creation_input_tokens": p.Usage.CacheCreationInputTokens, } span.Provider = "anthropic" @@ -747,34 +758,25 @@ func processTranscriptEntries(traceID string, entries []TranscriptEntry, parentS return spans } -// domainSnapshotsFromEntries returns every per-domain snapshot keyed by +// domainSnapshotsFromEntries returns the per-domain snapshots keyed by // domain name. Called by postTraceMetrics for the trace-level write and -// by dryrun_test for offline validation. The parsed+deduped slice is -// computed once and passed to the few extractors that need it -// (extractThinkingSnapshot), avoiding redundant work. +// by dryrun_test for offline validation. func domainSnapshotsFromEntries(fullEntries, turnEntries []TranscriptEntry) map[string]map[string]interface{} { - parsedTurn := ParseAssistantMessages(turnEntries) - DeduplicateUsage(parsedTurn) - return map[string]map[string]interface{}{ - "skills": BuildSkillsSnapshot(fullEntries), - "tools": extractToolsSnapshot(fullEntries), - "memory": extractMemorySnapshot(), - "agents": extractAgentsSnapshot(), - "thinking": extractThinkingSnapshot(turnEntries, parsedTurn), - "tool_results": extractToolResultsSnapshot(turnEntries), - "user_prompts": extractUserPromptsSnapshot(turnEntries), - "file_attachments": extractFileAttachmentsSnapshot(turnEntries), - "prior_assistant": extractPriorAssistantSnapshot(fullEntries, turnEntries), - "assistant_text": extractAssistantTextSnapshot(turnEntries), - "output_tokens": extractOutputTokensSnapshot(turnEntries, parsedTurn), + // The product schema: per-call positional cache-tier attribution. + // Tier tokens are per-call billing events — additive across traces + // and exactly reconciled to API usage (see billing.go). Every UI + // lane value, breakdown item, stacked definition/usage segment and + // count comes from this block. + "billing": computeBillingSnapshot(fullEntries, turnEntries), // cc_builtin covers the bundled system-prompt + tool-catalog cost // /context reports under "System prompt" / "System tools" / // "System tools (deferred)". These never appear in the transcript // (the binary holds the schemas internally), so values are // version-keyed approximations — marked `estimated: true` in the - // payload so the FE can render them as such. - "cc_builtin": extractCCBuiltinSnapshot(fullEntries), + // payload so the FE can render them as such. Kept for the Static + // overhead disclaimer and calibration diagnostics. + "cc_builtin": extractCCBuiltinSnapshot(fullEntries), } } @@ -1068,4 +1070,3 @@ func debugLog(format string, args ...interface{}) { fmt.Fprintf(f, "[%s] ", ts) fmt.Fprintf(f, format+"\n", args...) } - diff --git a/src/metrics.go b/src/metrics.go index 1d9adff..4f0bc7a 100644 --- a/src/metrics.go +++ b/src/metrics.go @@ -245,7 +245,6 @@ func postTraceMetrics(state *State) { state.TraceID[:8], repo, branch, commits, insC+delC, files, authored, overwritten, len(metrics)) } - // mergeMetadataCC reads the trace's current metadata, merges new keys into // the `cc` block (preserving identity already written at trace creation and // any non-cc metadata Opik may have added), and PATCHes the trace. @@ -293,4 +292,4 @@ func inferCwd() string { return d } return "" -} \ No newline at end of file +} diff --git a/src/plugin_catalog.go b/src/plugin_catalog.go index f45d363..c240309 100644 --- a/src/plugin_catalog.go +++ b/src/plugin_catalog.go @@ -34,7 +34,7 @@ type pluginCatalogTokens struct { } type pluginCatalogComponent struct { - Name string `json:"name"` + Name string `json:"name"` Chars pluginCatalogTokens `json:"chars"` } diff --git a/src/settings.go b/src/settings.go index ddeda84..fb69d24 100644 --- a/src/settings.go +++ b/src/settings.go @@ -20,11 +20,11 @@ import ( // // Resolution order (Claude Code's documented layering — later wins): // -// 1. Managed (org-pushed) OS-specific (see managedSettingsPaths) -// 2. User ~/.claude/settings.json -// 3. User local ~/.claude/settings.local.json -// 4. Project /.claude/settings.json -// 5. Project local /.claude/settings.local.json +// 1. Managed (org-pushed) OS-specific (see managedSettingsPaths) +// 2. User ~/.claude/settings.json +// 3. User local ~/.claude/settings.local.json +// 4. Project /.claude/settings.json +// 5. Project local /.claude/settings.local.json // // A plugin counts as enabled iff the merged map's value is true (boolean // or "true"/"1" string). Anything else — including unset, false, or an diff --git a/src/settings_test.go b/src/settings_test.go index 2e954a5..794fb4a 100644 --- a/src/settings_test.go +++ b/src/settings_test.go @@ -20,10 +20,10 @@ func TestReadEnabledPluginsField(t *testing.T) { }`) got := readEnabledPluginsField(path) wantEnabled := map[string]bool{ - "on@mp": true, - "off@mp": false, - "strtrue@mp": true, - "str1@mp": true, + "on@mp": true, + "off@mp": false, + "strtrue@mp": true, + "str1@mp": true, "strfalse@mp": false, } for k, want := range wantEnabled { diff --git a/src/skill_hash.go b/src/skill_hash.go index 3031f6e..bb48a29 100644 --- a/src/skill_hash.go +++ b/src/skill_hash.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" ) @@ -169,32 +170,24 @@ func extractLoadedSkills(entries []TranscriptEntry) []SkillEvent { if len(loads) == 0 { return nil } - // Latest-wins per skill name (mirrors how the model sees the most - // recent body). FirstSeenIdx tracks the first appearance for ordering - // stability across the FE. - firstIdx := map[string]int{} - latest := map[string]loadedSkillBody{} - order := []string{} + // One event per load (OPIK-6873): repeat loads of the same skill each + // inject their body into context and each replays from then on, so + // collapsing to latest-wins under-counted both tokens and load counts. + // Every event carries a stable unique ToolUseID (toolu_… or + // "slash:") so consumers can dedupe events across the cumulative + // per-trace loaded[] arrays. + out := make([]SkillEvent, 0, len(loads)) for _, l := range loads { - if _, ok := firstIdx[l.Name]; !ok { - firstIdx[l.Name] = l.Index - order = append(order, l.Name) - } - latest[l.Name] = l - } - out := make([]SkillEvent, 0, len(order)) - for _, name := range order { - l := latest[name] - path, _ := resolveSkillBody(name) + path, _ := resolveSkillBody(l.Name) source := "bundled" if path != "" { source = "listing" } ev := SkillEvent{ - Name: name, + Name: l.Name, SHA256: sha256hex(l.Body), BodyTokens: tokEstimateAs(l.Body, "skill_body"), - FirstSeenIdx: firstIdx[name], + FirstSeenIdx: l.Index, Source: source, Path: path, ToolUseID: l.ToolUseID, @@ -205,7 +198,7 @@ func extractLoadedSkills(entries []TranscriptEntry) []SkillEvent { // cross-check — CC's own tokenizer produced that number when it // built the cache, so it's a tighter signal than our 3.5 ratio // for skills with rich code blocks or markdown tables. - if pluginShort, leaf, ok := splitNamespacedSkillName(name); ok { + if pluginShort, leaf, ok := splitNamespacedSkillName(l.Name); ok { home, _ := os.UserHomeDir() model := mostRecentModelFromEntries(entries) if comp, fullKey, hit := pluginCatalogLookup(home, pluginShort, "skill", leaf); hit { @@ -249,8 +242,9 @@ func mostRecentModelFromEntries(entries []TranscriptEntry) string { // loadedSkillBody is one resolved skill load. Index is the transcript entry // index of the user message that carried the body — used by FirstSeenIdx -// in extractLoadedSkills for stable ordering. ToolUseID is populated for -// Skill-tool-use loads, empty for slash-command loads. +// in extractLoadedSkills for stable ordering. ToolUseID is the toolu_… id +// for Skill-tool-use loads and a synthetic "slash:" id for +// slash-command loads, so every load event has a stable unique identity. type loadedSkillBody struct { Name string Body string @@ -411,9 +405,16 @@ func slashCommandSkillLoads(entries []TranscriptEntry) []loadedSkillBody { } if strings.HasPrefix(c.Text, slashCommandPrefix) { out = append(out, loadedSkillBody{ - Name: pendingName, - Body: c.Text, - Index: i, + Name: pendingName, + Body: c.Text, + // Synthetic load id (OPIK-6873): slash loads have no + // tool_use, but consumers dedupe load events across the + // cumulative per-trace loaded[] arrays via this id. + // The transcript entry index is stable for the session + // (append-only file), so the same load keeps the same + // id on every later trace. + ToolUseID: "slash:" + strconv.Itoa(i), + Index: i, }) } // Whether we matched a body or saw an unrelated text block, diff --git a/src/skill_hash_test.go b/src/skill_hash_test.go index 3a450b6..e8f1615 100644 --- a/src/skill_hash_test.go +++ b/src/skill_hash_test.go @@ -90,8 +90,8 @@ func TestBuildLoadedSkillBodiesSlashCommand(t *testing.T) { if !strings.HasPrefix(loads[0].Body, "Base directory") { t.Errorf("body should start with the prefix the model actually sees; got %q", loads[0].Body[:30]) } - if loads[0].ToolUseID != "" { - t.Errorf("slash-command load should have empty ToolUseID, got %q", loads[0].ToolUseID) + if !strings.HasPrefix(loads[0].ToolUseID, "slash:") { + t.Errorf("slash-command load should have a synthetic slash: ToolUseID, got %q", loads[0].ToolUseID) } } diff --git a/src/state.go b/src/state.go index db2e450..abd6a7e 100644 --- a/src/state.go +++ b/src/state.go @@ -10,8 +10,8 @@ import ( type State struct { TraceID string `json:"trace_id"` StartTime string `json:"start_time"` - StartUnix int64 `json:"start_unix,omitempty"` // wall-clock seconds the current trace started - PromptHash string `json:"prompt_hash,omitempty"` // sha256 of the user prompt for the current trace + StartUnix int64 `json:"start_unix,omitempty"` // wall-clock seconds the current trace started + PromptHash string `json:"prompt_hash,omitempty"` // sha256 of the user prompt for the current trace SessionID string `json:"session_id"` Transcript string `json:"transcript"` StartLine int `json:"start_line"` diff --git a/src/tools_extract.go b/src/tools_extract.go index f95bc64..315cc8d 100644 --- a/src/tools_extract.go +++ b/src/tools_extract.go @@ -95,9 +95,9 @@ func extractToolsSnapshot(entries []TranscriptEntry) map[string]interface{} { } var avail []toolEntry builtinCount, mcpCount := 0, 0 - serverTools := map[string][]string{} // server → tool names - serverLines := map[string]string{} // server → joined description lines - builtinLines, mcpLines := "", "" // sources → joined lines + serverTools := map[string][]string{} // server → tool names + serverLines := map[string]string{} // server → joined description lines + builtinLines, mcpLines := "", "" // sources → joined lines for name, line := range available { // SplitN("__", 3) — assumes server names contain no `__`. Tool @@ -156,12 +156,12 @@ func extractToolsSnapshot(entries []TranscriptEntry) map[string]interface{} { mcpInstructionsTokensTotal += instrTokens mcpEstimatedSchemaTokensTotal += estSchema byServer = append(byServer, map[string]interface{}{ - "server": server, - "tool_count": len(tools), - "schema_tokens": tokEstimateAs(serverLines[server], "deferred_tools_payload"), - "instructions_tokens": instrTokens, - "estimated_schema_tokens": estSchema, - "estimated_total_tokens": instrTokens + estSchema, + "server": server, + "tool_count": len(tools), + "schema_tokens": tokEstimateAs(serverLines[server], "deferred_tools_payload"), + "instructions_tokens": instrTokens, + "estimated_schema_tokens": estSchema, + "estimated_total_tokens": instrTokens + estSchema, }) } sort.Slice(byServer, func(i, j int) bool { @@ -191,18 +191,18 @@ func extractToolsSnapshot(entries []TranscriptEntry) map[string]interface{} { "schema_tokens": builtinSchemaTokens, }, "mcp": map[string]interface{}{ - "available_count": mcpCount, - "schema_tokens": mcpSchemaTokens, - "instructions_tokens": mcpInstructionsTokensTotal, - "estimated_schema_tokens": mcpEstimatedSchemaTokensTotal, + "available_count": mcpCount, + "schema_tokens": mcpSchemaTokens, + "instructions_tokens": mcpInstructionsTokensTotal, + "estimated_schema_tokens": mcpEstimatedSchemaTokensTotal, // estimated_deferred_tokens is the closest equivalent to // /context's "MCP tools (deferred)" row — addedLines we saw // + estimated full-schema overhead per tool. The // instructions block lives at by_source.mcp.instructions_tokens // because /context buckets it under "System tools (deferred)" // on some CC versions; surfacing both lets the FE pick. - "estimated_deferred_tokens": mcpSchemaTokens + mcpEstimatedSchemaTokensTotal, - "estimated": mcpCount > 0, + "estimated_deferred_tokens": mcpSchemaTokens + mcpEstimatedSchemaTokensTotal, + "estimated": mcpCount > 0, }, }, "by_server": byServer, diff --git a/src/transcript.go b/src/transcript.go index 7a864fc..c9d925c 100644 --- a/src/transcript.go +++ b/src/transcript.go @@ -30,21 +30,21 @@ type TranscriptEntry struct { // readdedNames (names re-enabled after a prior removal), and // pendingMcpServers (servers connecting; their tools not yet visible). type Attachment struct { - Type string `json:"type"` - Content json.RawMessage `json:"content,omitempty"` - Names []string `json:"names,omitempty"` - SkillCount int `json:"skillCount,omitempty"` - IsInitial bool `json:"isInitial,omitempty"` - AddedNames []string `json:"addedNames,omitempty"` - AddedLines []string `json:"addedLines,omitempty"` + Type string `json:"type"` + Content json.RawMessage `json:"content,omitempty"` + Names []string `json:"names,omitempty"` + SkillCount int `json:"skillCount,omitempty"` + IsInitial bool `json:"isInitial,omitempty"` + AddedNames []string `json:"addedNames,omitempty"` + AddedLines []string `json:"addedLines,omitempty"` // AddedBlocks carries multi-line text payloads. `mcp_instructions_delta` // uses it for per-server instructions (the always-on context CC injects // for each connected MCP server: capabilities, constraints, etc.). One // block per name, parallel arrays. - AddedBlocks []string `json:"addedBlocks,omitempty"` - RemovedNames []string `json:"removedNames,omitempty"` - ReaddedNames []string `json:"readdedNames,omitempty"` - PendingMcpServers []string `json:"pendingMcpServers,omitempty"` + AddedBlocks []string `json:"addedBlocks,omitempty"` + RemovedNames []string `json:"removedNames,omitempty"` + ReaddedNames []string `json:"readdedNames,omitempty"` + PendingMcpServers []string `json:"pendingMcpServers,omitempty"` } // ContentString decodes Content as a string. Returns "" if Content is missing