diff --git a/bin/opik-logger-darwin-arm64 b/bin/opik-logger-darwin-arm64
index c9b70a2..0d73e53 100755
Binary files a/bin/opik-logger-darwin-arm64 and b/bin/opik-logger-darwin-arm64 differ
diff --git a/docs/builtin-calibration.md b/docs/builtin-calibration.md
new file mode 100644
index 0000000..800b950
--- /dev/null
+++ b/docs/builtin-calibration.md
@@ -0,0 +1,65 @@
+# Calibrating the bundled Claude Code overhead (`cc_builtin`)
+
+The bundled system prompt + built-in tool schemas never appear in the
+transcript, so `cc.billing`'s `static_overhead` lane relies on the
+per-version table in `src/cc_builtin.go` (`ccBuiltinByVersion`). This doc is
+the procedure for refreshing it on each Claude Code release — and, since the
+`Components` field landed, for producing the itemized breakdown the
+dashboard shows.
+
+## Per-release procedure
+
+1. **Capture one real request** on the new CC version, in a project with NO
+   MCP servers connected and auto-memory disabled (so the request is almost
+   pure bundled content). Two known-good capture paths:
+   - [cost-xray](https://github.com/tigerless-labs/cost-xray): transparent
+     local mitmproxy hop; the captured request body contains the full
+     `system` block and `tools` array.
+   - Any HTTPS-intercepting proxy with the CC CLI's proxy env vars.
+2. **Split the `system` block into its named sections** (identity/harness
+   rules, security policy, memory instructions, environment template,
+   session guidance, context management). The section headings are stable
+   markdown headers.
+3. **Measure each section** with the free `count_tokens` endpoint (the
+   plugin's own `countTokensFor` works, or `curl` — auth with
+   `ANTHROPIC_API_KEY` or the CC OAuth token).
+4. **Measure the always-on `tools` array** the same way (count with and
+   without `tools`, diff). Per-tool figures: add tools one at a time.
+5. Add the row:
+
+```go
+"2.1.180": {
+    SystemPromptTokens:        4900,  // Σ prompt sections
+    SystemToolsTokens:         1900,  // tools array
+    SystemToolsDeferredTokens: 11300, // /context's deferred row
+    Components: map[string]int{
+        "identity_and_rules":   2100,
+        "security_policy":      300,
+        "memory_instructions":  800,
+        "environment_template": 200,  // static template only — the dynamic
+                                      // part (cwd, git status) is carved out
+                                      // at runtime as the `environment` item
+        "session_guidance":     600,
+        "context_management":   400,
+        "builtin_tool_schemas": 1900,
+    },
+},
+```
+
+Invariant: `Σ Components == SystemPromptTokens + SystemToolsTokens`.
+
+## Validation
+
+Run a fresh session on the new version and check the trace's
+`cc.billing.lanes.static_overhead.total` against `/context`'s
+"System prompt" + "System tools" rows (expect agreement within ~15%; the
+always-on tool set varies slightly per session config). The `unattributed`
+lane absorbs the difference either way — if it jumps after a CC release,
+this table is stale.
+
+## What NOT to do
+
+Do not derive these numbers as a usage residual ("call-1 usage minus known
+pieces") — that residual absorbs all unobserved request content (system
+reminders, deferred-name listings) and estimation drift; it inflated
+static_overhead ~3.6x when tried (see git history).
diff --git a/src/billing.go b/src/billing.go
new file mode 100644
index 0000000..d4e2f61
--- /dev/null
+++ b/src/billing.go
@@ -0,0 +1,626 @@
+package main
+
+import (
+	"encoding/json"
+	"path/filepath"
+	"sort"
+	"strings"
+)
+
+// cc.billing — exact, billing-native attribution (OPIK-6873 follow-up).
+//
+// Anthropic's prompt caching is prefix-based, so a request's usage splits it
+// into three POSITIONAL segments: [0,R) billed as cache_read, [R,R+W) as
+// cache_creation, the tail as fresh input. For every LLM call in the turn we
+// lay the request out as an ordered list of pieces (static config prefix,
+// then the conversation in transcript order), reconcile the piece sizes to
+// the call's measured usage, and cut by position. Tier tokens are therefore
+// per-call billing events: additive across calls, traces and periods, and
+// they sum EXACTLY to the API-reported usage at every level.
+//
+// Exactness contract, per call:
+//   input:  Σ piece tiers (incl. the explicit `unattributed` tail) ==
+//           input_tokens + cache_read + cache_creation
+//   output: Σ per-block attributed tokens == output_tokens
+//           (DeduplicateUsage normalizes block shares to the measured total)
+//
+// Estimated pieces are scaled down proportionally only when they OVERSHOOT
+// the measured total (assistant blocks are usage-derived and never scaled);
+// undershoot is parked in `unattributed` — a visible bucket holding what we
+// don't parse yet (system reminders, request envelope) plus residual
+// estimation error, placed at the tail of the layout, which is where
+// unobserved content actually sits in the request.
+
+type billingTier struct {
+	cacheRead, cacheCreation, fresh, output float64
+}
+
+type billingKey struct {
+	lane, entity string
+	// "definition" = always-on config that ships regardless of activity;
+	// "usage" = conversation-driven content. The UI stacks the two.
+	kind string
+}
+
+type billingPiece struct {
+	key    billingKey
+	tokens float64
+	exact  bool // usage-derived (assistant blocks): never rescaled
+}
+
+const (
+	unattributedLane = "unattributed"
+	kindDefinition   = "definition"
+	kindUsage        = "usage"
+)
+
+// computeBillingSnapshot returns the `cc.billing` block for the turn, or nil
+// when the turn contains no usage-bearing LLM calls.
+func computeBillingSnapshot(fullEntries, turnEntries []TranscriptEntry) map[string]interface{} {
+	calls := llmCallsInTurn(fullEntries, turnEntries)
+	if len(calls) == 0 {
+		return nil
+	}
+
+	setBillingModel(fullEntries) // anchors are keyed (model, sha)
+	staticPieces := staticPrefixPieces(fullEntries)
+	skillBodyNames := skillBodyNameBySHA(fullEntries)
+	toolNames := toolUseNames(fullEntries)
+	counts := countNewEvents(turnEntries, skillBodyNames, toolNames)
+
+	acc := map[billingKey]*billingTier{}
+	totals := billingTier{}
+	for _, call := range calls {
+		pieces := append(append([]billingPiece{}, staticPieces...),
+			conversationPieces(fullEntries[:call.entryIdx], skillBodyNames, toolNames)...)
+		pieces = reconcileToUsage(pieces, float64(call.read+call.write+call.fresh))
+		cutByPosition(pieces, float64(call.read), float64(call.write), acc)
+
+		attributeOutput(fullEntries[call.entryIdx:call.entryEnd], acc)
+
+		totals.cacheRead += float64(call.read)
+		totals.cacheCreation += float64(call.write)
+		totals.fresh += float64(call.fresh)
+		totals.output += float64(call.output)
+	}
+
+	return renderBillingSnapshot(len(calls), totals, acc, counts)
+}
+
+type billingCall struct {
+	entryIdx                   int // index in fullEntries of the call's FIRST entry: its request is the prefix before it
+	entryEnd                   int // one past the call's LAST entry
+	read, write, fresh, output int
+}
+
+// llmCallsInTurn returns the turn's LLM calls in order, one per message.id,
+// with usage and the message's contiguous entry span within fullEntries.
+// The transcript repeats the same usage on every entry of a multi-block
+// message, so usage is taken once from the first entry seen.
+func llmCallsInTurn(fullEntries, turnEntries []TranscriptEntry) []billingCall {
+	offset := len(fullEntries) - len(turnEntries)
+	var calls []billingCall
+	index := map[string]int{}
+	for i, e := range turnEntries {
+		if e.Type != "assistant" || e.Message == nil {
+			continue
+		}
+		id := e.Message.ID
+		if id == "" {
+			id = e.UUID
+		}
+		if pos, ok := index[id]; ok {
+			calls[pos].entryEnd = offset + i + 1
+			continue
+		}
+		if e.Message.Usage == nil {
+			continue
+		}
+		u := e.Message.Usage
+		index[id] = len(calls)
+		calls = append(calls, billingCall{
+			entryIdx: offset + i,
+			entryEnd: offset + i + 1,
+			read:     u.CacheReadInputTokens,
+			write:    u.CacheCreationInputTokens,
+			fresh:    u.InputTokens,
+			output:   u.OutputTokens,
+		})
+	}
+	return calls
+}
+
+// staticPrefixPieces is the request prefix that precedes the conversation:
+// the bundled system prompt + built-in tool schemas (version-keyed
+// estimates), memory files, agent dispatch blurbs, and MCP schemas. Ordering
+// inside the prefix only matters on cache-cold calls (when the R or R+W
+// boundary falls inside it); on warm calls the whole prefix is cache_read.
+// An unknown cc_builtin version contributes no pieces — that mass then shows
+// up in `unattributed` instead of silently vanishing.
+func staticPrefixPieces(fullEntries []TranscriptEntry) []billingPiece {
+	var out []billingPiece
+	add := func(lane, entity string, tokens int) {
+		if tokens > 0 {
+			out = append(out, billingPiece{billingKey{lane, entity, kindDefinition}, float64(tokens), false})
+		}
+	}
+
+	if consts, matched := ccBuiltinFor(findCCVersion(fullEntries)); matched != "" {
+		if len(consts.Components) > 0 {
+			// Per-release itemization from the calibration capture
+			// (docs/builtin-calibration.md).
+			names := make([]string, 0, len(consts.Components))
+			for name := range consts.Components {
+				names = append(names, name)
+			}
+			sort.Strings(names)
+			for _, name := range names {
+				add("static_overhead", name, consts.Components[name])
+			}
+		} else {
+			// Tier-1 itemization: the environment block (cwd, platform, git
+			// status snapshot) is dynamic but locally reconstructible —
+			// carve it out of the bundled prompt so it shows as its own
+			// item; the rest stays the per-version core estimate.
+			envTokens := 0
+			if envText := environmentBlockText(); envText != "" {
+				envTokens = measuredOrEstimate(envText, "prose")
+				if max := consts.SystemPromptTokens / 2; envTokens > max {
+					envTokens = max // clamp: env can't dominate the prompt
+				}
+				add("static_overhead", "environment", envTokens)
+			}
+			add("static_overhead", "core_prompt", consts.SystemPromptTokens-envTokens)
+			add("static_overhead", "builtin_tool_schemas", consts.SystemToolsTokens)
+		}
+	}
+	if m := extractMemorySnapshot(); m != nil {
+		for _, f := range m["files"].([]map[string]interface{}) {
+			add("memory", filepath.Base(f["path"].(string)), f["body_tokens"].(int))
+		}
+	}
+	if a := extractAgentsSnapshot(); a != nil {
+		for _, ag := range a["agents"].([]map[string]interface{}) {
+			add("custom_agents", ag["name"].(string), ag["body_tokens"].(int))
+		}
+	}
+	if t := extractToolsSnapshot(fullEntries); t != nil {
+		sum := t["summary"].(map[string]interface{})
+		if bySource, ok := sum["by_source"].(map[string]interface{}); ok {
+			if b, ok := bySource["builtin"].(map[string]interface{}); ok {
+				add("static_overhead", "observed_builtin_schemas", b["schema_tokens"].(int))
+			}
+		}
+		if byServer, ok := sum["by_server"].([]map[string]interface{}); ok {
+			for _, s := range byServer {
+				add("mcp_servers", s["server"].(string),
+					s["schema_tokens"].(int)+s["instructions_tokens"].(int))
+			}
+		}
+	}
+	return out
+}
+
+// conversationPieces replays entries in transcript order, emitting one piece
+// per content unit with the same lane attribution rules as the composition
+// extractors. Order is what makes the positional tier cut valid. Assistant
+// blocks use per-block attributed tokens (usage-derived → exact); parsedIdx
+// advances in lockstep with ParseAssistantMessages' emission rules.
+func conversationPieces(entries []TranscriptEntry, skillBodyNames map[string]string,
+	toolNames map[string]string) []billingPiece {
+
+	parsed := ParseAssistantMessages(entries)
+	DeduplicateUsage(parsed)
+	parsedIdx := 0
+
+	var out []billingPiece
+	add := func(lane, entity, kind string, tokens float64, exact bool) {
+		if tokens > 0 {
+			out = append(out, billingPiece{billingKey{lane, entity, kind}, tokens, exact})
+		}
+	}
+
+	for _, e := range entries {
+		switch e.Type {
+		case "user":
+			if e.Message == nil {
+				continue
+			}
+			for _, c := range e.Message.Content {
+				switch c.Type {
+				case "text":
+					if name, ok := skillBodyNames[sha256hex(c.Text)]; ok {
+						add("skills", name, kindUsage, float64(measuredOrEstimate(c.Text, "skill_body")), false)
+					} else {
+						tokens := measuredOrEstimate(c.Text, "user_prompt")
+						add("user_prompts", promptBucket(tokens), kindUsage, float64(tokens), false)
+					}
+				case "tool_result":
+					lane, entity := toolLane(toolNames[c.ToolUseID])
+					add(lane, entity, kindUsage, float64(measuredResultTokens(c.Content)), false)
+				}
+			}
+		case "attachment":
+			if e.Attachment == nil {
+				continue
+			}
+			switch e.Attachment.Type {
+			case "skill_listing":
+				// Per-skill DEFINITION pieces: the listing is one attachment,
+				// but each skill owns its menu block — that's what makes the
+				// stacked definition/usage bars and the unused badge work
+				// straight from billing.
+				blocks := parseSkillListingMenu(e.Attachment.ContentString(), e.Attachment.Names)
+				if len(blocks) == 0 {
+					add("skills", "menu", kindDefinition,
+						float64(tokEstimateAs(e.Attachment.ContentString(), "skill_listing_menu")), false)
+					break
+				}
+				names := make([]string, 0, len(blocks))
+				for name := range blocks {
+					names = append(names, name)
+				}
+				sort.Strings(names) // deterministic layout
+				for _, name := range names {
+					add("skills", name, kindDefinition,
+						float64(measuredOrEstimate(blocks[name], "skill_listing_menu")), false)
+				}
+			case "file":
+				var w struct {
+					File struct {
+						Path    string `json:"path,omitempty"`
+						Content string `json:"content,omitempty"`
+					} `json:"file"`
+				}
+				if json.Unmarshal(e.Attachment.Content, &w) == nil {
+					ext := strings.ToLower(filepath.Ext(w.File.Path))
+					if ext == "" {
+						ext = "other"
+					}
+					add("file_attachments", ext, kindUsage, float64(tokEstimate(w.File.Content)), false)
+				}
+			case "deferred_tools_delta":
+				// The deferred catalog mixes built-in tool names with MCP
+				// ones — split so each lands in its lane (built-in names are
+				// part of Claude Code's own overhead, not MCP rent).
+				lines := e.Attachment.AddedLines
+				names := e.Attachment.AddedNames
+				if len(lines) != len(names) {
+					lines = names // fall back to names-only sizing
+				}
+				var builtinPayload, mcpPayload []string
+				for i, name := range names {
+					line := name
+					if i < len(lines) {
+						line = lines[i]
+					}
+					if strings.HasPrefix(name, "mcp__") {
+						mcpPayload = append(mcpPayload, line)
+					} else {
+						builtinPayload = append(builtinPayload, line)
+					}
+				}
+				add("static_overhead", "deferred_tool_names", kindDefinition,
+					float64(measuredOrEstimate(strings.Join(builtinPayload, "\n"), "deferred_tools_payload")), false)
+				add("mcp_servers", "catalog_deltas", kindDefinition,
+					float64(measuredOrEstimate(strings.Join(mcpPayload, "\n"), "deferred_tools_payload")), false)
+			case "mcp_instructions_delta":
+				// Per-server when the parallel arrays line up.
+				if len(e.Attachment.AddedNames) == len(e.Attachment.AddedBlocks) && len(e.Attachment.AddedNames) > 0 {
+					for i, name := range e.Attachment.AddedNames {
+						add("mcp_servers", name, kindDefinition,
+							float64(measuredOrEstimate(e.Attachment.AddedBlocks[i], "prose")), false)
+					}
+				} else {
+					add("mcp_servers", "instructions", kindDefinition,
+						float64(tokEstimateAs(strings.Join(e.Attachment.AddedBlocks, "\n"), "prose")), false)
+				}
+			}
+		case "assistant":
+			if e.Message == nil || len(e.Message.Content) == 0 {
+				continue
+			}
+			for _, c := range e.Message.Content {
+				if c.Type == "" {
+					continue
+				}
+				if parsedIdx >= len(parsed) {
+					break
+				}
+				p := parsed[parsedIdx]
+				parsedIdx++
+				switch p.ContentType {
+				case "text", "thinking":
+					add("prior_assistant", p.ContentType, kindUsage, float64(p.AttributedOutputTokens), true)
+				case "tool_use":
+					lane, entity := toolLane(p.Content.Name)
+					add(lane, entity, kindUsage, float64(p.AttributedOutputTokens), true)
+				}
+			}
+		}
+	}
+	return out
+}
+
+func toolLane(name string) (string, string) {
+	switch {
+	case name == "":
+		return "built_in_tools", "unknown"
+	case strings.HasPrefix(name, "mcp__"):
+		parts := strings.SplitN(name, "__", 3)
+		if len(parts) >= 2 {
+			return "mcp_servers", parts[1]
+		}
+		return "mcp_servers", "unknown"
+	case name == "Skill":
+		return "skills", "Skill"
+	default:
+		return "built_in_tools", name
+	}
+}
+
+// reconcileToUsage makes Σ pieces == total exactly. Overshoot shrinks only
+// the estimated pieces (usage-derived ones are already exact); undershoot
+// appends the explicit `unattributed` tail piece.
+func reconcileToUsage(pieces []billingPiece, total float64) []billingPiece {
+	sum, estSum := 0.0, 0.0
+	for _, p := range pieces {
+		sum += p.tokens
+		if !p.exact {
+			estSum += p.tokens
+		}
+	}
+	switch {
+	case sum > total && estSum > 0:
+		target := total - (sum - estSum)
+		if target < 0 {
+			target = 0
+		}
+		scale := target / estSum
+		for i := range pieces {
+			if !pieces[i].exact {
+				pieces[i].tokens *= scale
+			}
+		}
+	case sum < total:
+		pieces = append(pieces, billingPiece{
+			billingKey{unattributedLane, "", kindUsage}, total - sum, false})
+	}
+	return pieces
+}
+
+// cutByPosition assigns each piece's overlap with the cache_read segment
+// [0,R), the cache_creation segment [R,R+W), and the fresh tail.
+func cutByPosition(pieces []billingPiece, read, write float64, acc map[billingKey]*billingTier) {
+	pos := func(x float64) float64 {
+		if x < 0 {
+			return 0
+		}
+		return x
+	}
+	off := 0.0
+	for _, p := range pieces {
+		s, e := off, off+p.tokens
+		t := tierFor(acc, p.key)
+		t.cacheRead += pos(minF(e, read) - s)
+		t.cacheCreation += pos(minF(e, read+write) - maxF(s, read))
+		t.fresh += pos(e - maxF(s, read+write))
+		off = e
+	}
+}
+
+// attributeOutput books the call's own blocks against output. callEntries is
+// the contiguous span of the call's entries, so per-block attributed shares
+// sum to the call's usage.output_tokens by construction.
+func attributeOutput(callEntries []TranscriptEntry, acc map[billingKey]*billingTier) {
+	parsed := ParseAssistantMessages(callEntries)
+	DeduplicateUsage(parsed)
+	for _, p := range parsed {
+		var key billingKey
+		switch p.ContentType {
+		case "thinking":
+			key = billingKey{"output", "thinking", kindUsage}
+		case "text":
+			key = billingKey{"output", "assistant_text", kindUsage}
+		case "tool_use":
+			lane, entity := toolLane(p.Content.Name)
+			key = billingKey{"output", lane + "/" + entity, kindUsage}
+		default:
+			continue
+		}
+		tierFor(acc, key).output += float64(p.AttributedOutputTokens)
+	}
+}
+
+// countNewEvents returns the number of NEW events this turn per usage key:
+// prompts per bucket, tool calls per tool/server, files per ext, skill loads
+// per skill. Additive across traces (each event counted once, in its turn),
+// so plain SUM yields true counts — the same split rule as everywhere else.
+func countNewEvents(turnEntries []TranscriptEntry, skillBodyNames map[string]string,
+	toolNames map[string]string) map[billingKey]int {
+
+	counts := map[billingKey]int{}
+	bump := func(lane, entity string) {
+		counts[billingKey{lane, entity, kindUsage}]++
+	}
+
+	for _, e := range turnEntries {
+		switch e.Type {
+		case "user":
+			if e.Message == nil {
+				continue
+			}
+			for _, c := range e.Message.Content {
+				switch c.Type {
+				case "text":
+					if _, ok := skillBodyNames[sha256hex(c.Text)]; ok {
+						continue // loads counted via buildLoadedSkillBodies below
+					}
+					bump("user_prompts", promptBucket(tokEstimateAs(c.Text, "user_prompt")))
+				case "tool_result":
+					lane, entity := toolLane(toolNames[c.ToolUseID])
+					bump(lane, entity)
+				}
+			}
+		case "attachment":
+			if e.Attachment == nil || e.Attachment.Type != "file" {
+				continue
+			}
+			var w struct {
+				File struct {
+					Path string `json:"path,omitempty"`
+				} `json:"file"`
+			}
+			if json.Unmarshal(e.Attachment.Content, &w) == nil {
+				ext := strings.ToLower(filepath.Ext(w.File.Path))
+				if ext == "" {
+					ext = "other"
+				}
+				bump("file_attachments", ext)
+			}
+		}
+	}
+	for _, l := range buildLoadedSkillBodies(turnEntries) {
+		bump("skills", l.Name)
+	}
+	return counts
+}
+
+func tierFor(acc map[billingKey]*billingTier, key billingKey) *billingTier {
+	t, ok := acc[key]
+	if !ok {
+		t = &billingTier{}
+		acc[key] = t
+	}
+	return t
+}
+
+// measuredResultTokens is resultTokens with anchor lookup for the common
+// string-payload shape.
+func measuredResultTokens(content interface{}) int {
+	if s, ok := content.(string); ok {
+		return measuredOrEstimate(s, "tool_result")
+	}
+	return resultTokens(content)
+}
+
+func skillBodyNameBySHA(entries []TranscriptEntry) map[string]string {
+	out := map[string]string{}
+	for _, l := range buildLoadedSkillBodies(entries) {
+		if l.Body != "" {
+			out[sha256hex(l.Body)] = l.Name
+		}
+	}
+	return out
+}
+
+func toolUseNames(entries []TranscriptEntry) map[string]string {
+	out := map[string]string{}
+	for _, e := range entries {
+		if e.Type != "assistant" || e.Message == nil {
+			continue
+		}
+		for _, c := range e.Message.Content {
+			if c.Type == "tool_use" && c.ID != "" {
+				out[c.ID] = c.Name
+			}
+		}
+	}
+	return out
+}
+
+// renderBillingSnapshot emits a SQL-first shape (OPIK-6870):
+//
+//	cc.billing.lanes.<laneKey>.{total, cache_read, cache_creation, fresh, output}
+//	cc.billing.lanes.<laneKey>.items[] {name, total, cache_read, ...}
+//
+// Lane values are FIXED JSON paths so the BE's composition query stays one
+// `SUM(JSONExtractInt(metadata,'cc','billing','lanes','<lane>','<col>'))`
+// per cell — no ARRAY JOIN needed for totals. Breakdowns use the existing
+// generic pattern: ARRAY JOIN over `...,'lanes','<lane>','items'` with
+// label field `name`. `total` is precomputed (sum of the four columns) so
+// the Sankey/lane-card query is also a single path.
+func renderBillingSnapshot(callCount int, totals billingTier,
+	acc map[billingKey]*billingTier, counts map[billingKey]int) map[string]interface{} {
+
+	tierFields := func(t *billingTier) map[string]interface{} {
+		return map[string]interface{}{
+			"total":          round(t.cacheRead + t.cacheCreation + t.fresh + t.output),
+			"cache_read":     round(t.cacheRead),
+			"cache_creation": round(t.cacheCreation),
+			"input":          round(t.fresh),
+			"output":         round(t.output),
+		}
+	}
+
+	// A key with new events this turn but no tier mass yet (e.g. the very
+	// first event landed after the last call's request) must still surface.
+	for key := range counts {
+		tierFor(acc, key)
+	}
+
+	laneTiers := map[string]*billingTier{}
+	laneItems := map[string][]map[string]interface{}{}
+	for key, t := range acc {
+		lt, ok := laneTiers[key.lane]
+		if !ok {
+			lt = &billingTier{}
+			laneTiers[key.lane] = lt
+		}
+		lt.cacheRead += t.cacheRead
+		lt.cacheCreation += t.cacheCreation
+		lt.fresh += t.fresh
+		lt.output += t.output
+		if key.entity != "" {
+			item := tierFields(t)
+			item["name"] = key.entity
+			item["kind"] = key.kind
+			item["count"] = counts[key]
+			laneItems[key.lane] = append(laneItems[key.lane], item)
+		}
+	}
+
+	lanes := map[string]interface{}{}
+	for lane, t := range laneTiers {
+		obj := tierFields(t)
+		if items := laneItems[lane]; len(items) > 0 {
+			sort.Slice(items, func(i, j int) bool {
+				return items[i]["total"].(int) > items[j]["total"].(int)
+			})
+			obj["items"] = items
+		}
+		lanes[lane] = obj
+	}
+
+	return map[string]interface{}{
+		"llm_calls": callCount,
+		// The session's model — lets consumers price the tier columns
+		// without joining back to spans.
+		"model": billingModel,
+		"totals": map[string]interface{}{
+			"total": round(totals.cacheRead + totals.cacheCreation +
+				totals.fresh + totals.output),
+			"cache_read":     round(totals.cacheRead),
+			"cache_creation": round(totals.cacheCreation),
+			"input":          round(totals.fresh),
+			"output":         round(totals.output),
+		},
+		"lanes": lanes,
+	}
+}
+
+func round(f float64) int { return int(f + 0.5) }
+
+func minF(a, b float64) float64 {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func maxF(a, b float64) float64 {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/src/billing_test.go b/src/billing_test.go
new file mode 100644
index 0000000..f775683
--- /dev/null
+++ b/src/billing_test.go
@@ -0,0 +1,262 @@
+package main
+
+import (
+	"strings"
+	"testing"
+)
+
+// Exactness contract: per call (and therefore per trace), the lane
+// columns — including the explicit `unattributed` row — sum EXACTLY to the
+// API-reported usage: input_tokens, cache_read, cache_creation, output.
+
+func billingColumnSums(snap map[string]interface{}) (read, write, fresh, output, rows int) {
+	for _, v := range snap["lanes"].(map[string]interface{}) {
+		row := v.(map[string]interface{})
+		read += row["cache_read"].(int)
+		write += row["cache_creation"].(int)
+		fresh += row["input"].(int)
+		output += row["output"].(int)
+		rows++
+	}
+	return
+}
+
+func assistantCall(t *testing.T, id string, usage *Usage, blocks ...Content) []TranscriptEntry {
+	t.Helper()
+	// One entry per block, all sharing message.id and usage — the real
+	// transcript shape.
+	entries := make([]TranscriptEntry, 0, len(blocks))
+	for i, b := range blocks {
+		uuid := "u-" + id
+		if i > 0 {
+			uuid += "#"
+		}
+		entries = append(entries, TranscriptEntry{
+			Type: "assistant", UUID: uuid,
+			Message: &Message{ID: id, Usage: usage, Content: ContentSlice{b}},
+		})
+	}
+	return entries
+}
+
+func TestBillingSumsExactlyToUsage(t *testing.T) {
+	// Realistic mess: estimates won't match usage (unattributed absorbs),
+	// multi-block calls, tool results, two calls with growing cache.
+	u1 := &Usage{InputTokens: 900, CacheCreationInputTokens: 30_000, OutputTokens: 250}
+	u2 := &Usage{InputTokens: 40, CacheReadInputTokens: 31_150,
+		CacheCreationInputTokens: 5_000, OutputTokens: 700}
+
+	entries := []TranscriptEntry{userPromptEntry("please do the thing")}
+	entries = append(entries, assistantCall(t, "m1", u1,
+		Content{Type: "thinking", Thinking: "redacted"},
+		Content{Type: "text", Text: strings.Repeat("plan ", 40)},
+		Content{Type: "tool_use", ID: "tu1", Name: "Read",
+			Input: map[string]interface{}{"file_path": "/x"}},
+	)...)
+	entries = append(entries, toolResultEntry("tu1", strings.Repeat("file content\n", 200)))
+	entries = append(entries, assistantCall(t, "m2", u2,
+		Content{Type: "text", Text: strings.Repeat("answer ", 60)},
+	)...)
+
+	snap := computeBillingSnapshot(entries, entries)
+	if snap == nil {
+		t.Fatal("expected billing snapshot")
+	}
+	if got := snap["llm_calls"].(int); got != 2 {
+		t.Fatalf("llm_calls = %d, want 2", got)
+	}
+
+	wantRead := u1.CacheReadInputTokens + u2.CacheReadInputTokens
+	wantWrite := u1.CacheCreationInputTokens + u2.CacheCreationInputTokens
+	wantFresh := u1.InputTokens + u2.InputTokens
+	wantOut := u1.OutputTokens + u2.OutputTokens
+
+	totals := snap["totals"].(map[string]interface{})
+	if totals["cache_read"].(int) != wantRead || totals["cache_creation"].(int) != wantWrite ||
+		totals["input"].(int) != wantFresh || totals["output"].(int) != wantOut {
+		t.Errorf("totals = %v, want read=%d write=%d fresh=%d output=%d",
+			totals, wantRead, wantWrite, wantFresh, wantOut)
+	}
+
+	// THE invariant: lane columns (incl. unattributed) sum to usage,
+	// allowing ±1 per lane row for integer rounding of float cuts.
+	read, write, fresh, output, rows := billingColumnSums(snap)
+	closeEnough := func(got, want int) bool {
+		d := got - want
+		if d < 0 {
+			d = -d
+		}
+		return d <= rows
+	}
+	if !closeEnough(read, wantRead) || !closeEnough(write, wantWrite) ||
+		!closeEnough(fresh, wantFresh) || !closeEnough(output, wantOut) {
+		t.Errorf("Σ lanes = read %d / write %d / fresh %d / output %d, want %d/%d/%d/%d (±%d rounding)",
+			read, write, fresh, output, wantRead, wantWrite, wantFresh, wantOut, rows)
+	}
+}
+
+func TestBillingPositionalCutMovesContentToCacheRead(t *testing.T) {
+	// Deterministic layout: no HOME/cwd config, no CC version → the only
+	// pieces are the conversation. Call 1 bills the prompt as fresh input;
+	// call 2 re-reads it (plus call 1's output) from cache.
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir())
+
+	prompt := "please do the thing"
+	pTok := tokEstimateAs(prompt, "user_prompt")
+	out1 := 30
+
+	u1 := &Usage{InputTokens: pTok, OutputTokens: out1} // cold: all fresh
+	u2 := &Usage{CacheReadInputTokens: pTok + out1, InputTokens: 5, OutputTokens: 10}
+
+	entries := []TranscriptEntry{userPromptEntry(prompt)}
+	entries = append(entries, assistantCall(t, "m1", u1,
+		Content{Type: "text", Text: strings.Repeat("ok ", 15)})...)
+	entries = append(entries, userPromptEntry("and?"))
+	entries = append(entries, assistantCall(t, "m2", u2,
+		Content{Type: "text", Text: "done"})...)
+
+	snap := computeBillingSnapshot(entries, entries)
+	lanes := map[string]map[string]interface{}{}
+	for lane, v := range snap["lanes"].(map[string]interface{}) {
+		lanes[lane] = v.(map[string]interface{})
+	}
+
+	up := lanes["user_prompts"]
+	if up == nil {
+		t.Fatalf("no user_prompts lane: %v", lanes)
+	}
+	// Fresh: prompt 1 billed cold on call 1, prompt 2 billed in call 2's tail.
+	wantFresh := pTok + tokEstimateAs("and?", "user_prompt")
+	if up["input"].(int) != wantFresh {
+		t.Errorf("user_prompts fresh = %d, want %d", up["input"], wantFresh)
+	}
+	if up["cache_read"].(int) != pTok {
+		t.Errorf("user_prompts cache_read = %d, want %d (call 2 replay)", up["cache_read"], pTok)
+	}
+	// Call 1's output replays inside call 2's cached prefix.
+	pa := lanes["prior_assistant"]
+	if pa == nil || pa["cache_read"].(int) != out1 {
+		t.Errorf("prior_assistant cache_read = %v, want %d", pa, out1)
+	}
+	// Output side: booked once per call, exact.
+	if o := lanes["output"]; o == nil || o["output"].(int) != out1+10 {
+		t.Errorf("output lane = %v, want output %d", o, out1+10)
+	}
+}
+
+func TestBillingUnattributedAbsorbsUnknownMass(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir())
+
+	prompt := "hi"
+	pTok := tokEstimateAs(prompt, "user_prompt")
+	// Usage says far more input than we can see — e.g. system reminders.
+	u := &Usage{InputTokens: pTok + 5_000, OutputTokens: 5}
+
+	entries := []TranscriptEntry{userPromptEntry(prompt)}
+	entries = append(entries, assistantCall(t, "m1", u, Content{Type: "text", Text: "yo"})...)
+
+	snap := computeBillingSnapshot(entries, entries)
+	row, ok := snap["lanes"].(map[string]interface{})[unattributedLane].(map[string]interface{})
+	if !ok {
+		t.Fatal("expected an unattributed lane entry")
+	}
+	if row["input"].(int) != 5_000 {
+		t.Errorf("unattributed fresh = %d, want 5000", row["input"])
+	}
+	if row["total"].(int) != 5_000 {
+		t.Errorf("unattributed total = %d, want 5000", row["total"])
+	}
+}
+
+func TestStaticOverheadItemization(t *testing.T) {
+	resetTokenCache(t)
+
+	// Unknown components: env carve-out + core remainder, conserving the
+	// table's system_prompt total.
+	ccBuiltinByVersion["8.8.8"] = ccBuiltinConstants{SystemPromptTokens: 4000, SystemToolsTokens: 900}
+	defer delete(ccBuiltinByVersion, "8.8.8")
+
+	entries := []TranscriptEntry{userPromptEntry("hi")}
+	call := assistantCall(t, "m1", &Usage{InputTokens: 10_000, OutputTokens: 5},
+		Content{Type: "text", Text: "yo"})
+	entries = append(entries, call...)
+	entries[1].Version = "8.8.8"
+
+	snap := computeBillingSnapshot(entries, entries)
+	so := snap["lanes"].(map[string]interface{})["static_overhead"].(map[string]interface{})
+	byName := map[string]int{}
+	for _, it := range so["items"].([]map[string]interface{}) {
+		byName[it["name"].(string)] = it["total"].(int)
+	}
+	if byName["builtin_tool_schemas"] == 0 {
+		t.Errorf("missing builtin_tool_schemas item: %v", byName)
+	}
+	// env + core must conserve the prompt constant (env may be 0 in a
+	// temp-dir cwd, in which case core carries it all).
+	if got := byName["environment"] + byName["core_prompt"]; got != 4000 {
+		t.Errorf("environment+core_prompt = %d, want 4000", got)
+	}
+
+	// With a calibrated Components table, items follow it instead.
+	ccBuiltinByVersion["8.8.8"] = ccBuiltinConstants{
+		SystemPromptTokens: 4000, SystemToolsTokens: 900,
+		Components: map[string]int{
+			"identity_and_rules": 2100, "memory_instructions": 800,
+			"environment_template": 200, "session_guidance": 900,
+			"builtin_tool_schemas": 900,
+		},
+	}
+	snap = computeBillingSnapshot(entries, entries)
+	so = snap["lanes"].(map[string]interface{})["static_overhead"].(map[string]interface{})
+	if so["total"].(int) != 4900 {
+		t.Errorf("components total = %d, want 4900", so["total"])
+	}
+	if len(so["items"].([]map[string]interface{})) != 5 {
+		t.Errorf("want 5 component items, got %v", so["items"])
+	}
+}
+
+func TestDeferredCatalogSplitsBuiltinFromMcp(t *testing.T) {
+	resetTokenCache(t)
+
+	delta := TranscriptEntry{Type: "attachment", Attachment: &Attachment{
+		Type:       "deferred_tools_delta",
+		AddedNames: []string{"WebSearch", "mcp__slack__send", "Monitor"},
+		AddedLines: []string{
+			"WebSearch: search the web for things and stuff",
+			"mcp__slack__send: send a slack message to a channel",
+			"Monitor: watch a long-running script for events",
+		},
+	}}
+	entries := []TranscriptEntry{userPromptEntry("hi"), delta}
+	entries = append(entries, assistantCall(t, "m1", &Usage{InputTokens: 5_000, OutputTokens: 5},
+		Content{Type: "text", Text: "ok"})...)
+
+	snap := computeBillingSnapshot(entries, entries)
+	lanes := snap["lanes"].(map[string]interface{})
+	soItems := lanes["static_overhead"].(map[string]interface{})["items"].([]map[string]interface{})
+	foundBuiltin := false
+	for _, it := range soItems {
+		if it["name"] == "deferred_tool_names" && it["total"].(int) > 0 {
+			foundBuiltin = true
+		}
+	}
+	if !foundBuiltin {
+		t.Errorf("expected deferred_tool_names under static_overhead: %v", soItems)
+	}
+	mcp, ok := lanes["mcp_servers"].(map[string]interface{})
+	if !ok {
+		t.Fatal("expected mcp_servers lane")
+	}
+	foundMcp := false
+	for _, it := range mcp["items"].([]map[string]interface{}) {
+		if it["name"] == "catalog_deltas" && it["total"].(int) > 0 {
+			foundMcp = true
+		}
+	}
+	if !foundMcp {
+		t.Errorf("expected catalog_deltas under mcp_servers: %v", mcp["items"])
+	}
+}
diff --git a/src/cc_builtin.go b/src/cc_builtin.go
index f83b261..3a255f7 100644
--- a/src/cc_builtin.go
+++ b/src/cc_builtin.go
@@ -10,10 +10,10 @@ import (
 //
 //   - SystemPromptTokens         the bundled default system prompt
 //   - SystemToolsTokens          full JSON schemas for the default tools
-//                                (Read, Edit, Bash, …) — not the names alone
+//     (Read, Edit, Bash, …) — not the names alone
 //   - SystemToolsDeferredTokens  the catalog of deferred tool definitions
-//                                (Cron*, Task*, Web*, Monitor, …) plus any
-//                                schemas Claude Code injects on demand
+//     (Cron*, Task*, Web*, Monitor, …) plus any
+//     schemas Claude Code injects on demand
 //
 // These are taken from `/context` for a known CC version. They drift with
 // each binary release, so the table below should grow over time.
@@ -21,6 +21,13 @@ type ccBuiltinConstants struct {
 	SystemPromptTokens        int
 	SystemToolsTokens         int
 	SystemToolsDeferredTokens int
+	// Components is the optional per-release itemization of the bundled
+	// block (identity/harness rules, memory instructions, tool policy,
+	// per-tool schemas, ...), produced by the calibration capture described
+	// in docs/builtin-calibration.md. When present it replaces the
+	// two-entity system_prompt/builtin_tool_schemas split in cc.billing.
+	// Component values must sum to SystemPromptTokens + SystemToolsTokens.
+	Components map[string]int
 }
 
 // ccBuiltinByVersion is a small versioned table — keys are exact CC
@@ -37,6 +44,14 @@ var ccBuiltinByVersion = map[string]ccBuiltinConstants{
 		SystemToolsTokens:         17600,
 		SystemToolsDeferredTokens: 19200,
 	},
+	// 2.1.173 moved most of the built-in tool catalog behind deferral:
+	// always-on schemas dropped 17.6k → 1.1k. Captured from /context on
+	// 2.1.173 + Fable (cc.context_runtime cross-check, OPIK-6873 audit).
+	"2.1.173": {
+		SystemPromptTokens:        4800,
+		SystemToolsTokens:         1100,
+		SystemToolsDeferredTokens: 11300,
+	},
 }
 
 // ccBuiltinFor returns the constants for the given CC version. Falls
diff --git a/src/context_fetch_test.go b/src/context_fetch_test.go
index 0cb6ead..df2c17b 100644
--- a/src/context_fetch_test.go
+++ b/src/context_fetch_test.go
@@ -80,16 +80,16 @@ func TestParseContextMarkdownCategories(t *testing.T) {
 		t.Fatalf("categories missing or wrong type: %T", out["categories"])
 	}
 	wants := map[string]int{
-		"system_prompt":           8000,
-		"system_tools":            17600,
-		"mcp_tools_deferred":      2500,
-		"system_tools_deferred":   19200,
-		"custom_agents":           3300,
-		"memory_files":            2900,
-		"skills":                  2100,
-		"messages":                6100,
-		"free_space":              928600,
-		"autocompact_buffer":      33000,
+		"system_prompt":         8000,
+		"system_tools":          17600,
+		"mcp_tools_deferred":    2500,
+		"system_tools_deferred": 19200,
+		"custom_agents":         3300,
+		"memory_files":          2900,
+		"skills":                2100,
+		"messages":              6100,
+		"free_space":            928600,
+		"autocompact_buffer":    33000,
 	}
 	for k, want := range wants {
 		if got := cats[k]; got != want {
@@ -162,12 +162,12 @@ func TestParseTokens(t *testing.T) {
 
 func TestNormalizeCategoryKey(t *testing.T) {
 	cases := map[string]string{
-		"System prompt":            "system_prompt",
-		"System tools (deferred)":  "system_tools_deferred",
-		"MCP tools (deferred)":     "mcp_tools_deferred",
-		"Custom agents":            "custom_agents",
-		"Free space":               "free_space",
-		"Autocompact buffer":       "autocompact_buffer",
+		"System prompt":           "system_prompt",
+		"System tools (deferred)": "system_tools_deferred",
+		"MCP tools (deferred)":    "mcp_tools_deferred",
+		"Custom agents":           "custom_agents",
+		"Free space":              "free_space",
+		"Autocompact buffer":      "autocompact_buffer",
 	}
 	for in, want := range cases {
 		if got := normalizeCategoryKey(in); got != want {
diff --git a/src/context_snapshot.go b/src/context_snapshot.go
index cff0ca6..92c2ec0 100644
--- a/src/context_snapshot.go
+++ b/src/context_snapshot.go
@@ -130,10 +130,10 @@ func buildContextSnapshot(state *State) map[string]interface{} {
 		}
 	}
 	return map[string]interface{}{
-		"categories":       cats,
-		"total_tokens":     alwaysOn, // ← matches /context and the API
-		"deferred_tokens":  deferred, // ← informational; loaded on demand
-		"source":           "estimated_sync",
+		"categories":      cats,
+		"total_tokens":    alwaysOn, // ← matches /context and the API
+		"deferred_tokens": deferred, // ← informational; loaded on demand
+		"source":          "estimated_sync",
 	}
 }
 
@@ -158,6 +158,7 @@ func cumulativeMessagesTokens(entries []TranscriptEntry) int {
 	skillBodies := skillBodyHashSet(entries)
 
 	total := 0
+	seenMsgs := map[string]bool{}
 	for _, e := range entries {
 		switch e.Type {
 		case "user":
@@ -179,8 +180,18 @@ func cumulativeMessagesTokens(entries []TranscriptEntry) int {
 			if e.Message == nil || e.Message.Usage == nil {
 				continue
 			}
-			// Anthropic's own count for this LLM call's output —
-			// exact, no estimation drift.
+			// Anthropic's own count for this LLM call's output — exact,
+			// no estimation drift. Counted once per message.id: multi-block
+			// messages repeat the same usage on every entry (one entry per
+			// content block), so a per-entry sum double-counts.
+			id := e.Message.ID
+			if id == "" {
+				id = e.UUID
+			}
+			if seenMsgs[id] {
+				continue
+			}
+			seenMsgs[id] = true
 			total += e.Message.Usage.OutputTokens
 		}
 	}
diff --git a/src/count_tokens.go b/src/count_tokens.go
new file mode 100644
index 0000000..badb7c8
--- /dev/null
+++ b/src/count_tokens.go
@@ -0,0 +1,461 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"sync"
+	"time"
+)
+
+// count_tokens anchoring (OPIK-6873 follow-up).
+//
+// Anthropic's /v1/messages/count_tokens endpoint is free (no token billing)
+// and accepts Claude Code's own OAuth token, so we can measure the EXACT
+// token cost of any text we possess instead of relying on calibrated
+// chars/token ratios. Measurements are cached persistently keyed by
+// (model, sha256) — static config content (skill menu blocks, memory files,
+// agent blurbs, MCP instructions) is hash-stable, so it's measured once
+// ever and steady-state API traffic is ~zero.
+//
+// The measurement pass runs in a detached child at turn end (same pattern
+// as the /context fetcher): it re-derives the anchor candidates from the
+// transcript, measures the biggest cache misses under a per-turn budget,
+// and persists them. The NEXT flush picks them up via measuredOrEstimate.
+// Totals are unaffected (per-call usage is already exact); anchors improve
+// the split and shrink `unattributed` to genuinely unobserved content.
+//
+// As a bonus, on a session's cache-cold first call the bundled system
+// prompt + built-in tool schemas — the one block we can never read — is
+// derived as a residual: call₁ usage minus everything we CAN account for.
+// Stored per CC version, replacing the hand-maintained constant table
+// whenever a measurement exists.
+
+const (
+	countTokensEndpoint = "https://api.anthropic.com/v1/messages/count_tokens"
+	anthropicVersion    = "2023-06-01"
+	oauthBeta           = "oauth-2025-04-20"
+	defaultCountModel   = "claude-fable-5"
+
+	tokenCountBudget = 10 // API calls per measurement pass, biggest-first
+	oauthSkewMs      = 60_000
+)
+
+// ---------------------------------------------------------------------------
+// Persistent cache: { "<model>|<sha256>": tokens }
+
+var (
+	tokCacheOnce sync.Once
+	tokCacheMu   sync.Mutex
+	tokCacheMap  map[string]int
+)
+
+func tokenCachePath() string {
+	home, err := os.UserHomeDir()
+	if err != nil || home == "" {
+		return filepath.Join(os.TempDir(), "opik-token-counts.json")
+	}
+	return filepath.Join(home, ".opik-token-counts.json")
+}
+
+func loadTokenCache() map[string]int {
+	tokCacheOnce.Do(func() {
+		tokCacheMap = map[string]int{}
+		data, err := os.ReadFile(tokenCachePath())
+		if err == nil {
+			_ = json.Unmarshal(data, &tokCacheMap)
+		}
+	})
+	return tokCacheMap
+}
+
+func tokenCacheGet(key string) (int, bool) {
+	tokCacheMu.Lock()
+	defer tokCacheMu.Unlock()
+	v, ok := loadTokenCache()[key]
+	return v, ok
+}
+
+// saveTokenCounts merges updates into the on-disk cache (reload + merge +
+// atomic rename, so concurrent sessions don't clobber each other's rows).
+func saveTokenCounts(updates map[string]int) {
+	if len(updates) == 0 {
+		return
+	}
+	tokCacheMu.Lock()
+	defer tokCacheMu.Unlock()
+	merged := map[string]int{}
+	if data, err := os.ReadFile(tokenCachePath()); err == nil {
+		_ = json.Unmarshal(data, &merged)
+	}
+	for k, v := range updates {
+		merged[k] = v
+	}
+	loadTokenCache() // ensure the Once ran before replacing the map
+	tokCacheMap = merged
+	data, err := json.Marshal(merged)
+	if err != nil {
+		return
+	}
+	tmp := tokenCachePath() + ".tmp"
+	if os.WriteFile(tmp, data, 0o600) == nil {
+		_ = os.Rename(tmp, tokenCachePath())
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Lookup used by the attribution code
+
+// billingModel is the model the current snapshot's measurements are keyed
+// by; set once per computeBillingSnapshot from the transcript.
+var billingModel = defaultCountModel
+
+func setBillingModel(entries []TranscriptEntry) {
+	if m := mostRecentModelFromEntries(entries); m != "" {
+		billingModel = m
+	}
+}
+
+// measuredOrEstimate returns the exact measured token count for text when
+// one is cached for the current model, falling back to the calibrated
+// ratio estimate.
+func measuredOrEstimate(text, contentType string) int {
+	if text == "" {
+		return 0
+	}
+	if v, ok := tokenCacheGet(billingModel + "|" + sha256hex(text)); ok {
+		return v
+	}
+	return tokEstimateAs(text, contentType)
+}
+
+// ---------------------------------------------------------------------------
+// Auth: Claude Code's own OAuth credential (or ANTHROPIC_API_KEY)
+
+func countTokensHeaders() map[string]string {
+	if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" {
+		return map[string]string{
+			"x-api-key":         key,
+			"anthropic-version": anthropicVersion,
+			"content-type":      "application/json",
+		}
+	}
+	if tok := claudeOAuthToken(); tok != "" {
+		return map[string]string{
+			"authorization":     "Bearer " + tok,
+			"anthropic-version": anthropicVersion,
+			"anthropic-beta":    oauthBeta,
+			"content-type":      "application/json",
+		}
+	}
+	return nil
+}
+
+func claudeOAuthToken() string {
+	if blob := oauthBlobFromFile(); blob != nil {
+		if tok := validOAuthToken(blob); tok != "" {
+			return tok
+		}
+	}
+	if runtime.GOOS == "darwin" {
+		if blob := oauthBlobFromKeychain(); blob != nil {
+			return validOAuthToken(blob)
+		}
+	}
+	return ""
+}
+
+type oauthBlob struct {
+	AccessToken string `json:"accessToken"`
+	ExpiresAt   int64  `json:"expiresAt"`
+}
+
+func validOAuthToken(b *oauthBlob) string {
+	if b.AccessToken == "" {
+		return ""
+	}
+	if b.ExpiresAt > 0 && time.Now().UnixMilli() >= b.ExpiresAt-oauthSkewMs {
+		return ""
+	}
+	return b.AccessToken
+}
+
+func oauthBlobFromFile() *oauthBlob {
+	dir := os.Getenv("CLAUDE_CONFIG_DIR")
+	if dir == "" {
+		home, err := os.UserHomeDir()
+		if err != nil {
+			return nil
+		}
+		dir = filepath.Join(home, ".claude")
+	}
+	data, err := os.ReadFile(filepath.Join(dir, ".credentials.json"))
+	if err != nil {
+		return nil
+	}
+	return parseOAuthBlob(data)
+}
+
+func oauthBlobFromKeychain() *oauthBlob {
+	out, err := exec.Command("security", "find-generic-password",
+		"-s", "Claude Code-credentials", "-w").Output()
+	if err != nil {
+		return nil
+	}
+	return parseOAuthBlob(out)
+}
+
+func parseOAuthBlob(data []byte) *oauthBlob {
+	var wrapper struct {
+		ClaudeAiOauth *oauthBlob `json:"claudeAiOauth"`
+	}
+	if json.Unmarshal(data, &wrapper) != nil {
+		return nil
+	}
+	return wrapper.ClaudeAiOauth
+}
+
+// ---------------------------------------------------------------------------
+// The endpoint
+
+// countTokensHTTP is swappable for tests.
+var countTokensHTTP = func(payload []byte, headers map[string]string) (int, error) {
+	req, err := http.NewRequest("POST", countTokensEndpoint, bytes.NewReader(payload))
+	if err != nil {
+		return 0, err
+	}
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+	client := &http.Client{Timeout: 30 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return 0, err
+	}
+	defer resp.Body.Close()
+	var out struct {
+		InputTokens int `json:"input_tokens"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return 0, err
+	}
+	return out.InputTokens, nil
+}
+
+func countTokensFor(model, text string, headers map[string]string) (int, error) {
+	payload, err := json.Marshal(map[string]interface{}{
+		"model": model,
+		"messages": []map[string]string{
+			{"role": "user", "content": text},
+		},
+	})
+	if err != nil {
+		return 0, err
+	}
+	return countTokensHTTP(payload, headers)
+}
+
+// ---------------------------------------------------------------------------
+// Measurement pass (runs in the detached child)
+
+type anchorCandidate struct {
+	sha, text, contentType string
+}
+
+// anchorCandidates gathers every text whose exact size improves attribution:
+// transcript content (prompts, skill bodies, menu blocks, instruction
+// blocks, string tool results) plus on-disk config (memory files, agent
+// frontmatter is covered transitively by the agents extractor's content).
+func anchorCandidates(entries []TranscriptEntry) []anchorCandidate {
+	seen := map[string]bool{}
+	var out []anchorCandidate
+	add := func(text, contentType string) {
+		if len(text) < 200 { // tiny texts: ratio error is ≤ a few tokens
+			return
+		}
+		sha := sha256hex(text)
+		if seen[sha] {
+			return
+		}
+		seen[sha] = true
+		out = append(out, anchorCandidate{sha, text, contentType})
+	}
+
+	skillBodyNames := skillBodyNameBySHA(entries)
+	for _, e := range entries {
+		switch e.Type {
+		case "user":
+			if e.Message == nil {
+				continue
+			}
+			for _, c := range e.Message.Content {
+				switch c.Type {
+				case "text":
+					if _, ok := skillBodyNames[sha256hex(c.Text)]; ok {
+						add(c.Text, "skill_body")
+					} else {
+						add(c.Text, "user_prompt")
+					}
+				case "tool_result":
+					if s, ok := c.Content.(string); ok {
+						add(s, "tool_result")
+					}
+				}
+			}
+		case "attachment":
+			if e.Attachment == nil {
+				continue
+			}
+			switch e.Attachment.Type {
+			case "skill_listing":
+				for _, block := range parseSkillListingMenu(e.Attachment.ContentString(), e.Attachment.Names) {
+					add(block, "skill_listing_menu")
+				}
+			case "mcp_instructions_delta":
+				for _, b := range e.Attachment.AddedBlocks {
+					add(b, "prose")
+				}
+			}
+		}
+	}
+
+	// The reconstructed environment block — changes per session (git
+	// status), so it mostly rides the ratio estimate, but stable repos
+	// get anchored.
+	add(environmentBlockText(), "prose")
+
+	// On-disk config content: memory files + agent files.
+	if m := extractMemorySnapshot(); m != nil {
+		for _, f := range m["files"].([]map[string]interface{}) {
+			if body, err := os.ReadFile(f["path"].(string)); err == nil {
+				add(string(body), "memory_file")
+			}
+		}
+	}
+	if a := extractAgentsSnapshot(); a != nil {
+		for _, ag := range a["agents"].([]map[string]interface{}) {
+			if body, err := os.ReadFile(ag["path"].(string)); err == nil {
+				if meta := frontmatter(string(body)); meta != "" {
+					add(meta, "agent_frontmatter")
+				}
+			}
+		}
+	}
+	return out
+}
+
+// runTokenCountPass measures the biggest uncached candidates under budget
+// and persists them, then derives the cc_builtin cold-start residual when
+// the session's first call is available. Returns the number of API calls
+// spent (for tests/logging).
+func runTokenCountPass(entries []TranscriptEntry, budget int) int {
+	headers := countTokensHeaders()
+	if headers == nil {
+		debugLog("count_tokens: no credential — skipping")
+		return 0
+	}
+	model := mostRecentModelFromEntries(entries)
+	if model == "" {
+		model = defaultCountModel
+	}
+
+	candidates := anchorCandidates(entries)
+	var misses []anchorCandidate
+	for _, c := range candidates {
+		if _, ok := tokenCacheGet(model + "|" + c.sha); !ok {
+			misses = append(misses, c)
+		}
+	}
+	sort.Slice(misses, func(i, j int) bool {
+		return len(misses[i].text) > len(misses[j].text)
+	})
+
+	updates := map[string]int{}
+	spent := 0
+
+	// Baseline: the envelope overhead of a single one-token user message.
+	baselineKey := model + "|__baseline__"
+	baseline, ok := tokenCacheGet(baselineKey)
+	if !ok && len(misses) > 0 {
+		b, err := countTokensFor(model, ".", headers)
+		if err != nil {
+			debugLog("count_tokens: baseline failed: %v", err)
+			return 0
+		}
+		baseline = b
+		updates[baselineKey] = b
+		spent++
+	}
+
+	for _, c := range misses {
+		if spent >= budget {
+			break
+		}
+		n, err := countTokensFor(model, c.text, headers)
+		if err != nil {
+			debugLog("count_tokens: measure failed: %v", err)
+			break
+		}
+		spent++
+		tokens := n - baseline + 1 // "." ≈ 1 token of content
+		if tokens < 0 {
+			tokens = 0
+		}
+		updates[model+"|"+c.sha] = tokens
+	}
+
+	saveTokenCounts(updates)
+	debugLog("count_tokens: measured %d of %d misses (budget %d)", len(updates), len(misses), budget)
+	return spent
+}
+
+// ---------------------------------------------------------------------------
+// Detached child plumbing
+
+func spawnDetachedTokenCount(sessionID string) error {
+	if sessionID == "" || os.Getenv("OPIK_CC_DISABLE_TOKEN_COUNT") == "true" {
+		return nil
+	}
+	self, err := os.Executable()
+	if err != nil {
+		return err
+	}
+	cmd := exec.Command(self)
+	cmd.Env = append(os.Environ(),
+		"OPIK_CC_TOKEN_COUNT=1",
+		"OPIK_CC_FETCH_SESSION_ID="+sessionID,
+	)
+	detachProcess(cmd)
+	devnull, _ := os.Open(os.DevNull)
+	if devnull != nil {
+		cmd.Stdin = devnull
+		cmd.Stdout = devnull
+		cmd.Stderr = devnull
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	return cmd.Process.Release()
+}
+
+// runTokenCountMode is the detached child's entry point.
+func runTokenCountMode() {
+	sid := os.Getenv("OPIK_CC_FETCH_SESSION_ID")
+	if sid == "" {
+		return
+	}
+	state, err := LoadState(sid)
+	if err != nil || state == nil || state.Transcript == "" {
+		return
+	}
+	entries, err := ReadTranscript(state.Transcript, 0)
+	if err != nil || len(entries) == 0 {
+		return
+	}
+	runTokenCountPass(entries, tokenCountBudget)
+}
diff --git a/src/count_tokens_test.go b/src/count_tokens_test.go
new file mode 100644
index 0000000..0ebd0f5
--- /dev/null
+++ b/src/count_tokens_test.go
@@ -0,0 +1,115 @@
+package main
+
+import (
+	"encoding/json"
+	"os"
+	"strings"
+	"testing"
+)
+
+// resetTokenCache points the cache at a fresh temp HOME and clears the
+// in-memory state between tests (the production code loads once per
+// process).
+func resetTokenCache(t *testing.T) {
+	t.Helper()
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir())
+	tokCacheMu.Lock()
+	tokCacheMap = map[string]int{}
+	tokCacheMu.Unlock()
+	billingModel = defaultCountModel
+}
+
+func TestMeasuredOrEstimatePrefersAnchor(t *testing.T) {
+	resetTokenCache(t)
+	text := strings.Repeat("some skill menu line\n", 30)
+
+	est := measuredOrEstimate(text, "skill_listing_menu")
+	if est != tokEstimateAs(text, "skill_listing_menu") {
+		t.Fatalf("without anchor, want ratio estimate %d, got %d",
+			tokEstimateAs(text, "skill_listing_menu"), est)
+	}
+
+	saveTokenCounts(map[string]int{billingModel + "|" + sha256hex(text): 4242})
+	if got := measuredOrEstimate(text, "skill_listing_menu"); got != 4242 {
+		t.Errorf("with anchor, want 4242, got %d", got)
+	}
+}
+
+func TestSaveTokenCountsMergesOnDisk(t *testing.T) {
+	resetTokenCache(t)
+	saveTokenCounts(map[string]int{"m|a": 1})
+	saveTokenCounts(map[string]int{"m|b": 2})
+
+	// Reload from disk into a fresh map to prove both rows persisted.
+	var onDisk map[string]int
+	data, err := jsonReadFile(tokenCachePath())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := json.Unmarshal(data, &onDisk); err != nil {
+		t.Fatal(err)
+	}
+	if onDisk["m|a"] != 1 || onDisk["m|b"] != 2 {
+		t.Errorf("on-disk cache = %v, want both rows", onDisk)
+	}
+}
+
+func TestRunTokenCountPassBudgetAndBaseline(t *testing.T) {
+	resetTokenCache(t)
+	t.Setenv("ANTHROPIC_API_KEY", "test-key") // auth via env, no keychain
+
+	// Mock the endpoint: baseline (".") costs 8; any text costs 8 - 1 + its
+	// length/4 so measured = len/4 exactly after baseline subtraction.
+	calls := 0
+	old := countTokensHTTP
+	countTokensHTTP = func(payload []byte, headers map[string]string) (int, error) {
+		calls++
+		var req struct {
+			Messages []struct {
+				Content string `json:"content"`
+			} `json:"messages"`
+		}
+		_ = json.Unmarshal(payload, &req)
+		text := req.Messages[0].Content
+		if text == "." {
+			return 8, nil
+		}
+		return 8 - 1 + len(text)/4, nil
+	}
+	defer func() { countTokensHTTP = old }()
+
+	big := strings.Repeat("a long tool result line\n", 100)     // 2400 chars
+	small := strings.Repeat("a smaller prompt body here\n", 10) // 270 chars
+	entries := []TranscriptEntry{
+		userPromptEntry(small),
+		toolResultEntry("tu1", big),
+		assistantCall(t, "m1", &Usage{InputTokens: 50, OutputTokens: 5},
+			Content{Type: "tool_use", ID: "tu1", Name: "Read",
+				Input: map[string]interface{}{"f": "x"}})[0],
+	}
+
+	// Budget of 2 = baseline + ONE measurement; biggest-first → the big
+	// tool result gets measured, the small prompt does not.
+	spent := runTokenCountPass(entries, 2)
+	if spent != 2 {
+		t.Fatalf("spent = %d, want 2 (baseline + 1 text)", spent)
+	}
+	if got, ok := tokenCacheGet(defaultCountModel + "|" + sha256hex(big)); !ok || got != len(big)/4 {
+		t.Errorf("big text anchor = %d/%v, want %d", got, ok, len(big)/4)
+	}
+	if _, ok := tokenCacheGet(defaultCountModel + "|" + sha256hex(small)); ok {
+		t.Error("small text should not have been measured under budget")
+	}
+
+	// Second pass: baseline + big are cached; remaining budget measures small.
+	spent = runTokenCountPass(entries, 2)
+	if _, ok := tokenCacheGet(defaultCountModel + "|" + sha256hex(small)); !ok {
+		t.Error("second pass should measure the remaining miss")
+	}
+	_ = spent
+}
+
+func jsonReadFile(path string) ([]byte, error) {
+	return os.ReadFile(path)
+}
diff --git a/src/cumulative_test.go b/src/cumulative_test.go
new file mode 100644
index 0000000..b2abb80
--- /dev/null
+++ b/src/cumulative_test.go
@@ -0,0 +1,129 @@
+package main
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+// Shared fixtures + regressions that guard the billing block's attribution
+// rules (the composition emitters they originally covered were consolidated
+// into cc.billing — see billing.go).
+
+func userPromptEntry(text string) TranscriptEntry {
+	return TranscriptEntry{Type: "user", Message: &Message{Content: ContentSlice{
+		{Type: "text", Text: text},
+	}}}
+}
+
+func toolResultEntry(toolUseID, payload string) TranscriptEntry {
+	return TranscriptEntry{Type: "user", Message: &Message{Content: ContentSlice{
+		{Type: "tool_result", ToolUseID: toolUseID, Content: payload},
+	}}}
+}
+
+func fileAttachmentEntry(path, content string) TranscriptEntry {
+	raw, _ := json.Marshal(map[string]interface{}{
+		"file": map[string]string{"path": path, "content": content},
+	})
+	return TranscriptEntry{Type: "attachment", Attachment: &Attachment{Type: "file", Content: raw}}
+}
+
+func TestBillingNoUsageDoubleCountAcrossMultiBlockEntries(t *testing.T) {
+	// The transcript repeats the same message.usage on EVERY entry of a
+	// multi-block message. Regression for the 2x inflation found in the
+	// OPIK-6873 audit: usage must be booked once per message.id.
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir())
+
+	usage := &Usage{InputTokens: 100, OutputTokens: 1000}
+	entries := []TranscriptEntry{
+		userPromptEntry("go"),
+		{Type: "assistant", UUID: "u1", Message: &Message{ID: "m1", Usage: usage, Content: ContentSlice{
+			{Type: "thinking", Thinking: "redacted"},
+		}}},
+		{Type: "assistant", UUID: "u1#1", Message: &Message{ID: "m1", Usage: usage, Content: ContentSlice{
+			{Type: "text", Text: strings.Repeat("answer ", 50)},
+		}}},
+	}
+
+	snap := computeBillingSnapshot(entries, entries)
+	if got := snap["llm_calls"].(int); got != 1 {
+		t.Errorf("llm_calls = %d, want 1 (two entries, one message)", got)
+	}
+	totals := snap["totals"].(map[string]interface{})
+	if totals["output"].(int) != 1000 {
+		t.Errorf("output total = %d, want 1000 booked once", totals["output"])
+	}
+	if totals["input"].(int) != 100 {
+		t.Errorf("fresh total = %d, want 100 booked once", totals["input"])
+	}
+}
+
+func TestBillingExcludesSkillBodiesFromUserPrompts(t *testing.T) {
+	// A slash-loaded skill body is a user text block in the transcript; it
+	// must be attributed to the skills lane, never to user_prompts.
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("CLAUDE_PROJECT_DIR", t.TempDir())
+
+	body := "Base directory for this skill: /tmp/skills/opik\n\n" + strings.Repeat("skill body ", 300)
+	bodyTok := tokEstimateAs(body, "skill_body")
+	promptTok := tokEstimateAs("real question", "user_prompt")
+
+	turn1 := []TranscriptEntry{
+		userPromptEntry("<command-message>opik:opik</command-message>\n<command-name>/opik:opik</command-name>"),
+		userPromptEntry(body),
+	}
+	entries := append(turn1, userPromptEntry("real question"))
+	call := assistantCall(t, "m1", &Usage{InputTokens: 5_000, OutputTokens: 10},
+		Content{Type: "text", Text: "ok"})
+	entries = append(entries, call...)
+
+	snap := computeBillingSnapshot(entries, entries)
+	lanes := snap["lanes"].(map[string]interface{})
+
+	skills := lanes["skills"].(map[string]interface{})
+	if skills["total"].(int) < bodyTok {
+		t.Errorf("skills total = %d, want >= body %d", skills["total"], bodyTok)
+	}
+	up := lanes["user_prompts"].(map[string]interface{})
+	// user_prompts must hold only the preamble + real question, not the body.
+	if up["total"].(int) >= bodyTok {
+		t.Errorf("user_prompts total = %d — skill body leaked in (body=%d, prompt=%d)",
+			up["total"], bodyTok, promptTok)
+	}
+
+	// And the load is counted once, on the skills item.
+	for _, v := range skills["items"].([]map[string]interface{}) {
+		if v["name"] == "opik:opik" && v["kind"] == kindUsage {
+			if v["count"].(int) != 1 {
+				t.Errorf("skill load count = %d, want 1", v["count"])
+			}
+			return
+		}
+	}
+	t.Error("expected a skills usage item for opik:opik")
+}
+
+func TestRepeatSkillLoadsKeepDistinctEvents(t *testing.T) {
+	slashLoad := func(name, body string) []TranscriptEntry {
+		return []TranscriptEntry{
+			userPromptEntry("<command-message>" + name + "</command-message>\n<command-name>/" + name + "</command-name>"),
+			userPromptEntry("Base directory for this skill: /tmp/skills/x\n\n" + body),
+		}
+	}
+	entries := append(slashLoad("opik:opik", "body one"), slashLoad("opik:opik", "body two")...)
+
+	loads := buildLoadedSkillBodies(entries)
+	if len(loads) != 2 {
+		t.Fatalf("want 2 load events for 2 slash loads, got %d", len(loads))
+	}
+	if loads[0].ToolUseID == loads[1].ToolUseID {
+		t.Errorf("load events must have distinct ids, both %q", loads[0].ToolUseID)
+	}
+	for _, l := range loads {
+		if !strings.HasPrefix(l.ToolUseID, "slash:") {
+			t.Errorf("slash load id = %q, want slash:<idx>", l.ToolUseID)
+		}
+	}
+}
diff --git a/src/dryrun_test.go b/src/dryrun_test.go
index 3e5a1be..cd265de 100644
--- a/src/dryrun_test.go
+++ b/src/dryrun_test.go
@@ -18,17 +18,13 @@ func TestDryRunOnTestThread(t *testing.T) {
 		t.Fatal(err)
 	}
 	snaps := domainSnapshotsFromEntries(entries, entries)
-	for _, domain := range []string{"tools", "skills", "user_prompts", "tool_results", "thinking", "memory", "agents", "cc_builtin", "assistant_text", "prior_assistant", "file_attachments", "output_tokens"} {
+	for _, domain := range []string{"billing", "cc_builtin"} {
 		fmt.Printf("--- %s ---\n", domain)
 		if snaps[domain] == nil {
 			fmt.Println("(nil)")
 			continue
 		}
-		out := snaps[domain]
-		if domain == "tools" {
-			out = map[string]interface{}{"summary": snaps[domain]["summary"]}
-		}
-		b, _ := json.MarshalIndent(out, "", "  ")
+		b, _ := json.MarshalIndent(snaps[domain], "", "  ")
 		fmt.Println(string(b))
 	}
 }
diff --git a/src/environment.go b/src/environment.go
new file mode 100644
index 0000000..5f112f5
--- /dev/null
+++ b/src/environment.go
@@ -0,0 +1,42 @@
+package main
+
+import (
+	"fmt"
+	"os/exec"
+	"runtime"
+	"strings"
+)
+
+// environmentBlockText approximates the dynamic environment block Claude
+// Code injects into its system prompt every session: working directory,
+// platform/OS, and a git status snapshot with recent commits. We never see
+// the real text, but its inputs are all local, so a faithful reconstruction
+// sized via measuredOrEstimate (anchored when stable) lets cc.billing show
+// the env block as its own static_overhead item — and surfaces that a repo
+// with a long status/commit log pays for it on every request.
+func environmentBlockText() string {
+	cwd := inferCwd()
+	if cwd == "" {
+		return ""
+	}
+	var b strings.Builder
+	fmt.Fprintf(&b, "Primary working directory: %s\n", cwd)
+	fmt.Fprintf(&b, "Platform: %s\n", runtime.GOOS)
+	if out, err := exec.Command("uname", "-sr").Output(); err == nil {
+		fmt.Fprintf(&b, "OS Version: %s\n", strings.TrimSpace(string(out)))
+	}
+
+	if branch := git(cwd, "branch", "--show-current"); branch != "" {
+		fmt.Fprintf(&b, "Is a git repository: true\n")
+		fmt.Fprintf(&b, "Current branch: %s\n", branch)
+		if status := git(cwd, "status", "--porcelain"); status != "" {
+			fmt.Fprintf(&b, "Status:\n%s\n", status)
+		} else {
+			b.WriteString("Status:\n(clean)\n")
+		}
+		if log := git(cwd, "log", "--oneline", "-5"); log != "" {
+			fmt.Fprintf(&b, "Recent commits:\n%s\n", log)
+		}
+	}
+	return b.String()
+}
diff --git a/src/extractors.go b/src/extractors.go
index 1a86a0f..85e92eb 100644
--- a/src/extractors.go
+++ b/src/extractors.go
@@ -76,7 +76,7 @@ func extractMemorySnapshot() map[string]interface{} {
 		// tokenize as separate short tokens — denser than a skill body
 		// (3.5) or prose (3.9). Calibrated against /context's "Memory
 		// files" rows across 7 rule files (mean 2.37 chars/token).
-		tokens := tokEstimateAs(s, "memory_file")
+		tokens := measuredOrEstimate(s, "memory_file")
 		files = append(files, map[string]interface{}{
 			"path":        p,
 			"sha256":      sha256hex(s),
@@ -182,7 +182,7 @@ func extractAgentsSnapshot() map[string]interface{} {
 		if displayName == "" {
 			displayName = f.basename
 		}
-		tokens := tokEstimateAs(meta, "agent_frontmatter")
+		tokens := measuredOrEstimate(meta, "agent_frontmatter")
 		agents = append(agents, map[string]interface{}{
 			"name":        f.nsPrefix + displayName,
 			"path":        f.path,
@@ -290,242 +290,8 @@ func readInstalledPluginPaths(home string) map[string]string {
 	return out
 }
 
-// extractThinkingSnapshot aggregates thinking-block tokens bucketed by effort
-// level. Level is derived from actual thinking tokens per LLM call (the
-// transcript does not expose the requested budget_tokens).
-//
-// Buckets: minimal ≤500, light 501–3 000, medium 3 001–10 000, heavy >10 000.
-//
-// `cc.thinking.{summary, by_level}`.
-func extractThinkingSnapshot(entries []TranscriptEntry, parsed []ParsedEntry) map[string]interface{} {
-	if parsed == nil {
-		parsed = ParseAssistantMessages(entries)
-		DeduplicateUsage(parsed)
-	}
-
-	// Sum thinking tokens per LLM call (MessageID).
-	callThinking := map[string]int{}
-	anonTokens := 0
-	for _, p := range parsed {
-		if p.ContentType != "thinking" {
-			continue
-		}
-		if p.MessageID == "" {
-			anonTokens += p.AttributedOutputTokens
-			continue
-		}
-		callThinking[p.MessageID] += p.AttributedOutputTokens
-	}
-	if anonTokens > 0 {
-		callThinking["__anon"] = anonTokens
-	}
-	if len(callThinking) == 0 {
-		return nil
-	}
-
-	type levelGroup struct{ calls, tokens int }
-	byLevel := map[string]*levelGroup{}
-	totalTokens, totalCalls := 0, 0
-
-	for _, tok := range callThinking {
-		l := thinkingLevel(tok)
-		g, ok := byLevel[l]
-		if !ok {
-			g = &levelGroup{}
-			byLevel[l] = g
-		}
-		g.calls++
-		g.tokens += tok
-		totalTokens += tok
-		totalCalls++
-	}
-
-	order := []string{"minimal", "light", "medium", "heavy"}
-	byLevelOut := make([]map[string]interface{}, 0, len(byLevel))
-	for _, l := range order {
-		g, ok := byLevel[l]
-		if !ok {
-			continue
-		}
-		byLevelOut = append(byLevelOut, map[string]interface{}{
-			"level":      l,
-			"tokens":     g.tokens,
-			"call_count": g.calls,
-		})
-	}
-
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens": totalTokens,
-			"call_count":   totalCalls,
-		},
-		"by_level": byLevelOut,
-	}
-}
-
-func thinkingLevel(tokens int) string {
-	switch {
-	case tokens > 10000:
-		return "heavy"
-	case tokens > 3000:
-		return "medium"
-	case tokens > 500:
-		return "light"
-	default:
-		return "minimal"
-	}
-}
-
-// extractToolResultsSnapshot aggregates tool_result bytes grouped by the
-// tool that produced them. `cc.tool_results.{summary, by_tool}`.
-//
-// Two result-delivery shapes need special handling:
-//   - Normal tool_result: user message content block with type="tool_result".
-//   - ToolSearch: result is delivered as a `deferred_tools_delta` attachment
-//     (addedLines + addedNames). We attribute that delta's addedLines size
-//     to the ToolSearch tool_use that immediately preceded it.
-func extractToolResultsSnapshot(entries []TranscriptEntry) map[string]interface{} {
-	toolNames := map[string]string{}
-	for _, e := range entries {
-		if e.Type != "assistant" || e.Message == nil {
-			continue
-		}
-		for _, c := range e.Message.Content {
-			if c.Type == "tool_use" && c.ID != "" {
-				toolNames[c.ID] = c.Name
-			}
-		}
-	}
-
-	type group struct {
-		tokens, count int
-	}
-	byTool := map[string]*group{}
-	totalTokens, totalCount := 0, 0
-
-	// Walk entries in order. Pair ToolSearch tool_uses with the
-	// `deferred_tools_delta` that immediately follows them (no intervening
-	// non-delta event). Two protections against mis-attribution:
-	//
-	//   1. Pending IDs are a FIFO queue so back-to-back ToolSearches each
-	//      get their own delta — neither is silently overwritten.
-	//   2. Any event between the ToolSearch and the delta — a regular
-	//      tool_result, a non-delta attachment, an assistant message —
-	//      drains the queue, so an unrelated delta (e.g. one triggered by
-	//      an MCP toggle) can't be mis-attributed to a stale ToolSearch.
-	var pendingToolSearches []string // tool_use IDs awaiting a delta
-	resultedIDs := map[string]bool{}
-	for _, e := range entries {
-		switch e.Type {
-		case "assistant":
-			if e.Message == nil {
-				// Empty assistant entry — drain pending queue (no
-				// immediate delta means the ToolSearch's result must have
-				// already arrived as a normal tool_result, handled below).
-				pendingToolSearches = pendingToolSearches[:0]
-				continue
-			}
-			drainedThisEntry := false
-			for _, c := range e.Message.Content {
-				if c.Type == "tool_use" && c.Name == "ToolSearch" {
-					if !drainedThisEntry {
-						pendingToolSearches = pendingToolSearches[:0]
-						drainedThisEntry = true
-					}
-					pendingToolSearches = append(pendingToolSearches, c.ID)
-				} else if c.Type == "tool_use" {
-					// Some other tool_use sits between ToolSearch and a
-					// future delta — drain.
-					pendingToolSearches = pendingToolSearches[:0]
-				}
-			}
-		case "user":
-			if e.Message == nil {
-				pendingToolSearches = pendingToolSearches[:0]
-				continue
-			}
-			for _, c := range e.Message.Content {
-				if c.Type != "tool_result" {
-					continue
-				}
-				name := toolNames[c.ToolUseID]
-				if name == "" {
-					name = "unknown"
-				}
-				tokens := resultTokens(c.Content)
-				g, exists := byTool[name]
-				if !exists {
-					g = &group{}
-					byTool[name] = g
-				}
-				g.tokens += tokens
-				g.count++
-				totalTokens += tokens
-				totalCount++
-				resultedIDs[c.ToolUseID] = true
-			}
-			// Any user message (with tool_result OR otherwise) breaks the
-			// ToolSearch→delta adjacency, so drain.
-			pendingToolSearches = pendingToolSearches[:0]
-		case "attachment":
-			if e.Attachment == nil {
-				continue
-			}
-			if e.Attachment.Type != "deferred_tools_delta" {
-				// Non-delta attachment between ToolSearch and a delta —
-				// breaks adjacency.
-				pendingToolSearches = pendingToolSearches[:0]
-				continue
-			}
-			if len(pendingToolSearches) == 0 {
-				continue
-			}
-			// Synthesize a ToolSearch tool_result from the delta payload —
-			// the addedLines text is exactly what the model sees.
-			payload := strings.Join(e.Attachment.AddedLines, "\n")
-			if payload == "" {
-				payload = strings.Join(e.Attachment.AddedNames, "\n")
-			}
-			pendingID := pendingToolSearches[0]
-			pendingToolSearches = pendingToolSearches[1:]
-			if payload == "" {
-				continue
-			}
-			tokens := tokEstimateAs(payload, "deferred_tools_payload")
-			g, exists := byTool["ToolSearch"]
-			if !exists {
-				g = &group{}
-				byTool["ToolSearch"] = g
-			}
-			g.tokens += tokens
-			g.count++
-			totalTokens += tokens
-			totalCount++
-			resultedIDs[pendingID] = true
-		}
-	}
-	if totalCount == 0 {
-		return nil
-	}
-
-	byToolOut := make([]map[string]interface{}, 0, len(byTool))
-	for name, g := range byTool {
-		byToolOut = append(byToolOut, map[string]interface{}{
-			"name":   name,
-			"tokens": g.tokens,
-			"count":  g.count,
-		})
-	}
-	sort.Slice(byToolOut, func(i, j int) bool {
-		return byToolOut[i]["tokens"].(int) > byToolOut[j]["tokens"].(int)
-	})
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens": totalTokens,
-			"count":        totalCount,
-		},
-		"by_tool": byToolOut,
-	}
+type toolResultGroup struct {
+	tokens, count int
 }
 
 // resultTokens estimates the token cost of a tool_result.content payload,
@@ -555,43 +321,6 @@ func resultTokens(content interface{}) int {
 	}
 }
 
-// extractUserPromptsSnapshot returns the per-turn user-text contribution.
-// Tool results don't count here (they're under cc.tool_results); skill
-// bodies don't count either (they're under cc.skills.loaded). Without
-// excluding skill bodies, a `Skill` tool_use would inflate user_prompts
-// by the entire skill body — 100K+ tokens for claude-api.
-// `cc.user_prompts.summary`.
-func extractUserPromptsSnapshot(entries []TranscriptEntry) map[string]interface{} {
-	skillBodyHashes := skillBodyHashSet(entries)
-
-	totalTokens, count := 0, 0
-	for _, e := range entries {
-		if e.Type != "user" || e.Message == nil {
-			continue
-		}
-		for _, c := range e.Message.Content {
-			if c.Type != "text" {
-				continue
-			}
-			if skillBodyHashes[sha256hex(c.Text)] {
-				continue
-			}
-			totalTokens += tokEstimateAs(c.Text, "user_prompt")
-			count++
-		}
-	}
-	if count == 0 {
-		return nil
-	}
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens": totalTokens,
-			"count":        count,
-			"bucket":       promptBucket(totalTokens),
-		},
-	}
-}
-
 // skillBodyHashSet returns the set of sha256 hashes of every skill body
 // loaded this session — across both Skill-tool-use loads and slash-command
 // loads. Comparing by hash (rather than the raw string) is safer against
@@ -623,196 +352,4 @@ func promptBucket(tokens int) string {
 	}
 }
 
-// extractFileAttachmentsSnapshot returns @-mentioned + system-injected file
-// attachments this turn grouped by file extension. Skill bodies are NOT here —
-// they go under cc.skills.loaded. `cc.file_attachments.{summary, by_type}`.
-func extractFileAttachmentsSnapshot(entries []TranscriptEntry) map[string]interface{} {
-	type group struct{ tokens, count int }
-	byExt := map[string]*group{}
-	total, fileCount := 0, 0
-
-	for _, e := range entries {
-		if e.Type != "attachment" || e.Attachment == nil {
-			continue
-		}
-		if e.Attachment.Type != "file" {
-			continue
-		}
-		var wrapper struct {
-			File struct {
-				Path    string `json:"path,omitempty"`
-				Content string `json:"content,omitempty"`
-			} `json:"file"`
-		}
-		if err := json.Unmarshal(e.Attachment.Content, &wrapper); err != nil {
-			continue
-		}
-		tokens := tokEstimate(wrapper.File.Content)
-
-		ext := strings.ToLower(filepath.Ext(wrapper.File.Path))
-		if ext == "" {
-			ext = "other"
-		}
-
-		g, ok := byExt[ext]
-		if !ok {
-			g = &group{}
-			byExt[ext] = g
-		}
-		g.tokens += tokens
-		g.count++
-		total += tokens
-		fileCount++
-	}
-
-	if fileCount == 0 {
-		return nil
-	}
-
-	byTypeOut := make([]map[string]interface{}, 0, len(byExt))
-	for ext, g := range byExt {
-		byTypeOut = append(byTypeOut, map[string]interface{}{
-			"ext":        ext,
-			"tokens":     g.tokens,
-			"file_count": g.count,
-		})
-	}
-	sort.Slice(byTypeOut, func(i, j int) bool {
-		return byTypeOut[i]["tokens"].(int) > byTypeOut[j]["tokens"].(int)
-	})
-
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens": total,
-			"file_count":   fileCount,
-		},
-		"by_type": byTypeOut,
-	}
-}
-
-// extractPriorAssistantSnapshot is the cumulative cost of prior assistant
-// output in the session — what gets replayed every turn.
-// `cc.prior_assistant.summary`.
-func extractPriorAssistantSnapshot(fullEntries, turnEntries []TranscriptEntry) map[string]interface{} {
-	sessionTokens, sessionMsgs := assistantOutputTotals(fullEntries)
-	turnTokens, turnMsgs := assistantOutputTotals(turnEntries)
-	priorTokens := sessionTokens - turnTokens
-	priorMsgs := sessionMsgs - turnMsgs
-	if priorTokens < 0 {
-		priorTokens = 0
-	}
-	if priorMsgs < 0 {
-		priorMsgs = 0
-	}
-	if priorTokens == 0 && priorMsgs == 0 {
-		return nil
-	}
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens":  priorTokens,
-			"message_count": priorMsgs,
-		},
-	}
-}
-
-func assistantOutputTotals(entries []TranscriptEntry) (tokens, msgs int) {
-	for _, e := range entries {
-		if e.Type != "assistant" || e.Message == nil || e.Message.Usage == nil {
-			continue
-		}
-		tokens += e.Message.Usage.OutputTokens
-		msgs++
-	}
-	return
-}
-
-// extractAssistantTextSnapshot returns the per-turn text-block contribution.
-// `cc.assistant_text.summary`.
-func extractAssistantTextSnapshot(entries []TranscriptEntry) map[string]interface{} {
-	total, count := 0, 0
-	for _, e := range entries {
-		if e.Type != "assistant" || e.Message == nil {
-			continue
-		}
-		for _, c := range e.Message.Content {
-			if c.Type == "text" {
-				total += tokEstimateAs(c.Text, "assistant_text")
-				count++
-			}
-		}
-	}
-	if count == 0 {
-		return nil
-	}
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens": total,
-			"block_count":  count,
-		},
-	}
-}
-
-// extractOutputTokensSnapshot aggregates attributed output tokens by category
-// at the trace level. This lets the Sankey visualization use
-// sum(metadata.cc.output_tokens.by_category.*) directly without span
-// aggregation. `cc.output_tokens.{summary, by_category}`.
-//
-// Categories:
-//   - thinking         — extended thinking blocks
-//   - assistant_text   — visible text responses
-//   - builtin_tool_use — CC built-in tools (Bash, Read, Edit, …)
-//   - mcp_tool_use     — MCP tool calls (name prefix "mcp__")
-//   - skill_invocations — Skill tool invocations
-//
-// `parsed` should be the dedup-applied output of ParseAssistantMessages +
-// DeduplicateUsage. Pass nil to reparse from entries.
-func extractOutputTokensSnapshot(entries []TranscriptEntry, parsed []ParsedEntry) map[string]interface{} {
-	if parsed == nil {
-		parsed = ParseAssistantMessages(entries)
-		DeduplicateUsage(parsed)
-	}
-
-	var (
-		thinking         int
-		assistantText    int
-		builtinToolUse   int
-		mcpToolUse       int
-		skillInvocations int
-	)
-
-	for _, p := range parsed {
-		tok := p.AttributedOutputTokens
-		switch p.ContentType {
-		case "thinking":
-			thinking += tok
-		case "text":
-			assistantText += tok
-		case "tool_use":
-			switch {
-			case strings.HasPrefix(p.Content.Name, "mcp__"):
-				mcpToolUse += tok
-			case p.Content.Name == "Skill":
-				skillInvocations += tok
-			default:
-				builtinToolUse += tok
-			}
-		}
-	}
-
-	total := thinking + assistantText + builtinToolUse + mcpToolUse + skillInvocations
-	if total == 0 {
-		return nil
-	}
-	return map[string]interface{}{
-		"summary": map[string]interface{}{
-			"total_tokens": total,
-		},
-		"by_category": map[string]interface{}{
-			"thinking":          thinking,
-			"assistant_text":    assistantText,
-			"builtin_tool_use":  builtinToolUse,
-			"mcp_tool_use":      mcpToolUse,
-			"skill_invocations": skillInvocations,
-		},
-	}
-}
+type fileAttachmentGroup struct{ tokens, count int }
diff --git a/src/extractors_test.go b/src/extractors_test.go
index b7a7967..36a6c4c 100644
--- a/src/extractors_test.go
+++ b/src/extractors_test.go
@@ -6,197 +6,6 @@ import (
 	"testing"
 )
 
-func TestExtractOutputTokensSnapshot(t *testing.T) {
-	// One LLM call with thinking + text + builtin tool + MCP tool + Skill.
-	// All blocks share the same message.id so DeduplicateUsage can attribute.
-	const msgID = "msg_abc123"
-	entries := []TranscriptEntry{
-		{
-			Type: "assistant",
-			UUID: "u1",
-			Message: &Message{
-				ID:    msgID,
-				Model: "claude-opus-4-8",
-				Usage: &Usage{OutputTokens: 1000},
-				Content: ContentSlice{
-					{Type: "thinking", Thinking: "..."},
-					{Type: "text", Text: "hello world"},
-					{Type: "tool_use", ID: "t1", Name: "Bash", Input: map[string]interface{}{"command": "ls"}},
-					{Type: "tool_use", ID: "t2", Name: "mcp__slack__send", Input: map[string]interface{}{}},
-					{Type: "tool_use", ID: "t3", Name: "Skill", Input: map[string]interface{}{}},
-				},
-			},
-		},
-	}
-
-	parsed := ParseAssistantMessages(entries)
-	DeduplicateUsage(parsed)
-
-	snap := extractOutputTokensSnapshot(entries, parsed)
-	if snap == nil {
-		t.Fatal("expected non-nil snapshot")
-	}
-
-	summary, _ := snap["summary"].(map[string]interface{})
-	if summary == nil {
-		t.Fatal("missing summary")
-	}
-	total, _ := summary["total_tokens"].(int)
-	if total != 1000 {
-		t.Errorf("total_tokens = %d, want 1000", total)
-	}
-
-	cat, _ := snap["by_category"].(map[string]interface{})
-	if cat == nil {
-		t.Fatal("missing by_category")
-	}
-
-	// Sum of all categories must equal total.
-	catSum := 0
-	for _, key := range []string{"thinking", "assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} {
-		v, _ := cat[key].(int)
-		catSum += v
-	}
-	if catSum != total {
-		t.Errorf("sum(by_category) = %d, want %d (total_tokens)", catSum, total)
-	}
-
-	// thinking must be > 0 (leftover after non-thinking blocks).
-	if thinking, _ := cat["thinking"].(int); thinking == 0 {
-		t.Error("thinking should be > 0")
-	}
-
-	// Each non-thinking category must have been assigned something.
-	for _, key := range []string{"assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} {
-		if v, _ := cat[key].(int); v == 0 {
-			t.Errorf("by_category[%s] = 0, expected > 0", key)
-		}
-	}
-}
-
-func TestExtractOutputTokensSnapshotNilOnEmpty(t *testing.T) {
-	snap := extractOutputTokensSnapshot(nil, nil)
-	if snap != nil {
-		t.Errorf("expected nil on empty entries, got %v", snap)
-	}
-}
-
-func TestExtractThinkingSnapshotByLevel(t *testing.T) {
-	// Three LLM calls with different thinking budgets:
-	//   msg1 → 200 tokens  (minimal)
-	//   msg2 → 2000 tokens (light)
-	//   msg3 → 15000 tokens (heavy)
-	makeEntry := func(msgID string, thinkingTokens int) TranscriptEntry {
-		return TranscriptEntry{
-			Type: "assistant",
-			Message: &Message{
-				ID:    msgID,
-				Model: "claude-sonnet-4-6",
-				Usage: &Usage{OutputTokens: thinkingTokens},
-				Content: ContentSlice{
-					{Type: "thinking", Thinking: "..."},
-				},
-			},
-		}
-	}
-	entries := []TranscriptEntry{
-		makeEntry("msg1", 200),
-		makeEntry("msg2", 2000),
-		makeEntry("msg3", 15000),
-	}
-	parsed := ParseAssistantMessages(entries)
-	DeduplicateUsage(parsed)
-
-	snap := extractThinkingSnapshot(entries, parsed)
-	if snap == nil {
-		t.Fatal("expected non-nil snapshot")
-	}
-
-	summary := snap["summary"].(map[string]interface{})
-	if total := summary["total_tokens"].(int); total != 17200 {
-		t.Errorf("total_tokens = %d, want 17200", total)
-	}
-	if calls := summary["call_count"].(int); calls != 3 {
-		t.Errorf("call_count = %d, want 3", calls)
-	}
-
-	byLevel := snap["by_level"].([]map[string]interface{})
-	levels := map[string]map[string]interface{}{}
-	for _, l := range byLevel {
-		levels[l["level"].(string)] = l
-	}
-
-	if _, ok := levels["minimal"]; !ok {
-		t.Error("expected minimal level")
-	}
-	if _, ok := levels["light"]; !ok {
-		t.Error("expected light level")
-	}
-	if _, ok := levels["heavy"]; !ok {
-		t.Error("expected heavy level")
-	}
-	if _, ok := levels["medium"]; ok {
-		t.Error("unexpected medium level")
-	}
-
-	if levels["minimal"]["call_count"].(int) != 1 {
-		t.Errorf("minimal call_count = %d, want 1", levels["minimal"]["call_count"])
-	}
-	if levels["heavy"]["tokens"].(int) != 15000 {
-		t.Errorf("heavy tokens = %d, want 15000", levels["heavy"]["tokens"])
-	}
-}
-
-func TestExtractFileAttachmentsSnapshotByType(t *testing.T) {
-	makeAttachment := func(path, content string) TranscriptEntry {
-		raw, _ := json.Marshal(map[string]interface{}{
-			"file": map[string]string{"path": path, "content": content},
-		})
-		return TranscriptEntry{
-			Type: "attachment",
-			Attachment: &Attachment{
-				Type:    "file",
-				Content: raw,
-			},
-		}
-	}
-	entries := []TranscriptEntry{
-		makeAttachment("/repo/main.go", "package main\nfunc main() {}"),
-		makeAttachment("/repo/util.go", "package main"),
-		makeAttachment("/repo/README.md", "# Hello"),
-		makeAttachment("/repo/Makefile", "build:"),  // no extension → "other"
-	}
-
-	snap := extractFileAttachmentsSnapshot(entries)
-	if snap == nil {
-		t.Fatal("expected non-nil snapshot")
-	}
-
-	summary := snap["summary"].(map[string]interface{})
-	if fc := summary["file_count"].(int); fc != 4 {
-		t.Errorf("file_count = %d, want 4", fc)
-	}
-
-	byType := snap["by_type"].([]map[string]interface{})
-	exts := map[string]map[string]interface{}{}
-	for _, row := range byType {
-		exts[row["ext"].(string)] = row
-	}
-
-	if _, ok := exts[".go"]; !ok {
-		t.Error("expected .go entry")
-	}
-	if _, ok := exts[".md"]; !ok {
-		t.Error("expected .md entry")
-	}
-	if _, ok := exts["other"]; !ok {
-		t.Error("expected other entry for Makefile")
-	}
-	if exts[".go"]["file_count"].(int) != 2 {
-		t.Errorf(".go file_count = %d, want 2", exts[".go"]["file_count"])
-	}
-}
-
 func TestExtractAgentsSnapshotPrefersFrontmatterName(t *testing.T) {
 	home := t.TempDir()
 	cwd := t.TempDir()
@@ -280,4 +89,3 @@ description: Should not show up.
 		t.Errorf("bad:ghost must be filtered out (disabled plugin), got %v", names)
 	}
 }
-
diff --git a/src/main.go b/src/main.go
index 8afae26..da8e7bf 100644
--- a/src/main.go
+++ b/src/main.go
@@ -25,8 +25,8 @@ type HookInput struct {
 	Prompt              string `json:"prompt"`
 	AgentID             string `json:"agent_id"`
 	AgentType           string `json:"agent_type"`
-	AgentTranscriptPath  string `json:"agent_transcript_path"`
-	CustomInstructions   string `json:"custom_instructions"`
+	AgentTranscriptPath string `json:"agent_transcript_path"`
+	CustomInstructions  string `json:"custom_instructions"`
 }
 
 var (
@@ -49,6 +49,14 @@ func main() {
 	// background subprocess to ask claude `/context` and PATCH the result
 	// onto the trace. This path never reads stdin; it shells out to claude
 	// and exits.
+	// Detached token-count mode: measure exact token counts for anchor
+	// candidates via the free count_tokens endpoint and persist them for
+	// the next flush (see count_tokens.go).
+	if os.Getenv("OPIK_CC_TOKEN_COUNT") == "1" {
+		runTokenCountMode()
+		os.Exit(0)
+	}
+
 	if os.Getenv("OPIK_CC_CONTEXT_FETCH") == "1" {
 		var err error
 		config, err = LoadConfig()
@@ -233,6 +241,9 @@ func onStop() {
 	if err := spawnDetachedContextFetch(state.SessionID, state.TraceID, state.Cwd); err != nil {
 		debugLog("spawn context fetch: %v", err)
 	}
+	if err := spawnDetachedTokenCount(state.SessionID); err != nil {
+		debugLog("spawn token count: %v", err)
+	}
 
 	debugLog("done")
 }
@@ -727,12 +738,12 @@ func processTranscriptEntries(traceID string, entries []TranscriptEntry, parentS
 
 		if p.Usage != nil && span.Usage == nil {
 			span.Usage = map[string]int{
-				"prompt_tokens":     p.Usage.InputTokens,
-				"completion_tokens": p.Usage.OutputTokens,
-				"total_tokens":      p.Usage.InputTokens + p.Usage.OutputTokens,
-				"original_usage.input_tokens":               p.Usage.InputTokens,
-				"original_usage.output_tokens":              p.Usage.OutputTokens,
-				"original_usage.cache_read_input_tokens":    p.Usage.CacheReadInputTokens,
+				"prompt_tokens":                              p.Usage.InputTokens,
+				"completion_tokens":                          p.Usage.OutputTokens,
+				"total_tokens":                               p.Usage.InputTokens + p.Usage.OutputTokens,
+				"original_usage.input_tokens":                p.Usage.InputTokens,
+				"original_usage.output_tokens":               p.Usage.OutputTokens,
+				"original_usage.cache_read_input_tokens":     p.Usage.CacheReadInputTokens,
 				"original_usage.cache_creation_input_tokens": p.Usage.CacheCreationInputTokens,
 			}
 			span.Provider = "anthropic"
@@ -747,34 +758,25 @@ func processTranscriptEntries(traceID string, entries []TranscriptEntry, parentS
 	return spans
 }
 
-// domainSnapshotsFromEntries returns every per-domain snapshot keyed by
+// domainSnapshotsFromEntries returns the per-domain snapshots keyed by
 // domain name. Called by postTraceMetrics for the trace-level write and
-// by dryrun_test for offline validation. The parsed+deduped slice is
-// computed once and passed to the few extractors that need it
-// (extractThinkingSnapshot), avoiding redundant work.
+// by dryrun_test for offline validation.
 func domainSnapshotsFromEntries(fullEntries, turnEntries []TranscriptEntry) map[string]map[string]interface{} {
-	parsedTurn := ParseAssistantMessages(turnEntries)
-	DeduplicateUsage(parsedTurn)
-
 	return map[string]map[string]interface{}{
-		"skills":           BuildSkillsSnapshot(fullEntries),
-		"tools":            extractToolsSnapshot(fullEntries),
-		"memory":           extractMemorySnapshot(),
-		"agents":           extractAgentsSnapshot(),
-		"thinking":         extractThinkingSnapshot(turnEntries, parsedTurn),
-		"tool_results":     extractToolResultsSnapshot(turnEntries),
-		"user_prompts":     extractUserPromptsSnapshot(turnEntries),
-		"file_attachments": extractFileAttachmentsSnapshot(turnEntries),
-		"prior_assistant":  extractPriorAssistantSnapshot(fullEntries, turnEntries),
-		"assistant_text":   extractAssistantTextSnapshot(turnEntries),
-		"output_tokens":    extractOutputTokensSnapshot(turnEntries, parsedTurn),
+		// The product schema: per-call positional cache-tier attribution.
+		// Tier tokens are per-call billing events — additive across traces
+		// and exactly reconciled to API usage (see billing.go). Every UI
+		// lane value, breakdown item, stacked definition/usage segment and
+		// count comes from this block.
+		"billing": computeBillingSnapshot(fullEntries, turnEntries),
 		// cc_builtin covers the bundled system-prompt + tool-catalog cost
 		// /context reports under "System prompt" / "System tools" /
 		// "System tools (deferred)". These never appear in the transcript
 		// (the binary holds the schemas internally), so values are
 		// version-keyed approximations — marked `estimated: true` in the
-		// payload so the FE can render them as such.
-		"cc_builtin":       extractCCBuiltinSnapshot(fullEntries),
+		// payload so the FE can render them as such. Kept for the Static
+		// overhead disclaimer and calibration diagnostics.
+		"cc_builtin": extractCCBuiltinSnapshot(fullEntries),
 	}
 }
 
@@ -1068,4 +1070,3 @@ func debugLog(format string, args ...interface{}) {
 	fmt.Fprintf(f, "[%s] ", ts)
 	fmt.Fprintf(f, format+"\n", args...)
 }
-
diff --git a/src/metrics.go b/src/metrics.go
index 1d9adff..4f0bc7a 100644
--- a/src/metrics.go
+++ b/src/metrics.go
@@ -245,7 +245,6 @@ func postTraceMetrics(state *State) {
 		state.TraceID[:8], repo, branch, commits, insC+delC, files, authored, overwritten, len(metrics))
 }
 
-
 // mergeMetadataCC reads the trace's current metadata, merges new keys into
 // the `cc` block (preserving identity already written at trace creation and
 // any non-cc metadata Opik may have added), and PATCHes the trace.
@@ -293,4 +292,4 @@ func inferCwd() string {
 		return d
 	}
 	return ""
-}
\ No newline at end of file
+}
diff --git a/src/plugin_catalog.go b/src/plugin_catalog.go
index f45d363..c240309 100644
--- a/src/plugin_catalog.go
+++ b/src/plugin_catalog.go
@@ -34,7 +34,7 @@ type pluginCatalogTokens struct {
 }
 
 type pluginCatalogComponent struct {
-	Name  string `json:"name"`
+	Name  string              `json:"name"`
 	Chars pluginCatalogTokens `json:"chars"`
 }
 
diff --git a/src/settings.go b/src/settings.go
index ddeda84..fb69d24 100644
--- a/src/settings.go
+++ b/src/settings.go
@@ -20,11 +20,11 @@ import (
 //
 // Resolution order (Claude Code's documented layering — later wins):
 //
-//   1. Managed (org-pushed)         OS-specific (see managedSettingsPaths)
-//   2. User                         ~/.claude/settings.json
-//   3. User local                   ~/.claude/settings.local.json
-//   4. Project                      <cwd>/.claude/settings.json
-//   5. Project local                <cwd>/.claude/settings.local.json
+//  1. Managed (org-pushed)         OS-specific (see managedSettingsPaths)
+//  2. User                         ~/.claude/settings.json
+//  3. User local                   ~/.claude/settings.local.json
+//  4. Project                      <cwd>/.claude/settings.json
+//  5. Project local                <cwd>/.claude/settings.local.json
 //
 // A plugin counts as enabled iff the merged map's value is true (boolean
 // or "true"/"1" string). Anything else — including unset, false, or an
diff --git a/src/settings_test.go b/src/settings_test.go
index 2e954a5..794fb4a 100644
--- a/src/settings_test.go
+++ b/src/settings_test.go
@@ -20,10 +20,10 @@ func TestReadEnabledPluginsField(t *testing.T) {
 	}`)
 	got := readEnabledPluginsField(path)
 	wantEnabled := map[string]bool{
-		"on@mp":      true,
-		"off@mp":     false,
-		"strtrue@mp": true,
-		"str1@mp":    true,
+		"on@mp":       true,
+		"off@mp":      false,
+		"strtrue@mp":  true,
+		"str1@mp":     true,
 		"strfalse@mp": false,
 	}
 	for k, want := range wantEnabled {
diff --git a/src/skill_hash.go b/src/skill_hash.go
index 3031f6e..bb48a29 100644
--- a/src/skill_hash.go
+++ b/src/skill_hash.go
@@ -6,6 +6,7 @@ import (
 	"os"
 	"path/filepath"
 	"sort"
+	"strconv"
 	"strings"
 )
 
@@ -169,32 +170,24 @@ func extractLoadedSkills(entries []TranscriptEntry) []SkillEvent {
 	if len(loads) == 0 {
 		return nil
 	}
-	// Latest-wins per skill name (mirrors how the model sees the most
-	// recent body). FirstSeenIdx tracks the first appearance for ordering
-	// stability across the FE.
-	firstIdx := map[string]int{}
-	latest := map[string]loadedSkillBody{}
-	order := []string{}
+	// One event per load (OPIK-6873): repeat loads of the same skill each
+	// inject their body into context and each replays from then on, so
+	// collapsing to latest-wins under-counted both tokens and load counts.
+	// Every event carries a stable unique ToolUseID (toolu_… or
+	// "slash:<idx>") so consumers can dedupe events across the cumulative
+	// per-trace loaded[] arrays.
+	out := make([]SkillEvent, 0, len(loads))
 	for _, l := range loads {
-		if _, ok := firstIdx[l.Name]; !ok {
-			firstIdx[l.Name] = l.Index
-			order = append(order, l.Name)
-		}
-		latest[l.Name] = l
-	}
-	out := make([]SkillEvent, 0, len(order))
-	for _, name := range order {
-		l := latest[name]
-		path, _ := resolveSkillBody(name)
+		path, _ := resolveSkillBody(l.Name)
 		source := "bundled"
 		if path != "" {
 			source = "listing"
 		}
 		ev := SkillEvent{
-			Name:         name,
+			Name:         l.Name,
 			SHA256:       sha256hex(l.Body),
 			BodyTokens:   tokEstimateAs(l.Body, "skill_body"),
-			FirstSeenIdx: firstIdx[name],
+			FirstSeenIdx: l.Index,
 			Source:       source,
 			Path:         path,
 			ToolUseID:    l.ToolUseID,
@@ -205,7 +198,7 @@ func extractLoadedSkills(entries []TranscriptEntry) []SkillEvent {
 		// cross-check — CC's own tokenizer produced that number when it
 		// built the cache, so it's a tighter signal than our 3.5 ratio
 		// for skills with rich code blocks or markdown tables.
-		if pluginShort, leaf, ok := splitNamespacedSkillName(name); ok {
+		if pluginShort, leaf, ok := splitNamespacedSkillName(l.Name); ok {
 			home, _ := os.UserHomeDir()
 			model := mostRecentModelFromEntries(entries)
 			if comp, fullKey, hit := pluginCatalogLookup(home, pluginShort, "skill", leaf); hit {
@@ -249,8 +242,9 @@ func mostRecentModelFromEntries(entries []TranscriptEntry) string {
 
 // loadedSkillBody is one resolved skill load. Index is the transcript entry
 // index of the user message that carried the body — used by FirstSeenIdx
-// in extractLoadedSkills for stable ordering. ToolUseID is populated for
-// Skill-tool-use loads, empty for slash-command loads.
+// in extractLoadedSkills for stable ordering. ToolUseID is the toolu_… id
+// for Skill-tool-use loads and a synthetic "slash:<entry index>" id for
+// slash-command loads, so every load event has a stable unique identity.
 type loadedSkillBody struct {
 	Name      string
 	Body      string
@@ -411,9 +405,16 @@ func slashCommandSkillLoads(entries []TranscriptEntry) []loadedSkillBody {
 			}
 			if strings.HasPrefix(c.Text, slashCommandPrefix) {
 				out = append(out, loadedSkillBody{
-					Name:  pendingName,
-					Body:  c.Text,
-					Index: i,
+					Name: pendingName,
+					Body: c.Text,
+					// Synthetic load id (OPIK-6873): slash loads have no
+					// tool_use, but consumers dedupe load events across the
+					// cumulative per-trace loaded[] arrays via this id.
+					// The transcript entry index is stable for the session
+					// (append-only file), so the same load keeps the same
+					// id on every later trace.
+					ToolUseID: "slash:" + strconv.Itoa(i),
+					Index:     i,
 				})
 			}
 			// Whether we matched a body or saw an unrelated text block,
diff --git a/src/skill_hash_test.go b/src/skill_hash_test.go
index 3a450b6..e8f1615 100644
--- a/src/skill_hash_test.go
+++ b/src/skill_hash_test.go
@@ -90,8 +90,8 @@ func TestBuildLoadedSkillBodiesSlashCommand(t *testing.T) {
 	if !strings.HasPrefix(loads[0].Body, "Base directory") {
 		t.Errorf("body should start with the prefix the model actually sees; got %q", loads[0].Body[:30])
 	}
-	if loads[0].ToolUseID != "" {
-		t.Errorf("slash-command load should have empty ToolUseID, got %q", loads[0].ToolUseID)
+	if !strings.HasPrefix(loads[0].ToolUseID, "slash:") {
+		t.Errorf("slash-command load should have a synthetic slash:<idx> ToolUseID, got %q", loads[0].ToolUseID)
 	}
 }
 
diff --git a/src/state.go b/src/state.go
index db2e450..abd6a7e 100644
--- a/src/state.go
+++ b/src/state.go
@@ -10,8 +10,8 @@ import (
 type State struct {
 	TraceID      string `json:"trace_id"`
 	StartTime    string `json:"start_time"`
-	StartUnix    int64  `json:"start_unix,omitempty"`     // wall-clock seconds the current trace started
-	PromptHash   string `json:"prompt_hash,omitempty"`    // sha256 of the user prompt for the current trace
+	StartUnix    int64  `json:"start_unix,omitempty"`  // wall-clock seconds the current trace started
+	PromptHash   string `json:"prompt_hash,omitempty"` // sha256 of the user prompt for the current trace
 	SessionID    string `json:"session_id"`
 	Transcript   string `json:"transcript"`
 	StartLine    int    `json:"start_line"`
diff --git a/src/tools_extract.go b/src/tools_extract.go
index f95bc64..315cc8d 100644
--- a/src/tools_extract.go
+++ b/src/tools_extract.go
@@ -95,9 +95,9 @@ func extractToolsSnapshot(entries []TranscriptEntry) map[string]interface{} {
 	}
 	var avail []toolEntry
 	builtinCount, mcpCount := 0, 0
-	serverTools := map[string][]string{}    // server → tool names
-	serverLines := map[string]string{}      // server → joined description lines
-	builtinLines, mcpLines := "", ""        // sources → joined lines
+	serverTools := map[string][]string{} // server → tool names
+	serverLines := map[string]string{}   // server → joined description lines
+	builtinLines, mcpLines := "", ""     // sources → joined lines
 
 	for name, line := range available {
 		// SplitN("__", 3) — assumes server names contain no `__`. Tool
@@ -156,12 +156,12 @@ func extractToolsSnapshot(entries []TranscriptEntry) map[string]interface{} {
 		mcpInstructionsTokensTotal += instrTokens
 		mcpEstimatedSchemaTokensTotal += estSchema
 		byServer = append(byServer, map[string]interface{}{
-			"server":                    server,
-			"tool_count":                len(tools),
-			"schema_tokens":             tokEstimateAs(serverLines[server], "deferred_tools_payload"),
-			"instructions_tokens":       instrTokens,
-			"estimated_schema_tokens":   estSchema,
-			"estimated_total_tokens":    instrTokens + estSchema,
+			"server":                  server,
+			"tool_count":              len(tools),
+			"schema_tokens":           tokEstimateAs(serverLines[server], "deferred_tools_payload"),
+			"instructions_tokens":     instrTokens,
+			"estimated_schema_tokens": estSchema,
+			"estimated_total_tokens":  instrTokens + estSchema,
 		})
 	}
 	sort.Slice(byServer, func(i, j int) bool {
@@ -191,18 +191,18 @@ func extractToolsSnapshot(entries []TranscriptEntry) map[string]interface{} {
 				"schema_tokens":   builtinSchemaTokens,
 			},
 			"mcp": map[string]interface{}{
-				"available_count":             mcpCount,
-				"schema_tokens":               mcpSchemaTokens,
-				"instructions_tokens":         mcpInstructionsTokensTotal,
-				"estimated_schema_tokens":     mcpEstimatedSchemaTokensTotal,
+				"available_count":         mcpCount,
+				"schema_tokens":           mcpSchemaTokens,
+				"instructions_tokens":     mcpInstructionsTokensTotal,
+				"estimated_schema_tokens": mcpEstimatedSchemaTokensTotal,
 				// estimated_deferred_tokens is the closest equivalent to
 				// /context's "MCP tools (deferred)" row — addedLines we saw
 				// + estimated full-schema overhead per tool. The
 				// instructions block lives at by_source.mcp.instructions_tokens
 				// because /context buckets it under "System tools (deferred)"
 				// on some CC versions; surfacing both lets the FE pick.
-				"estimated_deferred_tokens":   mcpSchemaTokens + mcpEstimatedSchemaTokensTotal,
-				"estimated":                   mcpCount > 0,
+				"estimated_deferred_tokens": mcpSchemaTokens + mcpEstimatedSchemaTokensTotal,
+				"estimated":                 mcpCount > 0,
 			},
 		},
 		"by_server": byServer,
diff --git a/src/transcript.go b/src/transcript.go
index 7a864fc..c9d925c 100644
--- a/src/transcript.go
+++ b/src/transcript.go
@@ -30,21 +30,21 @@ type TranscriptEntry struct {
 // readdedNames (names re-enabled after a prior removal), and
 // pendingMcpServers (servers connecting; their tools not yet visible).
 type Attachment struct {
-	Type              string          `json:"type"`
-	Content           json.RawMessage `json:"content,omitempty"`
-	Names             []string        `json:"names,omitempty"`
-	SkillCount        int             `json:"skillCount,omitempty"`
-	IsInitial         bool            `json:"isInitial,omitempty"`
-	AddedNames        []string        `json:"addedNames,omitempty"`
-	AddedLines        []string        `json:"addedLines,omitempty"`
+	Type       string          `json:"type"`
+	Content    json.RawMessage `json:"content,omitempty"`
+	Names      []string        `json:"names,omitempty"`
+	SkillCount int             `json:"skillCount,omitempty"`
+	IsInitial  bool            `json:"isInitial,omitempty"`
+	AddedNames []string        `json:"addedNames,omitempty"`
+	AddedLines []string        `json:"addedLines,omitempty"`
 	// AddedBlocks carries multi-line text payloads. `mcp_instructions_delta`
 	// uses it for per-server instructions (the always-on context CC injects
 	// for each connected MCP server: capabilities, constraints, etc.). One
 	// block per name, parallel arrays.
-	AddedBlocks       []string        `json:"addedBlocks,omitempty"`
-	RemovedNames      []string        `json:"removedNames,omitempty"`
-	ReaddedNames      []string        `json:"readdedNames,omitempty"`
-	PendingMcpServers []string        `json:"pendingMcpServers,omitempty"`
+	AddedBlocks       []string `json:"addedBlocks,omitempty"`
+	RemovedNames      []string `json:"removedNames,omitempty"`
+	ReaddedNames      []string `json:"readdedNames,omitempty"`
+	PendingMcpServers []string `json:"pendingMcpServers,omitempty"`
 }
 
 // ContentString decodes Content as a string. Returns "" if Content is missing