From b49d720a618f14d8b16978bdee0bef564568978a Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 12:08:12 +0300
Subject: [PATCH 01/13] codex adapter chunk A: line parser + byte-offset cursor
 (foundation)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First slice of the codex adapter (SOW-0004): the pure JSONL line parser
and the durable resume cursor, mirroring the claude_code adapter's
structure.

- parser.go: RolloutLine envelope {timestamp,type,payload} → typed record,
  per-discriminator dispatch, and dual unknown-variant tolerance with
  distinct sentinels for an unknown top-level type vs an unknown nested
  payload.type (non-colliding dedup keys so the scanner emits exactly one
  SourceError per variant per session). A skip flag separates silent
  no-ops (empty lines, ghost_snapshot, absent payload) from real errors.
- types.go: typed payload bodies plus the polymorphic source classifier
  (root / sub_agent / tool_internal / forward-compat other) with
  thread_spawn parent extraction, and the known/no-op nested-type sets
  across Limited and Extended persistence modes.
- cursor.go: byte-offset Files map with claude_code's After/truncation
  semantics verbatim; drops the sub-agent-deferral fields codex does not
  need; adds a LegacyJSON suppression map (one informational SourceError
  per legacy .json file, default off) and a version-gated ParseCursor.

Standalone-compilable; mapper/scanner/tailer/adapter wiring land in later
chunks. Gates green: gofmt/vet/golangci(0)/gosec(0)/race tests pass at
86.5% coverage; FuzzParseLine 0 crashes; no sibling-adapter regression.
---
 internal/adapters/codex/cursor.go           | 197 ++++++++++
 internal/adapters/codex/cursor_test.go      | 204 ++++++++++
 internal/adapters/codex/doc.go              |  30 ++
 internal/adapters/codex/parser.go           | 225 +++++++++++
 internal/adapters/codex/parser_fuzz_test.go | 159 ++++++++
 internal/adapters/codex/parser_test.go      | 390 ++++++++++++++++++++
 internal/adapters/codex/types.go            | 252 +++++++++++++
 7 files changed, 1457 insertions(+)
 create mode 100644 internal/adapters/codex/cursor.go
 create mode 100644 internal/adapters/codex/cursor_test.go
 create mode 100644 internal/adapters/codex/doc.go
 create mode 100644 internal/adapters/codex/parser.go
 create mode 100644 internal/adapters/codex/parser_fuzz_test.go
 create mode 100644 internal/adapters/codex/parser_test.go
 create mode 100644 internal/adapters/codex/types.go

diff --git a/internal/adapters/codex/cursor.go b/internal/adapters/codex/cursor.go
new file mode 100644
index 0000000..5e0d1d7
--- /dev/null
+++ b/internal/adapters/codex/cursor.go
@@ -0,0 +1,197 @@
+package codex
+
+import (
+	"encoding/json"
+	"fmt"
+	"maps"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// cursorVersion is the on-disk version of the persisted cursor. Bumped if the
+// shape ever changes; ParseCursor refuses unknown versions.
+const cursorVersion = 1
+
+// Cursor is the resume token persisted in sources.cursor for the codex
+// adapter. Keys in Files are paths RELATIVE to the configured sessions root
+// (e.g. "YYYY/MM/DD/rollout-...UUIDv7.jsonl"), so the cursor survives a move of
+// $CODEX_HOME. Keys in LegacyJSON are the basenames of legacy flat .json files
+// directly under sessions/. See adapter-codex.md §"Cursor".
+//
+// Unlike claude_code's cursor, codex has no sidecar/sub-agent deferral (forks
+// and sub-agents are separate top-level rollout files linked by id), so the
+// MetaSeen/Parked/Finalized fields are intentionally absent here. The codex
+// addition is LegacyJSON: a per-file suppression map so an unsupported legacy
+// .json file emits exactly one informational SourceError and is then quiet.
+type Cursor struct {
+	// Files maps a rollout's relative path to its consumption state.
+	Files map[string]FileCursor `json:"files,omitempty"`
+	// LegacyJSON records which legacy flat .json files have already been seen
+	// (and a single informational SourceError emitted). Default off: a file
+	// absent from this map has not been reported yet. Observability/suppression
+	// only — not part of After() ordering.
+	LegacyJSON map[string]LegacyFile `json:"legacy_json,omitempty"`
+	// Version is the on-disk format version. Defaults to cursorVersion on
+	// construction; ParseCursor refuses anything else.
+	Version int `json:"version"`
+}
+
+// FileCursor tracks consumption of a single rollout file. The shape mirrors
+// claude_code's FileCursor (byte-offset + truncation-defense size) with one
+// codex-specific addition (MtimeUs) matching the cursor JSON in
+// adapter-codex.md §"Cursor".
+type FileCursor struct {
+	// Offset is the byte offset of the next unread byte. Always points to the
+	// start of a line; trailing partial lines are held back (spec §"Atomicity").
+	Offset int64 `json:"offset"`
+	// Size is the file size at which Offset was last recorded. Used to detect
+	// truncation on resume (spec §"Cursor" restart logic).
+	Size int64 `json:"size,omitempty"`
+	// MtimeUs is the file mtime when Offset was last recorded, in microseconds
+	// since the UNIX epoch. Observability + staleness heuristic (rule #23).
+	MtimeUs int64 `json:"mtime_us,omitempty"`
+	// LastTsUs is the timestamp of the last record consumed, in microseconds
+	// since the UNIX epoch. Observability only.
+	LastTsUs int64 `json:"last_ts_us,omitempty"`
+}
+
+// LegacyFile is the per-legacy-file suppression record. Ingested is a misnomer
+// kept for cursor-JSON stability with the spec example (adapter-codex.md
+// §"Cursor"): for v1 it records that the file has been SEEN and its one-time
+// informational SourceError emitted, not that its content was ingested.
+type LegacyFile struct {
+	Ingested bool `json:"ingested"`
+}
+
+// newCursor returns an empty Cursor ready for use.
+func newCursor() Cursor {
+	return Cursor{
+		Files:      map[string]FileCursor{},
+		LegacyJSON: map[string]LegacyFile{},
+		Version:    cursorVersion,
+	}
+}
+
+// String implements canonical.Cursor. Returns stable JSON (sorted map keys via
+// encoding/json) suitable for persistence.
+func (c Cursor) String() string {
+	out := c
+	if out.Files == nil {
+		out.Files = map[string]FileCursor{}
+	}
+	if out.Version == 0 {
+		out.Version = cursorVersion
+	}
+	b, err := json.Marshal(out)
+	if err != nil {
+		// json.Marshal on a struct of known-encodable types cannot fail; if it
+		// ever does, surface a sentinel so callers don't silently persist an
+		// empty value.
+		return fmt.Sprintf(`{"error":%q}`, err.Error())
+	}
+	return string(b)
+}
+
+// After implements canonical.Cursor. Reports whether c is strictly after other
+// on at least one file's byte offset, with no file regressing. A regression
+// (lower Offset on any shared file, or a file the other has progress on that c
+// lacks) defeats After. LegacyJSON is observability-only and does not
+// participate in ordering. Mechanics are verbatim from claude_code.
+func (c Cursor) After(other canonical.Cursor) bool {
+	o, ok := other.(Cursor)
+	if !ok {
+		// A different cursor concrete type is comparable only by emptiness: c is
+		// After it iff c has any file progress.
+		return len(c.Files) > 0
+	}
+	advancedOne := false
+	for name, mine := range c.Files {
+		theirs, present := o.Files[name]
+		if !present {
+			if mine.Offset > 0 {
+				advancedOne = true
+			}
+			continue
+		}
+		if mine.Offset < theirs.Offset {
+			return false
+		}
+		if mine.Offset > theirs.Offset {
+			advancedOne = true
+		}
+	}
+	// Missing any file the other has progress on is a regression.
+	for name, theirs := range o.Files {
+		if _, present := c.Files[name]; present {
+			continue
+		}
+		if theirs.Offset > 0 {
+			return false
+		}
+	}
+	return advancedOne
+}
+
+// ParseCursor decodes a stored cursor JSON blob into a Cursor. Empty input
+// yields an empty Cursor (first run). An unknown version is rejected so a
+// schema mismatch is never silently misinterpreted.
+func ParseCursor(stored string) (Cursor, error) {
+	if stored == "" {
+		return newCursor(), nil
+	}
+	var c Cursor
+	if err := json.Unmarshal([]byte(stored), &c); err != nil {
+		return Cursor{}, fmt.Errorf("codex: decode cursor: %w", err)
+	}
+	if c.Version == 0 {
+		c.Version = cursorVersion
+	} else if c.Version != cursorVersion {
+		return Cursor{}, fmt.Errorf("codex: unsupported cursor version %d (want %d)", c.Version, cursorVersion)
+	}
+	if c.Files == nil {
+		c.Files = map[string]FileCursor{}
+	}
+	if c.LegacyJSON == nil {
+		c.LegacyJSON = map[string]LegacyFile{}
+	}
+	return c, nil
+}
+
+// withFile returns a new Cursor with the given relative path's FileCursor
+// replaced. The receiver is not mutated.
+func (c Cursor) withFile(rel string, fc FileCursor) Cursor {
+	out := c.clone()
+	out.Files[rel] = fc
+	return out
+}
+
+// legacyIngested reports whether the legacy .json file at basename has already
+// been seen and its one-time SourceError emitted. Defaults to false (off).
+func (c Cursor) legacyIngested(basename string) bool {
+	if c.LegacyJSON == nil {
+		return false
+	}
+	return c.LegacyJSON[basename].Ingested
+}
+
+// withLegacyIngested returns a new Cursor recording that the legacy .json file
+// at basename has been seen (its one informational SourceError emitted). The
+// receiver is not mutated.
+func (c Cursor) withLegacyIngested(basename string) Cursor {
+	out := c.clone()
+	out.LegacyJSON[basename] = LegacyFile{Ingested: true}
+	return out
+}
+
+// clone deep-copies the cursor's maps so callers can mutate the result without
+// affecting the receiver.
+func (c Cursor) clone() Cursor {
+	out := Cursor{
+		Files:      make(map[string]FileCursor, len(c.Files)+1),
+		LegacyJSON: make(map[string]LegacyFile, len(c.LegacyJSON)+1),
+		Version:    cursorVersion,
+	}
+	maps.Copy(out.Files, c.Files)
+	maps.Copy(out.LegacyJSON, c.LegacyJSON)
+	return out
+}
diff --git a/internal/adapters/codex/cursor_test.go b/internal/adapters/codex/cursor_test.go
new file mode 100644
index 0000000..38599cf
--- /dev/null
+++ b/internal/adapters/codex/cursor_test.go
@@ -0,0 +1,204 @@
+package codex
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+func TestParseCursor_Empty(t *testing.T) {
+	t.Parallel()
+	c, err := ParseCursor("")
+	if err != nil {
+		t.Fatalf("ParseCursor(\"\"): %v", err)
+	}
+	if len(c.Files) != 0 || c.Version != cursorVersion {
+		t.Fatalf("empty cursor wrong: %+v", c)
+	}
+}
+
+func TestParseCursor_RoundTrip(t *testing.T) {
+	t.Parallel()
+	rel := "2025/11/20/rollout-2025-11-20T18-59-09-019aa234.jsonl"
+	orig := newCursor().
+		withFile(rel, FileCursor{Offset: 100, Size: 100, MtimeUs: 42, LastTsUs: 123}).
+		withLegacyIngested("rollout-2025-06-26-5556f03d.json")
+	encoded := orig.String()
+	got, err := ParseCursor(encoded)
+	if err != nil {
+		t.Fatalf("ParseCursor: %v", err)
+	}
+	if got.Files[rel].Offset != 100 || got.Files[rel].MtimeUs != 42 {
+		t.Fatalf("offset/mtime lost: %+v", got.Files[rel])
+	}
+	if !got.legacyIngested("rollout-2025-06-26-5556f03d.json") {
+		t.Fatalf("legacyJSON ingested flag lost: %+v", got.LegacyJSON)
+	}
+}
+
+// TestParseCursor_OldCursorWithoutLegacyJSON verifies a cursor persisted BEFORE
+// the legacy_json field existed still parses (additive omitempty, unchanged
+// version), yielding an empty/nil legacy map — not an error.
+func TestParseCursor_OldCursorWithoutLegacyJSON(t *testing.T) {
+	t.Parallel()
+	old := `{"version":1,"files":{"2025/11/20/r.jsonl":{"offset":10,"size":10}}}`
+	got, err := ParseCursor(old)
+	if err != nil {
+		t.Fatalf("ParseCursor(old cursor): %v", err)
+	}
+	if got.Files["2025/11/20/r.jsonl"].Offset != 10 {
+		t.Fatalf("old cursor offset lost: %+v", got)
+	}
+	if len(got.LegacyJSON) != 0 {
+		t.Fatalf("old cursor must yield empty legacyJSON, got %+v", got.LegacyJSON)
+	}
+}
+
+// TestCursor_LegacyDefaultOff asserts the suppression flag defaults off: a
+// legacy file never seen is not reported as ingested.
+func TestCursor_LegacyDefaultOff(t *testing.T) {
+	t.Parallel()
+	c := newCursor()
+	if c.legacyIngested("never-seen.json") {
+		t.Fatal("unseen legacy file must not report ingested")
+	}
+}
+
+func TestParseCursor_RejectsUnknownVersion(t *testing.T) {
+	t.Parallel()
+	_, err := ParseCursor(`{"version":99}`)
+	if err == nil {
+		t.Fatal("ParseCursor(version 99): want error")
+	}
+}
+
+func TestParseCursor_Malformed(t *testing.T) {
+	t.Parallel()
+	_, err := ParseCursor(`{not json`)
+	if err == nil {
+		t.Fatal("ParseCursor(malformed): want error")
+	}
+}
+
+func TestCursor_StringStableSortedKeys(t *testing.T) {
+	t.Parallel()
+	c := newCursor().
+		withFile("2025/11/20/b.jsonl", FileCursor{Offset: 2}).
+		withFile("2025/11/20/a.jsonl", FileCursor{Offset: 1})
+	first := c.String()
+	second := c.String()
+	if first != second {
+		t.Fatalf("cursor String() not stable:\n first:  %s\n second: %s", first, second)
+	}
+	var probe struct {
+		Files map[string]json.RawMessage `json:"files"`
+	}
+	if err := json.Unmarshal([]byte(c.String()), &probe); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if len(probe.Files) != 2 {
+		t.Fatalf("want 2 files, got %d", len(probe.Files))
+	}
+}
+
+func TestCursor_After(t *testing.T) {
+	t.Parallel()
+	rel := "2025/11/20/r.jsonl"
+	base := newCursor().withFile(rel, FileCursor{Offset: 50, Size: 50})
+	ahead := newCursor().withFile(rel, FileCursor{Offset: 100, Size: 100})
+	behind := newCursor().withFile(rel, FileCursor{Offset: 10, Size: 10})
+
+	if !ahead.After(base) {
+		t.Error("ahead.After(base) = false, want true")
+	}
+	if base.After(ahead) {
+		t.Error("base.After(ahead) = true, want false")
+	}
+	if behind.After(base) {
+		t.Error("behind.After(base) = true, want false")
+	}
+	if newCursor().After(base) {
+		t.Error("empty.After(base) = true, want false")
+	}
+	if !base.After(newCursor()) {
+		t.Error("base.After(empty) = false, want true")
+	}
+}
+
+// TestCursor_AfterMultiFile asserts After requires at least one file to advance
+// with NO file regressing (verbatim semantics from claude_code).
+func TestCursor_AfterMultiFile(t *testing.T) {
+	t.Parallel()
+	a := "2025/11/20/a.jsonl"
+	b := "2025/11/20/b.jsonl"
+	base := newCursor().
+		withFile(a, FileCursor{Offset: 50}).
+		withFile(b, FileCursor{Offset: 50})
+	// One file advances, the other holds: After.
+	oneAdvances := newCursor().
+		withFile(a, FileCursor{Offset: 60}).
+		withFile(b, FileCursor{Offset: 50})
+	if !oneAdvances.After(base) {
+		t.Error("oneAdvances.After(base) = false, want true")
+	}
+	// One advances but the other regresses: NOT After.
+	mixed := newCursor().
+		withFile(a, FileCursor{Offset: 60}).
+		withFile(b, FileCursor{Offset: 40})
+	if mixed.After(base) {
+		t.Error("mixed (one regresses).After(base) = true, want false")
+	}
+	// Missing a file the other has progress on: regression, NOT After.
+	missing := newCursor().withFile(a, FileCursor{Offset: 100})
+	if missing.After(base) {
+		t.Error("missing-file.After(base) = true, want false")
+	}
+}
+
+func TestCursor_AfterAlienType(t *testing.T) {
+	t.Parallel()
+	type alien struct{ canonical.Cursor }
+	c := newCursor().withFile("2025/11/20/r.jsonl", FileCursor{Offset: 1})
+	if !c.After(alien{}) {
+		t.Error("cursor with progress should be After an alien cursor type")
+	}
+	if newCursor().After(alien{}) {
+		t.Error("empty cursor should not be After an alien cursor type")
+	}
+}
+
+// TestCursor_LegacyNotPartOfAfter asserts the legacyJSON suppression map is
+// observability-only and does NOT participate in After ordering (mirrors how
+// claude_code excludes MetaSeen/Parked/Finalized from After).
+func TestCursor_LegacyNotPartOfAfter(t *testing.T) {
+	t.Parallel()
+	rel := "2025/11/20/r.jsonl"
+	a := newCursor().withFile(rel, FileCursor{Offset: 50, Size: 50})
+	// Same byte progress, but b additionally marked a legacy file ingested.
+	b := newCursor().
+		withFile(rel, FileCursor{Offset: 50, Size: 50}).
+		withLegacyIngested("legacy.json")
+	if a.After(b) || b.After(a) {
+		t.Errorf("legacyJSON must not affect After ordering: a.After(b)=%v b.After(a)=%v", a.After(b), b.After(a))
+	}
+}
+
+// TestCursor_CloneIndependent asserts clone produces independent maps so
+// mutating the clone never affects the receiver (truncation-defense + tail
+// rely on this immutability, verbatim from claude_code).
+func TestCursor_CloneIndependent(t *testing.T) {
+	t.Parallel()
+	rel := "2025/11/20/r.jsonl"
+	orig := newCursor().withFile(rel, FileCursor{Offset: 10})
+	derived := orig.withFile(rel, FileCursor{Offset: 20}).withLegacyIngested("x.json")
+	if orig.Files[rel].Offset != 10 {
+		t.Errorf("receiver mutated: orig offset = %d, want 10", orig.Files[rel].Offset)
+	}
+	if orig.legacyIngested("x.json") {
+		t.Error("receiver mutated: orig should not have legacy flag")
+	}
+	if derived.Files[rel].Offset != 20 {
+		t.Errorf("derived offset = %d, want 20", derived.Files[rel].Offset)
+	}
+}
diff --git a/internal/adapters/codex/doc.go b/internal/adapters/codex/doc.go
new file mode 100644
index 0000000..b060aed
--- /dev/null
+++ b/internal/adapters/codex/doc.go
@@ -0,0 +1,30 @@
+// Package codex implements the canonical.Adapter for the OpenAI Codex CLI
+// rollout format.
+//
+// Codex stores one conversation as an append-only JSONL "rollout" file under
+// $CODEX_HOME/sessions/YYYY/MM/DD/rollout-YYYY-MM-DDTHH-MM-SS-<ThreadId>.jsonl
+// (default $CODEX_HOME is ~/.codex). The directory shards use local time; the
+// per-line timestamp field is UTC and is the canonical time source. Each line
+// is one RolloutItem envelope {timestamp, type, payload}; the top-level type is
+// one of session_meta, turn_context, response_item, event_msg, compacted, and
+// the nested payload carries its own type discriminator. Files are written
+// pure-append (no rename, no fsync), so a byte-offset tail with last-newline
+// seek-back is the correct watch strategy.
+//
+// A pre-mid-2025 legacy flat layout (rollout-YYYY-MM-DD-<uuid>.json directly
+// under sessions/) is recognized but not ingested by default: the adapter emits
+// one informational SourceError per legacy file and suppresses it thereafter
+// via the cursor's LegacyJSON map.
+//
+// Codex rollouts carry no native turn/op rollups, no cost data, and no
+// per-session terminal signal, so the adapter is a state machine over the line
+// stream (later chunks): it synthesizes turn boundaries from turn_context /
+// task_started / task_complete, computes cost downstream from the pricing
+// catalog, and never emits a clean SessionFinalizedEvent (codex sessions stay
+// status='running' and are resumable, like claude-code).
+//
+// See .agents/sow/specs/adapter-codex.md for the full format reference
+// (filesystem layout, wire format, record schema, sub-agent/fork linkage,
+// compaction, cursor design, edge cases) and
+// .agents/sow/specs/adapter-contract.md for the universal adapter rules.
+package codex
diff --git a/internal/adapters/codex/parser.go b/internal/adapters/codex/parser.go
new file mode 100644
index 0000000..25deed5
--- /dev/null
+++ b/internal/adapters/codex/parser.go
@@ -0,0 +1,225 @@
+package codex
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+)
+
+// errUnknownRecordType is the sentinel wrapped by unknownTypeError so callers
+// can detect the "skip and surface as parse error" case via errors.Is for an
+// unknown TOP-LEVEL RolloutItem.type. The concrete unknownTypeError carries the
+// offending type string so the scanner (a later chunk) emits exactly one
+// SourceError per distinct variant per session (spec adapter-codex.md:220).
+var errUnknownRecordType = errors.New("codex: unknown record type")
+
+// errUnknownPayloadType is the sibling sentinel for an unknown NESTED
+// payload.type inside a known top-level type (spec adapter-codex.md:221). It is
+// deliberately distinct from errUnknownRecordType so dedup keys for top-level
+// and nested unknowns never collide.
+var errUnknownPayloadType = errors.New("codex: unknown payload type")
+
+// unknownTypeError reports a line whose top-level `type` discriminator is not a
+// documented codex RolloutItem type. It wraps errUnknownRecordType (for
+// errors.Is) and exposes the raw Type so callers dedup one SourceError per
+// distinct unknown variant per session.
+type unknownTypeError struct {
+	Type string
+}
+
+func (e *unknownTypeError) Error() string {
+	return fmt.Sprintf("%s: %q", errUnknownRecordType.Error(), e.Type)
+}
+
+// Unwrap lets errors.Is(err, errUnknownRecordType) match.
+func (e *unknownTypeError) Unwrap() error { return errUnknownRecordType }
+
+// unknownPayloadTypeError reports a nested payload.type that is not documented
+// for its owning top-level type. Owner is the top-level type (e.g.
+// "response_item") and Type is the offending nested discriminator, so the dedup
+// key is "<Owner>/<Type>" and a nested name never collides across owners.
+type unknownPayloadTypeError struct {
+	Owner string
+	Type  string
+}
+
+func (e *unknownPayloadTypeError) Error() string {
+	return fmt.Sprintf("%s: %s/%q", errUnknownPayloadType.Error(), e.Owner, e.Type)
+}
+
+// Unwrap lets errors.Is(err, errUnknownPayloadType) match.
+func (e *unknownPayloadTypeError) Unwrap() error { return errUnknownPayloadType }
+
+// recordType discriminates the top-level RolloutItem variants. Values are
+// verbatim from the producer's serde tag (openai/codex protocol.rs:2705-2734,
+// 2849-2854; serde tag="type", content="payload", rename_all="snake_case").
+type recordType string
+
+const (
+	recSessionMeta  recordType = "session_meta"
+	recTurnContext  recordType = "turn_context"
+	recResponseItem recordType = "response_item"
+	recEventMsg     recordType = "event_msg"
+	recCompacted    recordType = "compacted"
+)
+
+// envelope is the shared RolloutLine wrapper present on every line. The flatten
+// in Rust (struct RolloutLine { timestamp, #[serde(flatten)] item }) lands the
+// tag at top level (`type`) and the content under `payload`.
+type envelope struct {
+	TS      string          `json:"timestamp"`
+	Type    recordType      `json:"type"`
+	Payload json.RawMessage `json:"payload"`
+}
+
+// record is the parsed codex line. Env is always populated; exactly one typed
+// payload pointer is non-nil for the variants the mapper (later chunk)
+// consumes. Raw holds the verbatim line bytes so the mapper can build
+// file://...#L<line> PayloadRefs without re-typing every nested variant.
+type record struct {
+	Env          envelope
+	SessionMeta  *sessionMetaPayload
+	TurnContext  *turnContextPayload
+	ResponseItem *responseItemPayload
+	EventMsg     *eventMsgPayload
+	Compacted    *compactedPayload
+	Raw          []byte
+}
+
+// Type returns the top-level RolloutItem discriminator.
+func (r record) Type() recordType { return r.Env.Type }
+
+// Timestamp returns the envelope timestamp (RFC3339 UTC) — the canonical time
+// source for the line (spec adapter-codex.md:56-60).
+func (r record) Timestamp() string { return r.Env.TS }
+
+// parseLine decodes one JSONL line into a record. Whitespace-only / empty lines
+// return (record{}, true, nil) to signal "skip silently". Malformed JSON or a
+// missing top-level type returns a wrapped error. An unknown top-level type
+// returns errUnknownRecordType (wrapped); an unknown nested payload.type returns
+// errUnknownPayloadType (wrapped). A catch-all/no-op nested variant (e.g.
+// ghost_snapshot) or an absent payload/nested-type returns (record, true, nil).
+func parseLine(line []byte) (record, bool, error) {
+	trimmed := bytes.TrimSpace(line)
+	if len(trimmed) == 0 {
+		return record{}, true, nil
+	}
+
+	var env envelope
+	if err := json.Unmarshal(trimmed, &env); err != nil {
+		return record{}, false, fmt.Errorf("decode envelope: %w", err)
+	}
+	if env.Type == "" {
+		return record{}, false, errors.New("record.type is required")
+	}
+
+	rec := record{Env: env, Raw: append([]byte(nil), trimmed...)}
+	switch env.Type {
+	case recSessionMeta:
+		var p sessionMetaPayload
+		if err := decodePayload(env.Payload, &p); err != nil {
+			return record{}, false, fmt.Errorf("decode session_meta: %w", err)
+		}
+		rec.SessionMeta = &p
+	case recTurnContext:
+		var p turnContextPayload
+		if err := decodePayload(env.Payload, &p); err != nil {
+			return record{}, false, fmt.Errorf("decode turn_context: %w", err)
+		}
+		rec.TurnContext = &p
+	case recResponseItem:
+		return decodeResponseItem(rec)
+	case recEventMsg:
+		return decodeEventMsg(rec)
+	case recCompacted:
+		var p compactedPayload
+		if err := decodePayload(env.Payload, &p); err != nil {
+			return record{}, false, fmt.Errorf("decode compacted: %w", err)
+		}
+		rec.Compacted = &p
+	default:
+		return record{}, false, &unknownTypeError{Type: string(env.Type)}
+	}
+	return rec, false, nil
+}
+
+// decodePayload unmarshals a payload body into dst, tolerating an absent body
+// (a known top-level type with no payload is not an error — it just carries no
+// nested data). A bare JSON null is likewise treated as absent.
+func decodePayload(payload json.RawMessage, dst any) error {
+	body := bytes.TrimSpace(payload)
+	if len(body) == 0 || bytes.Equal(body, []byte("null")) {
+		return nil
+	}
+	return json.Unmarshal(body, dst)
+}
+
+// nestedType extracts only the nested payload.type discriminator without
+// committing to a full typed decode. Returns "" when the payload is absent or
+// carries no type (both tolerated by the callers).
+func nestedType(payload json.RawMessage) (string, error) {
+	body := bytes.TrimSpace(payload)
+	if len(body) == 0 || bytes.Equal(body, []byte("null")) {
+		return "", nil
+	}
+	var probe struct {
+		Type string `json:"type"`
+	}
+	if err := json.Unmarshal(body, &probe); err != nil {
+		return "", err
+	}
+	return probe.Type, nil
+}
+
+// decodeResponseItem handles the response_item top-level type: it reads the
+// nested discriminator, classifies it (known / catch-all-no-op / unknown), and
+// only fully decodes the known variants. Unknown nested types surface
+// errUnknownPayloadType; catch-all variants (ResponseItem::Other upstream, e.g.
+// ghost_snapshot) skip silently per rule #21.
+func decodeResponseItem(rec record) (record, bool, error) {
+	nt, err := nestedType(rec.Env.Payload)
+	if err != nil {
+		return record{}, false, fmt.Errorf("decode response_item payload type: %w", err)
+	}
+	if nt == "" {
+		return rec, true, nil
+	}
+	if _, ok := responseItemNoOp[nt]; ok {
+		return rec, true, nil
+	}
+	if _, ok := responseItemTypes[nt]; !ok {
+		return record{}, false, &unknownPayloadTypeError{Owner: string(recResponseItem), Type: nt}
+	}
+	var p responseItemPayload
+	if err := decodePayload(rec.Env.Payload, &p); err != nil {
+		return record{}, false, fmt.Errorf("decode response_item: %w", err)
+	}
+	rec.ResponseItem = &p
+	return rec, false, nil
+}
+
+// decodeEventMsg handles the event_msg top-level type, mirroring
+// decodeResponseItem: known nested types decode; catch-all/no-op variants skip
+// silently; unknown nested types surface errUnknownPayloadType.
+func decodeEventMsg(rec record) (record, bool, error) {
+	nt, err := nestedType(rec.Env.Payload)
+	if err != nil {
+		return record{}, false, fmt.Errorf("decode event_msg payload type: %w", err)
+	}
+	if nt == "" {
+		return rec, true, nil
+	}
+	if _, ok := eventMsgNoOp[nt]; ok {
+		return rec, true, nil
+	}
+	if _, ok := eventMsgTypes[nt]; !ok {
+		return record{}, false, &unknownPayloadTypeError{Owner: string(recEventMsg), Type: nt}
+	}
+	var p eventMsgPayload
+	if err := decodePayload(rec.Env.Payload, &p); err != nil {
+		return record{}, false, fmt.Errorf("decode event_msg: %w", err)
+	}
+	rec.EventMsg = &p
+	return rec, false, nil
+}
diff --git a/internal/adapters/codex/parser_fuzz_test.go b/internal/adapters/codex/parser_fuzz_test.go
new file mode 100644
index 0000000..5ce56ee
--- /dev/null
+++ b/internal/adapters/codex/parser_fuzz_test.go
@@ -0,0 +1,159 @@
+package codex
+
+import (
+	"testing"
+)
+
+// FuzzParseLine exercises the JSONL line parser against arbitrary bytes. The
+// contract under fuzz is: parseLine never panics regardless of input. It
+// returns either a parsed record, a skip, or a wrapped error. Seeds cover every
+// top-level RolloutItem type, a representative spread of nested response_item /
+// event_msg variants, the polymorphic session_meta source shapes, the
+// sandbox_policy variants, malformed JSON, and unknown top-level / nested types.
+// Satisfies SOW-0004 acceptance #7 (fuzz target on the JSONL parser). All seeds
+// are synthetic with placeholder identities (no real session content).
+func FuzzParseLine(f *testing.F) {
+	seeds := [][]byte{
+		// session_meta, line 1, bare-string source + git block.
+		[]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"019aa234-a2a1-75c3-a9bf-d8425e1785f5","timestamp":"2025-11-20T16:59:09.857Z","cwd":"<ROOT>","originator":"codex_exec","cli_version":"0.125.0","source":"exec","model_provider":"openai","git":{"commit_hash":"abc","branch":"main","repository_url":"git@github.com:example/example.git"}}}`),
+		// session_meta, {custom} source.
+		[]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"sid","source":{"custom":"my_tool"}}}`),
+		// session_meta, {internal} source (memory consolidation).
+		[]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"sid","source":{"internal":"memory_consolidation"},"thread_source":"memory_consolidation"}}`),
+		// session_meta, sub-agent thread_spawn source with parent link.
+		[]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"child","thread_source":"subagent","agent_role":"explorer","source":{"subagent":{"thread_spawn":{"parent_thread_id":"parent-uuid","depth":1,"agent_nickname":"Tesla","agent_role":"explorer"}}}}}`),
+		// session_meta, fork.
+		[]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"forked","forked_from_id":"origin-uuid","source":"cli"}}`),
+		// turn_context, new format, workspace-write.
+		[]byte(`{"timestamp":"2025-11-20T16:59:10.000Z","type":"turn_context","payload":{"turn_id":"turn-1","cwd":"<ROOT>","model":"gpt-5.1-codex-max","effort":"high","approval_policy":"on-request","sandbox_policy":{"type":"workspace-write"}}}`),
+		// turn_context, danger-full-access, no turn_id (old format).
+		[]byte(`{"timestamp":"2025-11-20T16:59:10.000Z","type":"turn_context","payload":{"cwd":"<ROOT>","model":"gpt-5.5","sandbox_policy":{"type":"danger-full-access"}}}`),
+		// response_item, assistant message.
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi"}],"phase":"final_answer"}}`),
+		// response_item, user message.
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"do x"}]}}`),
+		// response_item, reasoning (summary + encrypted).
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"reasoning","summary":[{"type":"summary_text","text":"thinking"}],"content":null,"encrypted_content":"AAAA"}}`),
+		// response_item, function_call (arguments is a JSON-encoded string).
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{\"command\":[\"ls\",\"-la\"]}","call_id":"call_1"}}`),
+		// response_item, function_call_output (output is a string).
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_1","output":"total 0"}}`),
+		// response_item, function_call_output (output is an object form).
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_1","output":{"content":"ok"}}}`),
+		// response_item, custom_tool_call / output.
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"custom_tool_call","call_id":"call_2","name":"apply_patch","input":"*** Begin Patch","status":"completed"}}`),
+		// response_item, web_search_call.
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"web_search_call","call_id":"ws_1","status":"completed","action":{"type":"search","query":"q"}}}`),
+		// response_item, compaction / context_compaction.
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"compaction","encrypted_content":"BBBB"}}`),
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"context_compaction","encrypted_content":null}}`),
+		// response_item, ghost_snapshot (catch-all no-op).
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"ghost_snapshot","data":{}}}`),
+		// response_item, legacy local_shell_call.
+		[]byte(`{"timestamp":"2025-06-26T10:00:00.000Z","type":"response_item","payload":{"type":"local_shell_call","call_id":"ls_1"}}`),
+		// event_msg, user_message with images.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"user_message","message":"hi","images":["a.png"],"local_images":["/x/a.png"]}}`),
+		// event_msg, agent_message final_answer.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"agent_message","message":"answer","phase":"final_answer"}}`),
+		// event_msg, agent_reasoning + raw content.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"agent_reasoning","text":"summary"}}`),
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"agent_reasoning_raw_content","text":"raw cot"}}`),
+		// event_msg, token_count.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"total_tokens":100,"input_tokens":80,"output_tokens":20},"last_token_usage":{"input_tokens":10,"output_tokens":5}},"model_context_window":200000}}`),
+		// event_msg, task_started / task_complete (new format turn boundary).
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"task_started","turn_id":"turn-1","started_at":1763664000,"model_context_window":200000}}`),
+		[]byte(`{"timestamp":"2025-11-20T17:00:00.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"turn-1","last_agent_message":"done","completed_at":"2025-11-20T17:00:00.000Z","duration_ms":1000,"time_to_first_token_ms":120}}`),
+		// event_msg, turn_aborted (interrupted).
+		[]byte(`{"timestamp":"2025-11-20T17:00:00.000Z","type":"event_msg","payload":{"type":"turn_aborted","turn_id":"turn-1","reason":"interrupted","duration_ms":500}}`),
+		// event_msg, exec_command_end (Extended; aggregated_output only).
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_1","turn_id":"turn-1","command":["ls"],"exit_code":0,"aggregated_output":"out","duration":{"secs":0,"nanos":1},"status":"completed"}}`),
+		// event_msg, mcp_tool_call_end.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"mcp_tool_call_end","call_id":"call_1","invocation":{"server":"github","tool":"create_issue","arguments":{}}}}`),
+		// event_msg, patch_apply_end.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"call_1","turn_id":"turn-1","success":true,"status":"completed","changes":{}}}`),
+		// event_msg, context_compacted (unit struct).
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"context_compacted"}}`),
+		// event_msg, web_search_end.
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"web_search_end","call_id":"ws_1","query":"q","action":{"type":"search"}}}`),
+		// event_msg, error (Extended).
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"error","message":"boom"}}`),
+		// compacted top-level line with replacement_history.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"compacted","payload":{"message":"summary","replacement_history":[{"type":"message","role":"user","content":[]}]}}`),
+		// compacted, null replacement_history.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"compacted","payload":{"message":"summary","replacement_history":null}}`),
+		// unknown top-level type.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"totally_unknown","payload":{}}`),
+		// unknown nested payload type (response_item).
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"response_item","payload":{"type":"brand_new_variant","x":1}}`),
+		// unknown nested payload type (event_msg).
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"event_msg","payload":{"type":"brand_new_event"}}`),
+		// known top-level, absent payload.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"event_msg"}`),
+		// known top-level, null payload.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"response_item","payload":null}`),
+		// nested type present but body otherwise minimal.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"response_item","payload":{"role":"assistant"}}`),
+		// missing top-level type.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","payload":{}}`),
+		// malformed JSON.
+		[]byte(`{not json`),
+		// blank / whitespace.
+		[]byte(""),
+		[]byte("  \t\n "),
+		// embedded control char as a JSON \uXXXX escape in a tool output
+		// string (edge case #11): codex serializes ANSI escapes this way and
+		// encoding/json must accept them. \u001b is a literal 6-char escape
+		// inside this raw-string seed, exactly as it appears on disk.
+		[]byte(`{"timestamp":"2025-11-20T16:59:13.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"c","output":"line1\u001b[0mline2"}}`),
+		// sandbox_policy with an unknown future type (must not hard-fail).
+		[]byte(`{"timestamp":"2025-11-20T16:59:10.000Z","type":"turn_context","payload":{"model":"m","sandbox_policy":{"type":"brand-new-mode"}}}`),
+	}
+	for _, s := range seeds {
+		f.Add(s)
+	}
+	f.Fuzz(func(t *testing.T, data []byte) {
+		// MUST NOT panic on any input. Returned values are not asserted, but the
+		// typed helpers reachable from a parsed record must also not panic.
+		rec, skip, err := parseLine(data)
+		if err == nil && !skip {
+			if rec.SessionMeta != nil {
+				_, _ = rec.SessionMeta.classifySource()
+			}
+			if rec.TurnContext != nil {
+				_ = rec.TurnContext.sandboxType()
+			}
+			if rec.Compacted != nil {
+				_ = rec.Compacted.replacementHistorySize()
+			}
+		}
+	})
+}
+
+// FuzzParseCursor exercises ParseCursor against arbitrary cursor blobs.
+// Contract: never panics; either returns a Cursor or a wrapped error.
+func FuzzParseCursor(f *testing.F) {
+	seeds := []string{
+		"",
+		"{}",
+		`{"version":1}`,
+		`{"version":1,"files":{}}`,
+		`{"version":1,"files":{"2025/11/20/r.jsonl":{"offset":7,"size":7,"mtime_us":42}}}`,
+		`{"version":1,"files":{"2025/11/20/r.jsonl":{"offset":1}},"legacy_json":{"rollout-2025-06-26-x.json":{"ingested":true}}}`,
+		`{"version":99}`,
+		`{"files":{"x":{}}}`,
+		"not json",
+		`{"version":1,"files":null,"legacy_json":null}`,
+	}
+	for _, s := range seeds {
+		f.Add(s)
+	}
+	f.Fuzz(func(t *testing.T, s string) {
+		c, err := ParseCursor(s)
+		if err == nil {
+			// A successfully parsed cursor must round-trip and answer After
+			// against itself without panicking.
+			_ = c.String()
+			_ = c.After(c)
+		}
+	})
+}
diff --git a/internal/adapters/codex/parser_test.go b/internal/adapters/codex/parser_test.go
new file mode 100644
index 0000000..0db8dcd
--- /dev/null
+++ b/internal/adapters/codex/parser_test.go
@@ -0,0 +1,390 @@
+package codex
+
+import (
+	"errors"
+	"testing"
+)
+
+func TestParseLine_BlankAndWhitespaceSkip(t *testing.T) {
+	t.Parallel()
+	for _, in := range []string{"", "   ", "\t\n", "  \r\n  "} {
+		rec, skip, err := parseLine([]byte(in))
+		if err != nil {
+			t.Errorf("parseLine(%q): unexpected err %v", in, err)
+		}
+		if !skip {
+			t.Errorf("parseLine(%q): want skip=true", in)
+		}
+		_ = rec
+	}
+}
+
+func TestParseLine_MalformedJSON(t *testing.T) {
+	t.Parallel()
+	_, _, err := parseLine([]byte(`{not json`))
+	if err == nil {
+		t.Fatal("parseLine(malformed): want error")
+	}
+}
+
+func TestParseLine_MissingType(t *testing.T) {
+	t.Parallel()
+	_, _, err := parseLine([]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","payload":{}}`))
+	if err == nil {
+		t.Fatal("parseLine(no type): want error")
+	}
+}
+
+// TestParseLine_UnknownTopLevelType asserts an unknown RolloutItem.type is
+// surfaced (not silently dropped) and detectable via errors.Is so the caller
+// can dedup one SourceError per distinct variant (spec adapter-codex.md:220).
+func TestParseLine_UnknownTopLevelType(t *testing.T) {
+	t.Parallel()
+	rec, skip, err := parseLine([]byte(`{"timestamp":"2025-11-20T16:59:09.857Z","type":"totally_made_up","payload":{}}`))
+	if err == nil {
+		t.Fatal("parseLine(unknown top-level type): want error")
+	}
+	if !errors.Is(err, errUnknownRecordType) {
+		t.Fatalf("parseLine(unknown top-level type): want errUnknownRecordType, got %v", err)
+	}
+	var ute *unknownTypeError
+	if !errors.As(err, &ute) || ute.Type != "totally_made_up" {
+		t.Fatalf("unknownTypeError.Type = %v, want totally_made_up", err)
+	}
+	_ = rec
+	_ = skip
+}
+
+// TestParseLine_UnknownNestedPayloadType asserts that an unknown nested
+// payload.type (inside a known top-level type) is surfaced via a SEPARATE
+// sentinel so the caller can dedup one SourceError per distinct nested variant
+// (spec adapter-codex.md:221). It must be distinguishable from the top-level
+// unknown so dedup keys never collide.
+func TestParseLine_UnknownNestedPayloadType(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"response_item","payload":{"type":"brand_new_variant","foo":1}}`
+	rec, skip, err := parseLine([]byte(line))
+	if err == nil {
+		t.Fatal("parseLine(unknown nested payload type): want error")
+	}
+	if !errors.Is(err, errUnknownPayloadType) {
+		t.Fatalf("want errUnknownPayloadType, got %v", err)
+	}
+	var upe *unknownPayloadTypeError
+	if !errors.As(err, &upe) {
+		t.Fatalf("want *unknownPayloadTypeError, got %T (%v)", err, err)
+	}
+	// The dedup key embeds both the owner top-level type and the nested type
+	// so "response_item/brand_new_variant" never collides with a different
+	// owner carrying the same nested name.
+	if upe.Owner != "response_item" || upe.Type != "brand_new_variant" {
+		t.Fatalf("unknownPayloadTypeError = {Owner:%q Type:%q}, want {response_item brand_new_variant}", upe.Owner, upe.Type)
+	}
+	// The two unknown sentinels must be distinct so dedup never conflates a
+	// top-level unknown with a nested unknown of the same string.
+	if errors.Is(err, errUnknownRecordType) {
+		t.Fatal("nested-unknown must NOT match errUnknownRecordType")
+	}
+	_ = rec
+	_ = skip
+}
+
+// TestParseLine_SessionMeta decodes line 1 of a real rollout: a session_meta
+// carrying id, originator, cli_version, cwd, source, and a flattened git block.
+func TestParseLine_SessionMeta(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{` +
+		`"id":"019aa234-a2a1-75c3-a9bf-d8425e1785f5",` +
+		`"timestamp":"2025-11-20T16:59:09.857Z",` +
+		`"cwd":"<ROOT>","originator":"codex_exec","cli_version":"0.125.0",` +
+		`"source":"exec","model_provider":"openai",` +
+		`"git":{"commit_hash":"abc123","branch":"main","repository_url":"git@github.com:example/example.git"}}}`
+	rec, skip, err := parseLine([]byte(line))
+	if err != nil || skip {
+		t.Fatalf("parseLine(session_meta): err=%v skip=%v", err, skip)
+	}
+	if rec.Type() != recSessionMeta {
+		t.Fatalf("Type() = %q, want %q", rec.Type(), recSessionMeta)
+	}
+	if rec.SessionMeta == nil {
+		t.Fatal("SessionMeta payload not decoded")
+	}
+	if rec.SessionMeta.ID != "019aa234-a2a1-75c3-a9bf-d8425e1785f5" {
+		t.Errorf("id = %q", rec.SessionMeta.ID)
+	}
+	if rec.SessionMeta.Originator != "codex_exec" || rec.SessionMeta.CLIVersion != "0.125.0" {
+		t.Errorf("originator/cli_version wrong: %+v", rec.SessionMeta)
+	}
+	if rec.SessionMeta.Git == nil || rec.SessionMeta.Git.Branch != "main" {
+		t.Errorf("git not decoded: %+v", rec.SessionMeta.Git)
+	}
+}
+
+// TestParseLine_SessionMetaSourceVariants exercises the polymorphic
+// SessionSource enum: bare strings, {custom}, {internal}, and the nested
+// {subagent:{thread_spawn:{parent_thread_id,...}}} (spec adapter-codex.md:114-122).
+func TestParseLine_SessionMetaSourceVariants(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name       string
+		sourceJSON string
+		wantKind   sourceKind
+		wantParent string
+	}{
+		{"string-exec", `"exec"`, sourceRoot, ""},
+		{"string-cli", `"cli"`, sourceRoot, ""},
+		{"string-unknown", `"unknown"`, sourceRoot, ""},
+		{"custom", `{"custom":"my_tool"}`, sourceRoot, ""},
+		{"internal", `{"internal":"memory_consolidation"}`, sourceInternal, ""},
+		{"subagent-string", `{"subagent":"review"}`, sourceSubagent, ""},
+		{"subagent-threadspawn", `{"subagent":{"thread_spawn":{"parent_thread_id":"parent-uuid","depth":1,"agent_role":"explorer"}}}`, sourceSubagent, "parent-uuid"},
+		{"other-forward-compat", `{"brand_new":"x"}`, sourceOther, ""},
+	}
+	for _, c := range cases {
+		c := c
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"sid","source":` + c.sourceJSON + `}}`
+			rec, _, err := parseLine([]byte(line))
+			if err != nil {
+				t.Fatalf("parseLine: %v", err)
+			}
+			if rec.SessionMeta == nil {
+				t.Fatal("SessionMeta nil")
+			}
+			gotKind, gotParent := rec.SessionMeta.classifySource()
+			if gotKind != c.wantKind {
+				t.Errorf("classifySource kind = %q, want %q", gotKind, c.wantKind)
+			}
+			if gotParent != c.wantParent {
+				t.Errorf("classifySource parent = %q, want %q", gotParent, c.wantParent)
+			}
+		})
+	}
+}
+
+// TestParseLine_TurnContext decodes a turn_context with model, sandbox_policy,
+// approval_policy, effort and turn_id.
+func TestParseLine_TurnContext(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:10.000Z","type":"turn_context","payload":{` +
+		`"turn_id":"turn-1","cwd":"<ROOT>","model":"gpt-5.1-codex-max","effort":"high",` +
+		`"approval_policy":"on-request","sandbox_policy":{"type":"workspace-write"}}}`
+	rec, skip, err := parseLine([]byte(line))
+	if err != nil || skip {
+		t.Fatalf("parseLine(turn_context): err=%v skip=%v", err, skip)
+	}
+	if rec.TurnContext == nil {
+		t.Fatal("TurnContext payload not decoded")
+	}
+	if rec.TurnContext.TurnID != "turn-1" || rec.TurnContext.Model != "gpt-5.1-codex-max" {
+		t.Errorf("turn_context fields wrong: %+v", rec.TurnContext)
+	}
+	if rec.TurnContext.Effort != "high" || rec.TurnContext.ApprovalPolicy != "on-request" {
+		t.Errorf("policy fields wrong: %+v", rec.TurnContext)
+	}
+	if rec.TurnContext.sandboxType() != "workspace-write" {
+		t.Errorf("sandboxType = %q, want workspace-write", rec.TurnContext.sandboxType())
+	}
+}
+
+// TestParseLine_TurnContextSandboxVariants covers the three observed sandbox
+// modes plus an unknown one (must NOT hard-fail; forward-compat per
+// adapter-codex.md:222).
+func TestParseLine_TurnContextSandboxVariants(t *testing.T) {
+	t.Parallel()
+	for _, mode := range []string{"workspace-write", "danger-full-access", "read-only", "brand-new-mode"} {
+		line := `{"timestamp":"2025-11-20T16:59:10.000Z","type":"turn_context","payload":{"model":"m","sandbox_policy":{"type":"` + mode + `"}}}`
+		rec, _, err := parseLine([]byte(line))
+		if err != nil {
+			t.Fatalf("parseLine(sandbox %q): %v", mode, err)
+		}
+		if rec.TurnContext.sandboxType() != mode {
+			t.Errorf("sandboxType = %q, want %q", rec.TurnContext.sandboxType(), mode)
+		}
+	}
+}
+
+// TestParseLine_ResponseItemVariants covers the persisted ResponseItem nested
+// payload.type variants (spec adapter-codex.md:149-163). Each must decode into
+// a record with ResponseItem populated and the nested type recoverable.
+func TestParseLine_ResponseItemVariants(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name        string
+		payloadJSON string
+		wantType    string
+	}{
+		{"message", `{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi"}]}`, "message"},
+		{"reasoning", `{"type":"reasoning","summary":[{"type":"summary_text","text":"thinking"}],"encrypted_content":"AAAA"}`, "reasoning"},
+		{"function_call", `{"type":"function_call","name":"shell","arguments":"{\"cmd\":\"ls\"}","call_id":"c1"}`, "function_call"},
+		{"function_call_output", `{"type":"function_call_output","call_id":"c1","output":"done"}`, "function_call_output"},
+		{"custom_tool_call", `{"type":"custom_tool_call","call_id":"c2","name":"x","input":"i","status":"completed"}`, "custom_tool_call"},
+		{"custom_tool_call_output", `{"type":"custom_tool_call_output","call_id":"c2","output":"o"}`, "custom_tool_call_output"},
+		{"web_search_call", `{"type":"web_search_call","call_id":"w1","status":"completed","action":{"type":"search","query":"q"}}`, "web_search_call"},
+		{"image_generation_call", `{"type":"image_generation_call","id":"i1","status":"completed"}`, "image_generation_call"},
+		{"compaction", `{"type":"compaction","encrypted_content":"BBBB"}`, "compaction"},
+		{"context_compaction", `{"type":"context_compaction","encrypted_content":null}`, "context_compaction"},
+		{"local_shell_call-legacy", `{"type":"local_shell_call","call_id":"l1"}`, "local_shell_call"},
+	}
+	for _, c := range cases {
+		c := c
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			line := `{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":` + c.payloadJSON + `}`
+			rec, skip, err := parseLine([]byte(line))
+			if err != nil || skip {
+				t.Fatalf("parseLine(%s): err=%v skip=%v", c.name, err, skip)
+			}
+			if rec.ResponseItem == nil {
+				t.Fatalf("ResponseItem nil for %s", c.name)
+			}
+			if rec.ResponseItem.Type != c.wantType {
+				t.Errorf("ResponseItem.Type = %q, want %q", rec.ResponseItem.Type, c.wantType)
+			}
+		})
+	}
+}
+
+// TestParseLine_ResponseItemOtherIsTolerated asserts the Rust #[serde(other)]
+// ResponseItem::Other catch-all behavior: a known-as-catch-all variant
+// (ghost_snapshot) is NOT a hard fail and NOT an unknown-payload error — it
+// decodes into Other and is skipped silently (spec adapter-codex.md:163-165,
+// rule #21 strip-and-ignore).
+func TestParseLine_GhostSnapshotSkippedSilently(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"ghost_snapshot","data":{}}}`
+	rec, skip, err := parseLine([]byte(line))
+	if err != nil {
+		t.Fatalf("ghost_snapshot must not error, got %v", err)
+	}
+	if !skip {
+		t.Fatal("ghost_snapshot must be skipped silently (skip=true)")
+	}
+	_ = rec
+}
+
+// TestParseLine_EventMsgVariants covers a representative spread of the
+// persisted EventMsg variants across both Limited and Extended modes
+// (spec adapter-codex.md:173-204).
+func TestParseLine_EventMsgVariants(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name        string
+		payloadJSON string
+		wantType    string
+	}{
+		{"user_message", `{"type":"user_message","message":"hi"}`, "user_message"},
+		{"agent_message", `{"type":"agent_message","message":"answer","phase":"final_answer"}`, "agent_message"},
+		{"agent_reasoning", `{"type":"agent_reasoning","text":"reason"}`, "agent_reasoning"},
+		{"agent_reasoning_raw_content", `{"type":"agent_reasoning_raw_content","text":"raw cot"}`, "agent_reasoning_raw_content"},
+		{"token_count", `{"type":"token_count","info":{"total_token_usage":{"total_tokens":100},"last_token_usage":{"input_tokens":10,"output_tokens":5}},"model_context_window":200000}`, "token_count"},
+		{"task_started", `{"type":"task_started","turn_id":"turn-1","started_at":1763664000}`, "task_started"},
+		{"task_complete", `{"type":"task_complete","turn_id":"turn-1","completed_at":"2025-11-20T17:00:00.000Z","duration_ms":1000}`, "task_complete"},
+		{"turn_aborted", `{"type":"turn_aborted","turn_id":"turn-1","reason":"interrupted"}`, "turn_aborted"},
+		{"exec_command_end", `{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"out"}`, "exec_command_end"},
+		{"mcp_tool_call_end", `{"type":"mcp_tool_call_end","call_id":"c1","invocation":{"server":"gh","tool":"list"}}`, "mcp_tool_call_end"},
+		{"patch_apply_end", `{"type":"patch_apply_end","call_id":"c1","success":true,"status":"completed"}`, "patch_apply_end"},
+		{"context_compacted", `{"type":"context_compacted"}`, "context_compacted"},
+		{"web_search_end", `{"type":"web_search_end","call_id":"w1","query":"q"}`, "web_search_end"},
+		{"error", `{"type":"error","message":"boom"}`, "error"},
+	}
+	for _, c := range cases {
+		c := c
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			line := `{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":` + c.payloadJSON + `}`
+			rec, skip, err := parseLine([]byte(line))
+			if err != nil || skip {
+				t.Fatalf("parseLine(%s): err=%v skip=%v", c.name, err, skip)
+			}
+			if rec.EventMsg == nil {
+				t.Fatalf("EventMsg nil for %s", c.name)
+			}
+			if rec.EventMsg.Type != c.wantType {
+				t.Errorf("EventMsg.Type = %q, want %q", rec.EventMsg.Type, c.wantType)
+			}
+		})
+	}
+}
+
+// TestParseLine_Compacted decodes the top-level compacted line.
+func TestParseLine_Compacted(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:13.000Z","type":"compacted","payload":{"message":"summary text","replacement_history":[{"type":"message","role":"user","content":[]}]}}`
+	rec, skip, err := parseLine([]byte(line))
+	if err != nil || skip {
+		t.Fatalf("parseLine(compacted): err=%v skip=%v", err, skip)
+	}
+	if rec.Compacted == nil {
+		t.Fatal("Compacted payload not decoded")
+	}
+	if rec.Compacted.Message != "summary text" {
+		t.Errorf("compacted.message = %q", rec.Compacted.Message)
+	}
+	if rec.Compacted.replacementHistorySize() != 1 {
+		t.Errorf("replacementHistorySize = %d, want 1", rec.Compacted.replacementHistorySize())
+	}
+}
+
+// TestParseLine_TimestampPreserved asserts the envelope timestamp is captured
+// verbatim on every record (it is the canonical Ts source per
+// adapter-codex.md:56-60, 100-101).
+func TestParseLine_TimestampPreserved(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"event_msg","payload":{"type":"user_message","message":"hi"}}`
+	rec, _, err := parseLine([]byte(line))
+	if err != nil {
+		t.Fatalf("parseLine: %v", err)
+	}
+	if rec.Timestamp() != "2025-11-20T16:59:09.857Z" {
+		t.Errorf("Timestamp() = %q", rec.Timestamp())
+	}
+}
+
+// TestParseLine_RawPreserved asserts the verbatim line bytes are retained so a
+// later chunk's mapper can build a file://...#L<line> PayloadRef without
+// re-typing every nested variant (mirrors claude_code rec.Raw).
+func TestParseLine_RawPreserved(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"event_msg","payload":{"type":"user_message","message":"hi"}}`
+	rec, _, err := parseLine([]byte(line))
+	if err != nil {
+		t.Fatalf("parseLine: %v", err)
+	}
+	if string(rec.Raw) != line {
+		t.Errorf("Raw = %q, want verbatim line", rec.Raw)
+	}
+}
+
+// TestParseLine_EmptyPayloadTolerated asserts a known top-level type with an
+// absent payload does not panic and does not hard-fail (some metadata-only
+// appends may carry an empty body).
+func TestParseLine_EmptyPayloadTolerated(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"event_msg"}`
+	rec, skip, err := parseLine([]byte(line))
+	if err != nil {
+		t.Fatalf("parseLine(empty payload): %v", err)
+	}
+	// An event_msg with no payload carries no nested type; the parser must not
+	// crash. It surfaces as a skip (nothing actionable) rather than an error.
+	if !skip {
+		t.Fatalf("empty-payload event_msg: want skip=true, got rec=%+v", rec)
+	}
+}
+
+// TestParseLine_MissingNestedType asserts a known top-level type whose payload
+// lacks the nested discriminator is tolerated (skip, no panic) rather than
+// being reported as an unknown nested variant.
+func TestParseLine_MissingNestedType(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"2025-11-20T16:59:09.857Z","type":"response_item","payload":{"role":"assistant"}}`
+	rec, skip, err := parseLine([]byte(line))
+	if err != nil {
+		t.Fatalf("parseLine(missing nested type): %v", err)
+	}
+	if !skip {
+		t.Fatalf("missing nested type: want skip=true, got rec=%+v", rec)
+	}
+}
diff --git a/internal/adapters/codex/types.go b/internal/adapters/codex/types.go
new file mode 100644
index 0000000..902a6fa
--- /dev/null
+++ b/internal/adapters/codex/types.go
@@ -0,0 +1,252 @@
+package codex
+
+import (
+	"bytes"
+	"encoding/json"
+)
+
+// This file defines the typed payload bodies for each top-level RolloutItem
+// variant and the known-nested-type sets the parser dispatches against. Only
+// the fields the parser-level contract and the later mapper consume are
+// decoded; every struct tolerates unknown sibling fields (encoding/json drops
+// them) so a newer codex CLI never hard-fails a line (spec adapter-codex.md
+// §"Versioning / Forward Compatibility").
+
+// sourceKind classifies a session_meta source enum into the canonical session
+// kind buckets (spec adapter-codex.md:292): root, sub_agent, tool_internal,
+// plus an explicit forward-compat "other" for unrecognized object shapes.
+type sourceKind string
+
+const (
+	sourceRoot     sourceKind = "root"
+	sourceSubagent sourceKind = "sub_agent"
+	sourceInternal sourceKind = "tool_internal"
+	sourceOther    sourceKind = "other"
+)
+
+// stringSourceKinds maps the bare-string SessionSource variants to their kind.
+// Unrecognized bare strings fall through to sourceOther (forward-compat).
+var stringSourceKinds = map[string]sourceKind{
+	"cli":     sourceRoot,
+	"vscode":  sourceRoot,
+	"exec":    sourceRoot,
+	"mcp":     sourceRoot,
+	"unknown": sourceRoot,
+}
+
+// gitInfo is the optional git block flattened onto session_meta
+// (protocol.rs:2856-2867). repository_url is sensitive in real files; fixtures
+// sanitize it to git@github.com:example/example.git.
+type gitInfo struct {
+	CommitHash    string `json:"commit_hash"`
+	Branch        string `json:"branch"`
+	RepositoryURL string `json:"repository_url"`
+}
+
+// sessionMetaPayload is the SessionMetaLine body (protocol.rs:2638-2703). Only
+// the load-bearing fields are typed; the rest stay accessible via the record's
+// Raw for the mapper's Extras path.
+type sessionMetaPayload struct {
+	ID            string          `json:"id"`
+	ForkedFromID  string          `json:"forked_from_id"`
+	Timestamp     string          `json:"timestamp"`
+	Cwd           string          `json:"cwd"`
+	Originator    string          `json:"originator"`
+	CLIVersion    string          `json:"cli_version"`
+	ThreadSource  string          `json:"thread_source"`
+	AgentNickname string          `json:"agent_nickname"`
+	AgentRole     string          `json:"agent_role"`
+	ModelProvider string          `json:"model_provider"`
+	Source        json.RawMessage `json:"source"`
+	Git           *gitInfo        `json:"git"`
+}
+
+// classifySource resolves the polymorphic source enum into a canonical
+// sourceKind and, for a sub-agent thread_spawn, the parent ThreadId (else "").
+// Tolerates every shape in adapter-codex.md:114-122 and never panics on an
+// unknown object — it returns sourceOther so the line is still ingested.
+func (p *sessionMetaPayload) classifySource() (sourceKind, string) {
+	body := bytes.TrimSpace(p.Source)
+	if len(body) == 0 || bytes.Equal(body, []byte("null")) {
+		return sourceRoot, ""
+	}
+	// Bare string form: "cli" | "exec" | "vscode" | "mcp" | "unknown".
+	var s string
+	if json.Unmarshal(body, &s) == nil {
+		if k, ok := stringSourceKinds[s]; ok {
+			return k, ""
+		}
+		return sourceOther, ""
+	}
+	// Object form: exactly one of custom / internal / subagent / other.
+	var obj struct {
+		Custom   json.RawMessage `json:"custom"`
+		Internal json.RawMessage `json:"internal"`
+		Subagent json.RawMessage `json:"subagent"`
+	}
+	if json.Unmarshal(body, &obj) != nil {
+		return sourceOther, ""
+	}
+	switch {
+	case len(obj.Custom) > 0:
+		return sourceRoot, ""
+	case len(obj.Internal) > 0:
+		return sourceInternal, ""
+	case len(obj.Subagent) > 0:
+		return sourceSubagent, parentFromSubagent(obj.Subagent)
+	default:
+		return sourceOther, ""
+	}
+}
+
+// parentFromSubagent extracts parent_thread_id from a subagent source. The
+// subagent value is either a bare string ("review"|"compact"|...) carrying no
+// parent, or an object {thread_spawn:{parent_thread_id,...}}.
+func parentFromSubagent(raw json.RawMessage) string {
+	var nested struct {
+		ThreadSpawn struct {
+			ParentThreadID string `json:"parent_thread_id"`
+		} `json:"thread_spawn"`
+	}
+	if json.Unmarshal(raw, &nested) != nil {
+		return ""
+	}
+	return nested.ThreadSpawn.ParentThreadID
+}
+
+// turnContextPayload is the TurnContextItem body (protocol.rs:2745-2776). Rich
+// sandbox/approval/network policy is preserved opaquely via the record's Raw;
+// only the discriminating/load-bearing fields are typed here.
+type turnContextPayload struct {
+	TurnID         string          `json:"turn_id"`
+	Cwd            string          `json:"cwd"`
+	Model          string          `json:"model"`
+	Effort         string          `json:"effort"`
+	ApprovalPolicy string          `json:"approval_policy"`
+	SandboxPolicy  json.RawMessage `json:"sandbox_policy"`
+}
+
+// sandboxType returns the sandbox_policy.type discriminator (workspace-write |
+// danger-full-access | read-only | newer values), or "" when absent. Unknown
+// values pass through verbatim (forward-compat, adapter-codex.md:222).
+func (p *turnContextPayload) sandboxType() string {
+	body := bytes.TrimSpace(p.SandboxPolicy)
+	if len(body) == 0 || bytes.Equal(body, []byte("null")) {
+		return ""
+	}
+	var probe struct {
+		Type string `json:"type"`
+	}
+	if json.Unmarshal(body, &probe) != nil {
+		return ""
+	}
+	return probe.Type
+}
+
+// responseItemPayload is the ResponseItem body (models.rs:750-903). The mapper
+// reads the remaining variant-specific fields off the record's Raw; the parser
+// only needs the discriminator plus the correlation keys it shares broadly.
+type responseItemPayload struct {
+	Type             string          `json:"type"`
+	Role             string          `json:"role"`
+	Name             string          `json:"name"`
+	CallID           string          `json:"call_id"`
+	Arguments        string          `json:"arguments"`
+	EncryptedContent json.RawMessage `json:"encrypted_content"`
+	Summary          json.RawMessage `json:"summary"`
+	Content          json.RawMessage `json:"content"`
+	Output           json.RawMessage `json:"output"`
+}
+
+// eventMsgPayload is the EventMsg body (protocol.rs:1133-1328). As with
+// responseItemPayload, only the discriminator plus the broadly-shared
+// correlation keys are typed; the mapper reads the rest off Raw.
+type eventMsgPayload struct {
+	Type    string          `json:"type"`
+	TurnID  string          `json:"turn_id"`
+	CallID  string          `json:"call_id"`
+	Message string          `json:"message"`
+	Text    string          `json:"text"`
+	Reason  string          `json:"reason"`
+	Info    json.RawMessage `json:"info"`
+}
+
+// compactedPayload is the top-level CompactedItem body (protocol.rs:2705-2734).
+type compactedPayload struct {
+	Message            string            `json:"message"`
+	ReplacementHistory []json.RawMessage `json:"replacement_history"`
+}
+
+// replacementHistorySize returns the count of replacement_history entries (0
+// when null/absent). The mapper surfaces this in the compaction op's Extras.
+func (p *compactedPayload) replacementHistorySize() int { return len(p.ReplacementHistory) }
+
+// responseItemTypes is the set of nested response_item payload.type values the
+// adapter recognizes (spec adapter-codex.md:149-162; the persisted allowlist in
+// policy.rs:67-85 plus the legacy local_shell_* pair for old .json files).
+var responseItemTypes = map[string]struct{}{
+	"message":                 {},
+	"reasoning":               {},
+	"function_call":           {},
+	"function_call_output":    {},
+	"custom_tool_call":        {},
+	"custom_tool_call_output": {},
+	"tool_search_call":        {},
+	"tool_search_output":      {},
+	"web_search_call":         {},
+	"image_generation_call":   {},
+	"compaction":              {},
+	"context_compaction":      {},
+	"local_shell_call":        {}, // legacy .json only (adapter-codex.md:153)
+	"local_shell_call_output": {}, // legacy .json only
+}
+
+// responseItemNoOp are nested response_item variants the upstream
+// ResponseItem::Other catch-all (#[serde(other)], models.rs:901) absorbs and
+// the adapter intentionally strips without surfacing (rule #21,
+// adapter-codex.md:163,378). Distinct from truly unknown types, which DO
+// surface one SourceError per variant.
+var responseItemNoOp = map[string]struct{}{
+	"ghost_snapshot": {},
+}
+
+// eventMsgTypes is the set of nested event_msg payload.type values the adapter
+// recognizes across Limited and Extended persistence modes (spec
+// adapter-codex.md:173-204; policy.rs:135-220). Aliases turn_started /
+// turn_complete are included alongside task_started / task_complete.
+var eventMsgTypes = map[string]struct{}{
+	"user_message":                {},
+	"agent_message":               {},
+	"agent_reasoning":             {},
+	"agent_reasoning_raw_content": {},
+	"patch_apply_end":             {},
+	"token_count":                 {},
+	"thread_goal_updated":         {},
+	"context_compacted":           {},
+	"entered_review_mode":         {},
+	"exited_review_mode":          {},
+	"mcp_tool_call_end":           {},
+	"thread_rolled_back":          {},
+	"turn_aborted":                {},
+	"task_started":                {},
+	"turn_started":                {}, // alias of task_started
+	"task_complete":               {},
+	"turn_complete":               {}, // alias of task_complete
+	"web_search_end":              {},
+	"image_generation_end":        {},
+	"item_completed":              {},
+	// Extended mode (policy.rs:135-220).
+	"error":                      {},
+	"guardian_assessment":        {},
+	"exec_command_end":           {},
+	"view_image_tool_call":       {},
+	"dynamic_tool_call_request":  {},
+	"dynamic_tool_call_response": {},
+}
+
+// eventMsgNoOp are nested event_msg variants the adapter recognizes but
+// intentionally ignores without surfacing (rule #21 ghost_snapshot;
+// realtime_conversation_* voice subsystem, adapter-codex.md:507).
+var eventMsgNoOp = map[string]struct{}{
+	"ghost_snapshot": {},
+}

From 92d837d2bc5b339a14c6ea24e9fa644e776653b7 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 12:38:04 +0300
Subject: [PATCH 02/13] codex adapter chunk B: per-file mapper + ops state
 machine

Implements the codex per-file state machine (spec rules #1-24): turn
open/finalize across the dual boundary formats (turn_context-only on old
CLIs vs task_started/task_complete+turn_id on new), the reasoning split
(response_item.reasoning -> reasoning op with summary/raw kind;
event_msg.agent_reasoning* -> LogEntry only, never a duplicate op),
tool-call pairing by call_id with the namespace heuristic, web_search /
image_generation ops, enrichment-only events (exec_command_end,
mcp_tool_call_end, patch_apply_end add op Extras, never a second op),
sub_agent/fork/tool_internal session linkage, and compaction ops.

Token accounting follows the SOW C#1 decision: per-turn TokensIn/Out are
the sum of per-call last_token_usage over the turn's attributed
token_count events; cumulative total_token_usage feeds the preceding LLM
op's CtxUsed only. Session finalize follows C#3: no clean-EOF completed
finalize (sessions stay running like claude-code); the scanner-callable
finalizeStale(nowUs) emits the synthetic failed/incomplete finalize only
for a hanging turn on a >=1h-stale file.

Spec alignment (same commit): document that turns.extras_json is
unreachable today (no Extras on canonical turn events; writer does not
populate it), so codex surfaces codex_turn_id/sandbox/ttft_ms via a
turn_meta LogEntry in v1, with the real column deferred to a follow-up
SOW that adds a turn-extras carrier to the canonical event + writer.

Pure mapper (no I/O); scanner/tailer/adapter wiring land in later chunks.
Gates green: gofmt/vet/golangci(0)/gosec(0); race tests pass at 94.3%
new-code coverage; whole-repo tests pass with no sibling regression.
---
 .agents/sow/specs/adapter-codex.md            |  10 +
 internal/adapters/codex/helpers_unit_test.go  | 351 +++++++
 internal/adapters/codex/mapper.go             | 430 ++++++++
 .../adapters/codex/mapper_coverage_test.go    | 464 +++++++++
 internal/adapters/codex/mapper_finalize.go    | 104 ++
 .../adapters/codex/mapper_helpers_test.go     | 112 +++
 internal/adapters/codex/mapper_test.go        | 950 ++++++++++++++++++
 internal/adapters/codex/mapper_turn.go        | 315 ++++++
 internal/adapters/codex/ops.go                | 238 +++++
 internal/adapters/codex/ops_enrich.go         | 326 ++++++
 internal/adapters/codex/ops_event.go          | 372 +++++++
 internal/adapters/codex/ops_response.go       | 244 +++++
 internal/adapters/codex/ops_tools.go          | 230 +++++
 13 files changed, 4146 insertions(+)
 create mode 100644 internal/adapters/codex/helpers_unit_test.go
 create mode 100644 internal/adapters/codex/mapper.go
 create mode 100644 internal/adapters/codex/mapper_coverage_test.go
 create mode 100644 internal/adapters/codex/mapper_finalize.go
 create mode 100644 internal/adapters/codex/mapper_helpers_test.go
 create mode 100644 internal/adapters/codex/mapper_test.go
 create mode 100644 internal/adapters/codex/mapper_turn.go
 create mode 100644 internal/adapters/codex/ops.go
 create mode 100644 internal/adapters/codex/ops_enrich.go
 create mode 100644 internal/adapters/codex/ops_event.go
 create mode 100644 internal/adapters/codex/ops_response.go
 create mode 100644 internal/adapters/codex/ops_tools.go

diff --git a/.agents/sow/specs/adapter-codex.md b/.agents/sow/specs/adapter-codex.md
index 77d2482..db6eb21 100644
--- a/.agents/sow/specs/adapter-codex.md
+++ b/.agents/sow/specs/adapter-codex.md
@@ -482,6 +482,16 @@ Real observation: 8 distinct sub-agent sessions in the sampled set, all `depth=1
 
 Items in codex that don't map cleanly to canonical-events.md:
 
+> **v1 `turns.extras_json` reachability (gaps #2, #3, #8).** The `turns` table has an
+> `extras_json` column (data-model.md), but no canonical turn event carries an `Extras`
+> field today (`TurnStartedEvent`/`TurnFinalizedEvent` in `internal/canonical/events.go`
+> have none) and the ingest writer never populates `turns.extras_json`. So `codex_turn_id`,
+> `turns.extras_json.sandbox`, and `ttft_ms` are structurally unreachable from any adapter
+> as of SOW-0004. The codex adapter therefore surfaces these per-turn values via a single
+> informational `turn_meta` LogEntry at turn finalize (no silent loss), and populating the
+> real `turns.extras_json` column is deferred to a follow-up SOW that adds a turn-extras
+> carrier to the canonical event + writer (shared infrastructure benefiting every adapter).
+
 1. **Reasoning op as first-class**: covered (`OpKind = 'reasoning'` exists). However, codex distinguishes `agent_reasoning` (visible summary) from `agent_reasoning_raw_content` (full CoT) — canonical model has no field for that distinction. Stash in Extras: `{reasoning_kind: "summary"|"raw"}`.
 
 2. **No "turn" concept in codex pre-0.93**: older sessions infer turns from `turn_context` boundaries. Canonical `turn.seq` becomes a synthesized 1-based counter that may not match any codex-internal id. Store the codex `turn_id` (UUID) in `turns.extras_json.codex_turn_id` for cross-reference.
diff --git a/internal/adapters/codex/helpers_unit_test.go b/internal/adapters/codex/helpers_unit_test.go
new file mode 100644
index 0000000..186d5a3
--- /dev/null
+++ b/internal/adapters/codex/helpers_unit_test.go
@@ -0,0 +1,351 @@
+package codex
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// These unit tests exercise the pure helper functions' edge and error branches
+// directly — the malformed-JSON guards, empty-input fallbacks, and numeric
+// coercions that valid record streams do not reach. They pin the defensive
+// behavior (no panic, sane defaults) the forward-compat contract requires.
+
+func TestPayloadNumber_Variants(t *testing.T) {
+	t.Parallel()
+	// integer
+	if got := payloadNumber([]byte(`{"payload":{"n":42}}`), "n"); got != 42 {
+		t.Errorf("int = %d, want 42", got)
+	}
+	// float coerced to int
+	if got := payloadNumber([]byte(`{"payload":{"n":42.9}}`), "n"); got != 42 {
+		t.Errorf("float = %d, want 42", got)
+	}
+	// absent field
+	if got := payloadNumber([]byte(`{"payload":{}}`), "n"); got != 0 {
+		t.Errorf("absent = %d, want 0", got)
+	}
+	// non-numeric value
+	if got := payloadNumber([]byte(`{"payload":{"n":"x"}}`), "n"); got != 0 {
+		t.Errorf("string = %d, want 0", got)
+	}
+	// malformed JSON
+	if got := payloadNumber([]byte(`{not json`), "n"); got != 0 {
+		t.Errorf("malformed = %d, want 0", got)
+	}
+}
+
+func TestCompletedAtMicros_Variants(t *testing.T) {
+	t.Parallel()
+	// RFC3339 string
+	if got := completedAtMicros([]byte(`{"payload":{"completed_at":"2025-11-20T17:00:00.000Z"}}`)); got == 0 {
+		t.Error("rfc3339 completed_at not parsed")
+	}
+	// unix seconds
+	if got := completedAtMicros([]byte(`{"payload":{"completed_at":1763664600}}`)); got != 1763664600*1_000_000 {
+		t.Errorf("unix = %d, want %d", got, int64(1763664600)*1_000_000)
+	}
+	// absent
+	if got := completedAtMicros([]byte(`{"payload":{}}`)); got != 0 {
+		t.Errorf("absent = %d, want 0", got)
+	}
+	// invalid string
+	if got := completedAtMicros([]byte(`{"payload":{"completed_at":"not-a-date"}}`)); got != 0 {
+		t.Errorf("bad string = %d, want 0", got)
+	}
+	// malformed
+	if got := completedAtMicros([]byte(`{bad`)); got != 0 {
+		t.Errorf("malformed = %d, want 0", got)
+	}
+}
+
+func TestStartedAtMicros(t *testing.T) {
+	t.Parallel()
+	if got := startedAtMicros([]byte(`{"payload":{"started_at":100}}`)); got != 100_000_000 {
+		t.Errorf("started_at = %d, want 100000000", got)
+	}
+	if got := startedAtMicros([]byte(`{"payload":{}}`)); got != 0 {
+		t.Errorf("absent started_at = %d, want 0", got)
+	}
+}
+
+func TestMcpResultStatus_Variants(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name       string
+		raw        string
+		wantStatus string
+		wantErr    string
+	}{
+		{"ok", `{"payload":{"result":{"Ok":{"is_error":false}}}}`, "completed", ""},
+		{"ok-is-error", `{"payload":{"result":{"Ok":{"is_error":true}}}}`, "failed", "tool_error"},
+		{"err", `{"payload":{"result":{"Err":"boom"}}}`, "failed", "tool_error"},
+		{"absent", `{"payload":{}}`, "completed", ""},
+		{"malformed", `{bad`, "completed", ""},
+		{"unparseable-result", `{"payload":{"result":123}}`, "completed", ""},
+	}
+	for _, c := range cases {
+		c := c
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			s, e := mcpResultStatus([]byte(c.raw))
+			if s != c.wantStatus || e != c.wantErr {
+				t.Errorf("mcpResultStatus = {%q %q}, want {%q %q}", s, e, c.wantStatus, c.wantErr)
+			}
+		})
+	}
+}
+
+func TestPatchApplyStatus_Variants(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		raw        string
+		wantStatus string
+	}{
+		{`{"payload":{"success":true}}`, "completed"},
+		{`{"payload":{"success":false}}`, "failed"},
+		{`{"payload":{"status":"error"}}`, "failed"},
+		{`{"payload":{"status":"completed"}}`, "completed"},
+		{`{"payload":{}}`, "completed"},
+		{`{bad`, "completed"},
+	}
+	for _, c := range cases {
+		c := c
+		t.Run(c.wantStatus+"_"+c.raw, func(t *testing.T) {
+			t.Parallel()
+			s, _ := patchApplyStatus([]byte(c.raw))
+			if s != c.wantStatus {
+				t.Errorf("patchApplyStatus(%s) = %q, want %q", c.raw, s, c.wantStatus)
+			}
+		})
+	}
+}
+
+func TestEnrichStatus_Variants(t *testing.T) {
+	t.Parallel()
+	if s, _ := enrichStatus([]byte(`{"payload":{"exit_code":0}}`)); s != "completed" {
+		t.Errorf("exit 0 status = %q, want completed", s)
+	}
+	if s, e := enrichStatus([]byte(`{"payload":{"exit_code":1}}`)); s != "failed" || e != "command_failed" {
+		t.Errorf("exit 1 = {%q %q}, want {failed command_failed}", s, e)
+	}
+	if s, _ := enrichStatus([]byte(`{"payload":{}}`)); s != "" {
+		t.Errorf("no exit_code status = %q, want empty", s)
+	}
+	if s, _ := enrichStatus([]byte(`{bad`)); s != "" {
+		t.Errorf("malformed status = %q, want empty", s)
+	}
+}
+
+func TestOutputStatusAndText(t *testing.T) {
+	t.Parallel()
+	// bare-string success
+	if s, _ := outputStatus(json.RawMessage(`"all good"`)); s != "completed" {
+		t.Errorf("ok string status = %q, want completed", s)
+	}
+	// empty/null
+	if s, _ := outputStatus(json.RawMessage(`null`)); s != "completed" {
+		t.Errorf("null status = %q, want completed", s)
+	}
+	// sandbox denial
+	if s, e := outputStatus(json.RawMessage(`"operation not permitted"`)); s != "failed" || e != "sandbox_denied" {
+		t.Errorf("denial = {%q %q}, want {failed sandbox_denied}", s, e)
+	}
+	// object with output field carrying error
+	if s, e := outputStatus(json.RawMessage(`{"output":"error: nope"}`)); s != "failed" || e != "tool_error" {
+		t.Errorf("obj output error = {%q %q}, want {failed tool_error}", s, e)
+	}
+	// object with neither output nor content → scans raw bytes (no error markers)
+	if s, _ := outputStatus(json.RawMessage(`{"misc":1}`)); s != "completed" {
+		t.Errorf("obj-no-fields status = %q, want completed", s)
+	}
+	// scalarOrJSON: a non-string JSON value returns its raw form
+	if v := scalarOrJSON(json.RawMessage(`{"a":1}`)); v == "" {
+		t.Error("scalarOrJSON(object) returned empty")
+	}
+	if v := scalarOrJSON(json.RawMessage(`null`)); v != "" {
+		t.Errorf("scalarOrJSON(null) = %q, want empty", v)
+	}
+}
+
+func TestMessageText_And_ReasoningKindEdge(t *testing.T) {
+	t.Parallel()
+	if got := messageText(json.RawMessage(`[{"type":"input_text","text":"a"},{"text":"b"}]`)); got != "ab" {
+		t.Errorf("messageText = %q, want ab", got)
+	}
+	if got := messageText(json.RawMessage(`null`)); got != "" {
+		t.Errorf("messageText(null) = %q, want empty", got)
+	}
+	if got := messageText(json.RawMessage(`"notarray"`)); got != "" {
+		t.Errorf("messageText(non-array) = %q, want empty", got)
+	}
+	// reasoningKind with NO summary, NO content, NO encrypted → defaults raw.
+	p := &responseItemPayload{}
+	if k, f := reasoningKind(p); k != "raw" || f != "json" {
+		t.Errorf("empty reasoningKind = {%q %q}, want {raw json}", k, f)
+	}
+}
+
+func TestSourceStringAndDepth(t *testing.T) {
+	t.Parallel()
+	// bare string
+	if got := sourceString(json.RawMessage(`"exec"`)); got != "exec" {
+		t.Errorf("sourceString(bare) = %q, want exec", got)
+	}
+	// object → key name
+	if got := sourceString(json.RawMessage(`{"subagent":{"thread_spawn":{}}}`)); got != "subagent" {
+		t.Errorf("sourceString(subagent) = %q, want subagent", got)
+	}
+	// null/absent
+	if got := sourceString(json.RawMessage(`null`)); got != "" {
+		t.Errorf("sourceString(null) = %q, want empty", got)
+	}
+	// unrecognized object shape
+	if got := sourceString(json.RawMessage(`{"weird":1}`)); got != "" {
+		t.Errorf("sourceString(weird) = %q, want empty", got)
+	}
+	// malformed → empty (not a string, not an object)
+	if got := sourceString(json.RawMessage(`12`)); got != "" {
+		t.Errorf("sourceString(number) = %q, want empty", got)
+	}
+	// subagentDepth
+	if d := subagentDepth(json.RawMessage(`{"subagent":{"thread_spawn":{"depth":3}}}`)); d != 3 {
+		t.Errorf("subagentDepth = %d, want 3", d)
+	}
+	if d := subagentDepth(json.RawMessage(`null`)); d != 0 {
+		t.Errorf("subagentDepth(null) = %d, want 0", d)
+	}
+	if d := subagentDepth(json.RawMessage(`{bad`)); d != 0 {
+		t.Errorf("subagentDepth(malformed) = %d, want 0", d)
+	}
+}
+
+func TestMcpInvocationAndExecExtras(t *testing.T) {
+	t.Parallel()
+	s, tool := mcpInvocation([]byte(`{"payload":{"invocation":{"server":"gh","tool":"list"}}}`))
+	if s != "gh" || tool != "list" {
+		t.Errorf("mcpInvocation = {%q %q}, want {gh list}", s, tool)
+	}
+	if s, tool := mcpInvocation([]byte(`{bad`)); s != "" || tool != "" {
+		t.Errorf("mcpInvocation(malformed) = {%q %q}, want empty", s, tool)
+	}
+	// execCommandExtras: all fields
+	ex := execCommandExtras([]byte(`{"payload":{"exit_code":0,"cwd":"<ROOT>","source":"model","aggregated_output":"abc"}}`))
+	if ex["exec_exit_code"] != int64(0) || ex["exec_cwd"] != "<ROOT>" || ex["exec_source"] != "model" || ex["exec_output_bytes"] != 3 {
+		t.Errorf("execCommandExtras = %+v", ex)
+	}
+	// empty payload → nil
+	if ex := execCommandExtras([]byte(`{"payload":{}}`)); ex != nil {
+		t.Errorf("execCommandExtras(empty) = %+v, want nil", ex)
+	}
+	// malformed → nil
+	if ex := execCommandExtras([]byte(`{bad`)); ex != nil {
+		t.Errorf("execCommandExtras(malformed) = %+v, want nil", ex)
+	}
+	// webSearchExtras
+	if w := webSearchExtras([]byte(`{"payload":{"query":"q"}}`)); w["query"] != "q" {
+		t.Errorf("webSearchExtras = %+v", w)
+	}
+	if w := webSearchExtras([]byte(`{"payload":{}}`)); w != nil {
+		t.Errorf("webSearchExtras(empty) = %+v, want nil", w)
+	}
+	if w := webSearchExtras([]byte(`{bad`)); w != nil {
+		t.Errorf("webSearchExtras(malformed) = %+v, want nil", w)
+	}
+}
+
+func TestDecodeTokenCount_Placements(t *testing.T) {
+	t.Parallel()
+	// model_context_window as sibling of info (older shape).
+	info := decodeTokenCount([]byte(`{"payload":{"info":{"last_token_usage":{"input_tokens":3}},"model_context_window":128000}}`))
+	if info.last.InputTokens != 3 || info.modelContextWindow != 128000 {
+		t.Errorf("decodeTokenCount sibling mcw = %+v", info)
+	}
+	// malformed
+	if info := decodeTokenCount([]byte(`{bad`)); info.modelContextWindow != 0 {
+		t.Errorf("decodeTokenCount(malformed) = %+v, want zero", info)
+	}
+}
+
+func TestSmallHelpers(t *testing.T) {
+	t.Parallel()
+	// userFingerprint: empty in → empty out (never deduped).
+	if userFingerprint("  ") != "" {
+		t.Error("blank userFingerprint not empty")
+	}
+	if userFingerprint(" hi ") != "hi" {
+		t.Errorf("userFingerprint trim = %q, want hi", userFingerprint(" hi "))
+	}
+	// firstSeenUser: empty always first; non-empty dedups.
+	m := newTestMapper("sid")
+	if !m.firstSeenUser("") {
+		t.Error("empty fp should be first")
+	}
+	if !m.firstSeenUser("x") || m.firstSeenUser("x") {
+		t.Error("firstSeenUser dedup broken")
+	}
+	// trimPreview: max<=0 → empty; truncation.
+	if trimPreview("abc", 0) != "" {
+		t.Error("trimPreview max=0 not empty")
+	}
+	if trimPreview("abcdef", 3) != "abc" {
+		t.Errorf("trimPreview trunc = %q, want abc", trimPreview("abcdef", 3))
+	}
+	if trimPreview("ab", 5) != "ab" {
+		t.Errorf("trimPreview short = %q, want ab", trimPreview("ab", 5))
+	}
+	// parseTsToMicros: invalid → error.
+	if _, err := parseTsToMicros("not-a-ts"); err == nil {
+		t.Error("parseTsToMicros(bad) should error")
+	}
+	// mergeExtras nil-op paths.
+	mergeExtras(nil, map[string]any{"a": 1})
+	op := &openOp{}
+	mergeExtras(op, nil)
+	mergeExtras(op, map[string]any{"a": 1})
+	if op.extras["a"] != 1 {
+		t.Error("mergeExtras did not merge")
+	}
+	// trackOp empty call_id is not tracked.
+	m.trackOp("", "t", 1, 1, canonical.OpTool, "x")
+	if len(m.openOps) != 0 {
+		t.Error("trackOp tracked an empty call_id")
+	}
+	// sortByOpSeq single element (degenerate).
+	xs := []int{5}
+	sortByOpSeq(xs, func(v int) int { return v })
+	if xs[0] != 5 {
+		t.Error("sortByOpSeq mangled single element")
+	}
+	// agentNameFromMeta role fallback + bare codex.
+	if got := agentNameFromMeta(&sessionMetaPayload{AgentRole: "explorer"}); got != "explorer" {
+		t.Errorf("agentNameFromMeta role = %q, want explorer", got)
+	}
+	if got := agentNameFromMeta(&sessionMetaPayload{}); got != "codex" {
+		t.Errorf("agentNameFromMeta empty = %q, want codex", got)
+	}
+	// phaseFromRaw malformed → empty.
+	if phaseFromRaw([]byte(`{bad`)) != "" {
+		t.Error("phaseFromRaw(malformed) not empty")
+	}
+}
+
+func TestSessionExtras_EmptyAndProvider(t *testing.T) {
+	t.Parallel()
+	// A meta with only model_provider yields extras carrying it.
+	ex := sessionExtras(&sessionMetaPayload{ModelProvider: "openai"}, "")
+	if ex["model_provider"] != "openai" {
+		t.Errorf("sessionExtras model_provider = %+v", ex)
+	}
+	// A fully-empty meta yields nil extras.
+	if ex := sessionExtras(&sessionMetaPayload{}, ""); ex != nil {
+		t.Errorf("sessionExtras(empty) = %+v, want nil", ex)
+	}
+	// Git block surfaces only the non-empty git fields.
+	ex = sessionExtras(&sessionMetaPayload{Git: &gitInfo{Branch: "main"}}, "")
+	git, _ := ex["git"].(map[string]any)
+	if git["branch"] != "main" {
+		t.Errorf("sessionExtras git = %+v", ex["git"])
+	}
+}
diff --git a/internal/adapters/codex/mapper.go b/internal/adapters/codex/mapper.go
new file mode 100644
index 0000000..4117791
--- /dev/null
+++ b/internal/adapters/codex/mapper.go
@@ -0,0 +1,430 @@
+package codex
+
+import (
+	"fmt"
+	"path/filepath"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// subEventBits bounds the number of sub-events a single record may emit. A
+// codex record fans out to at most a handful of events (a function_call emits
+// an OpStarted + tool_request PayloadRef; a message emits an OpStarted +
+// OpFinalized + PayloadRef); 12 bits (4096) is ample headroom. SourceSeq is an
+// observability counter only — the durable resume key is the byte offset in the
+// cursor (cursor.go), so the exact packing is never load-bearing. Mirrors
+// claude_code/mapper.go.
+const subEventBits = 12
+
+// maxSubEventsPerRecord is the cap subEventBits implies.
+const maxSubEventsPerRecord = 1 << subEventBits
+
+// Format is the stable adapter identifier (SOW-0004 decision C#3: Format =
+// "codex"). It is the source name the mapper stamps onto every LogEntry.Source
+// and the name Chunk D's adapter.go registers with the adapter registry; it is
+// defined here (the mapper is the first non-test consumer) so Chunk B compiles
+// and is testable in isolation. Chunk D references this const for registration
+// rather than redefining it (mirrors claude_code, where one Format const is
+// shared by mapper.go and adapter.go).
+const Format = "codex"
+
+// provider is constant for codex: every LLM op is an OpenAI Responses-API call
+// (spec adapter-codex.md:322, "Cost calculation"). The pricing catalog is keyed
+// on (provider="openai", model=turn_context.model).
+const provider = "openai"
+
+// fileMapper holds the per-file inference state needed to project one codex
+// rollout's RolloutItem line stream onto the canonical session/turn/op model.
+// One fileMapper processes exactly one rollout file (a root, a fork, or a
+// sub-agent thread) start-to-finish; it is NOT reused across files because turn
+// and op numbering is per-session.
+//
+// The mapper is PURE with respect to I/O: it reads no files and watches no
+// directories. The scanner/tailer (Chunk C) drives it line-by-line via
+// mapRecord and, at EOF, asks finalizeStale whether a hanging turn must be
+// synthetically failed (the scanner owns file mtime; the mapper owns the
+// open-turn state — spec rule #23, SOW C#3).
+//
+// State persists across Scan→Tail of the same file via the rebuild path
+// (mirrors claude_code): the scanner replays the chain from offset 0 to
+// reconstruct the per-file turn/op counters deterministically, gating emission
+// to records at or after the resume offset, so a resume yields the SAME Seqs as
+// a one-shot pass.
+type fileMapper struct {
+	sourceID string
+	// nativeID is the canonical session id for THIS file: session_meta.id
+	// (spec rule #1). For a fork or sub-agent it is still the child's own id;
+	// the parent linkage is carried by parentNativeID.
+	nativeID string
+	// parentNativeID is empty for a root session; the parent thread id for a
+	// sub-agent (source.subagent.thread_spawn.parent_thread_id) or the
+	// forked_from_id for a fork (spec rule #1, "Sub-Agent Linkage").
+	parentNativeID string
+	// kind is root, sub_agent, fork, or tool_internal (spec rule #1).
+	kind canonical.SessionKind
+	// agentName seeds SessionStartedEvent.AgentName (sub-agent: agent_nickname
+	// or agent_role; else "codex:" + originator) (spec rule #1).
+	agentName string
+	// absPath is the rollout's absolute path on disk, used to build the
+	// PayloadRef LocationURI ("file://<abs>#L<line>") for inline bodies that
+	// live in the jsonl (user/assistant messages, reasoning, tool I/O,
+	// compaction summaries) and for log attribution. Empty in mapper-only unit
+	// tests; the URI then carries the line anchor without an absolute prefix.
+	absPath string
+
+	// lineNo is the 1-based file line number of the record currently being
+	// mapped. The scanner (Chunk C) sets it via setLineNo before each mapRecord
+	// so the PayloadRef LocationURI can anchor "#L<line>" at the owning record
+	// (spec rule #6/#7/#8, edge #7 — large bodies are referenced, never
+	// inlined). 0 disables the anchor (mapper-only tests that do not set it).
+	lineNo int
+
+	// recordIdx is the 0-based ordinal of the record being mapped, used to
+	// derive a stable per-file SourceSeq. Monotone within a streaming pass.
+	recordIdx uint64
+
+	// sessionStarted guards the once-per-file SessionStartedEvent (spec rule #1).
+	sessionStarted bool
+	// modelSeen records that a SessionUpdatedEvent(Model) has been emitted, so
+	// the model is announced exactly once when first learned from a turn_context
+	// (spec rule #2).
+	modelSeen bool
+	// model is the active turn's model (latest turn_context.model). Stamped onto
+	// every LLM/reasoning op so cost can be computed downstream (spec rule #2,
+	// #7). Empty until the first turn_context carrying a model.
+	model string
+
+	// turns maps a codex turn_id (UUID) to the synthesized 1-based turn state.
+	// A turn_id of "" is the absent-turn_id fallback bucket (old CLI without
+	// turn_id — spec edge #3); it shares the same map under the empty key.
+	turns map[string]*turnState
+	// turnOrder lists turn_ids in open order so finalizeStale can find the most
+	// recent still-open turn deterministically (spec rule #23).
+	turnOrder []string
+	// turnSeqCounter is the last assigned 1-based turn Seq. Monotone per file.
+	turnSeqCounter int
+	// activeTurnID is the turn_id of the most-recently-opened turn, used to
+	// attribute a session-level token_count (no turn_id) to the right turn
+	// (spec rule #17, "Token accounting nuance") and to drive the absent-turn_id
+	// fallback (spec edge #3).
+	activeTurnID string
+	// haveActiveTurn distinguishes activeTurnID=="" "no turn yet" from
+	// activeTurnID=="" "the absent-turn_id fallback turn is active".
+	haveActiveTurn bool
+
+	// openOps maps an in-flight tool/llm op's call_id to where it was emitted so
+	// the matching *_output (or enrichment event) finalizes/enriches the same
+	// op (spec rule #9, #14, #15, #16). A call_id of "" is never tracked.
+	openOps map[string]*openOp
+
+	// seenUserCallIDs dedups user input across response_item.message(role=user)
+	// and event_msg.user_message (spec rule #6, #18). Keyed on a content
+	// fingerprint so the second arrival is suppressed regardless of order.
+	seenUser map[string]struct{}
+
+	// finalized guards finalizeStale so the synthetic finalize is emitted at
+	// most once per file even if the scanner calls it more than once.
+	staleFinalized bool
+
+	// lastTsUs is the timestamp (micros) of the most recent record carrying one.
+	// Observability only (cursor LastTsUs). Stays 0 for a file whose records all
+	// lack timestamps.
+	lastTsUs int64
+}
+
+// turnState tracks one synthesized turn's accumulation between its open
+// (turn_context or task_started) and its close (task_complete / turn_aborted /
+// stale finalize). Token rollup is the C#1 model: TokensIn/Out are the SUM of
+// per-call last_token_usage over the token_count events attributed to this turn
+// — never a delta of the cumulative total_token_usage (spec rule #4, #17).
+type turnState struct {
+	// seq is the canonical 1-based turn Seq.
+	seq int
+	// codexTurnID is the source turn_id (UUID), surfaced in
+	// turns.extras_json.codex_turn_id (spec "Canonical Model Gaps" #2). Empty
+	// for the absent-turn_id fallback turn.
+	codexTurnID string
+	// opSeq is the 1-based op counter within this turn.
+	opSeq int
+	// started reports whether a TurnStartedEvent was already emitted for this
+	// turn (idempotency across turn_context + task_started — spec rule #2, #3).
+	started bool
+	// finalized reports whether a TurnFinalizedEvent was already emitted, so a
+	// duplicate task_complete / a later stale finalize does not double-close.
+	finalized bool
+	// startTsUs is the turn's open timestamp (micros), used as a floor for the
+	// synthetic stale-finalize EndTs (spec rule #23).
+	startTsUs int64
+	// tokensIn / tokensOut accumulate the C#1 per-call last_token_usage rollup
+	// (spec rule #4, #17).
+	tokensIn  int64
+	tokensOut int64
+	// tokensCacheRead / tokensCacheWrite accumulate the per-call cached-token
+	// split when newer rollouts report it (canonical-events.md codex cache row).
+	tokensCacheRead  int64
+	tokensCacheWrite int64
+	// ctxMax is the model_context_window stashed from task_started /
+	// token_count, applied to the turn's last LLM op at finalize (spec rule #3,
+	// #17).
+	ctxMax int64
+	// sandbox is the sandbox_policy.type snapshotted from the turn's
+	// turn_context, surfaced in turns.extras_json.sandbox (spec rule #2,
+	// "Canonical Model Gaps" #3).
+	sandbox string
+	// effort / approvalPolicy are turn_context policy snapshots for turn extras.
+	effort         string
+	approvalPolicy string
+	// ttftMs is task_complete.time_to_first_token_ms, surfaced in
+	// turns.extras_json.ttft_ms (spec "Canonical Model Gaps" #8).
+	ttftMs int64
+	// lastAgentMessage is event_msg.agent_message.message, surfaced in
+	// TurnFinalized extras as the UI "latest answer" preview (spec rule #19).
+	lastAgentMessage string
+	// lastLLMOpSeq is the op Seq of the most recent LLM op in this turn, so a
+	// trailing token_count attaches CtxUsed/CtxMax to it (spec rule #17). 0 when
+	// no LLM op has been emitted yet.
+	lastLLMOpSeq int
+	// lastLLMEndTs is the EndTs of the turn's last LLM op, preserved so a
+	// token_count re-finalize that adds CtxUsed/CtxMax does not clobber the op's
+	// real end timestamp (the ingester reconciles fields on the (turn,seq)
+	// upsert — canonical-events.md §Idempotency).
+	lastLLMEndTs int64
+	// lastLLMCtxUsed is the cumulative total_token_usage observed for the turn's
+	// last LLM op (spec rule #17). The op's CtxUsed is set from this at finalize.
+	lastLLMCtxUsed int64
+}
+
+// openOp records where an in-flight op was emitted so its finalize / enrichment
+// lands under the same turn/op (spec rule #9, #14-16).
+type openOp struct {
+	turnID  string
+	turnSeq int
+	opSeq   int
+	kind    canonical.OpKind
+	name    string
+	// extras accumulates enrichment (exec_command_end, mcp_tool_call_end,
+	// patch_apply_end) merged onto the op's OpFinalized (spec rule #14-16). The
+	// adapter does NOT emit a second op for an enrichment event.
+	extras map[string]any
+	// finalized guards against a second *_output finalizing the same op.
+	finalized bool
+}
+
+// mapperConfig bundles the per-file inputs newFileMapper needs.
+type mapperConfig struct {
+	sourceID       string
+	absPath        string
+	nativeID       string
+	parentNativeID string
+	kind           canonical.SessionKind
+	agentName      string
+}
+
+// newFileMapper constructs a mapper for one rollout file.
+func newFileMapper(cfg mapperConfig) *fileMapper {
+	return &fileMapper{
+		sourceID:       cfg.sourceID,
+		absPath:        cfg.absPath,
+		nativeID:       cfg.nativeID,
+		parentNativeID: cfg.parentNativeID,
+		kind:           cfg.kind,
+		agentName:      cfg.agentName,
+		turns:          map[string]*turnState{},
+		openOps:        map[string]*openOp{},
+		seenUser:       map[string]struct{}{},
+	}
+}
+
+// setLineNo records the 1-based file line number of the next record the scanner
+// will feed to mapRecord, so a PayloadRef can anchor "#L<line>" at the owning
+// record (spec rule #6/#7/#8). The scanner (Chunk C) calls this before each
+// mapRecord; mapper-only unit tests may set it directly or leave it 0.
+func (m *fileMapper) setLineNo(n int) { m.lineNo = n }
+
+// mapRecord converts one parsed record into canonical events, advancing the
+// mapper's inference state. Pure with respect to I/O; mutates only the
+// receiver. The first call on any file emits the SessionStartedEvent (spec
+// rule #1). Records that produce nothing actionable (e.g. an event_msg the
+// adapter only uses for enrichment) return an empty slice.
+func (m *fileMapper) mapRecord(rec record) ([]canonical.Event, error) {
+	idx := m.recordIdx
+	m.recordIdx++
+	if ts := m.recordTs(rec); ts > m.lastTsUs {
+		m.lastTsUs = ts
+	}
+
+	out := make([]canonical.Event, 0, 4)
+	sub := uint64(0)
+	advance := func(tsUs int64) canonical.EventBase {
+		b := canonical.EventBase{
+			SourceID:  m.sourceID,
+			SourceSeq: packSeq(idx, sub),
+			Ts:        tsUs,
+		}
+		sub++
+		return b
+	}
+
+	// Bootstrap the session on the first record (spec rule #1). session_meta is
+	// always line 1, but the bootstrap keys on sessionStarted (not on the record
+	// type) so a corrupt file missing session_meta still anchors its events to a
+	// session row (the scanner surfaces the missing-meta SourceError per rule
+	// #24 before any mapping).
+	if !m.sessionStarted {
+		out = append(out, m.sessionStarted0(rec, advance))
+		m.sessionStarted = true
+	}
+
+	switch rec.Type() {
+	case recSessionMeta:
+		// SessionStarted already emitted by the bootstrap above; a session_meta
+		// arriving later (metadata-only append, recorder.rs:1610) carries no new
+		// turn/op data the mapper acts on.
+	case recTurnContext:
+		out = append(out, m.mapTurnContext(rec, advance)...)
+	case recResponseItem:
+		evs, err := m.mapResponseItem(rec, advance)
+		if err != nil {
+			return nil, err
+		}
+		out = append(out, evs...)
+	case recEventMsg:
+		evs, err := m.mapEventMsg(rec, advance)
+		if err != nil {
+			return nil, err
+		}
+		out = append(out, evs...)
+	case recCompacted:
+		out = append(out, m.mapCompacted(rec, advance)...)
+	default:
+		// Unreachable: parseLine refuses unknown top-level types and skips
+		// known no-op nested types before mapRecord is reached.
+		return nil, fmt.Errorf("codex: unhandled record type %q", rec.Type())
+	}
+	return out, nil
+}
+
+// sessionStarted0 builds the once-per-file SessionStartedEvent (spec rule #1).
+// Kind, parent linkage, AgentName, and Extras (cli_version, originator, source,
+// cwd, git, sandbox, relationship) come from the session_meta payload when the
+// first record IS a session_meta (the normal case); a non-meta first record
+// (corrupt file) yields a minimal root session so its events still attach.
+func (m *fileMapper) sessionStarted0(rec record, advance func(int64) canonical.EventBase) canonical.SessionStartedEvent {
+	base := advance(m.recordTs(rec))
+	ev := canonical.SessionStartedEvent{
+		EventBase:      base,
+		NativeID:       m.nativeID,
+		RootNativeID:   m.rootNativeID(),
+		ParentNativeID: m.parentNativeID,
+		Kind:           m.kind,
+		AgentName:      m.agentName,
+	}
+	if rec.SessionMeta != nil {
+		applySessionMeta(&ev, rec.SessionMeta, m)
+	} else if ev.Kind == "" {
+		// Bootstrap fallback for a corrupt file whose first record is not a
+		// session_meta (rule #24): a minimal root session so events still attach.
+		// The scanner surfaces the missing-meta SourceError separately.
+		ev.Kind = canonical.KindRoot
+		m.kind = canonical.KindRoot
+	}
+	return ev
+}
+
+// rootNativeID returns the root of this session's tree. A child (fork or
+// sub-agent) points at its parent so the ingester resolver has a meaningful
+// root pointer even before the parent file lands (mirrors claude_code); a root
+// session is its own root (spec rule #1, "Sub-Agent Linkage").
+func (m *fileMapper) rootNativeID() string {
+	if m.parentNativeID != "" {
+		return m.parentNativeID
+	}
+	return m.nativeID
+}
+
+// recordTs parses the record's envelope timestamp to micros, or returns 0 when
+// the record lacks one. The envelope timestamp is the canonical time source
+// (spec adapter-codex.md:56-60).
+func (m *fileMapper) recordTs(rec record) int64 {
+	if rec.Timestamp() == "" {
+		return 0
+	}
+	us, err := parseTsToMicros(rec.Timestamp())
+	if err != nil {
+		return 0
+	}
+	return us
+}
+
+// logEntry builds a LogEntryEvent attached to the current session and the
+// active turn (TurnSeq 0 when no turn is open). kind is a short stable message
+// label; severity is one of DBG/INF/WRN/ERR.
+func (m *fileMapper) logEntry(base canonical.EventBase, severity, message string, extras map[string]any) canonical.LogEntryEvent {
+	if extras == nil {
+		extras = map[string]any{}
+	}
+	return canonical.LogEntryEvent{
+		EventBase:       base,
+		SessionNativeID: m.nativeID,
+		TurnSeq:         m.activeTurnSeq(),
+		Severity:        severity,
+		Source:          Format,
+		Message:         message,
+		Extras:          extras,
+	}
+}
+
+// activeTurnSeq returns the Seq of the most-recently-opened turn, or 0 when no
+// turn is open. Used to scope LogEntry rows.
+func (m *fileMapper) activeTurnSeq() int {
+	if !m.haveActiveTurn {
+		return 0
+	}
+	if ts, ok := m.turns[m.activeTurnID]; ok {
+		return ts.seq
+	}
+	return 0
+}
+
+// payloadURI builds the PayloadRef LocationURI for a body inline in this
+// rollout file at the given 1-based line number (spec rule #6/#7/#8, edge #7).
+// The form is "file://<clean-abs>#L<line>" so the presenter reads the exact
+// record on demand without ai-viewer ever copying the body into SQLite.
+//
+// NOTE (Chunk B↔D seam): this mapper-side builder is the minimal contract
+// Chunk B needs to compile and be tested in isolation — it cleans the path and
+// appends the line anchor but does NOT do symlink containment. Chunk D replaces
+// it with the claude_code-verbatim containment version (payloads.go:
+// resolveWithinRoot + EvalSymlinks), keeping the SAME "#L<line>" anchor so the
+// emitted event stream is unchanged. When absPath is empty (mapper-only tests)
+// the URI is just the line anchor.
+func (m *fileMapper) payloadURI(lineNo int) string {
+	anchor := ""
+	if lineNo > 0 {
+		anchor = fmt.Sprintf("#L%d", lineNo)
+	}
+	if m.absPath == "" {
+		return anchor
+	}
+	cleaned := filepath.ToSlash(filepath.Clean(m.absPath))
+	return "file://" + cleaned + anchor
+}
+
+// payloadRef builds a PayloadRefEvent for a body inline in this rollout at the
+// record currently being mapped (m.lineNo). It is scoped to the owning op
+// (turnSeq/opSeq) so it references an op that EXISTS — payload_refs.op_id is NOT
+// NULL REFERENCES ops(id), so an orphan ref would FK-roll-back the ingest batch
+// (mirrors claude_code's P1.1a discipline). OriginalBytes is the byte length of
+// the verbatim line so the presenter can budget a read; -1 when unknown.
+func (m *fileMapper) payloadRef(base canonical.EventBase, turnSeq, opSeq int, kind, format string, originalBytes int64) canonical.PayloadRefEvent {
+	return canonical.PayloadRefEvent{
+		EventBase:       base,
+		SessionNativeID: m.nativeID,
+		TurnSeq:         turnSeq,
+		OpSeq:           opSeq,
+		PayloadKind:     kind,
+		Format:          format,
+		LocationURI:     m.payloadURI(m.lineNo),
+		OriginalBytes:   originalBytes,
+	}
+}
diff --git a/internal/adapters/codex/mapper_coverage_test.go b/internal/adapters/codex/mapper_coverage_test.go
new file mode 100644
index 0000000..a6134a7
--- /dev/null
+++ b/internal/adapters/codex/mapper_coverage_test.go
@@ -0,0 +1,464 @@
+package codex
+
+import (
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestMapper_ImageGenerationOp covers mapImageGenCall + image_generation_end
+// enrichment (spec rule #12): one media tool op.
+func TestMapper_ImageGenerationOp(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"image_generation_call","id":"i1","status":"completed","call_id":"g1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"image_generation_end","call_id":"g1"}}`,
+	}
+	events := runLines(t, m, lines)
+	media := 0
+	for _, s := range opStarts(events) {
+		if s.Name == "image_generation" && s.ToolNamespace == "media" {
+			media++
+		}
+	}
+	if media != 1 {
+		t.Fatalf("image_generation op count = %d, want 1", media)
+	}
+}
+
+// TestMapper_CustomToolCall covers the custom namespace branch (spec rule #10).
+func TestMapper_CustomToolCall(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"custom_tool_call","call_id":"c1","name":"my_tool","input":"i"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"custom_tool_call_output","call_id":"c1","output":"o"}}`,
+	}
+	events := runLines(t, m, lines)
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpTool && s.Name == "my_tool" && s.ToolNamespace != "custom" {
+			t.Errorf("custom_tool_call namespace = %q, want custom", s.ToolNamespace)
+		}
+	}
+}
+
+// TestMapper_LocalShellLegacy covers the legacy local_shell_call path (spec rule
+// #13): shell namespace, default name "shell".
+func TestMapper_LocalShellLegacy(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"local_shell_call","call_id":"l1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"local_shell_call_output","call_id":"l1","output":"done"}}`,
+	}
+	events := runLines(t, m, lines)
+	shell := 0
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpTool && s.ToolNamespace == "shell" {
+			shell++
+		}
+	}
+	if shell != 1 {
+		t.Fatalf("local_shell op (shell namespace) count = %d, want 1", shell)
+	}
+}
+
+// TestMapper_ToolSearchOp covers the tool_search_call/output pair (spec
+// adapter-codex.md:158).
+func TestMapper_ToolSearchOp(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"tool_search_call","call_id":"ts1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"tool_search_output","call_id":"ts1","output":"results"}}`,
+	}
+	events := runLines(t, m, lines)
+	got := 0
+	for _, s := range opStarts(events) {
+		if s.Name == "tool_search" {
+			got++
+		}
+	}
+	if got != 1 {
+		t.Fatalf("tool_search op count = %d, want 1", got)
+	}
+}
+
+// TestMapper_FsToolNamespaces covers the fs-namespace heuristic for the read/
+// write/edit/list_dir/view_image/apply_patch names (spec rule #9).
+func TestMapper_FsToolNamespaces(t *testing.T) {
+	t.Parallel()
+	for _, name := range []string{"read", "write", "edit", "list_dir", "view_image", "apply_patch"} {
+		name := name
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+			m := newTestMapper("sid")
+			lines := []string{
+				metaLine("sid", `"exec"`),
+				`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+				`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"` + name + `","arguments":"{}","call_id":"c1"}}`,
+			}
+			events := runLines(t, m, lines)
+			for _, s := range opStarts(events) {
+				if s.Kind == canonical.OpTool && s.Name == name && s.ToolNamespace != "fs" {
+					t.Errorf("%s namespace = %q, want fs", name, s.ToolNamespace)
+				}
+			}
+		})
+	}
+}
+
+// TestMapper_ExecPrefixShellNamespace covers the "exec*" → shell branch and the
+// default "custom" branch (spec rule #9).
+func TestMapper_ExecPrefixAndDefaultNamespace(t *testing.T) {
+	t.Parallel()
+	cases := map[string]string{
+		"exec_command":  "shell",
+		"shell_command": "shell",
+		"weird_tool":    "custom",
+	}
+	for name, wantNS := range cases {
+		name, wantNS := name, wantNS
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+			m := newTestMapper("sid")
+			lines := []string{
+				metaLine("sid", `"exec"`),
+				`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+				`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"` + name + `","arguments":"{}","call_id":"c1"}}`,
+			}
+			events := runLines(t, m, lines)
+			for _, s := range opStarts(events) {
+				if s.Kind == canonical.OpTool && s.Name == name && s.ToolNamespace != wantNS {
+					t.Errorf("%s namespace = %q, want %q", name, s.ToolNamespace, wantNS)
+				}
+			}
+		})
+	}
+}
+
+// TestMapper_ObjectOutputErrorStatus covers outputStatus/outputText for an
+// object-shaped output carrying an error (spec rule #9, edge #5 tool_error).
+func TestMapper_ObjectOutputErrorStatus(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":{"content":"command failed: exit code 1"}}}`,
+	}
+	events := runLines(t, m, lines)
+	failed := false
+	for _, f := range opFinals(events) {
+		if f.Status == "failed" && f.ErrorClass == "tool_error" {
+			failed = true
+		}
+	}
+	if !failed {
+		t.Errorf("object-output error did not finalize failed/tool_error")
+	}
+}
+
+// TestMapper_AgentMessageStashedAndDeduped covers stashAgentMessage and the
+// agent_message DBG log (spec rule #19): the assistant op comes from
+// response_item.message; agent_message only stashes the preview.
+func TestMapper_AgentMessageStashedAndDeduped(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the answer"}]}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"agent_message","message":"the answer","phase":"final_answer"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+	// Exactly one LLM op (no duplicate from agent_message).
+	llm := 0
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpLLM {
+			llm++
+		}
+	}
+	if llm != 1 {
+		t.Fatalf("LLM op count = %d, want 1 (agent_message must not add a second)", llm)
+	}
+	// The turn_meta log carries the stashed last_agent_message.
+	var meta canonical.LogEntryEvent
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "turn_meta" {
+			meta = le
+		}
+	}
+	if meta.Extras["last_agent_message"] != "the answer" {
+		t.Errorf("last_agent_message = %v, want 'the answer'", meta.Extras["last_agent_message"])
+	}
+}
+
+// TestMapper_EventError covers the event_msg.error → ERR LogEntry path.
+func TestMapper_EventError(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"error","message":"boom happened"}}`,
+	}
+	events := runLines(t, m, lines)
+	errLog := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Severity == "ERR" && le.Message == "error" {
+			errLog = true
+			if le.Extras["message"] != "boom happened" {
+				t.Errorf("error extras message = %v, want 'boom happened'", le.Extras["message"])
+			}
+		}
+	}
+	if !errLog {
+		t.Errorf("event_msg.error did not surface an ERR log")
+	}
+}
+
+// TestMapper_ModelLearnedOnceAcrossTurns asserts SessionUpdated(Model) is
+// emitted only the FIRST time a model is learned, even across multiple
+// turn_context records (spec rule #2).
+func TestMapper_ModelLearnedOnceAcrossTurns(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"turn_context","payload":{"turn_id":"t2","model":"gpt-5.6"}}`,
+	}
+	events := runLines(t, m, lines)
+	if got := countKind(events, canonical.EvSessionUpdated); got != 1 {
+		t.Fatalf("SessionUpdated count = %d, want 1 (model announced once)", got)
+	}
+}
+
+// TestMapper_EnrichOnAlreadyFinalizedOpLogs covers enrichFinalizedOrLog: an
+// exec_command_end whose op was ALREADY finalized by its function_call_output
+// surfaces a DBG enrichment log (spec rule #14 supplementary telemetry).
+func TestMapper_EnrichOnAlreadyFinalizedOpLogs(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		// output finalizes c1 first (deletes it from openOps).
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"ok"}}`,
+		// exec_command_end now arrives for the already-finalized op.
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"ok","source":"model"}}`,
+	}
+	events := runLines(t, m, lines)
+	dbg := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "enrich_exec_command_end" {
+			dbg = true
+		}
+	}
+	if !dbg {
+		t.Errorf("late exec_command_end on finalized op did not surface a DBG enrichment log")
+	}
+}
+
+// TestMapper_McpEndUnmatchedLogs covers the mcp_tool_call_end no-op-match branch.
+func TestMapper_McpEndUnmatchedLogs(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"mcp_tool_call_end","call_id":"orphan","invocation":{"server":"gh","tool":"x"}}}`,
+	}
+	events := runLines(t, m, lines)
+	dbg := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "enrich_mcp_tool_call_end" {
+			dbg = true
+		}
+	}
+	if !dbg {
+		t.Errorf("unmatched mcp_tool_call_end did not surface a DBG log")
+	}
+}
+
+// TestMapper_PatchApplyEndUnmatchedLogs covers the patch_apply_end no-match
+// branch.
+func TestMapper_PatchApplyEndUnmatchedLogs(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"orphan","success":true}}`,
+	}
+	events := runLines(t, m, lines)
+	dbg := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "enrich_patch_apply_end" {
+			dbg = true
+		}
+	}
+	if !dbg {
+		t.Errorf("unmatched patch_apply_end did not surface a DBG log")
+	}
+}
+
+// TestMapper_TaskCompleteNoTurnWarns covers the stray-task_complete branch.
+func TestMapper_TaskCompleteNoTurnWarns(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"ghost","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+	warn := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "task_complete_no_turn" {
+			warn = true
+		}
+	}
+	if !warn {
+		t.Errorf("stray task_complete did not surface a WRN log")
+	}
+}
+
+// TestMapper_TurnAbortedNoTurnWarns covers the stray-turn_aborted branch.
+func TestMapper_TurnAbortedNoTurnWarns(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"turn_aborted","turn_id":"ghost","reason":"interrupted"}}`,
+	}
+	events := runLines(t, m, lines)
+	warn := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "turn_aborted_no_turn" {
+			warn = true
+		}
+	}
+	if !warn {
+		t.Errorf("stray turn_aborted did not surface a WRN log")
+	}
+}
+
+// TestMapper_ReasoningContentRaw covers the content[]-non-empty raw branch of
+// reasoningKind (Format=json) (spec rule #8).
+func TestMapper_ReasoningContentRaw(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"reasoning","content":[{"type":"reasoning_text","text":"chain"}],"summary":[]}}`,
+	}
+	events := runLines(t, m, lines)
+	if r := firstReasoning(t, events); r.ReasoningKind != "raw" {
+		t.Errorf("ReasoningKind = %q, want raw (content[] non-empty)", r.ReasoningKind)
+	}
+}
+
+// TestMapper_TokenCountBeforeAnyTurnDropped covers mapTokenCount's nil-turn path
+// (token_count before any turn opened).
+func TestMapper_TokenCountBeforeAnyTurnDropped(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"token_count","info":{"last_token_usage":{"input_tokens":5}}}}`,
+	}
+	events := runLines(t, m, lines)
+	// No turn → no rollup, no crash, no token_count-derived event.
+	if got := countKind(events, canonical.EvTurnFinalized); got != 0 {
+		t.Errorf("TurnFinalized = %d, want 0", got)
+	}
+}
+
+// TestMapper_DeveloperMessageIsLLM covers the assistant/system/developer message
+// branch for a non-user, non-assistant role (still llm-kind).
+func TestMapper_DeveloperMessageIsLLM(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"developer","content":[{"type":"input_text","text":"sys"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	llm := 0
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpLLM {
+			llm++
+		}
+	}
+	if llm != 1 {
+		t.Fatalf("developer message LLM op count = %d, want 1", llm)
+	}
+}
+
+// TestMapper_CompletedAtUnixSeconds covers completedAtMicros' unix-seconds
+// branch (some codex versions encode completed_at as a number).
+func TestMapper_CompletedAtUnixSeconds(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763664000}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":1763664600}}`,
+	}
+	tf := turnFinals(runLines(t, m, lines))
+	if len(tf) != 1 {
+		t.Fatalf("TurnFinalized count = %d, want 1", len(tf))
+	}
+	if tf[0].EndTs != 1763664600*1_000_000 {
+		t.Errorf("EndTs = %d, want %d (unix-seconds completed_at)", tf[0].EndTs, int64(1763664600)*1_000_000)
+	}
+}
+
+// TestMapper_MissingSessionMetaStillAnchors covers mapRecord's bootstrap when
+// the first record is NOT a session_meta (corrupt file, rule #24): a minimal
+// root session is started so events still attach.
+func TestMapper_MissingSessionMetaStillAnchors(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid-nometa")
+	// First record is a turn_context (no session_meta).
+	lines := []string{
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+	}
+	events := runLines(t, m, lines)
+	s := firstStarted(t, events)
+	if s.NativeID != "sid-nometa" || s.Kind != canonical.KindRoot {
+		t.Errorf("fallback session = {%q %q}, want {sid-nometa root}", s.NativeID, s.Kind)
+	}
+}
+
+// TestMapper_LateSessionMetaNoSecondStart covers the recSessionMeta arm of
+// mapRecord when a session_meta arrives after bootstrap (metadata-only append):
+// exactly one SessionStarted total.
+func TestMapper_LateSessionMetaNoSecondStart(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		metaLine("sid", `"exec"`), // a second session_meta (append-after-end)
+	}
+	events := runLines(t, m, lines)
+	if got := countKind(events, canonical.EvSessionStarted); got != 1 {
+		t.Fatalf("SessionStarted count = %d, want 1 (no second start on late meta)", got)
+	}
+}
diff --git a/internal/adapters/codex/mapper_finalize.go b/internal/adapters/codex/mapper_finalize.go
new file mode 100644
index 0000000..aedb780
--- /dev/null
+++ b/internal/adapters/codex/mapper_finalize.go
@@ -0,0 +1,104 @@
+package codex
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// finalizeStale is the EOF-finalize surface the scanner (Chunk C) calls when a
+// rollout file has reached EOF AND its mtime is stale (>= 1 h — the scanner owns
+// the mtime check; spec rule #23, SOW C#3). nowUs is the synthetic end
+// timestamp (the scanner passes the file mtime in micros). When the most-recent
+// turn is still open (no task_complete / turn_aborted) the mapper emits a
+// synthetic TurnFinalizedEvent(failed, incomplete) for it AND a
+// SessionFinalizedEvent(failed, incomplete) for the session — the ONLY
+// SessionFinalizedEvent codex ever emits. A cleanly-ended session (most recent
+// turn already finalized) returns no events and stays running (SOW C#3:
+// no clean-EOF completed finalize). Idempotent: a second call is a no-op.
+//
+// The scanner MUST NOT call this on a fresh (mtime < 1 h) file — an in-flight
+// turn there is legitimately still running and must stay open for the next
+// append (spec rule #23 "keep turn open").
+func (m *fileMapper) finalizeStale(nowUs int64) []canonical.Event {
+	if m.staleFinalized {
+		return nil
+	}
+	m.staleFinalized = true
+	ts := m.mostRecentOpenTurn()
+	if ts == nil {
+		// No open turn: the session ended cleanly (or never opened a turn).
+		// Codex has no per-session terminal signal, so it stays running.
+		return nil
+	}
+	endUs := nowUs
+	if endUs < ts.startTsUs {
+		endUs = ts.startTsUs
+	}
+	base := func() canonical.EventBase {
+		return canonical.EventBase{SourceID: m.sourceID, SourceSeq: 0, Ts: endUs}
+	}
+	// Finalize the hanging turn's dangling ops as cancelled (the process died
+	// mid-turn, so in-flight tool/llm ops never completed), then close the turn
+	// failed/incomplete and the session failed/incomplete (the ONLY
+	// SessionFinalizedEvent codex emits — spec rule #23, SOW C#3).
+	out := m.finalizeDanglingOps(ts.codexTurnID, base, endUs, "cancelled")
+	out = append(out, m.finalizeTurn(ts, base(), endUs, "failed", "incomplete"))
+	if ev := m.turnExtrasLog(ts, base()); ev != nil {
+		out = append(out, ev)
+	}
+	out = append(out, canonical.SessionFinalizedEvent{
+		EventBase:  base(),
+		NativeID:   m.nativeID,
+		Status:     canonical.StatusFailed,
+		ErrorClass: "incomplete",
+		EndTs:      endUs,
+	})
+	return out
+}
+
+// mostRecentOpenTurn returns the latest-opened turn that has not been finalized,
+// or nil when every turn is closed (or none exist). Used by finalizeStale.
+func (m *fileMapper) mostRecentOpenTurn() *turnState {
+	for i := len(m.turnOrder) - 1; i >= 0; i-- {
+		if ts, ok := m.turns[m.turnOrder[i]]; ok && !ts.finalized {
+			return ts
+		}
+	}
+	return nil
+}
+
+// packSeq packs (recordIdx, subIdx) into a single uint64 that is monotone per
+// file. subIdx is masked to subEventBits. Mirrors claude_code.
+func packSeq(recordIdx, subIdx uint64) uint64 {
+	return recordIdx<<subEventBits | (subIdx & (maxSubEventsPerRecord - 1))
+}
+
+// parseTsToMicros decodes an RFC3339 timestamp into UNIX microseconds. Codex
+// writes UTC RFC3339 with millisecond precision (spec adapter-codex.md:56);
+// nano precision is accepted too.
+func parseTsToMicros(s string) (int64, error) {
+	t, err := time.Parse(time.RFC3339Nano, s)
+	if err != nil {
+		return 0, fmt.Errorf("ts %q: %w", s, err)
+	}
+	return t.UnixMicro(), nil
+}
+
+// trimPreview returns at most max runes of s with surrounding whitespace
+// removed, for a non-sensitive Extras preview (e.g. compaction message,
+// last_agent_message). Bodies are never inlined wholesale — full content lives
+// behind the PayloadRef (spec edge #7).
+func trimPreview(s string, max int) string {
+	s = strings.TrimSpace(s)
+	if max <= 0 {
+		return ""
+	}
+	r := []rune(s)
+	if len(r) <= max {
+		return s
+	}
+	return string(r[:max])
+}
diff --git a/internal/adapters/codex/mapper_helpers_test.go b/internal/adapters/codex/mapper_helpers_test.go
new file mode 100644
index 0000000..4258d30
--- /dev/null
+++ b/internal/adapters/codex/mapper_helpers_test.go
@@ -0,0 +1,112 @@
+package codex
+
+import (
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// runLines parses each JSONL line and feeds it to the mapper in order, returning
+// the full emitted canonical event stream. Lines that parse to skip=true (blank,
+// ghost_snapshot, missing nested type) are skipped exactly as the scanner would.
+// A parse error fails the test (the synthetic fixtures are well-formed). The
+// per-record line number is threaded (1-based) so PayloadRef anchors are
+// realistic. This mirrors how the scanner (Chunk C) will drive the mapper.
+func runLines(t *testing.T, m *fileMapper, lines []string) []canonical.Event {
+	t.Helper()
+	var out []canonical.Event
+	for i, line := range lines {
+		rec, skip, err := parseLine([]byte(line))
+		if err != nil {
+			t.Fatalf("line %d parseLine(%q): %v", i+1, line, err)
+		}
+		if skip {
+			continue
+		}
+		m.setLineNo(i + 1)
+		evs, mErr := m.mapRecord(rec)
+		if mErr != nil {
+			t.Fatalf("line %d mapRecord: %v", i+1, mErr)
+		}
+		out = append(out, evs...)
+	}
+	return out
+}
+
+// newTestMapper builds a root-session mapper with synthetic ids. absPath is set
+// so PayloadRef URIs are exercised; root containment is Chunk D's concern.
+func newTestMapper(nativeID string) *fileMapper {
+	return newFileMapper(mapperConfig{
+		sourceID: "codex:/test/sessions",
+		absPath:  "/test/sessions/2025/11/20/rollout-" + nativeID + ".jsonl",
+		nativeID: nativeID,
+	})
+}
+
+// countKind returns how many events have the given kind.
+func countKind(events []canonical.Event, kind canonical.EventKind) int {
+	n := 0
+	for _, ev := range events {
+		if ev.EventKind() == kind {
+			n++
+		}
+	}
+	return n
+}
+
+// opStarts returns every OpStartedEvent in the stream.
+func opStarts(events []canonical.Event) []canonical.OpStartedEvent {
+	var out []canonical.OpStartedEvent
+	for _, ev := range events {
+		if s, ok := ev.(canonical.OpStartedEvent); ok {
+			out = append(out, s)
+		}
+	}
+	return out
+}
+
+// opFinals returns every OpFinalizedEvent in the stream.
+func opFinals(events []canonical.Event) []canonical.OpFinalizedEvent {
+	var out []canonical.OpFinalizedEvent
+	for _, ev := range events {
+		if f, ok := ev.(canonical.OpFinalizedEvent); ok {
+			out = append(out, f)
+		}
+	}
+	return out
+}
+
+// turnFinals returns every TurnFinalizedEvent in the stream.
+func turnFinals(events []canonical.Event) []canonical.TurnFinalizedEvent {
+	var out []canonical.TurnFinalizedEvent
+	for _, ev := range events {
+		if f, ok := ev.(canonical.TurnFinalizedEvent); ok {
+			out = append(out, f)
+		}
+	}
+	return out
+}
+
+// firstStarted returns the first SessionStartedEvent, or fails.
+func firstStarted(t *testing.T, events []canonical.Event) canonical.SessionStartedEvent {
+	t.Helper()
+	for _, ev := range events {
+		if s, ok := ev.(canonical.SessionStartedEvent); ok {
+			return s
+		}
+	}
+	t.Fatal("no SessionStartedEvent in stream")
+	return canonical.SessionStartedEvent{}
+}
+
+// firstReasoning returns the first OpReasoning OpStartedEvent, or fails.
+func firstReasoning(t *testing.T, events []canonical.Event) canonical.OpStartedEvent {
+	t.Helper()
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpReasoning {
+			return s
+		}
+	}
+	t.Fatal("no reasoning op in stream")
+	return canonical.OpStartedEvent{}
+}
diff --git a/internal/adapters/codex/mapper_test.go b/internal/adapters/codex/mapper_test.go
new file mode 100644
index 0000000..cb1ece5
--- /dev/null
+++ b/internal/adapters/codex/mapper_test.go
@@ -0,0 +1,950 @@
+package codex
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+const (
+	tsMeta  = "2025-11-20T16:59:09.857Z"
+	tsCtx   = "2025-11-20T16:59:10.000Z"
+	tsItem  = "2025-11-20T16:59:11.000Z"
+	tsEvent = "2025-11-20T16:59:12.000Z"
+	tsDone  = "2025-11-20T17:00:00.000Z"
+)
+
+func metaLine(id, sourceJSON string) string {
+	return `{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"` + id +
+		`","cwd":"<ROOT>","originator":"codex_exec","cli_version":"0.125.0","model_provider":"openai","source":` + sourceJSON + `}}`
+}
+
+// TestMapper_SessionStartedRoot asserts the first record (session_meta) emits a
+// single root SessionStartedEvent with extras and AgentName from originator
+// (spec rule #1).
+func TestMapper_SessionStartedRoot(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid-root")
+	events := runLines(t, m, []string{metaLine("sid-root", `"exec"`)})
+
+	if got := countKind(events, canonical.EvSessionStarted); got != 1 {
+		t.Fatalf("SessionStarted count = %d, want 1", got)
+	}
+	s := firstStarted(t, events)
+	if s.NativeID != "sid-root" || s.Kind != canonical.KindRoot {
+		t.Errorf("session = {NativeID:%q Kind:%q}, want {sid-root root}", s.NativeID, s.Kind)
+	}
+	if s.RootNativeID != "sid-root" || s.ParentNativeID != "" {
+		t.Errorf("root/parent = {%q %q}, want {sid-root \"\"}", s.RootNativeID, s.ParentNativeID)
+	}
+	if s.AgentName != "codex:codex_exec" {
+		t.Errorf("AgentName = %q, want codex:codex_exec", s.AgentName)
+	}
+	if s.Cwd != "<ROOT>" {
+		t.Errorf("Cwd = %q, want <ROOT>", s.Cwd)
+	}
+	if s.Extras["cli_version"] != "0.125.0" || s.Extras["originator"] != "codex_exec" {
+		t.Errorf("extras missing cli_version/originator: %+v", s.Extras)
+	}
+}
+
+// TestMapper_SubAgentLinkage asserts a sub-agent thread_spawn session links to
+// its parent and stamps Kind=sub_agent + relationship=sub_agent (spec rule #1,
+// "Sub-Agent Linkage", gap #5/#10).
+func TestMapper_SubAgentLinkage(t *testing.T) {
+	t.Parallel()
+	src := `{"subagent":{"thread_spawn":{"parent_thread_id":"parent-uuid","depth":1,"agent_nickname":"Tesla","agent_role":"explorer"}}}`
+	line := `{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"child-uuid","originator":"codex_exec","agent_nickname":"Tesla","agent_role":"explorer","thread_source":"subagent","source":` + src + `}}`
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", absPath: "/t/child.jsonl", nativeID: "child-uuid"})
+	events := runLines(t, m, []string{line})
+
+	s := firstStarted(t, events)
+	if s.Kind != canonical.KindSubAgent {
+		t.Errorf("Kind = %q, want sub_agent", s.Kind)
+	}
+	if s.ParentNativeID != "parent-uuid" || s.RootNativeID != "parent-uuid" {
+		t.Errorf("parent/root = {%q %q}, want {parent-uuid parent-uuid}", s.ParentNativeID, s.RootNativeID)
+	}
+	if s.AgentName != "Tesla" {
+		t.Errorf("AgentName = %q, want Tesla", s.AgentName)
+	}
+	if s.Extras["relationship"] != "sub_agent" {
+		t.Errorf("relationship = %v, want sub_agent", s.Extras["relationship"])
+	}
+	if s.Extras["subagent_depth"] != 1 {
+		t.Errorf("subagent_depth = %v, want 1", s.Extras["subagent_depth"])
+	}
+}
+
+// TestMapper_ForkLinkage asserts a forked_from_id session is Kind=fork with the
+// parent set from forked_from_id and relationship=fork (spec rule #1, gap #5).
+func TestMapper_ForkLinkage(t *testing.T) {
+	t.Parallel()
+	line := `{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"fork-uuid","forked_from_id":"origin-uuid","originator":"codex_cli_rs","source":"cli"}}`
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", absPath: "/t/fork.jsonl", nativeID: "fork-uuid"})
+	events := runLines(t, m, []string{line})
+
+	s := firstStarted(t, events)
+	if s.Kind != canonical.KindFork {
+		t.Errorf("Kind = %q, want fork", s.Kind)
+	}
+	if s.ParentNativeID != "origin-uuid" || s.RootNativeID != "origin-uuid" {
+		t.Errorf("parent/root = {%q %q}, want {origin-uuid origin-uuid}", s.ParentNativeID, s.RootNativeID)
+	}
+	if s.Extras["relationship"] != "fork" {
+		t.Errorf("relationship = %v, want fork", s.Extras["relationship"])
+	}
+}
+
+// TestMapper_ToolInternalLinkage asserts an internal source maps to
+// tool_internal (spec rule #1, gap #6).
+func TestMapper_ToolInternalLinkage(t *testing.T) {
+	t.Parallel()
+	line := metaLine("internal-uuid", `{"internal":"memory_consolidation"}`)
+	m := newTestMapper("internal-uuid")
+	events := runLines(t, m, []string{line})
+	if s := firstStarted(t, events); s.Kind != canonical.KindToolInternal {
+		t.Errorf("Kind = %q, want tool_internal", s.Kind)
+	}
+}
+
+// TestMapper_TurnContextOpensTurnAndModel asserts a turn_context opens turn 1,
+// emits TurnStarted once, and announces the model once via SessionUpdated (spec
+// rule #2).
+func TestMapper_TurnContextOpensTurnAndModel(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"turn-1","model":"gpt-5.1-codex-max","effort":"high","approval_policy":"on-request","sandbox_policy":{"type":"workspace-write"}}}`,
+	}
+	events := runLines(t, m, lines)
+
+	if got := countKind(events, canonical.EvTurnStarted); got != 1 {
+		t.Fatalf("TurnStarted count = %d, want 1", got)
+	}
+	if got := countKind(events, canonical.EvSessionUpdated); got != 1 {
+		t.Fatalf("SessionUpdated count = %d, want 1", got)
+	}
+	for _, ev := range events {
+		if u, ok := ev.(canonical.SessionUpdatedEvent); ok && u.Model != "gpt-5.1-codex-max" {
+			t.Errorf("SessionUpdated.Model = %q, want gpt-5.1-codex-max", u.Model)
+		}
+		if ts, ok := ev.(canonical.TurnStartedEvent); ok && ts.Seq != 1 {
+			t.Errorf("TurnStarted.Seq = %d, want 1", ts.Seq)
+		}
+	}
+}
+
+// TestMapper_TurnBoundaryNewFormat asserts the new format (turn_context +
+// task_started + task_complete) yields exactly one TurnStarted and one
+// TurnFinalized(completed) per turn (spec rule #2,#3,#4; acceptance #3).
+func TestMapper_TurnBoundaryNewFormat(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"turn-1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsCtx + `","type":"event_msg","payload":{"type":"task_started","turn_id":"turn-1","started_at":1763664000,"model_context_window":200000}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"turn-1","completed_at":"` + tsDone + `","duration_ms":1000}}`,
+	}
+	events := runLines(t, m, lines)
+
+	if got := countKind(events, canonical.EvTurnStarted); got != 1 {
+		t.Fatalf("TurnStarted count = %d, want 1 (idempotent across turn_context+task_started)", got)
+	}
+	tf := turnFinals(events)
+	if len(tf) != 1 {
+		t.Fatalf("TurnFinalized count = %d, want 1", len(tf))
+	}
+	if tf[0].Status != "completed" || tf[0].Seq != 1 {
+		t.Errorf("TurnFinalized = {Status:%q Seq:%d}, want {completed 1}", tf[0].Status, tf[0].Seq)
+	}
+}
+
+// TestMapper_TurnBoundaryOldFormat asserts the old format (turn_context only, no
+// task_started/complete) opens a turn per turn_id; the running turn stays open
+// at clean EOF (no clean finalize — SOW C#3) (spec rule #2,#22; edge #3;
+// acceptance #3).
+func TestMapper_TurnBoundaryOldFormat(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid-old")
+	lines := []string{
+		`{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"sid-old","originator":"codex-tui","cli_version":"0.61.0","source":"cli"}}`,
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.0"}}`,
+		`{"timestamp":"` + tsItem + `","type":"turn_context","payload":{"turn_id":"t2","model":"gpt-5.0"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	if got := countKind(events, canonical.EvTurnStarted); got != 2 {
+		t.Fatalf("TurnStarted count = %d, want 2 (one per turn_id)", got)
+	}
+	// Clean EOF: no TurnFinalized, no SessionFinalized (SOW C#3).
+	if got := countKind(events, canonical.EvTurnFinalized); got != 0 {
+		t.Errorf("TurnFinalized count = %d, want 0 at clean EOF", got)
+	}
+	if got := countKind(events, canonical.EvSessionFinalized); got != 0 {
+		t.Errorf("SessionFinalized count = %d, want 0 at clean EOF", got)
+	}
+}
+
+// TestMapper_AbsentTurnIDFallback asserts an old-CLI rollout with no turn_id on
+// turn_context still opens a single fallback turn under the empty key (spec edge
+// #3 "user message → next user message" fallback bucket).
+func TestMapper_AbsentTurnIDFallback(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid-noturn")
+	lines := []string{
+		`{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"sid-noturn","source":"cli"}}`,
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"model":"gpt-5.0"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	if got := countKind(events, canonical.EvTurnStarted); got != 1 {
+		t.Fatalf("TurnStarted count = %d, want 1 (single fallback turn)", got)
+	}
+}
+
+// TestMapper_ReasoningKindSummary asserts a reasoning item with only summary[]
+// → reasoning_kind=summary (spec rule #8, acceptance #4).
+func TestMapper_ReasoningKindSummary(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"reasoning","summary":[{"type":"summary_text","text":"thinking"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	r := firstReasoning(t, events)
+	if r.ReasoningKind != "summary" {
+		t.Errorf("ReasoningKind = %q, want summary", r.ReasoningKind)
+	}
+}
+
+// TestMapper_ReasoningKindRaw asserts a reasoning item with encrypted_content
+// (or content[]) → reasoning_kind=raw (spec rule #8, acceptance #4).
+func TestMapper_ReasoningKindRaw(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"reasoning","summary":[],"encrypted_content":"AAAABBBB"}}`,
+	}
+	events := runLines(t, m, lines)
+	r := firstReasoning(t, events)
+	if r.ReasoningKind != "raw" {
+		t.Errorf("ReasoningKind = %q, want raw", r.ReasoningKind)
+	}
+}
+
+// TestMapper_EventReasoningIsLogOnlyNoDupOp asserts event_msg.agent_reasoning*
+// produces a LogEntry and NEVER a reasoning op, so the canonical reasoning op
+// comes only from response_item.reasoning (spec rule #8, acceptance #4).
+func TestMapper_EventReasoningIsLogOnlyNoDupOp(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"reasoning","summary":[{"type":"summary_text","text":"s"}]}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"agent_reasoning","text":"the visible summary"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"agent_reasoning_raw_content","text":"raw cot"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	reasoningOps := 0
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpReasoning {
+			reasoningOps++
+		}
+	}
+	if reasoningOps != 1 {
+		t.Fatalf("reasoning ops = %d, want 1 (only response_item.reasoning)", reasoningOps)
+	}
+	// The two event_msg reasoning entries must be LogEntry rows.
+	if got := countKind(events, canonical.EvLogEntry); got < 2 {
+		t.Errorf("LogEntry count = %d, want >= 2 (agent_reasoning + raw)", got)
+	}
+}
+
+// TestMapper_ToolCallPairing asserts a function_call + function_call_output pair
+// produces one tool op (started+finalized) with the shell namespace and two
+// PayloadRefs (request + response) (spec rule #9).
+func TestMapper_ToolCallPairing(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{\"cmd\":\"ls\"}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"file.txt"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	starts := opStarts(events)
+	tools := 0
+	for _, s := range starts {
+		if s.Kind == canonical.OpTool {
+			tools++
+			if s.Name != "shell" || s.ToolNamespace != "shell" {
+				t.Errorf("tool op = {Name:%q NS:%q}, want {shell shell}", s.Name, s.ToolNamespace)
+			}
+		}
+	}
+	if tools != 1 {
+		t.Fatalf("tool op starts = %d, want 1", tools)
+	}
+	fins := opFinals(events)
+	toolFin := 0
+	for _, f := range fins {
+		if f.Status == "completed" && f.Seq >= 1 {
+			toolFin++
+		}
+	}
+	if toolFin < 1 {
+		t.Errorf("no completed op finalize for the tool")
+	}
+	if got := countKind(events, canonical.EvPayloadRef); got != 2 {
+		t.Errorf("PayloadRef count = %d, want 2 (request+response)", got)
+	}
+}
+
+// TestMapper_DanglingOpCancelledOnAbort asserts a function_call with no matching
+// output is finalized cancelled at turn_aborted (spec edge #9, rule #5).
+func TestMapper_DanglingOpCancelledOnAbort(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"turn_aborted","turn_id":"t1","reason":"interrupted"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	var cancelled int
+	for _, f := range opFinals(events) {
+		if f.Status == "cancelled" {
+			cancelled++
+		}
+	}
+	if cancelled != 1 {
+		t.Fatalf("cancelled op finalize count = %d, want 1", cancelled)
+	}
+	tf := turnFinals(events)
+	if len(tf) != 1 || tf[0].Status != "failed" || tf[0].ErrorClass != "user_interrupt" {
+		t.Fatalf("turn finalize = %+v, want failed/user_interrupt", tf)
+	}
+}
+
+// TestMapper_AbortErrorClasses covers every turn_aborted reason→ErrorClass
+// mapping (spec rule #5).
+func TestMapper_AbortErrorClasses(t *testing.T) {
+	t.Parallel()
+	cases := map[string]string{
+		"interrupted":    "user_interrupt",
+		"replaced":       "replaced",
+		"review_ended":   "review_ended",
+		"budget_limited": "rate_limit",
+	}
+	for reason, want := range cases {
+		reason, want := reason, want
+		t.Run(reason, func(t *testing.T) {
+			t.Parallel()
+			m := newTestMapper("sid")
+			lines := []string{
+				metaLine("sid", `"exec"`),
+				`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+				`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"turn_aborted","turn_id":"t1","reason":"` + reason + `"}}`,
+			}
+			tf := turnFinals(runLines(t, m, lines))
+			if len(tf) != 1 || tf[0].ErrorClass != want {
+				t.Fatalf("reason %q → %+v, want ErrorClass %q", reason, tf, want)
+			}
+		})
+	}
+}
+
+// TestMapper_TokenRollup asserts the C#1 rollup: TurnFinalized.TokensIn/Out are
+// the SUM of per-call last_token_usage over the turn's token_count events, and
+// the cumulative total_token_usage feeds the LLM op CtxUsed only — never the
+// per-turn tokens (spec rule #4, #17; SOW C#1).
+func TestMapper_TokenRollup(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763664000}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"a"}]}}`,
+		// two token_count events for this turn: last usage 10/5 then 20/8 → sum 30/13.
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"token_count","turn_id":"t1","info":{"total_token_usage":{"total_tokens":100},"last_token_usage":{"input_tokens":10,"output_tokens":5}},"model_context_window":200000}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"token_count","turn_id":"t1","info":{"total_token_usage":{"total_tokens":250},"last_token_usage":{"input_tokens":20,"output_tokens":8}},"model_context_window":200000}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	tf := turnFinals(events)
+	if len(tf) != 1 {
+		t.Fatalf("turn finalize count = %d, want 1", len(tf))
+	}
+	if tf[0].TokensIn != 30 || tf[0].TokensOut != 13 {
+		t.Errorf("rollup = {In:%d Out:%d}, want {30 13} (sum of last_token_usage)", tf[0].TokensIn, tf[0].TokensOut)
+	}
+	// CtxUsed = cumulative total (250), CtxMax = model_context_window (200000),
+	// applied to the LLM op via a re-finalize at task_complete.
+	var ctxUsed, ctxMax int64
+	for _, f := range opFinals(events) {
+		if f.CtxUsed > 0 || f.CtxMax > 0 {
+			ctxUsed, ctxMax = f.CtxUsed, f.CtxMax
+		}
+	}
+	if ctxUsed != 250 || ctxMax != 200000 {
+		t.Errorf("LLM op ctx = {Used:%d Max:%d}, want {250 200000}", ctxUsed, ctxMax)
+	}
+}
+
+// TestMapper_SessionLevelTokenCountAttributedToActiveTurn asserts a token_count
+// with NO turn_id attributes to the most-recently-active turn (spec rule #17,
+// "Token accounting nuance").
+func TestMapper_SessionLevelTokenCountAttributedToActiveTurn(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"total_tokens":50},"last_token_usage":{"input_tokens":7,"output_tokens":3}}}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	tf := turnFinals(runLines(t, m, lines))
+	if len(tf) != 1 || tf[0].TokensIn != 7 || tf[0].TokensOut != 3 {
+		t.Fatalf("session-level token_count rollup = %+v, want In7/Out3 on the active turn", tf)
+	}
+}
+
+// TestMapper_UserDedup asserts user input arriving as BOTH
+// response_item.message(role=user) and event_msg.user_message produces exactly
+// one user_input op (spec rule #6, #18).
+func TestMapper_UserDedup(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"user_message","message":"do the thing"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"do the thing"}]}}`,
+	}
+	events := runLines(t, m, lines)
+
+	userOps := 0
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpInternal && s.Name == "user_input" {
+			userOps++
+		}
+	}
+	if userOps != 1 {
+		t.Fatalf("user_input ops = %d, want 1 (deduped)", userOps)
+	}
+}
+
+// TestMapper_Compaction asserts a top-level compacted line emits one compaction
+// op with extras + a PayloadRef (spec rule #20, gap #4).
+func TestMapper_Compaction(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"compacted","payload":{"message":"summary text","replacement_history":[{"type":"message"},{"type":"message"}]}}`,
+	}
+	events := runLines(t, m, lines)
+
+	var compactionOps int
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpCompaction {
+			compactionOps++
+			if s.Extras["replacement_history_size"] != 2 {
+				t.Errorf("replacement_history_size = %v, want 2", s.Extras["replacement_history_size"])
+			}
+			if s.Extras["message_preview"] != "summary text" {
+				t.Errorf("message_preview = %v, want 'summary text'", s.Extras["message_preview"])
+			}
+		}
+	}
+	if compactionOps != 1 {
+		t.Fatalf("compaction ops = %d, want 1", compactionOps)
+	}
+}
+
+// TestMapper_ContextCompactionVariants asserts response_item.compaction,
+// response_item.context_compaction, and event_msg.context_compacted all converge
+// on a compaction op (spec rule #20, gap #4).
+func TestMapper_ContextCompactionVariants(t *testing.T) {
+	t.Parallel()
+	for _, line := range []string{
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"compaction","encrypted_content":"X"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"context_compaction","encrypted_content":null}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"context_compacted"}}`,
+	} {
+		m := newTestMapper("sid")
+		events := runLines(t, m, []string{metaLine("sid", `"exec"`), line})
+		if got := 0; func() int {
+			for _, s := range opStarts(events) {
+				if s.Kind == canonical.OpCompaction {
+					got++
+				}
+			}
+			return got
+		}() != 1 {
+			t.Errorf("compaction op count for %q != 1", line)
+		}
+	}
+}
+
+// TestMapper_ExecCommandEndEnrichesNoSecondOp asserts exec_command_end enriches
+// the matching tool op (it finalizes via exit_code) without emitting a second
+// op-start (spec rule #14).
+func TestMapper_ExecCommandEndEnrichesNoSecondOp(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"out","cwd":"<ROOT>","source":"model"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	toolStarts := 0
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpTool {
+			toolStarts++
+		}
+	}
+	if toolStarts != 1 {
+		t.Fatalf("tool op starts = %d, want 1 (exec_command_end must not add a second op)", toolStarts)
+	}
+	// The op is finalized completed (exit_code 0).
+	completed := false
+	for _, f := range opFinals(events) {
+		if f.Status == "completed" {
+			completed = true
+		}
+	}
+	if !completed {
+		t.Errorf("tool op not finalized completed by exec_command_end exit_code 0")
+	}
+}
+
+// TestMapper_ExecCommandEndNonZeroExitFails asserts a non-zero exit_code
+// finalizes the op failed/command_failed (spec rule #14).
+func TestMapper_ExecCommandEndNonZeroExitFails(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":2,"aggregated_output":""}}`,
+	}
+	events := runLines(t, m, lines)
+	failed := false
+	for _, f := range opFinals(events) {
+		if f.Status == "failed" && f.ErrorClass == "command_failed" {
+			failed = true
+		}
+	}
+	if !failed {
+		t.Errorf("non-zero exit did not finalize failed/command_failed")
+	}
+}
+
+// TestMapper_McpToolCallEndSetsNamespace asserts mcp_tool_call_end re-stamps the
+// matching op's namespace to mcp:<server> and finalizes it (spec rule #15).
+func TestMapper_McpToolCallEndSetsNamespace(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"github.list","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"mcp_tool_call_end","call_id":"c1","invocation":{"server":"github","tool":"list"},"result":{"Ok":{"is_error":false}}}}`,
+	}
+	events := runLines(t, m, lines)
+
+	var sawMcp bool
+	for _, s := range opStarts(events) {
+		if s.ToolNamespace == "mcp:github" && s.Name == "list" {
+			sawMcp = true
+		}
+	}
+	if !sawMcp {
+		t.Errorf("mcp_tool_call_end did not set namespace mcp:github / name list")
+	}
+}
+
+// TestMapper_PatchApplyEndFinalizes asserts patch_apply_end finalizes the
+// matching apply_patch op with success→completed / failure→failed (spec rule
+// #16).
+func TestMapper_PatchApplyEndFinalizes(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"apply_patch","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"c1","success":false,"status":"failed"}}`,
+	}
+	events := runLines(t, m, lines)
+	// The apply_patch op must carry the fs namespace and be finalized failed.
+	var nsOK, failed bool
+	for _, s := range opStarts(events) {
+		if s.Name == "apply_patch" && s.ToolNamespace == "fs" {
+			nsOK = true
+		}
+	}
+	for _, f := range opFinals(events) {
+		if f.Status == "failed" && f.ErrorClass == "patch_failed" {
+			failed = true
+		}
+	}
+	if !nsOK {
+		t.Errorf("apply_patch op missing fs namespace")
+	}
+	if !failed {
+		t.Errorf("patch_apply_end success=false did not finalize failed/patch_failed")
+	}
+}
+
+// TestMapper_SandboxDeniedOutput asserts a tool output string signalling a
+// sandbox denial finalizes the op failed/sandbox_denied (spec edge #5).
+func TestMapper_SandboxDeniedOutput(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m","sandbox_policy":{"type":"read-only"}}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"operation denied by sandbox"}}`,
+	}
+	events := runLines(t, m, lines)
+	denied := false
+	for _, f := range opFinals(events) {
+		if f.Status == "failed" && f.ErrorClass == "sandbox_denied" {
+			denied = true
+		}
+	}
+	if !denied {
+		t.Errorf("sandbox-denial output did not finalize failed/sandbox_denied")
+	}
+}
+
+// TestMapper_WebSearchOp asserts a web_search_call + web_search_end produces one
+// web tool op enriched by the end event (spec rule #11).
+func TestMapper_WebSearchOp(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"web_search_call","call_id":"w1","status":"completed","action":{"type":"search","query":"q"}}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"web_search_end","call_id":"w1","query":"q"}}`,
+	}
+	events := runLines(t, m, lines)
+	web := 0
+	for _, s := range opStarts(events) {
+		if s.Name == "web_search" && s.ToolNamespace == "web" {
+			web++
+		}
+	}
+	if web != 1 {
+		t.Fatalf("web_search op count = %d, want 1", web)
+	}
+}
+
+// TestMapper_NoCleanFinalize asserts a cleanly-ended session (last event a
+// task_complete) emits NO SessionFinalizedEvent — it stays running (SOW C#3,
+// spec rule #23, tabular "clean EOF" row).
+func TestMapper_NoCleanFinalize(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763664000}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+	if got := countKind(events, canonical.EvSessionFinalized); got != 0 {
+		t.Fatalf("SessionFinalized count = %d, want 0 (no clean-EOF finalize)", got)
+	}
+	// And finalizeStale on a clean session (no open turn) emits nothing.
+	if extra := m.finalizeStale(1_700_000_000_000_000); len(extra) != 0 {
+		t.Fatalf("finalizeStale on clean session emitted %d events, want 0", len(extra))
+	}
+}
+
+// TestMapper_SyntheticStaleFinalize asserts a hanging turn (no task_complete)
+// yields a synthetic TurnFinalized(failed,incomplete) + SessionFinalized(failed,
+// incomplete) ONLY when the scanner calls finalizeStale (spec rule #23, SOW
+// C#3, acceptance #5h).
+func TestMapper_SyntheticStaleFinalize(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid-crash")
+	lines := []string{
+		`{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"sid-crash","source":"cli"}}`,
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763664000}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+	}
+	events := runLines(t, m, lines)
+	// Before EOF: no terminal events (turn is in-flight).
+	if got := countKind(events, canonical.EvTurnFinalized); got != 0 {
+		t.Fatalf("pre-EOF TurnFinalized = %d, want 0", got)
+	}
+
+	// Scanner determines mtime is stale ≥ 1h and calls finalizeStale.
+	const staleUs = 1_763_700_000_000_000
+	stale := m.finalizeStale(staleUs)
+
+	if countKind(stale, canonical.EvTurnFinalized) != 1 {
+		t.Fatalf("stale finalize: TurnFinalized = %d, want 1", countKind(stale, canonical.EvTurnFinalized))
+	}
+	tf := turnFinals(stale)
+	if tf[0].Status != "failed" || tf[0].ErrorClass != "incomplete" {
+		t.Errorf("stale turn finalize = {%q %q}, want {failed incomplete}", tf[0].Status, tf[0].ErrorClass)
+	}
+	sf := 0
+	for _, ev := range stale {
+		if s, ok := ev.(canonical.SessionFinalizedEvent); ok {
+			sf++
+			if s.Status != canonical.StatusFailed || s.ErrorClass != "incomplete" {
+				t.Errorf("stale session finalize = {%q %q}, want {failed incomplete}", s.Status, s.ErrorClass)
+			}
+		}
+	}
+	if sf != 1 {
+		t.Fatalf("stale SessionFinalized = %d, want 1", sf)
+	}
+	// The dangling shell op is finalized cancelled.
+	cancelled := 0
+	for _, f := range opFinals(stale) {
+		if f.Status == "cancelled" {
+			cancelled++
+		}
+	}
+	if cancelled != 1 {
+		t.Errorf("stale dangling op finalize cancelled = %d, want 1", cancelled)
+	}
+	// Idempotent: a second call emits nothing.
+	if again := m.finalizeStale(staleUs); len(again) != 0 {
+		t.Fatalf("second finalizeStale emitted %d events, want 0 (idempotent)", len(again))
+	}
+}
+
+// TestMapper_ReplacedTurnThenNewTurn asserts that two task_started without an
+// intervening task_complete are both representable: the first turn stays open
+// until explicitly closed; the second opens a new turn (spec edge #2). (Codex
+// records a turn_aborted(replaced) for the superseded turn in practice; this
+// pins that the mapper opens distinct turns per turn_id and does not conflate
+// them.)
+func TestMapper_ReplacedTurnThenNewTurn(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"turn_aborted","turn_id":"t1","reason":"replaced"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"turn_context","payload":{"turn_id":"t2","model":"m"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t2","started_at":2}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t2","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+	if got := countKind(events, canonical.EvTurnStarted); got != 2 {
+		t.Fatalf("TurnStarted = %d, want 2 (t1 + t2)", got)
+	}
+	tf := turnFinals(events)
+	if len(tf) != 2 {
+		t.Fatalf("TurnFinalized = %d, want 2", len(tf))
+	}
+	// Order: t1 failed/replaced (seq 1), t2 completed (seq 2).
+	if tf[0].Seq != 1 || tf[0].Status != "failed" || tf[0].ErrorClass != "replaced" {
+		t.Errorf("turn1 finalize = %+v, want seq1 failed replaced", tf[0])
+	}
+	if tf[1].Seq != 2 || tf[1].Status != "completed" {
+		t.Errorf("turn2 finalize = %+v, want seq2 completed", tf[1])
+	}
+}
+
+// TestMapper_PayloadRefLineAnchor asserts a PayloadRef LocationURI carries the
+// "file://<abs>#L<line>" line anchor at the owning record's line number (the
+// Chunk-B↔D contract; spec rule #6/#7/#8). Chunk D adds containment but keeps
+// the anchor.
+func TestMapper_PayloadRefLineAnchor(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		// line 3 is the assistant message whose PayloadRef must anchor #L3.
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	var ref canonical.PayloadRefEvent
+	found := false
+	for _, ev := range events {
+		if r, ok := ev.(canonical.PayloadRefEvent); ok {
+			ref = r
+			found = true
+		}
+	}
+	if !found {
+		t.Fatal("no PayloadRef emitted for the assistant message")
+	}
+	if !strings.HasSuffix(ref.LocationURI, "#L3") {
+		t.Errorf("PayloadRef LocationURI = %q, want suffix #L3", ref.LocationURI)
+	}
+	if !strings.HasPrefix(ref.LocationURI, "file:///test/sessions/") {
+		t.Errorf("PayloadRef LocationURI = %q, want file:// prefix", ref.LocationURI)
+	}
+}
+
+// TestMapper_SeqMonotone asserts SourceSeq is monotone non-decreasing across the
+// emitted stream (packSeq per-record packing; observability only).
+func TestMapper_SeqMonotone(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"ok"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+	var last uint64
+	for i, ev := range events {
+		if ev.EventSourceSeq() < last {
+			t.Fatalf("event %d SourceSeq %d < previous %d (not monotone)", i, ev.EventSourceSeq(), last)
+		}
+		last = ev.EventSourceSeq()
+	}
+}
+
+// TestMapper_FinalAnswerLog asserts an assistant message with phase=final_answer
+// emits an INF LogEntry flagging the final response (spec rule #7).
+func TestMapper_FinalAnswerLog(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"assistant","phase":"final_answer","content":[{"type":"output_text","text":"done"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	final := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "final_answer" {
+			final = true
+		}
+	}
+	if !final {
+		t.Errorf("no final_answer LogEntry emitted")
+	}
+}
+
+// TestMapper_TurnExtrasLog asserts the turn-meta LogEntry carries codex_turn_id,
+// sandbox, effort, and ttft_ms at turn close (the canonical-gap workaround; spec
+// gaps #2/#3/#8).
+func TestMapper_TurnExtrasLog(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"turn-abc","model":"gpt-5.5","effort":"high","sandbox_policy":{"type":"workspace-write"}}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"turn-abc","started_at":1}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"turn-abc","completed_at":"` + tsDone + `","time_to_first_token_ms":250}}`,
+	}
+	events := runLines(t, m, lines)
+	var meta canonical.LogEntryEvent
+	found := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "turn_meta" {
+			meta = le
+			found = true
+		}
+	}
+	if !found {
+		t.Fatal("no turn_meta LogEntry emitted")
+	}
+	if meta.Extras["codex_turn_id"] != "turn-abc" {
+		t.Errorf("codex_turn_id = %v, want turn-abc", meta.Extras["codex_turn_id"])
+	}
+	if meta.Extras["sandbox"] != "workspace-write" {
+		t.Errorf("sandbox = %v, want workspace-write", meta.Extras["sandbox"])
+	}
+	if meta.Extras["ttft_ms"] != int64(250) {
+		t.Errorf("ttft_ms = %v, want 250", meta.Extras["ttft_ms"])
+	}
+	if meta.TurnSeq != 1 {
+		t.Errorf("turn_meta TurnSeq = %d, want 1", meta.TurnSeq)
+	}
+}
+
+// TestMapper_UnmatchedToolOutputLogged asserts a function_call_output with no
+// matching call surfaces a WRN log and emits no op finalize (spec edge #10).
+func TestMapper_UnmatchedToolOutputLogged(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call_output","call_id":"orphan","output":"x"}}`,
+	}
+	events := runLines(t, m, lines)
+	warn := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Severity == "WRN" && le.Message == "tool_output_unmatched" {
+			warn = true
+		}
+	}
+	if !warn {
+		t.Errorf("unmatched tool output did not surface a WRN log")
+	}
+	if got := countKind(events, canonical.EvOpFinalized); got != 0 {
+		t.Errorf("unmatched output emitted %d op finalizes, want 0", got)
+	}
+}
+
+// TestMapper_TwoTurnsOpCounters asserts op Seq restarts per turn (each turn's
+// first op is Seq 1) while turn Seq increments globally (spec rule #2,
+// data-model UNIQUE(turn_id,seq)).
+func TestMapper_TwoTurnsOpCounters(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"a"}]}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+		`{"timestamp":"` + tsDone + `","type":"turn_context","payload":{"turn_id":"t2","model":"m"}}`,
+		`{"timestamp":"` + tsDone + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"b"}]}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t2","completed_at":"` + tsDone + `"}}`,
+	}
+	events := runLines(t, m, lines)
+
+	// Each turn's first (and only) LLM op must be Seq 1.
+	llmByTurn := map[int]int{}
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpLLM {
+			llmByTurn[s.TurnSeq] = s.Seq
+		}
+	}
+	if llmByTurn[1] != 1 || llmByTurn[2] != 1 {
+		t.Errorf("LLM op Seq per turn = %v, want turn1→1 turn2→1", llmByTurn)
+	}
+}
diff --git a/internal/adapters/codex/mapper_turn.go b/internal/adapters/codex/mapper_turn.go
new file mode 100644
index 0000000..786f256
--- /dev/null
+++ b/internal/adapters/codex/mapper_turn.go
@@ -0,0 +1,315 @@
+package codex
+
+import (
+	"encoding/json"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// applySessionMeta fills a SessionStartedEvent's Kind, parent linkage,
+// AgentName, Cwd, and Extras from the session_meta payload (spec rule #1,
+// "Sub-Agent Linkage", "Canonical Model Gaps" #3/#5/#10). The mapper's
+// kind/parentNativeID/agentName are pre-seeded by the scanner (Chunk C, which
+// classifies the file before constructing the mapper); when those are unset
+// (mapper-only tests feeding session_meta directly) this derives them from the
+// payload so the event is complete either way.
+func applySessionMeta(ev *canonical.SessionStartedEvent, p *sessionMetaPayload, m *fileMapper) {
+	kind, parent := p.classifySource()
+	// forked_from_id wins as the parent only when source did not already name a
+	// sub-agent parent (a fork and a sub-agent are mutually exclusive shapes;
+	// spec rule #1 checks forked_from_id when source is not subagent).
+	relationship := ""
+	switch {
+	case kind == sourceSubagent:
+		ev.Kind = canonical.KindSubAgent
+		relationship = "sub_agent"
+		if parent != "" && ev.ParentNativeID == "" {
+			ev.ParentNativeID = parent
+		}
+	case p.ForkedFromID != "":
+		ev.Kind = canonical.KindFork
+		relationship = "fork"
+		if ev.ParentNativeID == "" {
+			ev.ParentNativeID = p.ForkedFromID
+		}
+	case kind == sourceInternal:
+		ev.Kind = canonical.KindToolInternal
+		relationship = "tool_internal"
+	case ev.Kind == "":
+		ev.Kind = canonical.KindRoot
+	}
+	// thread_source="subagent" is a second signal for a sub-agent even when the
+	// source enum did not resolve to one (spec rule #1).
+	if ev.Kind == canonical.KindRoot && p.ThreadSource == "subagent" {
+		ev.Kind = canonical.KindSubAgent
+		relationship = "sub_agent"
+	}
+	// thread_source="memory_consolidation" → tool_internal (spec gap #6).
+	if ev.Kind == canonical.KindRoot && p.ThreadSource == "memory_consolidation" {
+		ev.Kind = canonical.KindToolInternal
+		relationship = "tool_internal"
+	}
+	ev.RootNativeID = rootOf(ev.NativeID, ev.ParentNativeID)
+	if ev.ParentNativeID != "" {
+		m.parentNativeID = ev.ParentNativeID
+	}
+	m.kind = ev.Kind
+
+	if ev.AgentName == "" {
+		ev.AgentName = agentNameFromMeta(p)
+		m.agentName = ev.AgentName
+	}
+	if p.Cwd != "" {
+		ev.Cwd = p.Cwd
+	}
+	ev.Extras = sessionExtras(p, relationship)
+}
+
+// rootOf returns the root native id for a session: the parent when present
+// (the resolver walks the chain), else the session's own id.
+func rootOf(nativeID, parentNativeID string) string {
+	if parentNativeID != "" {
+		return parentNativeID
+	}
+	return nativeID
+}
+
+// agentNameFromMeta derives the session AgentName: agent_nickname or agent_role
+// for a sub-agent, else "codex:" + originator (spec rule #1). A bare originator
+// with no nickname yields "codex:<originator>"; an empty originator yields
+// "codex".
+func agentNameFromMeta(p *sessionMetaPayload) string {
+	if p.AgentNickname != "" {
+		return p.AgentNickname
+	}
+	if p.AgentRole != "" {
+		return p.AgentRole
+	}
+	if p.Originator != "" {
+		return "codex:" + p.Originator
+	}
+	return "codex"
+}
+
+// sessionExtras builds sessions.extras_json from session_meta (spec rule #1,
+// "Versioning / Forward Compatibility" — surface cli_version + originator so the
+// UI can show "captured by codex 0.93.0 (codex_exec)"). The sandbox is deferred
+// here (it lands per-turn from turn_context, spec gap #3); relationship
+// distinguishes fork vs sub_agent vs tool_internal (spec gap #5).
+func sessionExtras(p *sessionMetaPayload, relationship string) map[string]any {
+	extras := map[string]any{}
+	if p.CLIVersion != "" {
+		extras["cli_version"] = p.CLIVersion
+	}
+	if p.Originator != "" {
+		extras["originator"] = p.Originator
+	}
+	if src := sourceString(p.Source); src != "" {
+		extras["source"] = src
+	}
+	if p.Cwd != "" {
+		extras["cwd"] = p.Cwd
+	}
+	if p.ModelProvider != "" {
+		extras["model_provider"] = p.ModelProvider
+	}
+	if p.Git != nil {
+		git := map[string]any{}
+		if p.Git.CommitHash != "" {
+			git["commit_hash"] = p.Git.CommitHash
+		}
+		if p.Git.Branch != "" {
+			git["branch"] = p.Git.Branch
+		}
+		if p.Git.RepositoryURL != "" {
+			git["repository_url"] = p.Git.RepositoryURL
+		}
+		if len(git) > 0 {
+			extras["git"] = git
+		}
+	}
+	if relationship != "" {
+		extras["relationship"] = relationship
+	}
+	if depth := subagentDepth(p.Source); depth > 0 {
+		extras["subagent_depth"] = depth
+	}
+	if len(extras) == 0 {
+		return nil
+	}
+	return extras
+}
+
+// sourceString renders the raw source enum back to a compact string for extras:
+// a bare string verbatim, or the object's single key (custom/internal/subagent/
+// other) for the object forms. Returns "" when absent.
+func sourceString(raw json.RawMessage) string {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return ""
+	}
+	var s string
+	if json.Unmarshal(body, &s) == nil {
+		return s
+	}
+	var obj map[string]json.RawMessage
+	if json.Unmarshal(body, &obj) != nil {
+		return ""
+	}
+	for _, k := range []string{"subagent", "internal", "custom", "other"} {
+		if _, ok := obj[k]; ok {
+			return k
+		}
+	}
+	return ""
+}
+
+// subagentDepth extracts source.subagent.thread_spawn.depth (spec gap #10),
+// returning 0 when absent or not a thread_spawn shape.
+func subagentDepth(raw json.RawMessage) int {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return 0
+	}
+	var obj struct {
+		Subagent struct {
+			ThreadSpawn struct {
+				Depth int `json:"depth"`
+			} `json:"thread_spawn"`
+		} `json:"subagent"`
+	}
+	if json.Unmarshal(body, &obj) != nil {
+		return 0
+	}
+	return obj.Subagent.ThreadSpawn.Depth
+}
+
+// openTurn opens (or returns the already-open) turn for a codex turn_id, marking
+// it the active turn for token attribution and the absent-turn_id fallback (spec
+// rule #2, #3, #17). It does NOT emit the TurnStartedEvent — emitTurnStarted does
+// that idempotently so turn_context and task_started can both call openTurn but
+// only the first emits the event.
+func (m *fileMapper) openTurn(turnID string, startTsUs int64) *turnState {
+	if ts, ok := m.turns[turnID]; ok {
+		m.activeTurnID = turnID
+		m.haveActiveTurn = true
+		return ts
+	}
+	m.turnSeqCounter++
+	ts := &turnState{
+		seq:         m.turnSeqCounter,
+		codexTurnID: turnID,
+		startTsUs:   startTsUs,
+	}
+	m.turns[turnID] = ts
+	m.turnOrder = append(m.turnOrder, turnID)
+	m.activeTurnID = turnID
+	m.haveActiveTurn = true
+	return ts
+}
+
+// emitTurnStarted emits a TurnStartedEvent for the turn the first time it is
+// opened (idempotent across turn_context + task_started — spec rule #2, #3,
+// tabular summary "idempotent with task_started"). Returns nil on a repeat.
+func (m *fileMapper) emitTurnStarted(ts *turnState, base canonical.EventBase) canonical.Event {
+	if ts.started {
+		return nil
+	}
+	ts.started = true
+	return canonical.TurnStartedEvent{
+		EventBase:       base,
+		SessionNativeID: m.nativeID,
+		Seq:             ts.seq,
+	}
+}
+
+// finalizeTurn builds a TurnFinalizedEvent with the C#1 token rollup and the
+// per-turn extras (codex_turn_id, sandbox, effort, ttft_ms, last_agent_message).
+// It also marks the turn finalized so a duplicate close is a no-op. The caller
+// supplies status ("completed" | "failed" | "aborted") and errClass. EndTs is
+// the close timestamp (spec rule #4, #5, #23). It does NOT emit the dangling-op
+// finalizes — the caller (mapTaskComplete / mapTurnAborted) does that around it
+// so they share the close timestamp (spec rule #4, edge #9).
+func (m *fileMapper) finalizeTurn(ts *turnState, base canonical.EventBase, endUs int64, status, errClass string) canonical.TurnFinalizedEvent {
+	ts.finalized = true
+	return canonical.TurnFinalizedEvent{
+		EventBase:        base,
+		SessionNativeID:  m.nativeID,
+		Seq:              ts.seq,
+		Status:           status,
+		ErrorClass:       errClass,
+		EndTs:            endUs,
+		TokensIn:         ts.tokensIn,
+		TokensOut:        ts.tokensOut,
+		TokensCacheRead:  ts.tokensCacheRead,
+		TokensCacheWrite: ts.tokensCacheWrite,
+	}
+}
+
+// finalizeDanglingOps finalizes every op still open under the given turn at turn
+// close, with the supplied status (spec rule #4 — "completed inferred or unknown
+// if no output ever arrived" at task_complete; edge #9 — "cancelled" at abort/
+// interrupt). Deterministic order by op Seq so the emitted stream is stable.
+// Returns the finalize events plus drops the ops from openOps.
+func (m *fileMapper) finalizeDanglingOps(turnID string, base func() canonical.EventBase, endUs int64, status string) []canonical.Event {
+	type pending struct {
+		callID string
+		op     *openOp
+	}
+	var ops []pending
+	for callID, op := range m.openOps {
+		if op.turnID == turnID && !op.finalized {
+			ops = append(ops, pending{callID: callID, op: op})
+		}
+	}
+	sortByOpSeq(ops, func(p pending) int { return p.op.opSeq })
+	out := make([]canonical.Event, 0, len(ops))
+	for _, p := range ops {
+		p.op.finalized = true
+		fin := canonical.OpFinalizedEvent{
+			EventBase:       base(),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         p.op.turnSeq,
+			Seq:             p.op.opSeq,
+			Status:          status,
+			EndTs:           endUs,
+		}
+		if len(p.op.extras) > 0 {
+			// Enrichment already merged onto the op carries no canonical
+			// finalize field beyond status; the extras live on the OpStarted's
+			// Extras (set at enrichment time), so nothing extra to attach here.
+			_ = p.op.extras
+		}
+		out = append(out, fin)
+		delete(m.openOps, p.callID)
+	}
+	return out
+}
+
+// addTokenUsage folds one token_count event into the attributed turn's C#1
+// rollup: TokensIn/Out += this call's last_token_usage; cache split likewise;
+// CtxUsed candidate = cumulative total_token_usage.total_tokens; CtxMax =
+// model_context_window (spec rule #4, #17, "Token accounting nuance"). The
+// cumulative total NEVER feeds per-turn tokens — only CtxUsed on the turn's last
+// LLM op.
+func (ts *turnState) addTokenUsage(info tokenCountInfo) {
+	ts.tokensIn += info.last.InputTokens
+	ts.tokensOut += info.last.OutputTokens
+	ts.tokensCacheRead += info.last.CachedInputTokens
+	ts.tokensCacheWrite += info.last.CacheCreationInputTokens
+	if info.total.TotalTokens > 0 {
+		ts.lastLLMCtxUsed = info.total.TotalTokens
+	}
+	if info.modelContextWindow > 0 {
+		ts.ctxMax = info.modelContextWindow
+	}
+}
+
+// jsonTrim trims whitespace and treats a bare null as empty (shared helper).
+func jsonTrim(raw json.RawMessage) json.RawMessage {
+	b := bytesTrimSpace(raw)
+	if len(b) == 0 || string(b) == "null" {
+		return nil
+	}
+	return b
+}
diff --git a/internal/adapters/codex/ops.go b/internal/adapters/codex/ops.go
new file mode 100644
index 0000000..be18062
--- /dev/null
+++ b/internal/adapters/codex/ops.go
@@ -0,0 +1,238 @@
+package codex
+
+import (
+	"bytes"
+	"encoding/json"
+	"sort"
+	"strings"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// previewMax bounds the rune length of a non-sensitive Extras preview string
+// (compaction message, last_agent_message). Full bodies live behind PayloadRefs.
+const previewMax = 200
+
+// mapTurnContext handles a turn_context record (spec rule #2). It opens (or
+// re-activates) the turn for turn_id, emits TurnStartedEvent the first time the
+// turn is seen (idempotent with task_started), emits SessionUpdatedEvent(Model)
+// the first time a model is learned, and snapshots the sandbox/effort/approval
+// policy into the turn's extras (spec gap #3). A turn_context after mid-turn
+// compaction re-activates the same turn_id without re-emitting TurnStarted.
+func (m *fileMapper) mapTurnContext(rec record, advance func(int64) canonical.EventBase) []canonical.Event {
+	p := rec.TurnContext
+	if p == nil {
+		return nil
+	}
+	tsUs := m.recordTs(rec)
+	ts := m.openTurn(p.TurnID, tsUs)
+	out := make([]canonical.Event, 0, 2)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	// Snapshot per-turn policy (spec rule #2, gap #3).
+	if sb := p.sandboxType(); sb != "" {
+		ts.sandbox = sb
+	}
+	if p.Effort != "" {
+		ts.effort = p.Effort
+	}
+	if p.ApprovalPolicy != "" {
+		ts.approvalPolicy = p.ApprovalPolicy
+	}
+	// Learn the model and announce it once (spec rule #2). The active turn
+	// always uses the latest model so later ops are stamped correctly.
+	if p.Model != "" {
+		m.model = p.Model
+		if !m.modelSeen {
+			m.modelSeen = true
+			out = append(out, canonical.SessionUpdatedEvent{
+				EventBase: advance(tsUs),
+				NativeID:  m.nativeID,
+				Model:     p.Model,
+			})
+		}
+	}
+	return out
+}
+
+// mapCompacted handles a top-level compacted line (spec rule #20). It emits a
+// single compaction op (Kind=compaction, Name=compaction) with the
+// replacement_history size and a message preview in Extras; the full summary
+// body goes to a PayloadRef. response_item.compaction / context_compaction and
+// event_msg.context_compacted are handled the same way in ops_response.go /
+// ops_event.go so all compaction signals converge on OpCompaction (spec gap #4).
+func (m *fileMapper) mapCompacted(rec record, advance func(int64) canonical.EventBase) []canonical.Event {
+	p := rec.Compacted
+	tsUs := m.recordTs(rec)
+	extras := map[string]any{"trigger": "auto"}
+	if p != nil {
+		extras["replacement_history_size"] = p.replacementHistorySize()
+		if prev := trimPreview(p.Message, previewMax); prev != "" {
+			extras["message_preview"] = prev
+		}
+	}
+	return m.emitCompactionOp(advance, tsUs, extras, "json")
+}
+
+// emitCompactionOp emits the OpStarted+OpFinalized compaction pair plus a
+// PayloadRef for the summary body (spec rule #20, gap #4). It opens a turn 0
+// fallback when compaction precedes any turn_context so the op attaches to a
+// real turn row. format is the PayloadRef Format ("json" for a structured
+// compaction body). The op carries no tokens; preTokens/postTokens are unknown
+// for codex (the rollout records only the summary), so trigger is the only
+// scalar.
+func (m *fileMapper) emitCompactionOp(advance func(int64) canonical.EventBase, tsUs int64, extras map[string]any, format string) []canonical.Event {
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 3)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	ts.opSeq++
+	opSeq := ts.opSeq
+	out = append(out,
+		canonical.OpStartedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         ts.seq,
+			Seq:             opSeq,
+			ParentOpSeq:     -1,
+			Kind:            canonical.OpCompaction,
+			Name:            "compaction",
+			Extras:          extras,
+		},
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         ts.seq,
+			Seq:             opSeq,
+			Status:          "completed",
+			EndTs:           tsUs,
+		},
+		m.payloadRef(advance(tsUs), ts.seq, opSeq, "log", format, -1),
+	)
+	return out
+}
+
+// ensureTurn returns the active turn, opening a fallback turn (under the active
+// turn_id, or the absent-turn_id bucket "") when no turn is open yet. Used by
+// ops that can legitimately precede a turn boundary (compaction, a stray
+// message in an old-CLI rollout — spec edge #3).
+func (m *fileMapper) ensureTurn(tsUs int64) *turnState {
+	if m.haveActiveTurn {
+		if ts, ok := m.turns[m.activeTurnID]; ok {
+			return ts
+		}
+	}
+	return m.openTurn("", tsUs)
+}
+
+// nextOp allocates the next op Seq in the turn and returns (turnSeq, opSeq).
+func (m *fileMapper) nextOp(ts *turnState) (int, int) {
+	ts.opSeq++
+	return ts.seq, ts.opSeq
+}
+
+// trackOp records an in-flight op by call_id so its matching *_output (or an
+// enrichment event) finalizes/enriches the SAME op (spec rule #9, #14-16). A
+// call_id of "" is not tracked (an unpaired op finalizes inline or at turn end).
+func (m *fileMapper) trackOp(callID, turnID string, turnSeq, opSeq int, kind canonical.OpKind, name string) {
+	if callID == "" {
+		return
+	}
+	m.openOps[callID] = &openOp{
+		turnID:  turnID,
+		turnSeq: turnSeq,
+		opSeq:   opSeq,
+		kind:    kind,
+		name:    name,
+		extras:  map[string]any{},
+	}
+}
+
+// sortByOpSeq sorts a slice in place ascending by the int key fn returns. A
+// tiny generic helper so dangling-op finalize order is deterministic.
+func sortByOpSeq[T any](s []T, key func(T) int) {
+	sort.Slice(s, func(i, j int) bool { return key(s[i]) < key(s[j]) })
+}
+
+// bytesTrimSpace trims ASCII whitespace from a json.RawMessage. Wraps
+// bytes.TrimSpace so callers in mapper_turn.go need not import bytes.
+func bytesTrimSpace(b []byte) []byte { return bytes.TrimSpace(b) }
+
+// tokenUsage is the subset of a TokenUsage block the rollup consumes
+// (protocol.rs:1895-1979). Field names match the codex wire form; unknown
+// siblings are dropped by encoding/json (forward-compat).
+type tokenUsage struct {
+	InputTokens              int64 `json:"input_tokens"`
+	OutputTokens             int64 `json:"output_tokens"`
+	CachedInputTokens        int64 `json:"cached_input_tokens"`
+	CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"`
+	TotalTokens              int64 `json:"total_tokens"`
+}
+
+// tokenCountInfo is the decoded token_count.info block plus the sibling
+// model_context_window (spec rule #17). last is the per-call usage summed into
+// the turn rollup (C#1); total is the cumulative session usage that feeds only
+// CtxUsed on the turn's last LLM op.
+type tokenCountInfo struct {
+	last               tokenUsage
+	total              tokenUsage
+	modelContextWindow int64
+}
+
+// decodeTokenCount extracts last_token_usage / total_token_usage /
+// model_context_window from an event_msg.token_count line (spec rule #17). The
+// fields live under the envelope's "payload"; the shape is
+// {info:{total_token_usage, last_token_usage, model_context_window}} in newer
+// rollouts, with model_context_window also appearing as a sibling of info in
+// some versions, so both placements are checked (forward-compat). raw is the
+// verbatim envelope line.
+func decodeTokenCount(raw []byte) tokenCountInfo {
+	var env struct {
+		Payload struct {
+			Info struct {
+				Total              tokenUsage `json:"total_token_usage"`
+				Last               tokenUsage `json:"last_token_usage"`
+				ModelContextWindow int64      `json:"model_context_window"`
+			} `json:"info"`
+			ModelContextWindow int64 `json:"model_context_window"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return tokenCountInfo{}
+	}
+	p := env.Payload
+	mcw := p.Info.ModelContextWindow
+	if mcw == 0 {
+		mcw = p.ModelContextWindow
+	}
+	return tokenCountInfo{last: p.Info.Last, total: p.Info.Total, modelContextWindow: mcw}
+}
+
+// userFingerprint builds the dedup key for user input shared across
+// response_item.message(role=user) and event_msg.user_message (spec rule #6,
+// #18). It hashes the trimmed message text so the second arrival is suppressed
+// regardless of which form arrives first. An empty body fingerprints to "" and
+// is never deduped (distinct empty inputs are rare and harmless to keep).
+func userFingerprint(text string) string {
+	t := strings.TrimSpace(text)
+	if t == "" {
+		return ""
+	}
+	return t
+}
+
+// firstSeenUser reports whether this user-input fingerprint is the first
+// occurrence on the file, recording it so the companion form is suppressed
+// (spec rule #6, #18). An empty fingerprint is always "first" (never deduped).
+func (m *fileMapper) firstSeenUser(fp string) bool {
+	if fp == "" {
+		return true
+	}
+	if _, ok := m.seenUser[fp]; ok {
+		return false
+	}
+	m.seenUser[fp] = struct{}{}
+	return true
+}
diff --git a/internal/adapters/codex/ops_enrich.go b/internal/adapters/codex/ops_enrich.go
new file mode 100644
index 0000000..e1a0db7
--- /dev/null
+++ b/internal/adapters/codex/ops_enrich.go
@@ -0,0 +1,326 @@
+package codex
+
+import (
+	"encoding/json"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// enrichOp merges telemetry from an event_msg end-event onto the op matched by
+// call_id, emitting an OpFinalizedEvent that re-states the op's terminal status
+// and carries the enrichment Extras (spec rule #14 exec_command_end, #11
+// web_search_end). It does NOT emit a second op — the ingester reconciles this
+// finalize with the op's existing (turn,seq) row (idempotent upsert). When no
+// op matches the call_id (the start was below a resume offset, or the end is
+// orphaned), it surfaces a DBG log so the enrichment is not silently lost.
+//
+// extractor builds the Extras map from the raw payload (nil → no extras, e.g.
+// image_generation_end which only marks completion). A blanked-output
+// exec_command_end (Limited mode clears stdout/stderr) is NOT an error — the
+// status stays the op's derived terminal status (spec rule #14).
+func (m *fileMapper) enrichOp(rec record, advance func(int64) canonical.EventBase, tsUs int64, extractor func([]byte) map[string]any) []canonical.Event {
+	p := rec.EventMsg
+	op, ok := m.openOps[p.CallID]
+	if !ok {
+		// The op may have already been finalized by its *_output before this
+		// end-event; re-state with the enrichment so the Extras still land.
+		return m.enrichFinalizedOrLog(rec, advance, tsUs, extractor)
+	}
+	var extras map[string]any
+	if extractor != nil {
+		extras = extractor(rec.Raw)
+	}
+	status, errClass := enrichStatus(rec.Raw)
+	if status == "" {
+		// No explicit status/exit_code on the end-event: leave the op's terminal
+		// status to its *_output (or turn-close inference). Emit nothing here but
+		// record the extras on the tracked op so its eventual finalize carries
+		// them (the finalize path reads op.extras when present).
+		mergeExtras(op, extras)
+		return nil
+	}
+	op.finalized = true
+	mergeExtras(op, extras)
+	fin := canonical.OpFinalizedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         op.turnSeq,
+		Seq:             op.opSeq,
+		Status:          status,
+		ErrorClass:      errClass,
+		EndTs:           tsUs,
+	}
+	delete(m.openOps, p.CallID)
+	return withExtrasLog(m, advance, tsUs, fin, op.extras)
+}
+
+// enrichFinalizedOrLog handles an end-event whose op is no longer tracked (its
+// *_output already finalized it, or its start was below a resume offset). It
+// re-emits an OpFinalizedEvent ONLY when the end-event carries an explicit
+// status AND the op can be located in a turn — otherwise it surfaces a DBG log
+// so the enrichment is visible without inventing an op reference. Because a
+// finalized op was deleted from openOps, this path cannot recover the (turn,seq)
+// and therefore always logs (the *_output already produced the canonical
+// finalize; the enrichment is supplementary telemetry).
+func (m *fileMapper) enrichFinalizedOrLog(rec record, advance func(int64) canonical.EventBase, tsUs int64, extractor func([]byte) map[string]any) []canonical.Event {
+	p := rec.EventMsg
+	extras := map[string]any{"call_id": p.CallID}
+	if extractor != nil {
+		for k, v := range extractor(rec.Raw) {
+			extras[k] = v
+		}
+	}
+	return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_"+p.Type, extras)}
+}
+
+// enrichMcp handles event_msg.mcp_tool_call_end (spec rule #15). It re-stamps
+// the matching op's ToolNamespace to "mcp:<server>" and Name to the invocation
+// tool by emitting an OpStarted update (the ingester upserts on (turn,seq), so a
+// second OpStarted with the corrected namespace/name overwrites the placeholder
+// from the function_call), then finalizes the op with the result status. When no
+// op matches, it surfaces a DBG log.
+func (m *fileMapper) enrichMcp(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	p := rec.EventMsg
+	server, tool := mcpInvocation(rec.Raw)
+	op, ok := m.openOps[p.CallID]
+	if !ok {
+		extras := map[string]any{"call_id": p.CallID}
+		if server != "" {
+			extras["server"] = server
+		}
+		if tool != "" {
+			extras["tool"] = tool
+		}
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_mcp_tool_call_end", extras)}
+	}
+	name := op.name
+	if tool != "" {
+		name = tool
+	}
+	namespace := "custom"
+	if server != "" {
+		namespace = "mcp:" + server
+	}
+	op.name = name
+	status, errClass := mcpResultStatus(rec.Raw)
+	op.finalized = true
+	out := []canonical.Event{
+		canonical.OpStartedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         op.turnSeq,
+			Seq:             op.opSeq,
+			ParentOpSeq:     -1,
+			Kind:            canonical.OpTool,
+			Name:            name,
+			ToolNamespace:   namespace,
+		},
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         op.turnSeq,
+			Seq:             op.opSeq,
+			Status:          status,
+			ErrorClass:      errClass,
+			EndTs:           tsUs,
+		},
+	}
+	delete(m.openOps, p.CallID)
+	return out
+}
+
+// enrichPatchApply handles event_msg.patch_apply_end (spec rule #16). It
+// finalizes the matching apply_patch op with the success/status from the event.
+// When no op matches, it surfaces a DBG log.
+func (m *fileMapper) enrichPatchApply(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	p := rec.EventMsg
+	op, ok := m.openOps[p.CallID]
+	status, errClass := patchApplyStatus(rec.Raw)
+	if !ok {
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_patch_apply_end", map[string]any{"call_id": p.CallID, "status": status})}
+	}
+	op.finalized = true
+	fin := canonical.OpFinalizedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         op.turnSeq,
+		Seq:             op.opSeq,
+		Status:          status,
+		ErrorClass:      errClass,
+		EndTs:           tsUs,
+	}
+	delete(m.openOps, p.CallID)
+	return []canonical.Event{fin}
+}
+
+// mergeExtras folds enrichment extras onto a tracked op so its eventual finalize
+// (if not produced here) carries them. A nil op or nil extras is a no-op.
+func mergeExtras(op *openOp, extras map[string]any) {
+	if op == nil || len(extras) == 0 {
+		return
+	}
+	if op.extras == nil {
+		op.extras = map[string]any{}
+	}
+	for k, v := range extras {
+		op.extras[k] = v
+	}
+}
+
+// withExtrasLog appends a DBG LogEntry carrying the op's enrichment extras after
+// its finalize, so exec/web telemetry is visible in the Logs tab even though the
+// canonical OpFinalized carries no Extras field. Returns the finalize alone when
+// there are no extras.
+func withExtrasLog(m *fileMapper, advance func(int64) canonical.EventBase, tsUs int64, fin canonical.OpFinalizedEvent, extras map[string]any) []canonical.Event {
+	out := []canonical.Event{fin}
+	if len(extras) > 0 {
+		out = append(out, m.logEntry(advance(tsUs), "DBG", "op_enrichment", extras))
+	}
+	return out
+}
+
+// execCommandExtras extracts the exec_command_end telemetry merged into the op
+// (spec rule #14): exit_code, duration, cwd, source, and the truncated
+// aggregated_output length (the body itself is blanked at the source in Limited
+// mode — only aggregated_output survives, truncated to 10 KB).
+func execCommandExtras(raw []byte) map[string]any {
+	var env struct {
+		Payload struct {
+			ExitCode         *int64 `json:"exit_code"`
+			Duration         any    `json:"duration"`
+			Cwd              string `json:"cwd"`
+			Source           string `json:"source"`
+			AggregatedOutput string `json:"aggregated_output"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return nil
+	}
+	extras := map[string]any{}
+	if env.Payload.ExitCode != nil {
+		extras["exec_exit_code"] = *env.Payload.ExitCode
+	}
+	if env.Payload.Cwd != "" {
+		extras["exec_cwd"] = env.Payload.Cwd
+	}
+	if env.Payload.Source != "" {
+		extras["exec_source"] = env.Payload.Source
+	}
+	if env.Payload.AggregatedOutput != "" {
+		extras["exec_output_bytes"] = len(env.Payload.AggregatedOutput)
+	}
+	if len(extras) == 0 {
+		return nil
+	}
+	return extras
+}
+
+// webSearchExtras extracts event_msg.web_search_end query/action (spec rule #11).
+func webSearchExtras(raw []byte) map[string]any {
+	var env struct {
+		Payload struct {
+			Query string `json:"query"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return nil
+	}
+	if env.Payload.Query == "" {
+		return nil
+	}
+	return map[string]any{"query": trimPreview(env.Payload.Query, previewMax)}
+}
+
+// enrichStatus derives a terminal status/ErrorClass from an end-event carrying
+// an exit_code (spec rule #14). exit_code 0 → completed; non-zero → failed
+// (command_failed). A blanked output is NOT an error (spec rule #14). Returns
+// ("", "") when the event carries no exit_code (status left to the *_output).
+func enrichStatus(raw []byte) (status, errClass string) {
+	var env struct {
+		Payload struct {
+			ExitCode *int64 `json:"exit_code"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "", ""
+	}
+	if env.Payload.ExitCode == nil {
+		return "", ""
+	}
+	if *env.Payload.ExitCode == 0 {
+		return "completed", ""
+	}
+	return "failed", "command_failed"
+}
+
+// mcpInvocation extracts mcp_tool_call_end.invocation.{server,tool} (spec rule
+// #15). Returns ("","") when absent.
+func mcpInvocation(raw []byte) (server, tool string) {
+	var env struct {
+		Payload struct {
+			Invocation struct {
+				Server string `json:"server"`
+				Tool   string `json:"tool"`
+			} `json:"invocation"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "", ""
+	}
+	return env.Payload.Invocation.Server, env.Payload.Invocation.Tool
+}
+
+// mcpResultStatus derives status from mcp_tool_call_end.result, a
+// Result<CallToolResult, String> serialized as {"Ok":...} or {"Err":"..."} (spec
+// rule #15, protocol.rs:2191-2228). An Err, or a CallToolResult with
+// is_error=true, is failed; anything else completed.
+func mcpResultStatus(raw []byte) (status, errClass string) {
+	var env struct {
+		Payload struct {
+			Result json.RawMessage `json:"result"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "completed", ""
+	}
+	body := jsonTrim(env.Payload.Result)
+	if len(body) == 0 {
+		return "completed", ""
+	}
+	var res struct {
+		Err json.RawMessage `json:"Err"`
+		Ok  struct {
+			IsError bool `json:"is_error"`
+		} `json:"Ok"`
+	}
+	if json.Unmarshal(body, &res) != nil {
+		return "completed", ""
+	}
+	if len(jsonTrim(res.Err)) > 0 || res.Ok.IsError {
+		return "failed", "tool_error"
+	}
+	return "completed", ""
+}
+
+// patchApplyStatus derives status from patch_apply_end.success/status (spec rule
+// #16). success=false → failed; an explicit status string maps directly. Default
+// completed.
+func patchApplyStatus(raw []byte) (status, errClass string) {
+	var env struct {
+		Payload struct {
+			Success *bool  `json:"success"`
+			Status  string `json:"status"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "completed", ""
+	}
+	if env.Payload.Success != nil && !*env.Payload.Success {
+		return "failed", "patch_failed"
+	}
+	switch env.Payload.Status {
+	case "failed", "error":
+		return "failed", "patch_failed"
+	}
+	return "completed", ""
+}
diff --git a/internal/adapters/codex/ops_event.go b/internal/adapters/codex/ops_event.go
new file mode 100644
index 0000000..edd9080
--- /dev/null
+++ b/internal/adapters/codex/ops_event.go
@@ -0,0 +1,372 @@
+package codex
+
+import (
+	"encoding/json"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// mapEventMsg dispatches an event_msg record to its per-variant handler (spec
+// rules #3, #4, #5, #8, #14-20, #22). Variants the adapter consumes only for
+// enrichment (exec_command_end, mcp_tool_call_end, patch_apply_end) merge onto
+// an already-emitted op and do NOT emit a second op. Variants the adapter uses
+// only for the UI (agent_reasoning*, agent_message) produce a LogEntry, never a
+// duplicate op.
+func (m *fileMapper) mapEventMsg(rec record, advance func(int64) canonical.EventBase) ([]canonical.Event, error) {
+	p := rec.EventMsg
+	if p == nil {
+		return nil, nil
+	}
+	tsUs := m.recordTs(rec)
+	switch p.Type {
+	case "task_started", "turn_started":
+		return m.mapTaskStarted(rec, advance, tsUs), nil
+	case "task_complete", "turn_complete":
+		return m.mapTaskComplete(rec, advance, tsUs), nil
+	case "turn_aborted":
+		return m.mapTurnAborted(rec, advance, tsUs), nil
+	case "user_message":
+		return m.emitUserInput(advance, tsUs, p.Message, "json", int64(len(rec.Raw))), nil
+	case "agent_message":
+		// Dedup companion to response_item.message(assistant) (spec rule #19):
+		// no op; stash the message as the turn's last_agent_message preview and
+		// surface a DBG log so the UI reasoning/answer panel can show it.
+		m.stashAgentMessage(p.Message)
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "agent_message", nil)}, nil
+	case "agent_reasoning", "agent_reasoning_raw_content":
+		// Reasoning UI summary (spec rule #8): LogEntry ONLY — the canonical
+		// reasoning op is emitted from response_item.reasoning so the UI never
+		// sees a duplicate.
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "agent_reasoning", nil)}, nil
+	case "token_count":
+		return m.mapTokenCount(rec, advance, tsUs), nil
+	case "exec_command_end":
+		return m.enrichOp(rec, advance, tsUs, execCommandExtras), nil
+	case "mcp_tool_call_end":
+		return m.enrichMcp(rec, advance, tsUs), nil
+	case "patch_apply_end":
+		return m.enrichPatchApply(rec, advance, tsUs), nil
+	case "web_search_end":
+		return m.enrichOp(rec, advance, tsUs, webSearchExtras), nil
+	case "image_generation_end":
+		return m.enrichOp(rec, advance, tsUs, nil), nil
+	case "context_compacted":
+		// event_msg.context_compacted → OpCompaction (spec rule #20, gap #4).
+		return m.emitCompactionOp(advance, tsUs, map[string]any{"trigger": "auto"}, "json"), nil
+	case "error":
+		return []canonical.Event{m.logEntry(advance(tsUs), "ERR", "error", errorExtras(p))}, nil
+	case "thread_rolled_back":
+		return []canonical.Event{m.logEntry(advance(tsUs), "INF", "thread_rolled_back", nil)}, nil
+	case "entered_review_mode", "exited_review_mode":
+		return []canonical.Event{m.logEntry(advance(tsUs), "INF", p.Type, nil)}, nil
+	case "item_completed":
+		// Plan items (spec gap #11): INF log for now.
+		return []canonical.Event{m.logEntry(advance(tsUs), "INF", "item_completed", nil)}, nil
+	default:
+		// thread_goal_updated, guardian_assessment, view_image_tool_call,
+		// dynamic_tool_call_*, and any future persisted variant: keep visible.
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "event_msg:"+p.Type, nil)}, nil
+	}
+}
+
+// mapTaskStarted handles event_msg.task_started (alias turn_started) (spec rule
+// #3, #22). It opens the turn for turn_id (idempotent with turn_context) and
+// emits TurnStartedEvent the first time. started_at (unix seconds) is used as
+// the canonical Ts when it is newer than the wire timestamp. model_context_window
+// is stashed for the turn's LLM ctx_max (spec rule #3, #17).
+func (m *fileMapper) mapTaskStarted(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	p := rec.EventMsg
+	startUs := tsUs
+	if sa := startedAtMicros(rec.Raw); sa > startUs {
+		startUs = sa
+	}
+	ts := m.openTurn(p.TurnID, startUs)
+	out := make([]canonical.Event, 0, 1)
+	if ev := m.emitTurnStarted(ts, advance(startUs)); ev != nil {
+		out = append(out, ev)
+	}
+	if mcw := modelContextWindow(rec.Raw); mcw > 0 {
+		ts.ctxMax = mcw
+	}
+	return out
+}
+
+// mapTaskComplete handles event_msg.task_complete (alias turn_complete) (spec
+// rule #4). It finalizes the turn (completed) with the C#1 token rollup,
+// finalizes every dangling op tied to the turn (status "completed" inferred —
+// codex output for a completed turn that lacked an explicit _output is treated
+// as success), records ttft_ms, and applies the turn's accumulated CtxUsed/CtxMax
+// to its last LLM op (spec rule #4, #17). completed_at is the EndTs when present.
+func (m *fileMapper) mapTaskComplete(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	p := rec.EventMsg
+	ts, ok := m.turns[p.TurnID]
+	if !ok || ts.finalized {
+		// task_complete with no open turn (or already closed): surface and skip
+		// so a stray completion does not double-close (spec edge robustness).
+		return []canonical.Event{m.logEntry(advance(tsUs), "WRN", "task_complete_no_turn", map[string]any{"turn_id": p.TurnID})}
+	}
+	endUs := tsUs
+	if ca := completedAtMicros(rec.Raw); ca > 0 {
+		endUs = ca
+	}
+	if ttft := ttftMillis(rec.Raw); ttft > 0 {
+		ts.ttftMs = ttft
+	}
+	base := func() canonical.EventBase { return advance(endUs) }
+	out := make([]canonical.Event, 0, 4)
+	// Apply the accumulated ctx to the turn's last LLM op (spec rule #17).
+	if ev, ok := m.applyLLMCtx(ts, base); ok {
+		out = append(out, ev)
+	}
+	// Finalize dangling ops BEFORE the turn close so they share the turn (spec
+	// rule #4, edge #9: status completed inferred at task_complete).
+	out = append(out, m.finalizeDanglingOps(p.TurnID, base, endUs, "completed")...)
+	out = append(out, m.finalizeTurn(ts, base(), endUs, "completed", ""))
+	if ev := m.turnExtrasLog(ts, base()); ev != nil {
+		out = append(out, ev)
+	}
+	return out
+}
+
+// mapTurnAborted handles event_msg.turn_aborted (spec rule #5, edge #2). It
+// finalizes the turn (failed) with the reason→ErrorClass mapping and finalizes
+// dangling ops as "cancelled" (edge #9 — the user interrupted, so in-flight ops
+// did not complete). completed_at is the EndTs when present.
+func (m *fileMapper) mapTurnAborted(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	p := rec.EventMsg
+	ts, ok := m.turns[p.TurnID]
+	if !ok || ts.finalized {
+		return []canonical.Event{m.logEntry(advance(tsUs), "WRN", "turn_aborted_no_turn", map[string]any{"turn_id": p.TurnID})}
+	}
+	endUs := tsUs
+	if ca := completedAtMicros(rec.Raw); ca > 0 {
+		endUs = ca
+	}
+	base := func() canonical.EventBase { return advance(endUs) }
+	out := make([]canonical.Event, 0, 3)
+	out = append(out, m.finalizeDanglingOps(p.TurnID, base, endUs, "cancelled")...)
+	out = append(out, m.finalizeTurn(ts, base(), endUs, "failed", abortErrorClass(p.Reason)))
+	if ev := m.turnExtrasLog(ts, base()); ev != nil {
+		out = append(out, ev)
+	}
+	return out
+}
+
+// mapTokenCount handles event_msg.token_count (spec rule #17, C#1). It folds the
+// per-call last_token_usage into the attributed turn's rollup and stashes the
+// cumulative total / model_context_window for the turn's last LLM op. A
+// token_count carrying turn_id attributes to that turn; one without attributes
+// to the most-recently-active turn ("Token accounting nuance"). model_context_
+// window is also surfaced to the catalog via the next LLM op's CtxMax at turn
+// finalize. token_count itself emits no event.
+func (m *fileMapper) mapTokenCount(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	p := rec.EventMsg
+	info := decodeTokenCount(rec.Raw)
+	ts := m.tokenTurn(p.TurnID)
+	if ts == nil {
+		// No turn to attribute to yet (token_count before any turn opened):
+		// surface a DBG log so it is visible and drop the counts (they cannot be
+		// attributed; rare and not load-bearing).
+		_ = tsUs
+		return nil
+	}
+	ts.addTokenUsage(info)
+	return nil
+}
+
+// tokenTurn resolves the turn a token_count attributes to (spec rule #17,
+// "Token accounting nuance"): the turn for turn_id when present and known, else
+// the most-recently-active turn. Returns nil when no turn is open.
+func (m *fileMapper) tokenTurn(turnID string) *turnState {
+	if turnID != "" {
+		if ts, ok := m.turns[turnID]; ok {
+			return ts
+		}
+	}
+	if m.haveActiveTurn {
+		if ts, ok := m.turns[m.activeTurnID]; ok {
+			return ts
+		}
+	}
+	return nil
+}
+
+// applyLLMCtx emits an OpFinalizedEvent that sets CtxUsed/CtxMax on the turn's
+// last LLM op (spec rule #17) when the turn accumulated a cumulative total and
+// has an LLM op to attach it to. The ingester reconciles this finalize with the
+// op's earlier finalize (idempotent upsert keyed on (turn,seq)). Returns
+// (event, true) when emitted, (zero, false) when there is nothing to apply.
+func (m *fileMapper) applyLLMCtx(ts *turnState, base func() canonical.EventBase) (canonical.OpFinalizedEvent, bool) {
+	if ts.lastLLMOpSeq == 0 || (ts.lastLLMCtxUsed == 0 && ts.ctxMax == 0) {
+		return canonical.OpFinalizedEvent{}, false
+	}
+	endUs := ts.lastLLMEndTs
+	if endUs == 0 {
+		endUs = ts.startTsUs
+	}
+	return canonical.OpFinalizedEvent{
+		EventBase:       base(),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         ts.seq,
+		Seq:             ts.lastLLMOpSeq,
+		Status:          "completed",
+		EndTs:           endUs,
+		CtxUsed:         ts.lastLLMCtxUsed,
+		CtxMax:          ts.ctxMax,
+	}, true
+}
+
+// stashAgentMessage records event_msg.agent_message.message as the active
+// turn's last_agent_message preview (spec rule #19). Truncated to previewMax
+// runes; full text lives in the response_item.message PayloadRef.
+func (m *fileMapper) stashAgentMessage(msg string) {
+	if !m.haveActiveTurn {
+		return
+	}
+	if ts, ok := m.turns[m.activeTurnID]; ok {
+		if prev := trimPreview(msg, previewMax); prev != "" {
+			ts.lastAgentMessage = prev
+		}
+	}
+}
+
+// abortErrorClass maps a turn_aborted reason to a canonical ErrorClass (spec
+// rule #5). Unknown reasons pass through verbatim (forward-compat).
+func abortErrorClass(reason string) string {
+	switch reason {
+	case "interrupted":
+		return "user_interrupt"
+	case "replaced":
+		return "replaced"
+	case "review_ended":
+		return "review_ended"
+	case "budget_limited":
+		return "rate_limit"
+	default:
+		return reason
+	}
+}
+
+// turnExtrasLog emits an INF LogEntry carrying the turn's computed metadata that
+// the spec routes to turns.extras_json — codex_turn_id, sandbox, effort,
+// approval_policy, ttft_ms, last_agent_message (spec "Canonical Model Gaps" #2,
+// #3, #8; rule #19). It is scoped to the turn (TurnSeq) so the UI's per-turn
+// Logs surface it.
+//
+// IMPORTANT (canonical-model gap surfaced in Chunk B): the canonical
+// TurnFinalizedEvent has NO Extras field and the ingest writer's turns INSERT
+// (internal/ingest/writer.go) does not populate turns.extras_json from any
+// event, so these values cannot reach turns.extras_json today. Emitting them as
+// a turn-scoped LogEntry keeps the data DURABLE and VISIBLE (no silent loss)
+// without touching the canonical schema or the writer, both out of Chunk B
+// scope. A follow-up SOW should add a turn-extras carrier (a TurnFinalized
+// Extras field or a turn-scoped SessionUpdated-style event) so the data lands in
+// turns.extras_json as the spec intends. Returns nil when the turn carried no
+// surfaced metadata.
+func (m *fileMapper) turnExtrasLog(ts *turnState, base canonical.EventBase) canonical.Event {
+	extras := map[string]any{}
+	if ts.codexTurnID != "" {
+		extras["codex_turn_id"] = ts.codexTurnID
+	}
+	if ts.sandbox != "" {
+		extras["sandbox"] = ts.sandbox
+	}
+	if ts.effort != "" {
+		extras["effort"] = ts.effort
+	}
+	if ts.approvalPolicy != "" {
+		extras["approval_policy"] = ts.approvalPolicy
+	}
+	if ts.ttftMs > 0 {
+		extras["ttft_ms"] = ts.ttftMs
+	}
+	if ts.lastAgentMessage != "" {
+		extras["last_agent_message"] = ts.lastAgentMessage
+	}
+	if len(extras) == 0 {
+		return nil
+	}
+	le := m.logEntry(base, "INF", "turn_meta", extras)
+	le.TurnSeq = ts.seq
+	return le
+}
+
+// errorExtras surfaces an event_msg.error message in the LogEntry extras.
+func errorExtras(p *eventMsgPayload) map[string]any {
+	if p.Message == "" {
+		return nil
+	}
+	return map[string]any{"message": trimPreview(p.Message, previewMax)}
+}
+
+// startedAtMicros reads task_started.started_at (unix seconds) from the raw
+// payload and returns it in micros, or 0 when absent (spec rule #3).
+func startedAtMicros(raw []byte) int64 {
+	v := payloadNumber(raw, "started_at")
+	if v == 0 {
+		return 0
+	}
+	return v * 1_000_000
+}
+
+// completedAtMicros reads task_complete/turn_aborted.completed_at, accepting
+// either an RFC3339 string or a unix-seconds number, and returns micros (0 when
+// absent). codex versions vary in the encoding (spec rule #4, #5).
+func completedAtMicros(raw []byte) int64 {
+	var env struct {
+		Payload struct {
+			CompletedAt json.RawMessage `json:"completed_at"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return 0
+	}
+	body := jsonTrim(env.Payload.CompletedAt)
+	if len(body) == 0 {
+		return 0
+	}
+	var s string
+	if json.Unmarshal(body, &s) == nil {
+		if us, err := parseTsToMicros(s); err == nil {
+			return us
+		}
+		return 0
+	}
+	var secs int64
+	if json.Unmarshal(body, &secs) == nil {
+		return secs * 1_000_000
+	}
+	return 0
+}
+
+// ttftMillis reads task_complete.time_to_first_token_ms (spec gap #8).
+func ttftMillis(raw []byte) int64 { return payloadNumber(raw, "time_to_first_token_ms") }
+
+// modelContextWindow reads task_started/token_count.model_context_window (spec
+// rule #3, #17).
+func modelContextWindow(raw []byte) int64 { return payloadNumber(raw, "model_context_window") }
+
+// payloadNumber extracts an integer field from the payload object inside the
+// envelope. Returns 0 when absent or non-numeric. A shared narrow decoder so
+// each scalar lookup avoids a bespoke struct.
+func payloadNumber(raw []byte, field string) int64 {
+	var env struct {
+		Payload map[string]json.RawMessage `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return 0
+	}
+	body := jsonTrim(env.Payload[field])
+	if len(body) == 0 {
+		return 0
+	}
+	var n int64
+	if json.Unmarshal(body, &n) == nil {
+		return n
+	}
+	var f float64
+	if json.Unmarshal(body, &f) == nil {
+		return int64(f)
+	}
+	return 0
+}
diff --git a/internal/adapters/codex/ops_response.go b/internal/adapters/codex/ops_response.go
new file mode 100644
index 0000000..9bc3f01
--- /dev/null
+++ b/internal/adapters/codex/ops_response.go
@@ -0,0 +1,244 @@
+package codex
+
+import (
+	"encoding/json"
+	"strings"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// mapResponseItem dispatches a response_item record to its per-variant emitter
+// (spec rules #6-13, #20). The nested payload.type discriminator was validated
+// by the parser; an empty type is unreachable here (parseLine skips it). The
+// full record is threaded through so emitters can size PayloadRef.OriginalBytes
+// from the verbatim line and read sibling fields (e.g. message.phase) off Raw.
+func (m *fileMapper) mapResponseItem(rec record, advance func(int64) canonical.EventBase) ([]canonical.Event, error) {
+	p := rec.ResponseItem
+	if p == nil {
+		return nil, nil
+	}
+	tsUs := m.recordTs(rec)
+	bodyBytes := int64(len(rec.Raw))
+	switch p.Type {
+	case "message":
+		return m.mapMessage(rec, advance, tsUs, bodyBytes), nil
+	case "reasoning":
+		return m.mapReasoning(p, advance, tsUs, bodyBytes), nil
+	case "function_call", "custom_tool_call", "local_shell_call",
+		"tool_search_call":
+		return m.mapToolCall(p, advance, tsUs, bodyBytes), nil
+	case "function_call_output", "custom_tool_call_output", "local_shell_call_output",
+		"tool_search_output":
+		return m.mapToolOutput(p, advance, tsUs, bodyBytes), nil
+	case "web_search_call":
+		return m.mapWebSearchCall(p, advance, tsUs, bodyBytes), nil
+	case "image_generation_call":
+		return m.mapImageGenCall(p, advance, tsUs, bodyBytes), nil
+	case "compaction", "context_compaction":
+		// response_item compaction variants converge on OpCompaction (spec rule
+		// #20, gap #4). The body (encrypted_content) is opaque; preview omitted.
+		return m.emitCompactionOp(advance, tsUs, map[string]any{"trigger": "auto"}, "json"), nil
+	default:
+		// Unreachable for persisted variants (parser allowlist); a defensive
+		// LogEntry keeps a future persisted-but-unmapped variant visible.
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "response_item:"+p.Type, nil)}, nil
+	}
+}
+
+// mapMessage handles response_item.message (spec rule #6 user, #7 assistant). A
+// user message opens an internal user_input op (deduped against
+// event_msg.user_message); an assistant message opens an llm op stamped with the
+// turn model. Both attach the body as a PayloadRef. A final_answer assistant
+// message also emits an INF LogEntry so the UI can flag the final response.
+func (m *fileMapper) mapMessage(rec record, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	p := rec.ResponseItem
+	if p.Role == "user" {
+		return m.emitUserInput(advance, tsUs, messageText(p.Content), "json", bodyBytes)
+	}
+	// assistant / system / developer → llm op (the assistant is the LLM output;
+	// system/developer messages are rare inline instructions, still llm-kind so
+	// they show on the timeline with the model).
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 4)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	ts.lastLLMOpSeq = opSeq
+	ts.lastLLMEndTs = tsUs
+	out = append(out,
+		canonical.OpStartedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			ParentOpSeq:     -1,
+			Kind:            canonical.OpLLM,
+			Name:            "message",
+			Model:           m.model,
+			Provider:        provider,
+		},
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			Status:          "completed",
+			EndTs:           tsUs,
+		},
+		m.payloadRef(advance(tsUs), turnSeq, opSeq, "llm_response", "json", bodyBytes),
+	)
+	if phaseFromRaw(rec.Raw) == "final_answer" {
+		out = append(out, m.logEntry(advance(tsUs), "INF", "final_answer", nil))
+	}
+	return out
+}
+
+// emitUserInput emits the internal user_input op pair + body PayloadRef, deduped
+// against the companion event_msg.user_message form (spec rule #6, #18). When
+// the fingerprint was already seen, it returns nil so the UI sees exactly one
+// user op per logical input. A user message also opens a new turn under the
+// active turn_id when none is open (old-CLI user-message boundary, spec edge #3).
+func (m *fileMapper) emitUserInput(advance func(int64) canonical.EventBase, tsUs int64, text, format string, bodyBytes int64) []canonical.Event {
+	if !m.firstSeenUser(userFingerprint(text)) {
+		return nil
+	}
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 4)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	out = append(out,
+		canonical.OpStartedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			ParentOpSeq:     -1,
+			Kind:            canonical.OpInternal,
+			Name:            "user_input",
+		},
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			Status:          "completed",
+			EndTs:           tsUs,
+		},
+		m.payloadRef(advance(tsUs), turnSeq, opSeq, "tool_request", format, bodyBytes),
+	)
+	return out
+}
+
+// mapReasoning handles response_item.reasoning (spec rule #8, acceptance #4). It
+// emits an OpReasoning pair with reasoning_kind = "summary" when only summary[]
+// is non-empty, "raw" when content[] carries text OR encrypted_content is set.
+// The body goes to a PayloadRef (Format=text for a summary, json for the full
+// item). event_msg.agent_reasoning* is a LogEntry only (ops_event.go) so the UI
+// never sees a duplicate reasoning op.
+func (m *fileMapper) mapReasoning(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	kind, format := reasoningKind(p)
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 4)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	out = append(out,
+		canonical.OpStartedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			ParentOpSeq:     -1,
+			Kind:            canonical.OpReasoning,
+			Name:            "reasoning",
+			ReasoningKind:   kind,
+		},
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			Status:          "completed",
+			EndTs:           tsUs,
+		},
+		m.payloadRef(advance(tsUs), turnSeq, opSeq, "llm_reasoning", format, bodyBytes),
+	)
+	return out
+}
+
+// reasoningKind classifies a reasoning item (spec rule #8, acceptance #4):
+// "summary" when only summary[] is non-empty (PayloadRef Format=text), "raw"
+// when content[] has text or encrypted_content is set (Format=json). When both
+// summary and raw signals are present, raw wins (the durable model state is the
+// fuller record). Defaults to "raw"/json when the item is opaque (encrypted).
+func reasoningKind(p *responseItemPayload) (kind, format string) {
+	hasSummary := jsonArrayNonEmpty(p.Summary)
+	hasContent := jsonArrayNonEmpty(p.Content)
+	hasEnc := len(jsonTrim(p.EncryptedContent)) > 0
+	switch {
+	case hasContent || hasEnc:
+		return "raw", "json"
+	case hasSummary:
+		return "summary", "text"
+	default:
+		// No discernible body (rare); treat as raw so the op is not mislabeled
+		// a summary it does not carry.
+		return "raw", "json"
+	}
+}
+
+// jsonArrayNonEmpty reports whether raw is a JSON array with at least one
+// element. Tolerates null/absent (returns false).
+func jsonArrayNonEmpty(raw json.RawMessage) bool {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return false
+	}
+	var arr []json.RawMessage
+	if json.Unmarshal(body, &arr) != nil {
+		return false
+	}
+	return len(arr) > 0
+}
+
+// messageText extracts the concatenated text of a message content[] array for
+// the user-dedup fingerprint (spec rule #6). Each element is
+// {type:"input_text"|"output_text"|..., text}. Returns "" when absent or when no
+// element carries text.
+func messageText(raw json.RawMessage) string {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return ""
+	}
+	var items []struct {
+		Text string `json:"text"`
+	}
+	if json.Unmarshal(body, &items) != nil {
+		return ""
+	}
+	var b strings.Builder
+	for _, it := range items {
+		b.WriteString(it.Text)
+	}
+	return b.String()
+}
+
+// phaseFromRaw reads the optional message.phase ("commentary" | "final_answer")
+// off the verbatim payload bytes (spec rule #7). phase is a sibling field not
+// kept in the typed responseItemPayload, so it is decoded narrowly from the
+// payload object inside the envelope. Returns "" when absent or unparseable.
+func phaseFromRaw(raw []byte) string {
+	var env struct {
+		Payload struct {
+			Phase string `json:"phase"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return ""
+	}
+	return env.Payload.Phase
+}
diff --git a/internal/adapters/codex/ops_tools.go b/internal/adapters/codex/ops_tools.go
new file mode 100644
index 0000000..b9c6ad6
--- /dev/null
+++ b/internal/adapters/codex/ops_tools.go
@@ -0,0 +1,230 @@
+package codex
+
+import (
+	"encoding/json"
+	"strings"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// mapToolCall handles function_call / custom_tool_call / local_shell_call /
+// tool_search_call (spec rule #9, #10, #13). It emits an OpStarted (Kind=tool)
+// with the namespace heuristic and tracks the op by call_id so the matching
+// *_output finalizes it. The arguments string becomes a tool_request PayloadRef.
+// The op is finalized later by mapToolOutput (or at turn close as dangling —
+// spec edge #9).
+func (m *fileMapper) mapToolCall(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 2)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	name, namespace := toolNameNamespace(p)
+	extras := map[string]any{}
+	if p.CallID != "" {
+		extras["call_id"] = p.CallID
+	}
+	out = append(out, canonical.OpStartedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         turnSeq,
+		Seq:             opSeq,
+		ParentOpSeq:     -1,
+		Kind:            canonical.OpTool,
+		Name:            name,
+		ToolNamespace:   namespace,
+		Extras:          extras,
+	})
+	// Arguments string → tool_request PayloadRef (spec rule #9). Only emit when
+	// the op has a body to point at.
+	if bodyBytes > 0 {
+		out = append(out, m.payloadRef(advance(tsUs), turnSeq, opSeq, "tool_request", "json", bodyBytes))
+	}
+	m.trackOp(p.CallID, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, name)
+	return out
+}
+
+// mapToolOutput handles function_call_output / custom_tool_call_output /
+// local_shell_call_output / tool_search_output (spec rule #9). It finalizes the
+// op matched by call_id with a status derived from the output (failed when the
+// output looks like a sandbox/error string — spec edge #5), and attaches the
+// output body as a tool_response PayloadRef. An output with no matching call is a
+// SourceError-class event surfaced as a WRN LogEntry (spec edge #10 — the
+// scanner does not see it; the mapper has no SourceError channel, so a WRN log
+// keeps it visible without dropping silently).
+func (m *fileMapper) mapToolOutput(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	op, ok := m.openOps[p.CallID]
+	if !ok || op.finalized {
+		// Unmatched / already-finalized output: surface and skip (spec edge #10).
+		return []canonical.Event{m.logEntry(advance(tsUs), "WRN", "tool_output_unmatched", map[string]any{"call_id": p.CallID})}
+	}
+	op.finalized = true
+	status, errClass := outputStatus(p.Output)
+	out := []canonical.Event{
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         op.turnSeq,
+			Seq:             op.opSeq,
+			Status:          status,
+			ErrorClass:      errClass,
+			EndTs:           tsUs,
+		},
+	}
+	if bodyBytes > 0 {
+		out = append(out, m.payloadRef(advance(tsUs), op.turnSeq, op.opSeq, "tool_response", "json", bodyBytes))
+	}
+	delete(m.openOps, p.CallID)
+	return out
+}
+
+// mapWebSearchCall handles response_item.web_search_call (spec rule #11). It
+// emits a tool op (Name=web_search, namespace=web). The companion
+// event_msg.web_search_end enriches it with the query/action (ops_event.go); the
+// op is tracked by call_id for that enrichment and finalized at turn close if no
+// end arrives.
+func (m *fileMapper) mapWebSearchCall(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	return m.emitSingleToolOp(p.CallID, "web_search", "web", advance, tsUs, bodyBytes)
+}
+
+// mapImageGenCall handles response_item.image_generation_call (spec rule #12):
+// a tool op Name=image_generation, namespace=media, tracked by call_id for the
+// event_msg.image_generation_end enrichment.
+func (m *fileMapper) mapImageGenCall(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	// image_generation_call uses `id`, not `call_id`; the typed payload keeps
+	// only call_id, so fall back to call_id and (when empty) leave it untracked —
+	// the end event then enriches by the same empty key path and the op finalizes
+	// at turn close.
+	return m.emitSingleToolOp(p.CallID, "image_generation", "media", advance, tsUs, bodyBytes)
+}
+
+// emitSingleToolOp emits a tool OpStarted tracked by callID (for a later
+// enrichment end-event) plus an optional tool_request PayloadRef. Shared by
+// web_search and image_generation, which both pair a response_item start with an
+// event_msg end (spec rule #11, #12).
+func (m *fileMapper) emitSingleToolOp(callID, name, namespace string, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 2)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	out = append(out, canonical.OpStartedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         turnSeq,
+		Seq:             opSeq,
+		ParentOpSeq:     -1,
+		Kind:            canonical.OpTool,
+		Name:            name,
+		ToolNamespace:   namespace,
+	})
+	if bodyBytes > 0 {
+		out = append(out, m.payloadRef(advance(tsUs), turnSeq, opSeq, "tool_request", "json", bodyBytes))
+	}
+	m.trackOp(callID, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, name)
+	return out
+}
+
+// toolNameNamespace derives the canonical op Name and ToolNamespace from a tool
+// call payload using the codex namespace heuristic (spec rule #9). Codex tools
+// are not pre-namespaced on disk; the name pattern selects the namespace. A
+// custom_tool_call / local_shell_call carries its own implied namespace.
+func toolNameNamespace(p *responseItemPayload) (name, namespace string) {
+	name = p.Name
+	switch p.Type {
+	case "custom_tool_call":
+		return name, "custom"
+	case "local_shell_call":
+		// Legacy .json shell op (spec rule #13).
+		if name == "" {
+			name = "shell"
+		}
+		return name, "shell"
+	case "tool_search_call":
+		if name == "" {
+			name = "tool_search"
+		}
+		return name, "custom"
+	}
+	return name, namespaceForName(name)
+}
+
+// namespaceForName maps a function_call tool name to a namespace (spec rule #9
+// heuristic). mcp routing is resolved later from event_msg.mcp_tool_call_end
+// (ops_event.go sets tool_namespace="mcp:<server>" on the matching op).
+func namespaceForName(name string) string {
+	switch {
+	case name == "shell" || name == "shell_command" || strings.HasPrefix(name, "exec"):
+		return "shell"
+	case name == "apply_patch":
+		return "fs"
+	case name == "read" || name == "write" || name == "edit" || name == "list_dir":
+		return "fs"
+	case name == "view_image":
+		return "fs"
+	default:
+		return "custom"
+	}
+}
+
+// outputStatus derives an op's terminal status from a tool output body (spec
+// rule #9, edge #5). A success output yields "completed"; an output whose string
+// content matches a sandbox-denial or error signal yields "failed" with the
+// matching ErrorClass. The output is either a bare string or {output} /
+// {content} — all reduced to a lower-cased scan string.
+func outputStatus(raw json.RawMessage) (status, errClass string) {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return "completed", ""
+	}
+	text := outputText(body)
+	low := strings.ToLower(text)
+	switch {
+	case strings.Contains(low, "denied by sandbox") || strings.Contains(low, "operation not permitted") || strings.Contains(low, "sandbox deny"):
+		return "failed", "sandbox_denied"
+	case strings.Contains(low, "\"error\"") || strings.HasPrefix(low, "error:") || strings.Contains(low, "exit code 1") || strings.Contains(low, "command failed"):
+		return "failed", "tool_error"
+	default:
+		return "completed", ""
+	}
+}
+
+// outputText reduces a tool output body to a scan string: a bare JSON string
+// returns its value; an object returns its `output`/`content` field (string or
+// re-serialized); anything else returns the raw bytes verbatim. Used only for
+// the heuristic status scan — never surfaced as content.
+func outputText(body json.RawMessage) string {
+	var s string
+	if json.Unmarshal(body, &s) == nil {
+		return s
+	}
+	var obj struct {
+		Output  json.RawMessage `json:"output"`
+		Content json.RawMessage `json:"content"`
+	}
+	if json.Unmarshal(body, &obj) == nil {
+		if v := scalarOrJSON(obj.Output); v != "" {
+			return v
+		}
+		if v := scalarOrJSON(obj.Content); v != "" {
+			return v
+		}
+	}
+	return string(body)
+}
+
+// scalarOrJSON returns a JSON value's string form if it is a string, else its
+// raw JSON, else "" when absent.
+func scalarOrJSON(raw json.RawMessage) string {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return ""
+	}
+	var s string
+	if json.Unmarshal(body, &s) == nil {
+		return s
+	}
+	return string(body)
+}

From d5d753f77f8438c6738a9d6559298ffa3833293a Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 12:39:10 +0300
Subject: [PATCH 03/13] SOW-0021: file turn-extras-carrier follow-up
 (turns.extras_json unreachable)

Track the infra gap found during SOW-0004 Chunk B: the turns table has an
extras_json column but no canonical turn event carries Extras and the
writer never populates it, so per-turn metadata (codex_turn_id, sandbox,
ttft_ms; claude-code turn_duration) is structurally unreachable. Codex
ships an interim no-loss turn_meta LogEntry; this follow-up adds the real
carrier to the canonical event + ingest writer.
---
 .../SOW-0021-20260530-turn-extras-carrier.md  | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 .agents/sow/pending/SOW-0021-20260530-turn-extras-carrier.md

diff --git a/.agents/sow/pending/SOW-0021-20260530-turn-extras-carrier.md b/.agents/sow/pending/SOW-0021-20260530-turn-extras-carrier.md
new file mode 100644
index 0000000..d96d672
--- /dev/null
+++ b/.agents/sow/pending/SOW-0021-20260530-turn-extras-carrier.md
@@ -0,0 +1,77 @@
+# SOW-0021 - turn-extras carrier (populate turns.extras_json)
+
+## Status
+
+Status: open
+
+Sub-state: proposed follow-up, awaiting operator prioritization. Discovered during SOW-0004 (codex adapter) Chunk B. Not blocking SOW-0004 — codex ships an interim no-loss workaround.
+
+## Requirements
+
+### Purpose
+
+Make `turns.extras_json` reachable. The `turns` table defines an `extras_json TEXT` column (data-model.md:112, with `codex_turn_id` cited as the canonical example value), but **no canonical turn event carries an `Extras` field** (`TurnStartedEvent`/`TurnFinalizedEvent` in `internal/canonical/events.go:220,233` have none) and the ingest writer never populates `turns.extras_json`. So every per-turn extra the specs promise is structurally unreachable from any adapter. This SOW adds a turn-extras carrier to the canonical event model + ingest writer so adapters can populate `turns.extras_json`, and migrates the codex adapter's interim surface onto it.
+
+### User Request
+
+Implied by the data-model + adapter specs, which document `turns.extras_json.{codex_turn_id,sandbox,ttft_ms}` (adapter-codex.md "Canonical Model Gaps" #2/#3/#8) and `claude-code system.subtype='turn_duration'` (data-model.md:112) as the durable home for per-turn metadata. SOW-0004 surfaced that this home is unwired.
+
+### Assistant Understanding
+
+Facts:
+
+- `internal/canonical/events.go`: `TurnStartedEvent` = {EventBase, SessionNativeID, Seq}; `TurnFinalizedEvent` = {EventBase, SessionNativeID, Seq, Status, ErrorClass, EndTs, Tokens*, CostUSD}. Neither carries `Extras`.
+- `internal/ingest/writer.go`: the `turns` UPSERT paths never write `extras_json`; the `graftAiViewerExtras` extras handling is ops/sessions-only.
+- `data-model.md:98-112`: the `turns` table has `extras_json TEXT`.
+- SOW-0004 codex adapter computes per-turn `codex_turn_id`, `sandbox`, and `ttft_ms` on its `turnState` and currently surfaces them via a single informational `turn_meta` LogEntry at turn finalize (no silent loss) — `internal/adapters/codex/ops_event.go` (`turnExtrasLog`).
+
+Inferences:
+
+- The cleanest carrier is an `Extras map[string]any` field on `TurnFinalizedEvent` (turn extras are known by the time the turn finalizes), mirroring how ops/sessions carry `Extras`; the writer then marshals it into `turns.extras_json` on the turn UPSERT, mirroring the ops/sessions extras write. A `TurnUpdatedEvent` is an alternative if mid-turn extras are ever needed, but no current adapter needs that.
+- This is shared infrastructure: claude-code (`turn_duration`), codex, and future adapters all benefit. It is deliberately out of SOW-0004's codex-only-additive blast radius.
+
+Unknowns:
+
+- Whether `turns.extras_json` needs the same per-key graft protection the ops/sessions paths use (re-emit safety). Turn finalize is terminal and single-shot per (session,seq), so a wholesale write is likely safe — confirm against the idempotent-write model (SOW-0015) during the gate.
+
+### Acceptance Criteria
+
+1. `TurnFinalizedEvent` carries an `Extras` field (or an equivalent carrier); `internal/canonical` tests cover it. **Verification**: `go build`/`go test` for canonical.
+2. The ingest writer marshals turn `Extras` into `turns.extras_json` on the turn UPSERT, idempotently. **Verification**: an ingester test asserts a `TurnFinalizedEvent{Extras:{...}}` lands in `turns.extras_json`, and a re-emit does not corrupt it.
+3. The codex adapter populates `turns.extras_json.{codex_turn_id,sandbox,ttft_ms}` via the carrier and **removes** the interim `turn_meta` LogEntry. **Verification**: codex golden/mapper tests assert the turn extras on the event; the `turn_meta` LogEntry is gone.
+4. Specs reconciled: adapter-codex.md "Canonical Model Gaps" v1-reachability note removed/updated; data-model.md + canonical-events.md describe the turn-extras carrier. **Verification**: spec-drift sweep clean.
+
+## Analysis
+
+Sources checked: `internal/canonical/events.go`, `internal/ingest/writer.go`, `.agents/sow/specs/{data-model.md,canonical-events.md,adapter-codex.md}`, `internal/adapters/codex/ops_event.go`.
+
+Current state: discovered 2026-05-30 during SOW-0004 Chunk B. Codex ships an interim no-loss `turn_meta` LogEntry; this SOW migrates to the real column.
+
+Risks:
+
+- **R1 — Shared-surface change.** Touches `internal/canonical` + `internal/ingest`, used by every adapter. Mitigation: additive field (no existing adapter sets it → no behavior change for v2/v3/claude-code until they opt in); full gate + external review.
+- **R2 — Idempotency.** Re-emitted turn finalize must not corrupt `turns.extras_json`. Mitigation: confirm against SOW-0015 idempotent-write model in the gate; test the re-emit path.
+
+## Pre-Implementation Gate
+
+(To be filled by the assistant picking this SOW up. Required before moving to `current/`.)
+
+## Implementation
+
+(Empty placeholder.)
+
+## Validation
+
+(Empty placeholder.)
+
+## Reviews
+
+(Empty placeholder.)
+
+## Outcome
+
+Pending.
+
+## Lessons / Follow-Ups
+
+Pending.

From ed72bd2d5ee92e5e5b8e4e38f07aecb4d6429313 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 13:16:18 +0300
Subject: [PATCH 04/13] codex adapter chunk C: shard-tree scanner + fsnotify
 tailer

Drives the Chunk-B mapper over the codex sessions tree.

- discovery.go: walks the YYYY/MM/DD shard tree, matches modern
  rollout-*.jsonl at any depth, classifies legacy flat rollout-*.json at
  the root, prunes archived_sessions/ and sqlite/history/index files.
  Fail-soft per entry (onError + skip + continue); only an unreadable
  root is fatal.
- scanner.go: scanAll (per-file read driver + legacy one-shot SourceError
  + progress cadence) and readRollout (containment open of the resolved
  path, truncation re-scan, rule-#24 first-line session_meta probe with
  offset held at 0 for retry, byte-offset stream, EOF stale-finalize only
  when fully read AND mtime >= 1h). Resume replays from offset 0 with an
  emit-gate so the per-file turn/op counters rebuild deterministically
  (zero-dup / zero-gap on restart, acceptance #6).
- stream.go: byte-offset line streaming reused verbatim from claude_code
  (partial-line hold-back, oversized-line skip-not-EOF), the symlink
  containment helpers, and the per-file unknown-variant dedup.
- tailer.go: fsnotify loop mirroring claude_code (debounce/tick/forced
  flush/catch-up) plus codex new-date-shard-dir handling (a new
  YYYY/MM/DD dir is added to the watch and its rollouts scanned).

scanner/tailer are internal drivers; the canonical.Adapter wiring +
auto-discovery land in Chunk D. Gates green: gofmt/vet/golangci(0)/
gosec(0); race tests pass; package coverage 91.9%; whole-repo green.
---
 .../adapters/codex/containment_branch_test.go | 104 +++
 .../adapters/codex/coverage_branch_test.go    | 164 +++++
 internal/adapters/codex/discovery.go          | 197 ++++++
 internal/adapters/codex/final_branch_test.go  | 139 ++++
 internal/adapters/codex/scanner.go            | 295 ++++++++
 .../adapters/codex/scanner_branch_test.go     | 217 ++++++
 internal/adapters/codex/scanner_test.go       | 645 ++++++++++++++++++
 internal/adapters/codex/stream.go             | 320 +++++++++
 internal/adapters/codex/stream_test.go        | 373 ++++++++++
 internal/adapters/codex/tailer.go             | 360 ++++++++++
 internal/adapters/codex/tailer_branch_test.go | 228 +++++++
 internal/adapters/codex/tailer_test.go        | 197 ++++++
 internal/adapters/codex/tailer_unit_test.go   | 309 +++++++++
 13 files changed, 3548 insertions(+)
 create mode 100644 internal/adapters/codex/containment_branch_test.go
 create mode 100644 internal/adapters/codex/coverage_branch_test.go
 create mode 100644 internal/adapters/codex/discovery.go
 create mode 100644 internal/adapters/codex/final_branch_test.go
 create mode 100644 internal/adapters/codex/scanner.go
 create mode 100644 internal/adapters/codex/scanner_branch_test.go
 create mode 100644 internal/adapters/codex/scanner_test.go
 create mode 100644 internal/adapters/codex/stream.go
 create mode 100644 internal/adapters/codex/stream_test.go
 create mode 100644 internal/adapters/codex/tailer.go
 create mode 100644 internal/adapters/codex/tailer_branch_test.go
 create mode 100644 internal/adapters/codex/tailer_test.go
 create mode 100644 internal/adapters/codex/tailer_unit_test.go

diff --git a/internal/adapters/codex/containment_branch_test.go b/internal/adapters/codex/containment_branch_test.go
new file mode 100644
index 0000000..bb43ec5
--- /dev/null
+++ b/internal/adapters/codex/containment_branch_test.go
@@ -0,0 +1,104 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestWithinResolvedRoot_ResolveError covers the EvalSymlinks-error branch of
+// withinResolvedRoot (and evalSymlinksAllowingTail's non-IsNotExist return) via
+// a path whose ancestor directory is unreadable (EACCES, not IsNotExist).
+// Skipped where 0o000 is ignored.
+func TestWithinResolvedRoot_ResolveError(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	blocked := filepath.Join(resolved, "blocked")
+	if err := os.MkdirAll(blocked, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.Chmod(blocked, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(blocked, 0o755) })
+	// A path UNDER the unreadable dir cannot be EvalSymlinks-resolved (EACCES on
+	// the existing-but-unreadable ancestor → non-IsNotExist error).
+	under := filepath.Join(blocked, "child", "r.jsonl")
+	if _, err := filepath.EvalSymlinks(filepath.Dir(under)); err == nil {
+		t.Skip("filesystem allowed resolving under a 0o000 dir; resolve-error seam not exercised")
+	}
+
+	_, ok, err := withinResolvedRoot(resolved, under)
+	if err == nil {
+		t.Fatalf("withinResolvedRoot under unreadable dir = (ok=%v,nil), want a resolve error", ok)
+	}
+
+	// withinSourceRoot surfaces that resolve error via onError and returns false.
+	var errs []string
+	if withinSourceRoot(resolved, under, func(e error) { errs = append(errs, e.Error()) }) {
+		t.Error("withinSourceRoot under unreadable dir should return false")
+	}
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "cannot resolve") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("resolve error not surfaced; errs=%v", errs)
+	}
+}
+
+// TestTailLoop_ResolveErrorDisablesCleanly covers tailLoop's EvalSymlinks-error
+// branch: a root that os.Stat succeeds on but EvalSymlinks fails (an unreadable
+// ancestor) surfaces a SourceError and returns nil (tail disabled for this
+// source, daemon keeps running). Skipped where 0o000 is ignored.
+func TestTailLoop_ResolveErrorDisablesCleanly(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	parent := t.TempDir()
+	mid := filepath.Join(parent, "mid")
+	root := filepath.Join(mid, "sessions")
+	if err := os.MkdirAll(root, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	// Block the MIDDLE component so os.Stat(root) via the cached path may still
+	// fail; to make os.Stat succeed but EvalSymlinks fail we instead chmod the
+	// parent AFTER stat — simplest: chmod mid so EvalSymlinks(root) hits EACCES.
+	if err := os.Chmod(mid, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(mid, 0o755) })
+	if _, err := filepath.EvalSymlinks(root); err == nil {
+		t.Skip("filesystem allowed resolving under a 0o000 dir; resolve-error seam not exercised")
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out := make(chan canonical.Event, 4)
+	var errs []string
+	err := tailLoop(ctx, root, "codex:"+root, newCursor(), out, func(e error) { errs = append(errs, e.Error()) })
+	if err != nil {
+		t.Fatalf("tailLoop with resolve error = %v, want nil (disabled cleanly)", err)
+	}
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "cannot resolve sessions root") || strings.Contains(e, "not present") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("resolve/stat error not surfaced; errs=%v", errs)
+	}
+}
diff --git a/internal/adapters/codex/coverage_branch_test.go b/internal/adapters/codex/coverage_branch_test.go
new file mode 100644
index 0000000..5827aae
--- /dev/null
+++ b/internal/adapters/codex/coverage_branch_test.go
@@ -0,0 +1,164 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestStreamLines_SkipAndReplaySuppress covers two streamLines branches: a
+// skip=true line (ghost_snapshot) is passed over, and a resume with emitFrom>0
+// replays early lines to rebuild state but emits nothing for them (the !emit
+// branch).
+func TestStreamLines_SkipAndReplaySuppress(t *testing.T) {
+	t.Parallel()
+	src := strings.Join([]string{
+		metaLine("sid-s", `"exec"`),
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"ghost_snapshot"}}`, // skip
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+	}, "\n") + "\n"
+
+	// First, a full pass (emitFrom=0) to learn the byte offset after line 1.
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", nativeID: "sid-s"})
+	out := make(chan canonical.Event, 64)
+	full, err := streamLines(context.Background(), strings.NewReader(src), 0, "r.jsonl", m, newUnknownDedup(), out, func(error) {})
+	if err != nil {
+		t.Fatalf("full streamLines: %v", err)
+	}
+	_ = drainBuffered(out)
+	if full.advanced != int64(len(src)) {
+		t.Fatalf("full advanced = %d, want %d", full.advanced, len(src))
+	}
+
+	// Now replay with emitFrom past the session_meta line so it is rebuilt but
+	// NOT re-emitted (the !emit branch), and the ghost_snapshot skip still runs.
+	metaLen := int64(len(metaLine("sid-s", `"exec"`)) + 1)
+	m2 := newFileMapper(mapperConfig{sourceID: "codex:/t", nativeID: "sid-s"})
+	out2 := make(chan canonical.Event, 64)
+	res, err := streamLines(context.Background(), strings.NewReader(src), metaLen, "r.jsonl", m2, newUnknownDedup(), out2, func(error) {})
+	if err != nil {
+		t.Fatalf("replay streamLines: %v", err)
+	}
+	got := drainBuffered(out2)
+	// The session_meta line was below emitFrom → no SessionStarted re-emitted.
+	if countKind(got, canonical.EvSessionStarted) != 0 {
+		t.Errorf("replay re-emitted SessionStarted (%d); want 0 (below emitFrom)", countKind(got, canonical.EvSessionStarted))
+	}
+	// But the turn_context above emitFrom did emit a TurnStarted.
+	if countKind(got, canonical.EvTurnStarted) == 0 {
+		t.Error("replay did not emit the above-emitFrom TurnStarted")
+	}
+	if res.advanced != int64(len(src)) {
+		t.Errorf("replay advanced = %d, want %d", res.advanced, len(src))
+	}
+}
+
+// TestRelPath_Error covers relPath's error branch via a relative base (Rel of
+// an absolute target against a relative base fails).
+func TestRelPath_Error(t *testing.T) {
+	t.Parallel()
+	if _, err := relPath("relative-base", "/absolute/target"); err == nil {
+		t.Error("relPath(relative base, absolute target) should error")
+	}
+}
+
+// TestFileCursor_NilMap covers fileCursor's nil-map branch.
+func TestFileCursor_NilMap(t *testing.T) {
+	t.Parallel()
+	var c Cursor // Files is nil
+	if fc := c.fileCursor("any"); fc.Offset != 0 {
+		t.Errorf("nil-map fileCursor = %+v, want zero", fc)
+	}
+}
+
+// TestFirstRecordIsSessionMeta_BlankOnly covers the blank-line-then-EOF path:
+// a file of only blank lines has no parseable record → false.
+func TestFirstRecordIsSessionMeta_BlankOnly(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "r.jsonl")
+	if err := os.WriteFile(path, []byte("\n\n   \n"), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	f, err := os.Open(path) // #nosec G304 -- test temp path
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer func() { _ = f.Close() }()
+	info, _ := f.Stat()
+	got, perr := firstRecordIsSessionMeta(f, info.Size())
+	if perr != nil || got {
+		t.Errorf("blank-only probe = (%v,%v), want (false,nil)", got, perr)
+	}
+}
+
+// TestMarkExistingDirty_WalkErrorSurfaced covers markExistingDirty's
+// non-IsNotExist walk-error branch via a chmod-000 subtree. Skipped where 0o000
+// is ignored.
+func TestMarkExistingDirty_WalkErrorSurfaced(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	newDir := filepath.Join(resolved, "2025", "12", "01")
+	deep := filepath.Join(newDir, "deep")
+	if err := os.MkdirAll(deep, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.Chmod(deep, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(deep, 0o755) })
+	if _, derr := os.ReadDir(deep); derr == nil {
+		t.Skip("filesystem allowed reading a 0o000 dir; walk-error seam not exercised")
+	}
+
+	dirty := map[string]struct{}{}
+	var errs []string
+	markExistingDirty(resolved, newDir, dirty, func(e error) { errs = append(errs, e.Error()) })
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "walk new dir") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("markExistingDirty walk-error not surfaced; errs=%v", errs)
+	}
+}
+
+// TestMarkExistingDirty_NilOnError asserts the nil-onError default is installed
+// (no panic when called with a nil callback).
+func TestMarkExistingDirty_NilOnError(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	shard := filepath.Join(resolved, "2025", "11", "20")
+	writeFileBytes(t, filepath.Join(shard, "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl"), completeSession("sid"))
+	dirty := map[string]struct{}{}
+	markExistingDirty(resolved, shard, dirty, nil) // nil onError must not panic
+	if len(dirty) != 1 {
+		t.Errorf("dirty count = %d, want 1", len(dirty))
+	}
+}
+
+// TestDiscoverRollouts_NilOnError asserts the nil-onError default (no panic) and
+// returns the modern file.
+func TestDiscoverRollouts_NilOnError(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	writeFileBytes(t, shardPath(root, uuid7(1)), completeSession("sid"))
+	disc, err := discoverRollouts(root, nil)
+	if err != nil {
+		t.Fatalf("discoverRollouts: %v", err)
+	}
+	if len(disc.modern) != 1 {
+		t.Errorf("modern = %d, want 1", len(disc.modern))
+	}
+}
diff --git a/internal/adapters/codex/discovery.go b/internal/adapters/codex/discovery.go
new file mode 100644
index 0000000..6956c90
--- /dev/null
+++ b/internal/adapters/codex/discovery.go
@@ -0,0 +1,197 @@
+package codex
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// modernExt is the required extension for a modern codex rollout file.
+const modernExt = ".jsonl"
+
+// rolloutPrefix is the required filename prefix for both modern and legacy
+// rollout files (openai/codex codex-rs/rollout/src/list.rs:898 filters on
+// starts_with("rollout-")).
+const rolloutPrefix = "rollout-"
+
+// archivedSessionsDir is the codex session archive, explicitly out of scope for
+// ingest (spec adapter-codex.md §"Filesystem Layout").
+const archivedSessionsDir = "archived_sessions"
+
+// modernNameRe matches a modern rollout filename: "rollout-*.jsonl" (the strict
+// upstream filter, codex-rs/rollout/src/list.rs:898,918,932). Anchored so a
+// name like "x-rollout-….jsonl" or "rollout-….jsonl.bak" does not match.
+var modernNameRe = regexp.MustCompile(`^rollout-.*\.jsonl$`)
+
+// legacyNameRe matches a legacy flat rollout filename: "rollout-*.json" (no
+// time component, directly under sessions/; spec §"Legacy `.json` layout").
+var legacyNameRe = regexp.MustCompile(`^rollout-.*\.json$`)
+
+// rollout describes one modern rollout file discovered under the sessions root.
+type rollout struct {
+	// rel is the path relative to the root (the cursor key),
+	// "YYYY/MM/DD/rollout-….jsonl", forward-slashed.
+	rel string
+	// abs is the absolute path on disk.
+	abs string
+}
+
+// discovered is the result of one discovery walk: the modern rollout files
+// (sorted by rel for deterministic replay) plus the basenames of the legacy
+// flat .json files found directly under the root.
+type discovered struct {
+	modern []rollout
+	legacy []string
+}
+
+// discoverRollouts walks the sessions root and returns every modern rollout
+// file (sorted by relative path) plus the legacy flat .json basenames found
+// directly under the root. Discovery is fail-soft per entry (SOW gate: a
+// non-IsNotExist error reading one shard/file surfaces a SourceError via
+// onError and is skipped so the walk continues); ONLY the configured root being
+// unreadable is fatal. Modern files are matched by ^rollout-.*\.jsonl$ at any
+// depth under YYYY/MM/DD/; legacy files by ^rollout-.*\.json$ at the root only.
+// archived_sessions/, *.sqlite*, history*, session_index.jsonl, and any other
+// name are ignored (spec §"Watch Strategy").
+//
+// Every discovered modern path is symlink-resolved and verified to stay inside
+// the resolved root before it is returned (security.md §6 "No symlink traversal
+// escape"); a path that escapes is refused with a SourceError and skipped.
+func discoverRollouts(root string, onError func(error)) (discovered, error) {
+	if onError == nil {
+		onError = func(error) {}
+	}
+	resolvedRoot, rerr := filepath.EvalSymlinks(filepath.Clean(root))
+	if rerr != nil {
+		if os.IsNotExist(rerr) {
+			return discovered{}, nil
+		}
+		return discovered{}, fmt.Errorf("resolve sessions root %s: %w", root, rerr)
+	}
+	// Stat-probe the root: a non-IsNotExist failure (e.g. unreadable) is fatal
+	// (the source is broken), an absent root is benign-empty (first run).
+	if _, serr := os.ReadDir(root); serr != nil {
+		if os.IsNotExist(serr) {
+			return discovered{}, nil
+		}
+		return discovered{}, fmt.Errorf("read sessions root %s: %w", root, serr)
+	}
+	var out discovered
+	// Walk the RESOLVED root: filepath.WalkDir does not descend INTO a symlinked
+	// walk-root, so walking the unresolved root would yield nothing under a
+	// legitimately-symlinked sessions dir. Keys are rel to resolvedRoot, which
+	// equals rel to root for the same subtree (the tail handleEvent keys the
+	// same way, so scan and tail cursor keys match).
+	_ = filepath.WalkDir(resolvedRoot, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			if os.IsNotExist(err) {
+				if d != nil && d.IsDir() {
+					return filepath.SkipDir
+				}
+				return nil
+			}
+			// Fail-soft: surface the unreadable entry and continue past it. SkipDir
+			// on a directory prunes just that subtree; a file error is reported and
+			// the walk resumes with the next sibling.
+			onError(fmt.Errorf("codex: walk sessions tree %s: %w; skipping", path, err))
+			if d != nil && d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if d.IsDir() {
+			// Prune the archive subtree (out of scope for ingest).
+			if d.Name() == archivedSessionsDir && path != resolvedRoot {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		name := d.Name()
+		atRoot := filepath.Dir(path) == resolvedRoot
+		switch {
+		case modernNameRe.MatchString(name):
+			if !withinSourceRoot(resolvedRoot, path, onError) {
+				return nil
+			}
+			rel, rrErr := relPath(resolvedRoot, path)
+			if rrErr != nil {
+				onError(fmt.Errorf("codex: relpath rollout %s: %w; skipping", path, rrErr))
+				return nil
+			}
+			out.modern = append(out.modern, rollout{rel: rel, abs: path})
+		case atRoot && legacyNameRe.MatchString(name):
+			// Legacy flat .json directly under the root: recorded by basename so a
+			// single informational SourceError is emitted once (rule #24/§Legacy).
+			out.legacy = append(out.legacy, name)
+		}
+		return nil
+	})
+	sort.Slice(out.modern, func(i, j int) bool { return out.modern[i].rel < out.modern[j].rel })
+	sort.Strings(out.legacy)
+	return out, nil
+}
+
+// relPath returns abs relative to root with forward slashes, the canonical
+// cursor key form. Mirrors claude_code.
+func relPath(root, abs string) (string, error) {
+	rel, err := filepath.Rel(root, abs)
+	if err != nil {
+		return "", fmt.Errorf("relpath %s under %s: %w", abs, root, err)
+	}
+	return filepath.ToSlash(rel), nil
+}
+
+// nativeIDForRollout derives the fallback session NativeID for a rollout file
+// from its filename (the UUIDv7 ThreadId tail of "rollout-<ts>-<ThreadId>"),
+// used as mapperConfig.nativeID. The mapper overrides this from the
+// session_meta.id when the meta is read (mapper.go applySessionMeta), so this
+// is only the pre-meta anchor; a file without a parseable id tail falls back to
+// the basename so events still attach to a stable id. Rule #24 ensures a
+// session_meta is present before this file is streamed, so the override
+// normally wins; the fallback only matters for the degenerate "meta present but
+// id empty" case.
+func nativeIDForRollout(r rollout) string {
+	base := strings.TrimSuffix(filepath.Base(r.abs), modernExt)
+	base = strings.TrimPrefix(base, rolloutPrefix)
+	// base is "YYYY-MM-DDTHH-MM-SS-<ThreadId>"; the ThreadId is a UUIDv7 whose
+	// five hyphen-separated groups are the last 5 of the dash-split. Extract the
+	// trailing UUID (8-4-4-4-12) when present; else use the whole tail.
+	if id := uuidTail(base); id != "" {
+		return id
+	}
+	return base
+}
+
+// uuidTail returns the trailing 8-4-4-4-12 UUID embedded at the end of a
+// dash-joined filename stem, or "" when the tail does not look like a UUID. The
+// stem is "YYYY-MM-DDTHH-MM-SS-<8>-<4>-<4>-<4>-<12>"; the UUID is the last five
+// dash groups.
+func uuidTail(stem string) string {
+	parts := strings.Split(stem, "-")
+	if len(parts) < 5 {
+		return ""
+	}
+	tail := parts[len(parts)-5:]
+	want := []int{8, 4, 4, 4, 12}
+	for i, p := range tail {
+		if len(p) != want[i] || !isHex(p) {
+			return ""
+		}
+	}
+	return strings.Join(tail, "-")
+}
+
+// isHex reports whether s is non-empty and all lowercase/uppercase hex digits.
+func isHex(s string) bool {
+	for _, c := range s {
+		switch {
+		case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F':
+		default:
+			return false
+		}
+	}
+	return len(s) > 0
+}
diff --git a/internal/adapters/codex/final_branch_test.go b/internal/adapters/codex/final_branch_test.go
new file mode 100644
index 0000000..290085e
--- /dev/null
+++ b/internal/adapters/codex/final_branch_test.go
@@ -0,0 +1,139 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestStreamLines_CtxCancelAtLoopTop covers streamLines' ctx.Err() check at the
+// top of the loop (a context cancelled before any line is read returns
+// immediately with the cancel error and a zero advanced offset).
+func TestStreamLines_CtxCancelAtLoopTop(t *testing.T) {
+	t.Parallel()
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", nativeID: "sid"})
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	out := make(chan canonical.Event, 4)
+	res, err := streamLines(ctx, nil, 0, "r.jsonl", m, newUnknownDedup(), out, func(error) {})
+	if err == nil {
+		t.Fatal("streamLines on a cancelled ctx should return the cancel error at the loop top")
+	}
+	if res.advanced != 0 {
+		t.Errorf("advanced = %d, want 0 (nothing read before cancel)", res.advanced)
+	}
+}
+
+// TestFirstRecordIsSessionMeta_SeekError covers the Seek-error branch of the
+// rule-#24 probe: a closed file fails to Seek, surfacing the error.
+func TestFirstRecordIsSessionMeta_SeekError(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "r.jsonl")
+	if err := os.WriteFile(path, completeSession("sid"), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	f, err := os.Open(path) // #nosec G304 -- test temp path
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	size := int64(100)
+	_ = f.Close() // close BEFORE probing so Seek fails on the closed fd
+	if _, perr := firstRecordIsSessionMeta(f, size); perr == nil {
+		t.Fatal("firstRecordIsSessionMeta on a closed file should return a Seek error")
+	}
+}
+
+// TestScan_CancelMidWalkReturnsCursor covers scanAll's ctx.Err() check between
+// files: a context cancelled before the walk reaches a file returns the cursor
+// and the cancel error path (scanAll returns nil from Scan via the adapter, but
+// scanAll itself returns the ctx error so the caller can decide).
+func TestScan_CancelMidWalkReturnsCursor(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	for i := 0; i < 3; i++ {
+		p := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T10-00-"+pad2(i)+"-"+uuid7(i)+".jsonl")
+		writeFileBytes(t, p, completeSession("sid-"+pad2(i)))
+		setMtime(t, p, time.Minute)
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	out := make(chan canonical.Event, 256)
+	cur, err := scanAll(ctx, root, "codex:"+root, newCursor(), out, func(error) {})
+	if err == nil {
+		t.Fatal("scanAll with cancelled ctx should return the cancel error")
+	}
+	// The returned cursor is the best-effort resume point (empty here since the
+	// walk was cancelled before reading).
+	_ = cur
+}
+
+// TestScan_NilFilesCursor covers scanAll's `cur.Files == nil` initialisation
+// branch (a zero Cursor handed in is upgraded to a fresh cursor).
+func TestScan_NilFilesCursor(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, completeSession("sid-z"))
+	setMtime(t, path, time.Minute)
+	events, _, final := scanCollect(t, root, "codex:"+root, Cursor{}) // nil Files
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("scan with a nil-Files cursor must still ingest")
+	}
+	if final.Files == nil {
+		t.Error("scanAll must initialise the cursor's Files map")
+	}
+}
+
+// TestReadRollout_ContainmentRefusesEscape covers readRollout's own containment
+// guard (scanner.go:270-282): a rollout descriptor whose abs is a symlink
+// escaping the root (as can reach readRollout via the Tail flush path, which
+// has no prior discovery check) is refused with a SourceError and never opened.
+func TestReadRollout_ContainmentRefusesEscape(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	outside := t.TempDir()
+	secret := filepath.Join(outside, "secret.jsonl")
+	writeFileBytes(t, secret, completeSession("sid-secret"))
+	shardDir := filepath.Join(resolved, "2025", "11", "20")
+	if err := os.MkdirAll(shardDir, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	rel := "2025/11/20/rollout-2025-11-20T10-00-00-" + uuid7(1) + ".jsonl"
+	link := filepath.Join(shardDir, "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	if err := os.Symlink(secret, link); err != nil {
+		t.Skipf("symlink unsupported: %v", err)
+	}
+
+	out := make(chan canonical.Event, 8)
+	r := rollout{rel: rel, abs: link}
+	_, n, err := readRollout(context.Background(), resolved, r, "codex:"+root, FileCursor{}, out, func(error) {})
+	if err == nil {
+		t.Fatal("readRollout must refuse a symlink escaping the root")
+	}
+	if n != 0 {
+		t.Errorf("escaped rollout emitted %d events, want 0", n)
+	}
+}
+
+// TestReadRollout_OpenError covers readRollout's open-error branch
+// (scanner.go:304): a descriptor pointing at a non-existent (but in-root) path
+// returns an open error. (Containment tolerates a not-yet-created tail; the
+// open then fails.)
+func TestReadRollout_OpenError(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	rel := "2025/11/20/rollout-2025-11-20T10-00-00-" + uuid7(1) + ".jsonl"
+	r := rollout{rel: rel, abs: filepath.Join(resolved, filepath.FromSlash(rel))} // not created
+	out := make(chan canonical.Event, 4)
+	_, _, err := readRollout(context.Background(), resolved, r, "codex:"+root, FileCursor{}, out, func(error) {})
+	if err == nil {
+		t.Fatal("readRollout on a non-existent in-root path should return an open error")
+	}
+}
diff --git a/internal/adapters/codex/scanner.go b/internal/adapters/codex/scanner.go
new file mode 100644
index 0000000..daa1e8e
--- /dev/null
+++ b/internal/adapters/codex/scanner.go
@@ -0,0 +1,295 @@
+package codex
+
+import (
+	"bufio"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// staleAfter is the file-mtime age beyond which a hanging open turn at EOF is
+// synthetically finalized failed/incomplete (spec rule #23, SOW C#3: "≥ 1 h").
+// A fresher file keeps its open turn running for the next append.
+const staleAfter = time.Hour
+
+// progressEveryEvents bounds how frequently SourceProgress is emitted by record
+// count (spec §"Watch Strategy"; mirrors claude_code's progress cadence).
+const progressEveryEvents = 200
+
+// progressEveryDuration bounds SourceProgress emission by wall-clock.
+const progressEveryDuration = 5 * time.Second
+
+// scanAll walks the sessions root and reads every modern rollout from its
+// cursor offset to EOF, emitting events and periodic SourceProgress. Legacy
+// flat .json files emit exactly one informational SourceError each (first
+// sight) and are then suppressed via the cursor's LegacyJSON map (spec
+// §"Legacy"; rule #24 deferral). A modern file with no session_meta on its
+// first parseable line is skipped with a SourceError and its offset held at 0
+// so a later append retries (rule #24). At EOF a hanging open turn is finalized
+// failed/incomplete ONLY when the file mtime is stale ≥ 1 h (rule #23).
+// Returns the final cursor.
+func scanAll(ctx context.Context, root, sourceID string, start Cursor, out chan<- canonical.Event, onError func(error)) (Cursor, error) {
+	disc, err := discoverRollouts(root, onError)
+	if err != nil {
+		return start, err
+	}
+	cur := start
+	if cur.Files == nil {
+		cur = newCursor()
+	}
+
+	// Pre-resolve the root ONCE so the per-file containment open does not re-run
+	// EvalSymlinks on the root for every file. A resolve failure here is
+	// non-fatal: fall back to the unresolved root (the files were already
+	// discovered, so the root exists; a degenerate resolve only loses the perf
+	// optimisation, not correctness). Mirrors claude_code/scanAll.
+	resolvedRoot := root
+	if rr, rrErr := filepath.EvalSymlinks(filepath.Clean(root)); rrErr == nil {
+		resolvedRoot = rr
+	}
+
+	// Emit one informational SourceError per legacy file the first time it is
+	// seen, then record it in the cursor so it stays quiet thereafter (R1 / spec
+	// §"Legacy"). The content is NOT ingested (Phase-2.5 follow-up).
+	cur = reportLegacy(cur, disc.legacy, onError)
+
+	emittedSinceProgress := 0
+	lastProgress := time.Now()
+	for _, r := range disc.modern {
+		if ctx.Err() != nil {
+			return cur, ctx.Err()
+		}
+		fc := cur.fileCursor(r.rel)
+		updated, n, rerr := readRollout(ctx, resolvedRoot, r, sourceID, fc, out, onError)
+		if rerr != nil {
+			if errors.Is(rerr, context.Canceled) || errors.Is(rerr, context.DeadlineExceeded) {
+				return cur, rerr
+			}
+			onError(rerr)
+			continue
+		}
+		cur = cur.withFile(r.rel, updated)
+		emittedSinceProgress += n
+		if emittedSinceProgress >= progressEveryEvents || time.Since(lastProgress) >= progressEveryDuration {
+			if perr := emitProgress(ctx, sourceID, cur, out); perr != nil {
+				return cur, perr
+			}
+			emittedSinceProgress = 0
+			lastProgress = time.Now()
+		}
+	}
+
+	if perr := emitProgress(ctx, sourceID, cur, out); perr != nil {
+		return cur, perr
+	}
+	return cur, nil
+}
+
+// fileCursor returns the FileCursor for rel, or a zero cursor when absent.
+func (c Cursor) fileCursor(rel string) FileCursor {
+	if c.Files == nil {
+		return FileCursor{}
+	}
+	return c.Files[rel]
+}
+
+// reportLegacy emits one informational SourceError per not-yet-seen legacy file
+// and returns a cursor recording each as seen (suppression). The receiver is
+// not mutated. Deterministic order (the caller sorts the basenames).
+func reportLegacy(cur Cursor, legacy []string, onError func(error)) Cursor {
+	for _, base := range legacy {
+		if cur.legacyIngested(base) {
+			continue
+		}
+		onError(fmt.Errorf("codex: legacy flat .json rollout %q is not ingested in v1 (legacy_json_format=false); a Phase-2.5 follow-up may add support", base))
+		cur = cur.withLegacyIngested(base)
+	}
+	return cur
+}
+
+// readRollout parses one modern rollout from its cursor offset to EOF, emits
+// canonical events, and returns the updated FileCursor and emitted-event count.
+// Partial trailing lines are held back (offset advances only past complete
+// lines, spec "Atomicity"). Truncation (size < cursor.size) re-scans from 0
+// with a SourceError (spec §"Cursor" restart logic). A file with no
+// session_meta on its first parseable line is skipped with a SourceError and
+// its offset held at 0 (rule #24). At EOF a hanging open turn is finalized
+// failed/incomplete ONLY when the mtime is stale ≥ 1 h (rule #23); a fresh file
+// leaves the turn open. resolvedRoot is the symlink-resolved sessions root,
+// threaded into the containment open.
+func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID string, start FileCursor, out chan<- canonical.Event, onError func(error)) (FileCursor, int, error) {
+	// Containment guard on EVERY rollout open (security.md §6): a *.jsonl symlink
+	// planted in a watched shard dir after Tail starts would otherwise be opened.
+	// Open the RESOLVED path, not the original (no TOCTOU). A refused path
+	// surfaces a SourceError (the caller logs the returned error) and is skipped.
+	resolvedAbs, ok, cerr := withinResolvedRoot(resolvedRoot, r.abs)
+	if cerr != nil {
+		return start, 0, fmt.Errorf("codex: cannot resolve %s for containment; skipping: %w", r.abs, cerr)
+	} else if !ok {
+		return start, 0, fmt.Errorf("codex: %s resolves outside the sessions root; skipping (symlink escape)", r.rel)
+	}
+	f, err := os.Open(resolvedAbs) // #nosec G304 -- opening the containment-checked RESOLVED path (withinResolvedRoot) from a filtered scan under the configured read-only sessions root
+	if err != nil {
+		return start, 0, fmt.Errorf("open %s: %w", r.abs, err)
+	}
+	defer func() { _ = f.Close() }()
+
+	info, err := f.Stat()
+	if err != nil {
+		return start, 0, fmt.Errorf("stat %s: %w", r.abs, err)
+	}
+	size := info.Size()
+	mtimeUs := info.ModTime().UnixMicro()
+	cur := start
+
+	// Truncation defense (spec §"Cursor"): a shrunken file is re-scanned from 0;
+	// SQL-layer idempotent upserts absorb any re-emitted rows. Codex never
+	// truncates, so this means a manual operator delete + recreate.
+	if cur.Size > 0 && size < cur.Size {
+		onError(fmt.Errorf("rollout %s shrank (size=%d, cursor.size=%d); rescanning from 0", r.rel, size, cur.Size))
+		cur = FileCursor{}
+	}
+
+	// Rule #24: require a session_meta on the file's first parseable line. Codex
+	// always writes session_meta first (recorder.rs), so a first record that is
+	// NOT session_meta means the file is corrupt / a pre-write crash. Skip it
+	// with a SourceError and hold the offset at 0 so a later append retries. The
+	// probe reads from absolute offset 0 (independent of the resume offset) since
+	// session_meta is line 1 and may already be below the cursor on a resume.
+	hasMeta, probeErr := firstRecordIsSessionMeta(f, size)
+	if probeErr != nil {
+		return start, 0, fmt.Errorf("probe %s: %w", r.rel, probeErr)
+	}
+	if !hasMeta {
+		onError(fmt.Errorf("rollout %s has no session_meta on its first line; skipping (rule #24, offset held at 0)", r.rel))
+		// Hold offset at 0; do not record size so a later append re-probes.
+		return start, 0, nil
+	}
+	if _, serr := f.Seek(0, io.SeekStart); serr != nil {
+		return start, 0, fmt.Errorf("seek %s: %w", r.rel, serr)
+	}
+
+	emitFrom := cur.Offset
+	mapper := newFileMapper(mapperConfig{
+		sourceID: sourceID,
+		absPath:  r.abs,
+		nativeID: nativeIDForRollout(r),
+	})
+	dedup := newUnknownDedup()
+
+	// Even when the file is fully consumed (offset >= size) we replay from offset
+	// 0 with the emit-gate set to size (emit NOTHING) so the per-file turn/op
+	// inference counters are rebuilt deterministically — codex has no native
+	// turn/op numbers, so a resume produces the SAME Seqs only by replaying the
+	// chain from the start (acceptance #6). emitFrom is clamped to <= size.
+	if emitFrom > size {
+		emitFrom = size
+	}
+
+	res, perr := streamLines(ctx, f, emitFrom, r.rel, mapper, dedup, out, onError)
+	if perr != nil {
+		// Record the offset reached even on cancellation so a follow-up resumes
+		// from completed work (only fully-consumed lines advance the offset).
+		cur.Offset = res.advanced
+		return cur, res.emitted, perr
+	}
+	cur.Offset = res.advanced
+	cur.Size = size
+	cur.MtimeUs = mtimeUs
+	if mapper.lastTsUs > 0 {
+		cur.LastTsUs = mapper.lastTsUs
+	}
+
+	// EOF-finalize (rule #23): finalize a hanging open turn failed/incomplete
+	// ONLY when the file is fully read AND its mtime is stale ≥ 1 h. The mapper
+	// owns the open-turn decision (finalizeStale is a no-op for a cleanly-ended
+	// session — SOW C#3: no clean-EOF completed finalize). A fresh file leaves
+	// the turn open for the next append. The synthetic end timestamp is the file
+	// mtime in micros (mapper_finalize.go).
+	fullyRead := res.advanced >= size
+	if fullyRead && time.Since(info.ModTime()) >= staleAfter {
+		for _, ev := range mapper.finalizeStale(mtimeUs) {
+			select {
+			case <-ctx.Done():
+				return cur, res.emitted, ctx.Err()
+			case out <- ev:
+				res.emitted++
+			}
+		}
+	}
+	return cur, res.emitted, nil
+}
+
+// firstRecordIsSessionMeta reports whether the file's first non-blank,
+// parseable line is a session_meta record (rule #24). It reads from absolute
+// offset 0 (the caller seeks back afterwards). A blank or known-skip line is
+// passed over; the first line that parses to a concrete record decides. A file
+// with no parseable record at all (only blanks, or a single oversized line, or
+// nothing but parse errors) returns false so an empty/corrupt file is treated
+// as "no session_meta" and held at offset 0. size bounds the oversized-line
+// probe so a hostile first line cannot force an unbounded read.
+func firstRecordIsSessionMeta(f *os.File, size int64) (bool, error) {
+	if size == 0 {
+		return false, nil
+	}
+	if _, err := f.Seek(0, io.SeekStart); err != nil {
+		return false, err
+	}
+	br := bufio.NewReaderSize(f, streamReaderSize)
+	for {
+		line, _, err := readOneLine(br)
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				return false, nil
+			}
+			if errors.Is(err, errLineTooLong) {
+				// An oversized first line is not a session_meta the adapter can use;
+				// keep probing past it for a later session_meta (none expected, but
+				// be forgiving rather than abort the whole file).
+				continue
+			}
+			return false, err
+		}
+		if len(line) == 0 {
+			return false, nil
+		}
+		rec, skip, perr := parseLine(line[:len(line)-1])
+		if perr != nil {
+			// A malformed first line is not a usable session_meta; the file is
+			// corrupt for rule-#24 purposes.
+			return false, nil
+		}
+		if skip {
+			continue
+		}
+		return rec.Type() == recSessionMeta, nil
+	}
+}
+
+// emitProgress publishes a SourceProgressEvent with the current cursor. Mirrors
+// claude_code.
+func emitProgress(ctx context.Context, sourceID string, cur Cursor, out chan<- canonical.Event) error {
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+	ev := canonical.SourceProgressEvent{
+		EventBase: canonical.EventBase{
+			SourceID:  sourceID,
+			SourceSeq: 0,
+			Ts:        time.Now().UnixMicro(),
+		},
+		Cursor: cur.String(),
+	}
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case out <- ev:
+		return nil
+	}
+}
diff --git a/internal/adapters/codex/scanner_branch_test.go b/internal/adapters/codex/scanner_branch_test.go
new file mode 100644
index 0000000..5d2d480
--- /dev/null
+++ b/internal/adapters/codex/scanner_branch_test.go
@@ -0,0 +1,217 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestDiscover_UnreadableRootFatal asserts an unreadable sessions root (not
+// absent) is a FATAL error (the source is broken), distinct from the benign
+// absent-root case. Skipped on filesystems that allow descending a 0o000 dir.
+func TestDiscover_UnreadableRootFatal(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	parent := t.TempDir()
+	root := filepath.Join(parent, "sessions")
+	if err := os.MkdirAll(root, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.Chmod(root, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(root, 0o755) })
+	if _, derr := os.ReadDir(root); derr == nil {
+		t.Skip("filesystem allowed reading a 0o000 dir; fatal-root seam not exercised")
+	}
+
+	_, err := discoverRollouts(root, func(error) {})
+	if err == nil {
+		t.Fatal("unreadable root must be a fatal error")
+	}
+	if !strings.Contains(err.Error(), "read sessions root") {
+		t.Errorf("fatal error = %v, want 'read sessions root'", err)
+	}
+}
+
+// TestScan_DiscoverFatalPropagates asserts scanAll returns the fatal discovery
+// error (does not swallow it).
+func TestScan_DiscoverFatalPropagates(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	parent := t.TempDir()
+	root := filepath.Join(parent, "sessions")
+	if err := os.MkdirAll(root, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.Chmod(root, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(root, 0o755) })
+	if _, derr := os.ReadDir(root); derr == nil {
+		t.Skip("filesystem allowed reading a 0o000 dir; fatal-root seam not exercised")
+	}
+
+	out := make(chan canonical.Event, 4)
+	_, err := scanAll(context.Background(), root, "codex:"+root, newCursor(), out, func(error) {})
+	if err == nil {
+		t.Fatal("scanAll must propagate the fatal discovery error")
+	}
+}
+
+// TestScan_ReadErrorContinues asserts a per-file open error (unreadable rollout)
+// surfaces a SourceError and the scan CONTINUES with the remaining files
+// (fail-soft), exercising scanAll's onError(rerr)+continue branch. Skipped on
+// filesystems that ignore 0o000 file perms.
+func TestScan_ReadErrorContinues(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	root := t.TempDir()
+	bad := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T09-00-00-"+uuid7(1)+".jsonl")
+	good := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(2)+".jsonl")
+	writeFileBytes(t, bad, completeSession("sid-bad"))
+	writeFileBytes(t, good, completeSession("sid-good"))
+	setMtime(t, good, time.Minute)
+	if err := os.Chmod(bad, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(bad, 0o644) })
+	if f, oerr := os.Open(bad); oerr == nil { // #nosec G304 -- test probe
+		_ = f.Close()
+		t.Skip("filesystem allowed opening a 0o000 file; read-error seam not exercised")
+	}
+
+	events, errs, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("the good file must ingest while the bad one errors")
+	}
+	openErr := false
+	for _, e := range errs {
+		if strings.Contains(e, "open ") {
+			openErr = true
+		}
+	}
+	if !openErr {
+		t.Errorf("unreadable rollout did not surface an open SourceError; errs=%v", errs)
+	}
+}
+
+// TestScan_ProgressCheckpointMidWalk drives >progressEveryEvents events across
+// several files so scanAll emits an intermediate SourceProgress (not only the
+// final one), exercising the mid-walk checkpoint branch.
+func TestScan_ProgressCheckpointMidWalk(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	// Each session emits ~5 events (SessionStarted, TurnStarted, user op trio,
+	// TurnFinalized). 60 files * ~5 > 200 → at least one mid-walk checkpoint.
+	for i := 0; i < 60; i++ {
+		// Unique rel per file even when uuid7 repeats mod 100 (the seconds field
+		// is distinct per i).
+		path := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T10-00-"+pad2(i)+"-"+uuid7(i)+".jsonl")
+		writeFileBytes(t, path, busySession("sid-"+pad2(i)))
+		setMtime(t, path, time.Minute)
+	}
+	events, _, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if countKind(events, canonical.EvSourceProgress) < 2 {
+		t.Errorf("SourceProgress count = %d, want >= 2 (mid-walk + final)", countKind(events, canonical.EvSourceProgress))
+	}
+}
+
+// busySession returns a session with a user message op so each file emits enough
+// events to push the progress counter.
+func busySession(id string) []byte {
+	lines := []string{
+		metaLine(id, `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"user_message","message":"hello there"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	return []byte(strings.Join(lines, "\n") + "\n")
+}
+
+// pad2 zero-pads n to two digits for unique synthetic filenames.
+func pad2(n int) string {
+	if n < 10 {
+		return "0" + string(rune('0'+n))
+	}
+	return string(rune('0'+n/10)) + string(rune('0'+n%10))
+}
+
+// TestReadRollout_StaleFinalizeCancel covers readRollout's ctx.Done branch in
+// the stale-finalize emit loop (scanner.go:357): a stale hanging file is fully
+// streamed under a LIVE context (so the mapper holds an open turn), then the
+// synthetic finalize's first emit BLOCKS on an unbuffered, undrained channel
+// while a goroutine cancels the context — so the select picks ctx.Done.
+func TestReadRollout_StaleFinalizeCancel(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, hangingSession("sid-cancel"))
+	setMtime(t, path, 2*time.Hour)
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	out := make(chan canonical.Event) // unbuffered + never drained → emit blocks
+	r := rollout{rel: rel, abs: filepath.Join(resolved, filepath.FromSlash(rel))}
+
+	// Cancel shortly after the call starts; streaming the 3-line file completes
+	// quickly, then the finalize emit blocks and observes the cancellation.
+	go func() {
+		time.Sleep(50 * time.Millisecond)
+		cancel()
+	}()
+	_, _, err := readRollout(ctx, resolved, r, "codex:"+root, FileCursor{}, out, func(error) {})
+	if err == nil {
+		t.Fatal("readRollout with ctx cancelled during finalize emit should return ctx err")
+	}
+}
+
+// TestReadRollout_ProbeSeekAfterTruncationReset asserts the truncation reset +
+// re-probe path: a cursor recording a larger size resets to 0, re-probes the
+// (still-valid) session_meta, and re-emits.
+func TestReadRollout_TruncationResetReemits(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	path := shardPath(root, uuid7(2))
+	writeFileBytes(t, path, completeSession("sid-tr"))
+	setMtime(t, path, time.Minute)
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(2) + ".jsonl"
+	r := rollout{rel: rel, abs: filepath.Join(resolved, filepath.FromSlash(rel))}
+
+	out := make(chan canonical.Event, 64)
+	// Cursor claims a much larger size than the file → truncation reset to 0.
+	var errs []string
+	updated, n, err := readRollout(context.Background(), resolved, r, "codex:"+root, FileCursor{Offset: 99999, Size: 99999}, out, func(e error) { errs = append(errs, e.Error()) })
+	if err != nil {
+		t.Fatalf("readRollout: %v", err)
+	}
+	if n == 0 {
+		t.Error("truncation reset should re-emit the session from 0")
+	}
+	if updated.Offset == 0 {
+		t.Error("offset should advance after re-scan")
+	}
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "shrank") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("truncation SourceError not surfaced; errs=%v", errs)
+	}
+}
diff --git a/internal/adapters/codex/scanner_test.go b/internal/adapters/codex/scanner_test.go
new file mode 100644
index 0000000..5cf7519
--- /dev/null
+++ b/internal/adapters/codex/scanner_test.go
@@ -0,0 +1,645 @@
+package codex
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// --- test helpers (shared by scanner_test.go and tailer_test.go) ---
+
+// writeFileBytes writes b to path, creating parent directories. Test-only.
+func writeFileBytes(t *testing.T, path string, b []byte) {
+	t.Helper()
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
+	}
+	if err := os.WriteFile(path, b, 0o644); err != nil {
+		t.Fatalf("write %s: %v", path, err)
+	}
+}
+
+// appendFileBytes appends b to path, creating parents. Simulates the codex
+// recorder appending records (resume / tail).
+func appendFileBytes(t *testing.T, path string, b []byte) {
+	t.Helper()
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
+	}
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		t.Fatalf("open %s: %v", path, err)
+	}
+	defer func() { _ = f.Close() }()
+	if _, err := f.Write(b); err != nil {
+		t.Fatalf("append %s: %v", path, err)
+	}
+}
+
+// drainBuffered collects all events currently available on ch in a single
+// non-blocking round. Test-only.
+func drainBuffered(ch chan canonical.Event) []canonical.Event {
+	out := make([]canonical.Event, 0, cap(ch))
+	for {
+		select {
+		case ev, ok := <-ch:
+			if !ok {
+				return out
+			}
+			out = append(out, ev)
+		default:
+			return out
+		}
+	}
+}
+
+// scanCollect runs scanAll over root from the given cursor, collecting the
+// emitted events, the error strings, and the final cursor.
+func scanCollect(t *testing.T, root, sourceID string, since Cursor) ([]canonical.Event, []string, Cursor) {
+	t.Helper()
+	var mu sync.Mutex
+	var errs []string
+	onError := func(e error) {
+		mu.Lock()
+		errs = append(errs, e.Error())
+		mu.Unlock()
+	}
+	out := make(chan canonical.Event, 16384)
+	final, err := scanAll(context.Background(), root, sourceID, since, out, onError)
+	if err != nil {
+		t.Fatalf("scanAll: %v", err)
+	}
+	return drainBuffered(out), errs, final
+}
+
+// shardPath returns "<root>/YYYY/MM/DD/rollout-YYYY-MM-DDTHH-MM-SS-<id>.jsonl"
+// for a synthetic modern rollout file with a UUID-shaped ThreadId tail.
+func shardPath(root, id string) string {
+	return filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T16-59-09-"+id+".jsonl")
+}
+
+// uuid7 returns a UUIDv7-shaped synthetic id so nativeIDForRollout's uuidTail
+// path is exercised, with a per-call suffix for uniqueness.
+func uuid7(n int) string {
+	return fmt.Sprintf("019aa234-a2a1-75c3-a9bf-d8425e1785%02d", n%100)
+}
+
+// completeSession returns a minimal but complete modern rollout: session_meta,
+// a turn_context (opens turn 1), and a task_complete (closes it). id is the
+// session native id stamped in session_meta.
+func completeSession(id string) []byte {
+	lines := []string{
+		metaLine(id, `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.1-codex-max"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","last_agent_message":"done","completed_at":"` + tsDone + `"}}`,
+	}
+	return []byte(strings.Join(lines, "\n") + "\n")
+}
+
+// hangingSession returns a modern rollout whose most-recent turn never closes
+// (task_started/turn_context but no task_complete) — the rule #23 stale-finalize
+// candidate.
+func hangingSession(id string) []byte {
+	lines := []string{
+		metaLine(id, `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.1-codex-max"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763664000}}`,
+	}
+	return []byte(strings.Join(lines, "\n") + "\n")
+}
+
+// setMtime sets a file's mtime to now-age so staleness can be controlled.
+func setMtime(t *testing.T, path string, age time.Duration) {
+	t.Helper()
+	mt := time.Now().Add(-age)
+	if err := os.Chtimes(path, mt, mt); err != nil {
+		t.Fatalf("chtimes %s: %v", path, err)
+	}
+}
+
+// hasKind reports whether any event has the given kind.
+func hasKind(events []canonical.Event, kind canonical.EventKind) bool {
+	return countKind(events, kind) > 0
+}
+
+// --- discovery tests ---
+
+// TestDiscover_MultiShardSorted asserts modern rollouts across several
+// YYYY/MM/DD shard dirs are discovered and returned sorted by rel, and that
+// archived_sessions/, sqlite, history, and session_index.jsonl are ignored.
+func TestDiscover_MultiShardSorted(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	// Two shard dirs, two files.
+	a := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	b := filepath.Join(root, "2025", "11", "21", "rollout-2025-11-21T10-00-00-"+uuid7(2)+".jsonl")
+	writeFileBytes(t, a, completeSession("sid-a"))
+	writeFileBytes(t, b, completeSession("sid-b"))
+	// Noise that must be ignored.
+	writeFileBytes(t, filepath.Join(root, "archived_sessions", "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(3)+".jsonl"), completeSession("sid-arch"))
+	writeFileBytes(t, filepath.Join(root, "session_index.jsonl"), []byte("{}\n"))
+	writeFileBytes(t, filepath.Join(root, "state_5.sqlite"), []byte("x"))
+	writeFileBytes(t, filepath.Join(root, "history.jsonl"), []byte("x"))
+	// A non-rollout .jsonl inside a shard dir (wrong prefix).
+	writeFileBytes(t, filepath.Join(root, "2025", "11", "20", "notes.jsonl"), []byte("{}\n"))
+
+	disc, err := discoverRollouts(root, nil)
+	if err != nil {
+		t.Fatalf("discoverRollouts: %v", err)
+	}
+	if len(disc.modern) != 2 {
+		t.Fatalf("modern count = %d, want 2; got %+v", len(disc.modern), disc.modern)
+	}
+	if disc.modern[0].rel >= disc.modern[1].rel {
+		t.Errorf("not sorted: %q then %q", disc.modern[0].rel, disc.modern[1].rel)
+	}
+	wantRelA := "2025/11/20/rollout-2025-11-20T10-00-00-" + uuid7(1) + ".jsonl"
+	if disc.modern[0].rel != wantRelA {
+		t.Errorf("rel[0] = %q, want %q", disc.modern[0].rel, wantRelA)
+	}
+}
+
+// TestDiscover_LegacyClassifiedSeparately asserts legacy flat .json files
+// directly under the root are returned in disc.legacy, not disc.modern, and
+// that a legacy-named .json inside a shard dir is NOT treated as legacy.
+func TestDiscover_LegacyClassifiedSeparately(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	writeFileBytes(t, shardPath(root, uuid7(1)), completeSession("sid-a"))
+	legacy := "rollout-2025-06-26-5556f03d-348c-4463-987c-053ccd0b1df5.json"
+	writeFileBytes(t, filepath.Join(root, legacy), []byte(`{"session":{},"items":[]}`))
+	// A legacy-shaped name inside a shard dir is NOT a root legacy file.
+	writeFileBytes(t, filepath.Join(root, "2025", "11", "20", "rollout-x.json"), []byte("{}"))
+
+	disc, err := discoverRollouts(root, nil)
+	if err != nil {
+		t.Fatalf("discoverRollouts: %v", err)
+	}
+	if len(disc.modern) != 1 {
+		t.Errorf("modern count = %d, want 1", len(disc.modern))
+	}
+	if len(disc.legacy) != 1 || disc.legacy[0] != legacy {
+		t.Errorf("legacy = %v, want [%s]", disc.legacy, legacy)
+	}
+}
+
+// TestDiscover_MissingRootBenign asserts an absent root is benign-empty (first
+// run), not an error.
+func TestDiscover_MissingRootBenign(t *testing.T) {
+	t.Parallel()
+	disc, err := discoverRollouts(filepath.Join(t.TempDir(), "does-not-exist"), nil)
+	if err != nil {
+		t.Fatalf("missing root should be benign, got %v", err)
+	}
+	if len(disc.modern) != 0 || len(disc.legacy) != 0 {
+		t.Errorf("missing root should yield empty, got %+v", disc)
+	}
+}
+
+// --- scanAll behavior tests ---
+
+// TestScan_HappyPathEmitsSession asserts a complete session produces a
+// SessionStarted, a TurnStarted, a TurnFinalized, and a final SourceProgress,
+// and that the cursor records a non-zero offset == file size.
+func TestScan_HappyPathEmitsSession(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, completeSession("sid-1"))
+
+	events, errs, final := scanCollect(t, root, "codex:"+root, newCursor())
+	if len(errs) != 0 {
+		t.Fatalf("unexpected errors: %v", errs)
+	}
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("no SessionStarted emitted")
+	}
+	if !hasKind(events, canonical.EvTurnStarted) || !hasKind(events, canonical.EvTurnFinalized) {
+		t.Error("turn boundary events missing")
+	}
+	if hasKind(events, canonical.EvSessionFinalized) {
+		t.Error("clean session must NOT emit SessionFinalized (SOW C#3)")
+	}
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+	info, _ := os.Stat(path)
+	if final.Files[rel].Offset != info.Size() || final.Files[rel].Offset == 0 {
+		t.Errorf("cursor offset = %d, want file size %d", final.Files[rel].Offset, info.Size())
+	}
+}
+
+// TestScan_ResumeNoDupNoGap is acceptance #6: scan a partial file, persist the
+// cursor, append the rest, resume, and assert the union of emitted catalog
+// events equals a single one-shot scan (zero duplicate SessionStarted, all
+// turns present).
+func TestScan_ResumeNoDupNoGap(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	// First half: session_meta + turn_context (turn opened, not closed).
+	half := []string{
+		metaLine("sid-r", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+	}
+	writeFileBytes(t, path, []byte(strings.Join(half, "\n")+"\n"))
+	// Keep mtime fresh so the partial turn is NOT stale-finalized.
+	setMtime(t, path, time.Minute)
+
+	ev1, errs1, cur1 := scanCollect(t, root, "codex:"+root, newCursor())
+	if len(errs1) != 0 {
+		t.Fatalf("phase1 errors: %v", errs1)
+	}
+	// Append the closing task_complete.
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsDone+`","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"`+tsDone+`"}}`+"\n"))
+	setMtime(t, path, time.Minute)
+
+	ev2, errs2, _ := scanCollect(t, root, "codex:"+root, cur1)
+	if len(errs2) != 0 {
+		t.Fatalf("phase2 errors: %v", errs2)
+	}
+
+	// One-shot over the full file for comparison.
+	root2 := t.TempDir()
+	path2 := shardPath(root2, uuid7(1))
+	full := append([]string{}, half...)
+	full = append(full, `{"timestamp":"`+tsDone+`","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"`+tsDone+`"}}`)
+	writeFileBytes(t, path2, []byte(strings.Join(full, "\n")+"\n"))
+	setMtime(t, path2, time.Minute)
+	evOne, _, _ := scanCollect(t, root2, "codex:"+root2, newCursor())
+
+	// Resume must not duplicate SessionStarted.
+	if got := countKind(ev1, canonical.EvSessionStarted) + countKind(ev2, canonical.EvSessionStarted); got != 1 {
+		t.Errorf("SessionStarted across resume = %d, want exactly 1 (no dup)", got)
+	}
+	// Phase 2 must emit the TurnFinalized that the appended line produced (no gap).
+	if countKind(ev2, canonical.EvTurnFinalized) != 1 {
+		t.Errorf("phase2 TurnFinalized = %d, want 1 (the appended close)", countKind(ev2, canonical.EvTurnFinalized))
+	}
+	// The combined turn-final count equals the one-shot count.
+	combinedTF := countKind(ev1, canonical.EvTurnFinalized) + countKind(ev2, canonical.EvTurnFinalized)
+	if combinedTF != countKind(evOne, canonical.EvTurnFinalized) {
+		t.Errorf("combined TurnFinalized = %d, one-shot = %d", combinedTF, countKind(evOne, canonical.EvTurnFinalized))
+	}
+}
+
+// TestScan_TruncationRescans is acceptance #6 (truncation): a cursor recording
+// a larger size than the on-disk file triggers a re-scan from 0 and a
+// SourceError, re-emitting the session.
+func TestScan_TruncationRescans(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, completeSession("sid-t"))
+	setMtime(t, path, time.Minute)
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+
+	// First full scan.
+	_, _, cur1 := scanCollect(t, root, "codex:"+root, newCursor())
+	if cur1.Files[rel].Offset == 0 {
+		t.Fatalf("phase1 cursor not advanced")
+	}
+	// Simulate truncation: shrink the file on disk but keep the (larger) cursor.
+	writeFileBytes(t, path, completeSession("sid-t")[:20])
+	setMtime(t, path, time.Minute)
+
+	ev2, errs2, _ := scanCollect(t, root, "codex:"+root, cur1)
+	foundTrunc := false
+	for _, e := range errs2 {
+		if strings.Contains(e, "shrank") && strings.Contains(e, "rescanning from 0") {
+			foundTrunc = true
+		}
+	}
+	if !foundTrunc {
+		t.Errorf("truncation SourceError not surfaced; errs=%v", errs2)
+	}
+	_ = ev2
+}
+
+// TestScan_LegacyOneShotSourceError is R1: a legacy flat .json file emits
+// exactly one informational SourceError on first scan and is suppressed on the
+// next scan via the cursor's LegacyJSON map.
+func TestScan_LegacyOneShotSourceError(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	legacy := "rollout-2025-06-26-5556f03d-348c-4463-987c-053ccd0b1df5.json"
+	writeFileBytes(t, filepath.Join(root, legacy), []byte(`{"session":{},"items":[]}`))
+
+	_, errs1, cur1 := scanCollect(t, root, "codex:"+root, newCursor())
+	legacyErrs := 0
+	for _, e := range errs1 {
+		if strings.Contains(e, "legacy flat .json") {
+			legacyErrs++
+		}
+	}
+	if legacyErrs != 1 {
+		t.Fatalf("legacy SourceError count = %d, want exactly 1; errs=%v", legacyErrs, errs1)
+	}
+	if !cur1.legacyIngested(legacy) {
+		t.Fatal("legacy file not recorded as seen in cursor")
+	}
+	// Second scan with the carried cursor must be quiet.
+	_, errs2, _ := scanCollect(t, root, "codex:"+root, cur1)
+	for _, e := range errs2 {
+		if strings.Contains(e, "legacy flat .json") {
+			t.Fatalf("legacy SourceError re-emitted after suppression: %v", errs2)
+		}
+	}
+}
+
+// TestScan_NoSessionMetaSkips is rule #24: a modern file whose first line is
+// not a session_meta is skipped with a SourceError, emits no canonical session,
+// and its cursor offset stays 0 so a later append retries.
+func TestScan_NoSessionMetaSkips(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	// First line is a turn_context (no session_meta anywhere).
+	writeFileBytes(t, path, []byte(`{"timestamp":"`+tsCtx+`","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`+"\n"))
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+
+	events, errs, final := scanCollect(t, root, "codex:"+root, newCursor())
+	if hasKind(events, canonical.EvSessionStarted) {
+		t.Error("rule #24 file must not emit a SessionStarted")
+	}
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "no session_meta") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("rule #24 SourceError not surfaced; errs=%v", errs)
+	}
+	if final.Files[rel].Offset != 0 {
+		t.Errorf("rule #24 offset = %d, want 0 (retry on next append)", final.Files[rel].Offset)
+	}
+}
+
+// TestScan_NoSessionMetaThenMetaAppended asserts the rule #24 retry actually
+// works: once a session_meta is prepended-via-rewrite the next scan ingests the
+// file. (Codex never truncates; this models a delayed first-line write — the
+// offset-held-at-0 means the whole file is re-probed.)
+func TestScan_NoSessionMetaThenMetaAppended(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, []byte(`{"timestamp":"`+tsCtx+`","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`+"\n"))
+
+	_, _, cur1 := scanCollect(t, root, "codex:"+root, newCursor())
+	// Now write a proper file beginning with session_meta.
+	writeFileBytes(t, path, completeSession("sid-late"))
+	setMtime(t, path, time.Minute)
+	events, _, _ := scanCollect(t, root, "codex:"+root, cur1)
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("after session_meta present, file must ingest")
+	}
+}
+
+// TestScan_FailSoftUnreadableShard is the fail-soft requirement: a chmod-000
+// shard subtree surfaces a SourceError (onError fires) AND healthy files in
+// sibling shards still ingest. Skipped on filesystems that allow descending a
+// 0o000 dir.
+func TestScan_FailSoftUnreadableShard(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	root := t.TempDir()
+	// Healthy file in one shard.
+	good := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	writeFileBytes(t, good, completeSession("sid-good"))
+	setMtime(t, good, time.Minute)
+	// A second shard subtree we will block.
+	blockedDir := filepath.Join(root, "2025", "11", "21")
+	writeFileBytes(t, filepath.Join(blockedDir, "rollout-2025-11-21T10-00-00-"+uuid7(2)+".jsonl"), completeSession("sid-blocked"))
+	if err := os.Chmod(blockedDir, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(blockedDir, 0o755) })
+
+	// If the FS still lets us read the blocked dir, the seam is not exercised.
+	if entries, derr := os.ReadDir(blockedDir); derr == nil && len(entries) >= 0 {
+		if _, oerr := os.Open(filepath.Join(blockedDir, "rollout-2025-11-21T10-00-00-"+uuid7(2)+".jsonl")); oerr == nil {
+			t.Skip("filesystem allowed descending an unreadable dir; fail-soft seam not exercised")
+		}
+	}
+
+	events, errs, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("healthy file did not ingest while a sibling shard was unreadable")
+	}
+	if len(errs) == 0 {
+		t.Errorf("unreadable shard did not surface any SourceError; events=%d", len(events))
+	}
+}
+
+// TestScan_StaleFinalizes is acceptance #5h: a hanging-turn file whose mtime is
+// stale ≥ 1 h gets a synthetic TurnFinalized(failed) + SessionFinalized(failed).
+func TestScan_StaleFinalizes(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, hangingSession("sid-crash"))
+	setMtime(t, path, 2*time.Hour) // stale
+
+	events, errs, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if len(errs) != 0 {
+		t.Fatalf("unexpected errors: %v", errs)
+	}
+	tf := turnFinals(events)
+	if len(tf) != 1 || tf[0].Status != "failed" {
+		t.Fatalf("stale finalize TurnFinalized = %+v, want one failed", tf)
+	}
+	if !hasKind(events, canonical.EvSessionFinalized) {
+		t.Error("stale hanging session must emit SessionFinalized (rule #23)")
+	}
+}
+
+// TestScan_FreshDoesNotFinalize is the rule #23 lower bound: a hanging-turn file
+// whose mtime is fresh (< 1 h) leaves the turn open — no synthetic finalize.
+func TestScan_FreshDoesNotFinalize(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, hangingSession("sid-live"))
+	setMtime(t, path, 2*time.Minute) // fresh
+
+	events, _, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if hasKind(events, canonical.EvSessionFinalized) {
+		t.Error("fresh hanging session must NOT emit SessionFinalized")
+	}
+	if len(turnFinals(events)) != 0 {
+		t.Errorf("fresh hanging session must NOT emit a synthetic TurnFinalized; got %+v", turnFinals(events))
+	}
+}
+
+// TestScan_UnknownTypeDedup is acceptance #2: N distinct unknown top-level
+// `type` strings produce exactly one SourceError per variant per session, and
+// the scan does not abort (the surrounding valid session still ingests).
+func TestScan_UnknownTypeDedup(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	lines := []string{metaLine("sid-u", `"exec"`)}
+	// 3 distinct unknown top-level types, each repeated 3×.
+	for rep := 0; rep < 3; rep++ {
+		for _, typ := range []string{"frobnicate", "wibble", "splort"} {
+			lines = append(lines, `{"timestamp":"`+tsItem+`","type":"`+typ+`","payload":{}}`)
+		}
+	}
+	writeFileBytes(t, path, []byte(strings.Join(lines, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+
+	events, errs, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("valid session must still ingest alongside unknown variants")
+	}
+	unknownErrs := 0
+	for _, e := range errs {
+		if strings.Contains(e, "unknown record type") {
+			unknownErrs++
+		}
+	}
+	if unknownErrs != 3 {
+		t.Errorf("unknown-type SourceError count = %d, want 3 (one per distinct variant); errs=%v", unknownErrs, errs)
+	}
+}
+
+// TestScan_UnknownPayloadTypeDedup is acceptance #2 for the nested family: N
+// distinct unknown nested payload.type strings (under a known top-level type)
+// produce exactly one SourceError per variant.
+func TestScan_UnknownPayloadTypeDedup(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	lines := []string{metaLine("sid-up", `"exec"`)}
+	for rep := 0; rep < 2; rep++ {
+		for _, nt := range []string{"mystery_item", "ufo_call"} {
+			lines = append(lines, `{"timestamp":"`+tsItem+`","type":"response_item","payload":{"type":"`+nt+`"}}`)
+		}
+	}
+	writeFileBytes(t, path, []byte(strings.Join(lines, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+
+	_, errs, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	nestedErrs := 0
+	for _, e := range errs {
+		if strings.Contains(e, "unknown payload type") {
+			nestedErrs++
+		}
+	}
+	if nestedErrs != 2 {
+		t.Errorf("unknown-payload SourceError count = %d, want 2; errs=%v", nestedErrs, errs)
+	}
+}
+
+// TestScan_OversizedLineSkippedNotEOF asserts an oversized line surfaces one
+// SourceError and the scan CONTINUES past it (later valid records still
+// ingest) — verbatim claude_code semantics (not a jump-to-EOF).
+func TestScan_OversizedLineSkippedNotEOF(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	big := strings.Repeat("x", scanBufferMax+10)
+	lines := []string{
+		metaLine("sid-big", `"exec"`),
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"agent_message","message":"` + big + `"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"` + tsDone + `"}}`,
+	}
+	writeFileBytes(t, path, []byte(strings.Join(lines, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+
+	events, errs, final := scanCollect(t, root, "codex:"+root, newCursor())
+	oversize := false
+	for _, e := range errs {
+		if strings.Contains(e, "exceeds") && strings.Contains(e, "bytes; skipping") {
+			oversize = true
+		}
+	}
+	if !oversize {
+		t.Errorf("oversized line did not surface a SourceError; errs=%v", errs)
+	}
+	if !hasKind(events, canonical.EvSessionStarted) {
+		t.Error("records before the oversized line must still ingest")
+	}
+	// The cursor must reach EOF (the file was fully consumed past the big line).
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+	info, _ := os.Stat(path)
+	if final.Files[rel].Offset != info.Size() {
+		t.Errorf("cursor offset = %d, want EOF %d after skipping oversized line", final.Files[rel].Offset, info.Size())
+	}
+}
+
+// TestScan_SymlinkEscapeRefused asserts a *.jsonl symlink inside a shard dir
+// pointing OUTSIDE the sessions root is refused with a SourceError and never
+// opened (security.md §6).
+func TestScan_SymlinkEscapeRefused(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	outside := t.TempDir()
+	secret := filepath.Join(outside, "secret.jsonl")
+	writeFileBytes(t, secret, completeSession("sid-secret"))
+	shardDir := filepath.Join(root, "2025", "11", "20")
+	if err := os.MkdirAll(shardDir, 0o755); err != nil {
+		t.Fatalf("mkdir shard: %v", err)
+	}
+	link := filepath.Join(shardDir, "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	if err := os.Symlink(secret, link); err != nil {
+		t.Skipf("symlink unsupported: %v", err)
+	}
+
+	events, errs, _ := scanCollect(t, root, "codex:"+root, newCursor())
+	if hasKind(events, canonical.EvSessionStarted) {
+		t.Error("symlink escaping the root must not be ingested")
+	}
+	escaped := false
+	for _, e := range errs {
+		if strings.Contains(e, "outside the sessions root") {
+			escaped = true
+		}
+	}
+	if !escaped {
+		t.Errorf("symlink escape not refused with a SourceError; errs=%v", errs)
+	}
+}
+
+// TestScan_ContextCancelStops asserts a cancelled context stops the scan
+// promptly and returns the cursor without panicking.
+func TestScan_ContextCancelStops(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	writeFileBytes(t, shardPath(root, uuid7(1)), completeSession("sid-c"))
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // already cancelled
+	out := make(chan canonical.Event, 1)
+	_, err := scanAll(ctx, root, "codex:"+root, newCursor(), out, func(error) {})
+	if err != nil && !isCanceled(err) {
+		t.Fatalf("scanAll on cancelled ctx = %v, want nil or context.Canceled", err)
+	}
+}
+
+func isCanceled(err error) bool {
+	return strings.Contains(err.Error(), context.Canceled.Error())
+}
+
+// TestNativeIDForRollout asserts the UUID tail is extracted from a rollout
+// filename and that a non-UUID tail falls back to the stem.
+func TestNativeIDForRollout(t *testing.T) {
+	t.Parallel()
+	id := uuid7(7)
+	r := rollout{abs: "/x/2025/11/20/rollout-2025-11-20T16-59-09-" + id + ".jsonl"}
+	if got := nativeIDForRollout(r); got != id {
+		t.Errorf("nativeIDForRollout = %q, want %q", got, id)
+	}
+	// No UUID tail → whole stem after the prefix.
+	r2 := rollout{abs: "/x/2025/11/20/rollout-weird.jsonl"}
+	if got := nativeIDForRollout(r2); got != "weird" {
+		t.Errorf("nativeIDForRollout(no-uuid) = %q, want weird", got)
+	}
+}
diff --git a/internal/adapters/codex/stream.go b/internal/adapters/codex/stream.go
new file mode 100644
index 0000000..d06a002
--- /dev/null
+++ b/internal/adapters/codex/stream.go
@@ -0,0 +1,320 @@
+package codex
+
+import (
+	"bufio"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// scanBufferMax bounds a single rollout line. Real codex lines can be large
+// (base64 encrypted_content reasoning blocks, big tool I/O); 8 MB is ample
+// while bounding pathological allocations. Mirrors claude_code/scanner.go.
+const scanBufferMax = 8 * 1024 * 1024
+
+// streamReader is the per-line reader buffer size; matches claude_code.
+const streamReaderSize = 64 * 1024
+
+// errLineTooLong signals that a single rollout line exceeded scanBufferMax. The
+// caller surfaces it via onError, drains just that oversized line up to its
+// terminating newline, and CONTINUES reading the rest of the file (spec
+// adapter-codex.md "Atomicity" partial-line handling; edge #7) — it does NOT
+// skip to EOF. Reused verbatim from claude_code.
+var errLineTooLong = errors.New("codex: line exceeds scan buffer")
+
+// streamResult bundles what one rollout-file stream produced: the emitted-event
+// count and the absolute offset just past the last complete line consumed (the
+// durable resume key). The caller derives EOF-fullness from advanced >= size
+// and the rule-#24 session_meta check from a separate first-record probe, so no
+// further fields are carried here.
+type streamResult struct {
+	emitted  int
+	advanced int64
+}
+
+// streamLines reads '\n'-terminated JSON records from r (positioned at offset
+// 0), mapping each via the file's mapper to rebuild turn/op inference state
+// deterministically. Events are emitted ONLY for records whose line begins at
+// or after emitFrom, so a resume replays prior bytes to rebuild counters but
+// emits nothing already seen (zero dup, zero gap — acceptance #6). Returns the
+// emitted-event count and the absolute offset just past the last complete line
+// consumed. A partial trailing line (no final '\n') is held back so the offset
+// only ever advances past complete lines (spec "Atomicity").
+//
+// dedup is the per-file unknown-variant seen-set (rule #2: exactly one
+// SourceError per distinct unknown top-level `type` OR nested `payload.type`
+// per session). It lives in the scanner (not the mapper) because the parser
+// returns the variant via a typed error before the mapper is reached; the
+// scanner owns surfacing-once. The caller applies rule #24 via a separate
+// first-record probe (firstRecordIsSessionMeta), so streamLines does not track
+// session_meta presence itself.
+//
+// The byte-offset/oversized-line/partial-line mechanics are reused verbatim
+// from claude_code/scanner.go (the load-bearing tail invariants).
+func streamLines(ctx context.Context, r io.Reader, emitFrom int64, rel string, mapper *fileMapper, dedup *unknownDedup, out chan<- canonical.Event, onError func(error)) (streamResult, error) {
+	br := bufio.NewReaderSize(r, streamReaderSize)
+	var res streamResult
+	off := int64(0)
+	lineNo := 0
+	for {
+		if err := ctx.Err(); err != nil {
+			res.advanced = off
+			return res, err
+		}
+		line, consumed, err := readOneLine(br)
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				res.advanced = off
+				return res, nil
+			}
+			if errors.Is(err, errLineTooLong) {
+				// Surface exactly one SourceError for the oversized line, advance
+				// past the drained bytes (up to and including its terminating
+				// newline, or to EOF when it is the file's trailing line), and
+				// CONTINUE reading subsequent records (edge #7). Jumping to EOF
+				// here would silently discard every later valid record.
+				if off >= emitFrom {
+					onError(fmt.Errorf("rollout %s @%d: line exceeds %d bytes; skipping", rel, off, scanBufferMax))
+				}
+				off += consumed
+				continue
+			}
+			res.advanced = off
+			return res, fmt.Errorf("read %s @%d: %w", rel, off, err)
+		}
+		if len(line) == 0 {
+			res.advanced = off
+			return res, nil
+		}
+		recBytes := line[:len(line)-1]
+		lineStart := off
+		off += int64(len(line))
+		lineNo++
+		emit := lineStart >= emitFrom
+
+		rec, skip, perr := parseLine(recBytes)
+		if perr != nil {
+			if emit && shouldSurfaceParseError(dedup, perr) {
+				onError(fmt.Errorf("rollout %s @%d: %w", rel, lineStart, perr))
+			}
+			continue
+		}
+		if skip {
+			continue
+		}
+		// mapRecord always runs so the per-file turn/op inference counters
+		// advance during a resume replay; only events at/after emitFrom are
+		// sent. setLineNo anchors PayloadRef "#L<line>" at the owning record.
+		mapper.setLineNo(lineNo)
+		events, mErr := mapper.mapRecord(rec)
+		if mErr != nil {
+			if emit {
+				onError(fmt.Errorf("rollout %s @%d: map: %w", rel, lineStart, mErr))
+			}
+			continue
+		}
+		if !emit {
+			continue
+		}
+		for _, ev := range events {
+			select {
+			case <-ctx.Done():
+				res.advanced = off
+				return res, ctx.Err()
+			case out <- ev:
+				res.emitted++
+			}
+		}
+	}
+}
+
+// readOneLine reads one '\n'-terminated record from br, returning the line WITH
+// the trailing '\n' so callers can advance offset by len(). Returns io.EOF
+// (with consumed=0) when no complete line is available — it never returns a
+// partial trailing line, implementing the hold-back invariant (spec
+// "Atomicity"). On errLineTooLong it returns the number of bytes drained up to
+// AND including the next '\n' (or to EOF when the oversized line is the file's
+// trailing bytes) so the caller can advance past the skipped line and continue.
+// consumed is meaningful only for the errLineTooLong and nil-error cases; it is
+// 0 for io.EOF and other errors. Reused verbatim from claude_code.
+func readOneLine(br *bufio.Reader) ([]byte, int64, error) {
+	buf := make([]byte, 0, 256)
+	for {
+		chunk, err := br.ReadSlice('\n')
+		if err == nil {
+			buf = append(buf, chunk...)
+			if len(buf) > scanBufferMax {
+				return nil, int64(len(buf)), errLineTooLong
+			}
+			return buf, int64(len(buf)), nil
+		}
+		if errors.Is(err, bufio.ErrBufferFull) {
+			buf = append(buf, chunk...)
+			if len(buf) > scanBufferMax {
+				// Drain the rest of the oversized line and report total bytes
+				// consumed so the caller advances past it and continues.
+				drained, drainErr := drainToNewline(br)
+				if drainErr != nil && !errors.Is(drainErr, io.EOF) {
+					return nil, 0, drainErr
+				}
+				return nil, int64(len(buf)) + drained, errLineTooLong
+			}
+			continue
+		}
+		if errors.Is(err, io.EOF) {
+			// Partial line at EOF: do not return it (hold-back).
+			return nil, 0, io.EOF
+		}
+		return nil, 0, err
+	}
+}
+
+// drainToNewline reads and discards bytes from br up to and including the next
+// '\n', returning the number of bytes consumed. On io.EOF (the oversized line
+// runs to the end of the file with no trailing newline) it returns the bytes
+// consumed so far together with io.EOF so the caller can advance the offset to
+// EOF; the next read then reports io.EOF cleanly. Reused verbatim from
+// claude_code.
+func drainToNewline(br *bufio.Reader) (int64, error) {
+	var consumed int64
+	for {
+		chunk, err := br.ReadSlice('\n')
+		consumed += int64(len(chunk))
+		if err == nil {
+			return consumed, nil
+		}
+		if errors.Is(err, bufio.ErrBufferFull) {
+			continue
+		}
+		return consumed, err
+	}
+}
+
+// unknownDedup is the per-file seen-set that bounds unknown-variant SourceErrors
+// to exactly one per distinct unknown top-level `type` OR nested
+// "<owner>/<payload.type>" per session (spec rule #2, acceptance #2). The
+// dedup lives in the scanner because parseLine returns the offending variant
+// via a typed error (parser.go: unknownTypeError / unknownPayloadTypeError)
+// before the mapper is reached, so the scanner owns surfacing-once. The two
+// sentinel families use distinct key spaces so a top-level name never collides
+// with a nested name.
+type unknownDedup struct {
+	seen map[string]struct{}
+}
+
+// newUnknownDedup constructs an empty per-file dedup set.
+func newUnknownDedup() *unknownDedup {
+	return &unknownDedup{seen: map[string]struct{}{}}
+}
+
+// first reports whether key is the first occurrence on this file, recording it.
+func (d *unknownDedup) first(key string) bool {
+	if d == nil {
+		return true
+	}
+	if d.seen == nil {
+		d.seen = map[string]struct{}{}
+	}
+	if _, ok := d.seen[key]; ok {
+		return false
+	}
+	d.seen[key] = struct{}{}
+	return true
+}
+
+// shouldSurfaceParseError reports whether a per-line parse error should be
+// forwarded to onError. Unknown top-level `type` and unknown nested
+// `payload.type` errors are deduped to one per distinct variant per file (spec
+// rule #2, acceptance #2) via the per-file dedup set; all other parse errors
+// (malformed JSON, missing type, decode failures) surface every time because
+// each describes a distinct broken line, not a repeated known-unknown variant.
+// Mirrors claude_code's shouldSurfaceParseError, extended for codex's second
+// (nested) unknown family.
+func shouldSurfaceParseError(dedup *unknownDedup, perr error) bool {
+	var ute *unknownTypeError
+	if errors.As(perr, &ute) {
+		return dedup.first("type:" + ute.Type)
+	}
+	var upe *unknownPayloadTypeError
+	if errors.As(perr, &upe) {
+		return dedup.first("payload:" + upe.Owner + "/" + upe.Type)
+	}
+	return true
+}
+
+// withinResolvedRoot reports whether abs resolves (through symlinks) to a path
+// inside resolvedRoot (security.md §6 "No symlink traversal escape"), for
+// callers that have ALREADY resolved the sessions root once (the directory-walk
+// hot path: every discovered rollout shares one resolved root, so re-running
+// EvalSymlinks on the root per file is wasted work). resolvedRoot MUST be the
+// output of filepath.EvalSymlinks on the configured root; only abs is resolved
+// here. Returns:
+//   - (resolvedAbs, true, nil)  — abs resolves to a path under the root.
+//   - ("", false, nil)          — abs resolves outside the root (escape).
+//   - ("", false, err)          — the path could not be resolved.
+//
+// Reused verbatim from claude_code (the single-shot resolveWithinRoot wrapper
+// is added by Chunk D's payloadURI when it needs to resolve the root per call).
+func withinResolvedRoot(resolvedRoot, abs string) (string, bool, error) {
+	resolvedAbs, err := evalSymlinksAllowingTail(filepath.Clean(abs))
+	if err != nil {
+		return "", false, fmt.Errorf("resolve path %q: %w", abs, err)
+	}
+	rel, err := filepath.Rel(resolvedRoot, resolvedAbs)
+	if err != nil {
+		return "", false, fmt.Errorf("relative %q under %q: %w", resolvedAbs, resolvedRoot, err)
+	}
+	if rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) || filepath.IsAbs(rel) {
+		return "", false, nil
+	}
+	return resolvedAbs, true, nil
+}
+
+// evalSymlinksAllowingTail resolves symlinks in abs, tolerating a not-yet-
+// created leaf/tail: it walks up to the deepest existing ancestor, resolves
+// that, and re-joins the non-existent remainder. A non-existent path cannot be
+// a symlink itself, so judging it by its resolved parent is sound. Reused
+// verbatim from claude_code.
+func evalSymlinksAllowingTail(abs string) (string, error) {
+	resolved, err := filepath.EvalSymlinks(abs)
+	if err == nil {
+		return resolved, nil
+	}
+	if !os.IsNotExist(err) {
+		return "", err
+	}
+	parent := filepath.Dir(abs)
+	if parent == abs {
+		// Reached the filesystem root without an existing ancestor.
+		return abs, nil
+	}
+	resolvedParent, perr := evalSymlinksAllowingTail(parent)
+	if perr != nil {
+		return "", perr
+	}
+	return filepath.Join(resolvedParent, filepath.Base(abs)), nil
+}
+
+// withinSourceRoot reports whether abs resolves (through symlinks) to a path
+// inside resolvedRoot. On escape it surfaces a SourceError via onError and
+// returns false; on a resolve error it likewise surfaces and returns false, so
+// a path that cannot be safely resolved is skipped rather than read. Mirrors
+// claude_code's withinSourceRoot.
+func withinSourceRoot(resolvedRoot, abs string, onError func(error)) bool {
+	resolved, ok, err := withinResolvedRoot(resolvedRoot, abs)
+	if err != nil {
+		onError(fmt.Errorf("codex: cannot resolve %s for containment; skipping: %w", abs, err))
+		return false
+	}
+	if !ok {
+		onError(fmt.Errorf("codex: %s resolves to %s outside the sessions root; skipping (symlink escape)", abs, resolved))
+		return false
+	}
+	return true
+}
diff --git a/internal/adapters/codex/stream_test.go b/internal/adapters/codex/stream_test.go
new file mode 100644
index 0000000..81ad491
--- /dev/null
+++ b/internal/adapters/codex/stream_test.go
@@ -0,0 +1,373 @@
+package codex
+
+import (
+	"bufio"
+	"context"
+	"errors"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestReadOneLine_CompleteAndPartial covers the nil-error (complete line) and
+// io.EOF hold-back (partial trailing line) branches of readOneLine.
+func TestReadOneLine_CompleteAndPartial(t *testing.T) {
+	t.Parallel()
+	br := bufio.NewReaderSize(strings.NewReader("ab\ncd"), streamReaderSize)
+	line, n, err := readOneLine(br)
+	if err != nil || string(line) != "ab\n" || n != 3 {
+		t.Fatalf("first line = (%q,%d,%v), want (\"ab\\n\",3,nil)", line, n, err)
+	}
+	// "cd" has no trailing newline → held back as io.EOF, consumed 0.
+	line, n, err = readOneLine(br)
+	if !errors.Is(err, io.EOF) || n != 0 || line != nil {
+		t.Fatalf("partial line = (%q,%d,%v), want (nil,0,EOF)", line, n, err)
+	}
+}
+
+// TestReadOneLine_OversizedWithNewline exercises the errLineTooLong +
+// drainToNewline path: a line longer than scanBufferMax that DOES terminate in
+// a '\n' (so the rest is drained and the reader is positioned at the next line).
+func TestReadOneLine_OversizedWithNewline(t *testing.T) {
+	t.Parallel()
+	big := strings.Repeat("x", scanBufferMax+(2*streamReaderSize))
+	src := big + "\n" + "next\n"
+	br := bufio.NewReaderSize(strings.NewReader(src), streamReaderSize)
+	_, consumed, err := readOneLine(br)
+	if !errors.Is(err, errLineTooLong) {
+		t.Fatalf("oversized line err = %v, want errLineTooLong", err)
+	}
+	// consumed must cover the whole oversized line up to AND including its '\n'.
+	if consumed != int64(len(big)+1) {
+		t.Fatalf("consumed = %d, want %d (drained to newline)", consumed, len(big)+1)
+	}
+	// The reader is now positioned at "next\n".
+	line, _, err := readOneLine(br)
+	if err != nil || string(line) != "next\n" {
+		t.Fatalf("post-drain line = (%q,%v), want (\"next\\n\",nil)", line, err)
+	}
+}
+
+// TestReadOneLine_OversizedNoNewline exercises the drainToNewline io.EOF branch:
+// an oversized line that runs to EOF with no trailing newline. consumed covers
+// the rest of the file and the next read reports io.EOF.
+func TestReadOneLine_OversizedNoNewline(t *testing.T) {
+	t.Parallel()
+	big := strings.Repeat("y", scanBufferMax+(2*streamReaderSize))
+	br := bufio.NewReaderSize(strings.NewReader(big), streamReaderSize)
+	_, consumed, err := readOneLine(br)
+	if !errors.Is(err, errLineTooLong) {
+		t.Fatalf("oversized-noeol err = %v, want errLineTooLong", err)
+	}
+	if consumed != int64(len(big)) {
+		t.Fatalf("consumed = %d, want %d (drained to EOF)", consumed, len(big))
+	}
+	if _, _, err := readOneLine(br); !errors.Is(err, io.EOF) {
+		t.Fatalf("post-drain read = %v, want io.EOF", err)
+	}
+}
+
+// TestReadOneLine_SingleSliceOversized exercises the err==nil oversized branch:
+// a buffer large enough that ReadSlice returns the whole line in one call but it
+// still exceeds scanBufferMax.
+func TestReadOneLine_SingleSliceOversized(t *testing.T) {
+	t.Parallel()
+	big := strings.Repeat("z", scanBufferMax+5) + "\n"
+	// Reader buffer larger than the line so ReadSlice returns it whole (err==nil).
+	br := bufio.NewReaderSize(strings.NewReader(big), len(big)+16)
+	_, consumed, err := readOneLine(br)
+	if !errors.Is(err, errLineTooLong) {
+		t.Fatalf("single-slice oversized err = %v, want errLineTooLong", err)
+	}
+	if consumed != int64(len(big)) {
+		t.Fatalf("consumed = %d, want %d", consumed, len(big))
+	}
+}
+
+// TestDrainToNewline_ErrorPropagates asserts drainToNewline returns a non-EOF
+// read error from the underlying reader.
+func TestDrainToNewline_ErrorPropagates(t *testing.T) {
+	t.Parallel()
+	br := bufio.NewReaderSize(&errReader{after: []byte("nonewline")}, 4)
+	_, err := drainToNewline(br)
+	if err == nil || errors.Is(err, io.EOF) {
+		t.Fatalf("drainToNewline error = %v, want a non-EOF error", err)
+	}
+}
+
+// errReader returns `after` bytes once, then a hard error (not io.EOF) so the
+// drain/read error branches are exercised.
+type errReader struct {
+	after []byte
+	done  bool
+}
+
+func (e *errReader) Read(p []byte) (int, error) {
+	if !e.done && len(e.after) > 0 {
+		n := copy(p, e.after)
+		e.after = e.after[n:]
+		if len(e.after) == 0 {
+			e.done = true
+		}
+		return n, nil
+	}
+	return 0, errors.New("synthetic read failure")
+}
+
+// TestStreamLines_ReadErrorSurfaces asserts a hard read error (not EOF) from the
+// reader is returned wrapped, not swallowed.
+func TestStreamLines_ReadErrorSurfaces(t *testing.T) {
+	t.Parallel()
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", nativeID: "sid"})
+	out := make(chan canonical.Event, 16)
+	res, err := streamLines(context.Background(), &errReader{after: []byte("nonewline")}, 0, "r.jsonl", m, newUnknownDedup(), out, func(error) {})
+	if err == nil {
+		t.Fatalf("streamLines with read failure = nil err, want error; res=%+v", res)
+	}
+}
+
+// TestStreamLines_ParseErrorSurfacedOnce asserts a malformed (non-unknown-type)
+// line surfaces a SourceError, while the stream continues past it.
+func TestStreamLines_ParseErrorSurfaced(t *testing.T) {
+	t.Parallel()
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", nativeID: "sid"})
+	out := make(chan canonical.Event, 64)
+	var errs []string
+	onError := func(e error) { errs = append(errs, e.Error()) }
+	src := metaLine("sid", `"exec"`) + "\n" + `{"type":}` + "\n" // 2nd line malformed JSON
+	res, err := streamLines(context.Background(), strings.NewReader(src), 0, "r.jsonl", m, newUnknownDedup(), out, onError)
+	if err != nil {
+		t.Fatalf("streamLines = %v", err)
+	}
+	if len(errs) == 0 {
+		t.Error("malformed line did not surface a SourceError")
+	}
+	if res.emitted == 0 {
+		t.Error("session_meta before the malformed line should still emit")
+	}
+}
+
+// TestStreamLines_ContextCancelMidEmit asserts cancellation while events are
+// pending returns ctx.Err and the advanced offset.
+func TestStreamLines_ContextCancelMidEmit(t *testing.T) {
+	t.Parallel()
+	m := newFileMapper(mapperConfig{sourceID: "codex:/t", nativeID: "sid"})
+	// Unbuffered channel + cancelled ctx so the first emit selects ctx.Done.
+	out := make(chan canonical.Event)
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	src := metaLine("sid", `"exec"`) + "\n"
+	_, err := streamLines(ctx, strings.NewReader(src), 0, "r.jsonl", m, newUnknownDedup(), out, func(error) {})
+	if err == nil {
+		t.Fatal("streamLines on cancelled ctx during emit = nil, want ctx err")
+	}
+}
+
+// TestShouldSurfaceParseError_Families covers all three branches: unknown
+// top-level type (deduped), unknown nested payload type (deduped), and a
+// generic error (always surfaced).
+func TestShouldSurfaceParseError_Families(t *testing.T) {
+	t.Parallel()
+	d := newUnknownDedup()
+	ute := &unknownTypeError{Type: "frob"}
+	if !shouldSurfaceParseError(d, ute) || shouldSurfaceParseError(d, ute) {
+		t.Error("unknown top-level type dedup broken")
+	}
+	upe := &unknownPayloadTypeError{Owner: "response_item", Type: "ufo"}
+	if !shouldSurfaceParseError(d, upe) || shouldSurfaceParseError(d, upe) {
+		t.Error("unknown nested payload type dedup broken")
+	}
+	// A top-level "frob" and a nested ".../frob" must not collide (distinct key spaces).
+	upe2 := &unknownPayloadTypeError{Owner: "response_item", Type: "frob"}
+	if !shouldSurfaceParseError(d, upe2) {
+		t.Error("nested key collided with top-level key space")
+	}
+	generic := errors.New("malformed json")
+	// A generic (non-unknown-variant) error surfaces every time — calling twice
+	// must both report true (no dedup).
+	if !shouldSurfaceParseError(d, generic) {
+		t.Error("generic parse error must surface on first sight")
+	}
+	if !shouldSurfaceParseError(d, generic) {
+		t.Error("generic parse error must surface again (not deduped)")
+	}
+	// Nil dedup is tolerated (always first).
+	if !shouldSurfaceParseError(nil, ute) {
+		t.Error("nil dedup should report first=true")
+	}
+}
+
+// TestWithinResolvedRoot_EscapeAndTail covers the escape branch and the
+// not-yet-exist tail recursion (evalSymlinksAllowingTail) of withinResolvedRoot.
+func TestWithinResolvedRoot_EscapeAndTail(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, err := filepath.EvalSymlinks(root)
+	if err != nil {
+		t.Fatalf("evalsymlinks: %v", err)
+	}
+	// A path under the root that does NOT exist yet → judged by its parent,
+	// returns inside=true (exercises evalSymlinksAllowingTail recursion).
+	notYet := filepath.Join(resolved, "2025", "11", "20", "rollout-x.jsonl")
+	got, ok, err := withinResolvedRoot(resolved, notYet)
+	if err != nil || !ok {
+		t.Fatalf("non-existent in-root path = (%q,%v,%v), want inside", got, ok, err)
+	}
+	// A path clearly outside the root → escape (inside=false, no error).
+	outside := filepath.Join(filepath.Dir(resolved), "elsewhere", "x.jsonl")
+	_, ok, err = withinResolvedRoot(resolved, outside)
+	if err != nil {
+		t.Fatalf("outside path resolve err = %v", err)
+	}
+	if ok {
+		t.Error("path outside the root must report inside=false")
+	}
+}
+
+// TestWithinSourceRoot_SurfacesEscape covers withinSourceRoot's onError escape
+// branch.
+func TestWithinSourceRoot_SurfacesEscape(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	outside := filepath.Join(filepath.Dir(resolved), "elsewhere", "x.jsonl")
+	var errs []string
+	ok := withinSourceRoot(resolved, outside, func(e error) { errs = append(errs, e.Error()) })
+	if ok {
+		t.Error("withinSourceRoot must reject an out-of-root path")
+	}
+	if len(errs) == 0 || !strings.Contains(errs[0], "outside the sessions root") {
+		t.Errorf("escape not surfaced; errs=%v", errs)
+	}
+}
+
+// TestUnknownDedup_NilSafe asserts the dedup helper tolerates a nil receiver and
+// a nil map.
+func TestUnknownDedup_NilSafe(t *testing.T) {
+	t.Parallel()
+	var d *unknownDedup
+	if !d.first("k") {
+		t.Error("nil dedup.first should report true")
+	}
+	d2 := &unknownDedup{}
+	if !d2.first("k") || d2.first("k") {
+		t.Error("zero-value dedup.first broken")
+	}
+}
+
+// TestFirstRecordIsSessionMeta_Branches covers the empty-file, blank-line-skip,
+// oversized-first-line, malformed-first-line, and non-meta-first cases of the
+// rule #24 probe.
+func TestFirstRecordIsSessionMeta_Branches(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name    string
+		content string
+		want    bool
+	}{
+		{"empty", "", false},
+		{"blank-then-meta", "\n\n" + metaLine("sid", `"exec"`) + "\n", true},
+		{"meta-first", metaLine("sid", `"exec"`) + "\n", true},
+		{"turn-context-first", `{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{}}` + "\n", false},
+		{"malformed-first", `{not json` + "\n", false},
+	}
+	for _, c := range cases {
+		c := c
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			dir := t.TempDir()
+			path := filepath.Join(dir, "r.jsonl")
+			if err := os.WriteFile(path, []byte(c.content), 0o644); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+			f, err := os.Open(path) // #nosec G304 -- test-controlled temp path
+			if err != nil {
+				t.Fatalf("open: %v", err)
+			}
+			defer func() { _ = f.Close() }()
+			info, _ := f.Stat()
+			got, perr := firstRecordIsSessionMeta(f, info.Size())
+			if perr != nil {
+				t.Fatalf("probe err: %v", perr)
+			}
+			if got != c.want {
+				t.Errorf("firstRecordIsSessionMeta(%s) = %v, want %v", c.name, got, c.want)
+			}
+		})
+	}
+}
+
+// TestFirstRecordIsSessionMeta_OversizedFirstLine asserts a first line longer
+// than the scan buffer is skipped-over (not a usable session_meta) and the
+// probe keeps reading; with no later meta it returns false.
+func TestFirstRecordIsSessionMeta_OversizedFirstLine(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "r.jsonl")
+	big := strings.Repeat("x", scanBufferMax+(2*streamReaderSize))
+	content := big + "\n" + metaLine("sid", `"exec"`) + "\n"
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	f, err := os.Open(path) // #nosec G304 -- test-controlled temp path
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer func() { _ = f.Close() }()
+	info, _ := f.Stat()
+	// The oversized first line is drained; the SECOND line is a session_meta, so
+	// the probe finds it and returns true (forgiving past the unusable line).
+	got, perr := firstRecordIsSessionMeta(f, info.Size())
+	if perr != nil {
+		t.Fatalf("probe err: %v", perr)
+	}
+	if !got {
+		t.Error("probe should find the session_meta after an oversized first line")
+	}
+}
+
+// TestUUIDTailAndIsHex covers the non-UUID and bad-hex branches.
+func TestUUIDTailAndIsHex(t *testing.T) {
+	t.Parallel()
+	if uuidTail("too-few-parts") != "" {
+		t.Error("uuidTail with <5 groups should be empty")
+	}
+	// Right group count, wrong lengths.
+	if uuidTail("a-b-c-d-e") != "" {
+		t.Error("uuidTail with wrong group lengths should be empty")
+	}
+	// Right shape but non-hex.
+	if uuidTail("2025-11-20T16-zzzzzzzz-a2a1-75c3-a9bf-d8425e1785f5") != "" {
+		t.Error("uuidTail with non-hex group should be empty")
+	}
+	if !isHex("00ff") || isHex("") || isHex("xy") {
+		t.Error("isHex wrong")
+	}
+}
+
+// TestEmitProgress_CancelledCtx covers emitProgress's ctx.Err early return.
+func TestEmitProgress_CancelledCtx(t *testing.T) {
+	t.Parallel()
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	out := make(chan canonical.Event, 1)
+	if err := emitProgress(ctx, "codex:/t", newCursor(), out); err == nil {
+		t.Error("emitProgress on cancelled ctx should return ctx err")
+	}
+}
+
+// TestRelPathError covers relPath's error branch (a path that cannot be made
+// relative to the root, e.g. a different volume root on Windows; on POSIX an
+// absolute vs. the root produces a clean rel, so we assert the happy path plus
+// a forward-slash normalization).
+func TestRelPath(t *testing.T) {
+	t.Parallel()
+	got, err := relPath("/a/b", "/a/b/c/d.jsonl")
+	if err != nil || got != "c/d.jsonl" {
+		t.Fatalf("relPath = (%q,%v), want (\"c/d.jsonl\",nil)", got, err)
+	}
+}
diff --git a/internal/adapters/codex/tailer.go b/internal/adapters/codex/tailer.go
new file mode 100644
index 0000000..d80a64d
--- /dev/null
+++ b/internal/adapters/codex/tailer.go
@@ -0,0 +1,360 @@
+package codex
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/fsnotify/fsnotify"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// debounceWindow coalesces rapid Write events per flush cycle (spec
+// §"Watch Strategy"). Mirrors claude_code.
+const debounceWindow = 50 * time.Millisecond
+
+// debounceMaxEntries bounds the dirty set before a forced flush.
+const debounceMaxEntries = 4096
+
+// tailTickInterval drives periodic SourceProgress emission and rescans for new
+// shard directories (new YYYY/MM/DD created daily) that fsnotify
+// (non-recursive on Linux) may have missed. Spec §"Watch Strategy" specifies a
+// periodic full sweep; 5 s matches claude_code's cadence (a new-date-dir is
+// also picked up immediately via the Create event handler — the tick is the
+// slow-filesystem backstop).
+const tailTickInterval = 5 * time.Second
+
+// tailLoop runs the fsnotify event loop until ctx is cancelled. fsnotify is not
+// recursive on Linux, so the loop walks the tree at startup and Add()s the root
+// plus every YYYY, YYYY/MM, YYYY/MM/DD shard directory, and re-walks on a tick
+// to pick up new date dirs created since the last walk (spec §"Watch
+// Strategy"). The adapter owns the watcher lifecycle.
+func tailLoop(ctx context.Context, root, sourceID string, cur Cursor, out chan<- canonical.Event, onError func(error)) error {
+	if cur.Files == nil {
+		cur = newCursor()
+	}
+	watcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		return fmt.Errorf("codex: fsnotify watcher: %w", err)
+	}
+	defer func() { _ = watcher.Close() }()
+
+	// security.md §"Hard Rules" — read-only on sources, never mkdir. A missing
+	// root surfaces a SourceError and returns cleanly so the daemon keeps running
+	// for other sources.
+	if _, statErr := os.Stat(root); statErr != nil {
+		onError(fmt.Errorf("codex: sessions root %s not present (read-only on sources, no mkdir): %w", root, statErr))
+		return nil
+	}
+	// Resolve the root through symlinks ONCE so every watched dir can be checked
+	// for containment against it (security.md §6): a symlinked directory inside
+	// the tree that points outside the resolved root is never Add()ed.
+	resolvedRoot, rrErr := filepath.EvalSymlinks(filepath.Clean(root))
+	if rrErr != nil {
+		onError(fmt.Errorf("codex: cannot resolve sessions root %s; tail disabled for this source: %w", root, rrErr))
+		return nil
+	}
+	watched := map[string]struct{}{}
+	// Walk the RESOLVED root: filepath.WalkDir does not descend INTO a symlinked
+	// walk-root, so walking the unresolved root would Add() zero directories
+	// under a legitimately-symlinked sessions root. handleEvent still passes
+	// newly-created dirs (real paths) as they appear; fsnotify dedups overlap.
+	addWatchTree(watcher, resolvedRoot, resolvedRoot, watched, onError)
+
+	dirty := make(map[string]struct{}, 16)
+
+	// Initial catch-up (spec §"Watch Strategy"): the watch is now established,
+	// but bytes appended to a known file BETWEEN Scan finishing and this point
+	// arrived before the watch and would otherwise only be read on the next WRITE
+	// event (which may never come for an idle session). Read every known file
+	// from its cursor offset to current EOF once, up front. Re-emission of an
+	// already-consumed line is absorbed by the ingester's idempotent upserts.
+	if perr := catchUpFromCursor(ctx, resolvedRoot, root, sourceID, &cur, out, onError); perr != nil {
+		return perr
+	}
+
+	debounce := time.NewTimer(debounceWindow)
+	defer debounce.Stop()
+	if !debounce.Stop() {
+		<-debounce.C
+	}
+	tick := time.NewTicker(tailTickInterval)
+	defer tick.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return nil
+		case ev, ok := <-watcher.Events:
+			if !ok {
+				return nil
+			}
+			handleEvent(watcher, resolvedRoot, ev, watched, dirty, onError)
+			if len(dirty) >= debounceMaxEntries {
+				if perr := flushDirty(ctx, resolvedRoot, root, sourceID, dirty, &cur, out, onError); perr != nil {
+					return perr
+				}
+				dirty = make(map[string]struct{}, 16)
+				continue
+			}
+			if len(dirty) > 0 {
+				resetDebounce(debounce)
+			}
+		case werr, ok := <-watcher.Errors:
+			if !ok {
+				return nil
+			}
+			onError(fmt.Errorf("codex: watcher: %w", werr))
+		case <-debounce.C:
+			if perr := flushDirty(ctx, resolvedRoot, root, sourceID, dirty, &cur, out, onError); perr != nil {
+				return perr
+			}
+			dirty = make(map[string]struct{}, 16)
+		case <-tick.C:
+			// Re-walk to Add() any shard directory created since startup (new
+			// YYYY/MM/DD date dir). codex creates a new date dir daily; the Create
+			// handler picks it up immediately, and this tick is the slow-filesystem
+			// backstop. Walk the RESOLVED root so a symlinked sessions root is fully
+			// descended.
+			addWatchTree(watcher, resolvedRoot, resolvedRoot, watched, onError)
+			if perr := emitProgress(ctx, sourceID, cur, out); perr != nil {
+				return perr
+			}
+		}
+	}
+}
+
+// catchUpFromCursor reads every currently-discovered modern rollout from its
+// cursor offset to current EOF, once, at Tail startup (spec §"Watch
+// Strategy"). It closes the Scan→Tail window: bytes appended before the watch
+// was established are read here rather than waiting for a future WRITE event.
+// It reuses flushDirty so the offset advance, partial-line hold-back, and
+// SourceProgress checkpoint are identical to the steady-state path. A file
+// already fully consumed by Scan re-reads zero new bytes (offset == size) and
+// emits nothing. Legacy files are NOT re-reported here (Scan already emitted
+// the one-time SourceError; the cursor suppresses them).
+func catchUpFromCursor(ctx context.Context, resolvedRoot, root, sourceID string, cur *Cursor, out chan<- canonical.Event, onError func(error)) error {
+	disc, derr := discoverRollouts(root, onError)
+	if derr != nil {
+		// A discovery failure is non-fatal for Tail: surface it and continue into
+		// the watch loop (steady-state WRITE events still drive reads).
+		onError(fmt.Errorf("codex: tail catch-up discovery: %w", derr))
+		return nil
+	}
+	if len(disc.modern) == 0 {
+		return nil
+	}
+	dirty := make(map[string]struct{}, len(disc.modern))
+	for _, r := range disc.modern {
+		dirty[r.rel] = struct{}{}
+	}
+	return flushDirty(ctx, resolvedRoot, root, sourceID, dirty, cur, out, onError)
+}
+
+// handleEvent classifies one fsnotify event. New directories (new date shards)
+// are added to the watch set AND walked for any rollout files already present
+// (the create-race window). Rollout writes mark the relative path dirty.
+// Removes/renames are logged, not acted on (spec edge #13: codex does not
+// rename; a manual rename leaves the old cursor entry stale).
+//
+// Cursor keys are derived against the RESOLVED root: every watched dir is
+// Add()ed from the resolved-root walk, so fsnotify reports event paths under
+// the resolved root. Keying with relPath(resolvedRoot, …) yields the SAME key
+// the scan side records (discoverRollouts also keys against the resolved root),
+// so scan and tail keys are identical for one file regardless of root
+// symlinking.
+func handleEvent(watcher *fsnotify.Watcher, resolvedRoot string, ev fsnotify.Event, watched, dirty map[string]struct{}, onError func(error)) {
+	// A newly created directory must be watched (fsnotify is non-recursive).
+	// Files written into it BEFORE we Add() the watch would be missed, so we also
+	// walk the new dir and mark any rollouts already present as dirty. Subsequent
+	// writes arrive via the watch.
+	if ev.Op&fsnotify.Create != 0 {
+		if info, err := os.Stat(ev.Name); err == nil && info.IsDir() {
+			// Prune the archive subtree; never watch it.
+			if filepath.Base(ev.Name) == archivedSessionsDir {
+				return
+			}
+			addWatchTree(watcher, resolvedRoot, ev.Name, watched, onError)
+			markExistingDirty(resolvedRoot, ev.Name, dirty, onError)
+			return
+		}
+	}
+	base := filepath.Base(ev.Name)
+	if ev.Op&(fsnotify.Remove|fsnotify.Rename) != 0 {
+		if modernNameRe.MatchString(base) {
+			onError(fmt.Errorf("codex: %s removed/renamed", relOrBase(resolvedRoot, ev.Name)))
+		}
+		return
+	}
+	// Only modern rollout files under a shard dir are tailed; legacy flat .json
+	// at the root and all ignored names (sqlite, history, session_index) are
+	// dropped here.
+	if !modernNameRe.MatchString(base) {
+		return
+	}
+	rel, err := relPath(resolvedRoot, ev.Name)
+	if err != nil {
+		return
+	}
+	dirty[rel] = struct{}{}
+}
+
+// markExistingDirty walks a newly-created directory and marks every modern
+// rollout file already present as dirty, so the next flush reads content
+// written into the dir before the watch was added (the create-race window). The
+// periodic tick's addWatchTree handles dirs; this handles the files already
+// inside them. base is the RESOLVED root so the keys it records match the scan
+// cursor keys. A non-IsNotExist walk error over an unreadable subtree is
+// surfaced via onError and the walk continues past it.
+func markExistingDirty(base, dir string, dirty map[string]struct{}, onError func(error)) {
+	if onError == nil {
+		onError = func(error) {}
+	}
+	_ = filepath.WalkDir(dir, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			if !os.IsNotExist(err) {
+				onError(fmt.Errorf("codex: walk new dir %s: %w", path, err))
+			}
+			if d != nil && d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if d.IsDir() {
+			if d.Name() == archivedSessionsDir && path != dir {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if !modernNameRe.MatchString(d.Name()) {
+			return nil
+		}
+		rel, rerr := relPath(base, path)
+		if rerr != nil {
+			return nil
+		}
+		dirty[rel] = struct{}{}
+		return nil
+	})
+}
+
+// relOrBase returns the base-relative path for logging, falling back to the
+// basename when the path is outside base. Mirrors claude_code.
+func relOrBase(base, abs string) string {
+	if rel, err := relPath(base, abs); err == nil {
+		return rel
+	}
+	return filepath.Base(abs)
+}
+
+// addWatchTree walks dir and Add()s every subdirectory not already watched.
+// Errors adding a single dir are surfaced via onError but do not abort the
+// walk. fsnotify de-duplicates Add() of an already-watched path, but the
+// watched set avoids the syscall churn on every tick. resolvedRoot is the
+// symlink-resolved sessions root: a directory that resolves outside it (a
+// planted symlink escaping the source) is refused with a SourceError and not
+// watched (security.md §6). The archive subtree is pruned (never watched). A
+// non-IsNotExist walk error over an unreadable subtree is surfaced via onError
+// and the walk continues past it. Mirrors claude_code.
+func addWatchTree(watcher *fsnotify.Watcher, resolvedRoot, dir string, watched map[string]struct{}, onError func(error)) {
+	_ = filepath.WalkDir(dir, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			if !os.IsNotExist(err) {
+				onError(fmt.Errorf("codex: walk watch tree %s: %w", path, err))
+			}
+			if d != nil && d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if !d.IsDir() {
+			return nil
+		}
+		if d.Name() == archivedSessionsDir && path != dir {
+			return filepath.SkipDir
+		}
+		if !withinSourceRoot(resolvedRoot, path, onError) {
+			return filepath.SkipDir
+		}
+		if _, ok := watched[path]; ok {
+			return nil
+		}
+		if addErr := watcher.Add(path); addErr != nil {
+			onError(fmt.Errorf("codex: watch %s: %w", path, addErr))
+			return nil
+		}
+		watched[path] = struct{}{}
+		return nil
+	})
+}
+
+// resetDebounce restarts the debounce timer for one window. Mirrors claude_code.
+func resetDebounce(t *time.Timer) {
+	if !t.Stop() {
+		select {
+		case <-t.C:
+		default:
+		}
+	}
+	t.Reset(debounceWindow)
+}
+
+// flushDirty re-reads every dirty rollout from its cursor offset, updating the
+// shared cursor, and emits a SourceProgress checkpoint at the end. Each file is
+// read via readRollout so the offset advance, partial-line hold-back,
+// truncation defense, rule-#24 skip, and EOF stale-finalize are identical to
+// the Scan path. A rel that no longer maps to a recognized rollout is skipped.
+func flushDirty(ctx context.Context, resolvedRoot, root, sourceID string, dirty map[string]struct{}, cur *Cursor, out chan<- canonical.Event, onError func(error)) error {
+	if len(dirty) == 0 {
+		return nil
+	}
+	names := make([]string, 0, len(dirty))
+	for n := range dirty {
+		names = append(names, n)
+	}
+	sort.Strings(names)
+
+	for _, rel := range names {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+		r, ok := rolloutForRel(resolvedRoot, rel)
+		if !ok {
+			continue
+		}
+		fc := cur.fileCursor(rel)
+		updated, _, rerr := readRollout(ctx, resolvedRoot, r, sourceID, fc, out, onError)
+		if rerr != nil {
+			if errors.Is(rerr, context.Canceled) || errors.Is(rerr, context.DeadlineExceeded) {
+				return rerr
+			}
+			onError(rerr)
+			continue
+		}
+		*cur = cur.withFile(rel, updated)
+	}
+	return emitProgress(ctx, sourceID, *cur, out)
+}
+
+// rolloutForRel reconstructs a rollout descriptor from a root-relative path. A
+// modern rollout is "YYYY/MM/DD/rollout-….jsonl". The abs path is built under
+// the RESOLVED root so the containment open in readRollout resolves cleanly.
+// Returns false when rel is not a recognized modern rollout (a legacy .json, an
+// ignored name, or a path with no rollout basename).
+func rolloutForRel(resolvedRoot, rel string) (rollout, bool) {
+	base := rel
+	if i := strings.LastIndex(rel, "/"); i >= 0 {
+		base = rel[i+1:]
+	}
+	if !modernNameRe.MatchString(base) {
+		return rollout{}, false
+	}
+	abs := filepath.Join(resolvedRoot, filepath.FromSlash(rel))
+	return rollout{rel: rel, abs: abs}, true
+}
diff --git a/internal/adapters/codex/tailer_branch_test.go b/internal/adapters/codex/tailer_branch_test.go
new file mode 100644
index 0000000..1e4abee
--- /dev/null
+++ b/internal/adapters/codex/tailer_branch_test.go
@@ -0,0 +1,228 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// TestFlushDirty_DirectCoversBranches drives flushDirty directly: a recognized
+// modern rollout is read (cursor advances), an unrecognized rel is skipped, and
+// a final SourceProgress is emitted.
+func TestFlushDirty_DirectCoversBranches(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, completeSession("sid-fd"))
+	setMtime(t, path, time.Minute)
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+
+	out := make(chan canonical.Event, 64)
+	cur := newCursor()
+	dirty := map[string]struct{}{
+		rel:                         {},
+		"session_index.jsonl":       {}, // unrecognized → skipped
+		"rollout-2025-06-26-x.json": {}, // legacy → skipped
+	}
+	if err := flushDirty(context.Background(), resolved, root, "codex:"+root, dirty, &cur, out, func(error) {}); err != nil {
+		t.Fatalf("flushDirty: %v", err)
+	}
+	if cur.Files[rel].Offset == 0 {
+		t.Error("flushDirty did not advance the modern rollout cursor")
+	}
+	got := drainBuffered(out)
+	if !hasKind(got, canonical.EvSessionStarted) {
+		t.Error("flushDirty did not emit the session")
+	}
+	if !hasKind(got, canonical.EvSourceProgress) {
+		t.Error("flushDirty did not emit a final SourceProgress")
+	}
+}
+
+// TestFlushDirty_Empty asserts the empty-dirty-set early return (no progress).
+func TestFlushDirty_Empty(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	out := make(chan canonical.Event, 4)
+	cur := newCursor()
+	if err := flushDirty(context.Background(), resolved, root, "codex:"+root, map[string]struct{}{}, &cur, out, func(error) {}); err != nil {
+		t.Fatalf("flushDirty(empty): %v", err)
+	}
+	if len(drainBuffered(out)) != 0 {
+		t.Error("empty flushDirty should emit nothing")
+	}
+}
+
+// TestFlushDirty_CancelledCtx asserts flushDirty honors a cancelled context.
+func TestFlushDirty_CancelledCtx(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, completeSession("sid-x"))
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	out := make(chan canonical.Event, 4)
+	cur := newCursor()
+	err := flushDirty(ctx, resolved, root, "codex:"+root, map[string]struct{}{rel: {}}, &cur, out, func(error) {})
+	if err == nil {
+		t.Error("flushDirty on cancelled ctx should return ctx err")
+	}
+}
+
+// TestFlushDirty_ReadErrorContinues asserts flushDirty surfaces a per-file open
+// error and continues (does not abort the whole flush). Skipped where 0o000 is
+// ignored.
+func TestFlushDirty_ReadErrorContinues(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	bad := shardPath(root, uuid7(1))
+	writeFileBytes(t, bad, completeSession("sid-bad"))
+	relBad := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+	if err := os.Chmod(bad, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(bad, 0o644) })
+	if f, oerr := os.Open(bad); oerr == nil { // #nosec G304 -- test probe
+		_ = f.Close()
+		t.Skip("filesystem allowed opening a 0o000 file; seam not exercised")
+	}
+
+	out := make(chan canonical.Event, 8)
+	cur := newCursor()
+	var errs []string
+	err := flushDirty(context.Background(), resolved, root, "codex:"+root, map[string]struct{}{relBad: {}}, &cur, out, func(e error) { errs = append(errs, e.Error()) })
+	if err != nil {
+		t.Fatalf("flushDirty should not fatal on a per-file read error: %v", err)
+	}
+	if len(errs) == 0 {
+		t.Error("per-file read error not surfaced")
+	}
+}
+
+// TestCatchUpFromCursor_NoFiles asserts the early return when discovery finds no
+// modern rollouts.
+func TestCatchUpFromCursor_NoFiles(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	out := make(chan canonical.Event, 4)
+	cur := newCursor()
+	if err := catchUpFromCursor(context.Background(), resolved, root, "codex:"+root, &cur, out, func(error) {}); err != nil {
+		t.Fatalf("catchUpFromCursor(empty): %v", err)
+	}
+	if len(drainBuffered(out)) != 0 {
+		t.Error("catch-up with no files should emit nothing")
+	}
+}
+
+// TestTail_DebounceFlushPath drives a real append after the watch is live and
+// asserts the debounce-flush path emits the appended turn (covers the
+// watcher.Events → resetDebounce → debounce.C → flushDirty cycle), then a tick
+// fires (covers the tick branch's progress emit).
+func TestTail_DebounceFlushAndTick(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, []byte(metaLine("sid-deb", `"exec"`)+"\n"))
+
+	cancel, wait, out := runTail(t, root, "codex:"+root, newCursor())
+	defer func() { cancel(); wait() }()
+
+	// Drain the catch-up SessionStarted.
+	if _, ok := waitForKind(out, canonical.EvSessionStarted, 5*time.Second); !ok {
+		t.Fatal("catch-up SessionStarted missing")
+	}
+	// Append a complete turn → Write event → debounce → flush.
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsCtx+`","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`+"\n"))
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsDone+`","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"`+tsDone+`"}}`+"\n"))
+	if _, ok := waitForKind(out, canonical.EvTurnFinalized, 5*time.Second); !ok {
+		t.Fatal("debounce flush did not emit the appended TurnFinalized")
+	}
+	// Drain everything currently pending, then wait QUIETLY (no writes) for >1
+	// tick interval so the ONLY remaining SourceProgress can come from the tick
+	// arm (tailLoop:126), not a debounce flush — deterministically covering it.
+	drainFor(out, 200*time.Millisecond)
+	if _, ok := waitForKind(out, canonical.EvSourceProgress, tailTickInterval+3*time.Second); !ok {
+		t.Fatal("tick did not emit a SourceProgress during a quiet window")
+	}
+}
+
+// drainFor discards events for d, then returns. Used to flush pending
+// debounce-driven events so a subsequent wait isolates the tick arm.
+func drainFor(out chan canonical.Event, d time.Duration) {
+	deadline := time.After(d)
+	for {
+		select {
+		case <-out:
+		case <-deadline:
+			return
+		}
+	}
+}
+
+// TestTail_ForcedFlushAtMaxEntries covers tailLoop's forced-flush arm: a single
+// new-date-shard Create event whose dir already contains more than
+// debounceMaxEntries rollout files dirties them all via markExistingDirty, so
+// the loop's `len(dirty) >= debounceMaxEntries` branch trips an immediate flush
+// (before the debounce timer), ingesting them. This is the inotify-queue-burst
+// protection path.
+func TestTail_ForcedFlushAtMaxEntries(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	// Watch exists at the year level so the day-shard Create is observed.
+	if err := os.MkdirAll(filepath.Join(root, "2025", "12"), 0o755); err != nil {
+		t.Fatalf("seed: %v", err)
+	}
+
+	cancel, wait, out := runTail(t, root, "codex:"+root, newCursor())
+	defer func() { cancel(); wait() }()
+	time.Sleep(150 * time.Millisecond) // let the watch establish
+
+	// Build a day-shard dir with > debounceMaxEntries rollouts, then move it into
+	// place so a SINGLE Create event fires for the dir (markExistingDirty then
+	// dirties all of them at once → forced flush).
+	staging := filepath.Join(t.TempDir(), "28")
+	n := debounceMaxEntries + 5
+	for i := 0; i < n; i++ {
+		p := filepath.Join(staging, "rollout-2025-12-28T10-"+pad2(i/60)+"-"+pad2(i%60)+"-"+manyUUID(i)+".jsonl")
+		writeFileBytes(t, p, completeSession("sid-many"))
+	}
+	dest := filepath.Join(root, "2025", "12", "28")
+	if err := os.Rename(staging, dest); err != nil {
+		t.Fatalf("rename staging into place: %v", err)
+	}
+
+	// At least one of the many sessions must ingest, proving the forced-flush ran.
+	if _, ok := waitForKind(out, canonical.EvSessionStarted, 15*time.Second); !ok {
+		t.Fatal("forced-flush did not ingest the burst of rollouts")
+	}
+}
+
+// manyUUID returns a unique UUID-shaped id for the i-th burst file so each rel
+// is distinct (the 12-hex tail encodes i).
+func manyUUID(i int) string {
+	return "019aa234-a2a1-75c3-a9bf-" + leftPadHex(i, 12)
+}
+
+// leftPadHex renders i as a width-w lowercase-hex string.
+func leftPadHex(i, w int) string {
+	const hexdigits = "0123456789abcdef"
+	buf := make([]byte, w)
+	for j := w - 1; j >= 0; j-- {
+		buf[j] = hexdigits[i&0xf]
+		i >>= 4
+	}
+	return string(buf)
+}
diff --git a/internal/adapters/codex/tailer_test.go b/internal/adapters/codex/tailer_test.go
new file mode 100644
index 0000000..1b24b81
--- /dev/null
+++ b/internal/adapters/codex/tailer_test.go
@@ -0,0 +1,197 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// runTail starts tailLoop in a goroutine over root from the given cursor,
+// returning a cancel func, a wait func, and the live event channel. onError
+// appends to a shared slice the caller can inspect after cancelling.
+func runTail(t *testing.T, root, sourceID string, cur Cursor) (context.CancelFunc, func() []string, chan canonical.Event) {
+	t.Helper()
+	ctx, cancel := context.WithCancel(context.Background())
+	out := make(chan canonical.Event, 4096)
+	var mu sync.Mutex
+	var errs []string
+	onError := func(e error) {
+		mu.Lock()
+		errs = append(errs, e.Error())
+		mu.Unlock()
+	}
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		_ = tailLoop(ctx, root, sourceID, cur, out, onError)
+	}()
+	wait := func() []string {
+		wg.Wait()
+		mu.Lock()
+		defer mu.Unlock()
+		cp := make([]string, len(errs))
+		copy(cp, errs)
+		return cp
+	}
+	return cancel, wait, out
+}
+
+// waitForKind drains out until an event of the given kind appears or the
+// deadline elapses, returning the accumulated events and whether it was found.
+func waitForKind(out chan canonical.Event, kind canonical.EventKind, d time.Duration) ([]canonical.Event, bool) {
+	deadline := time.After(d)
+	var got []canonical.Event
+	for {
+		select {
+		case ev := <-out:
+			got = append(got, ev)
+			if ev.EventKind() == kind {
+				return got, true
+			}
+		case <-deadline:
+			return got, false
+		}
+	}
+}
+
+// TestTail_PicksUpAppendedRecords verifies the fsnotify tail loop emits events
+// for records appended to an existing rollout after Tail starts (the realtime
+// path). The seeded session_meta is below the catch-up cursor's snapshot and
+// must NOT be re-emitted as a duplicate; only the appended turn produces new
+// turn events.
+func TestTail_PicksUpAppendedRecords(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	rel := "2025/11/20/rollout-2025-11-20T16-59-09-" + uuid7(1) + ".jsonl"
+	// Seed with session_meta only so the catch-up reads it and advances the
+	// cursor; we then append a turn.
+	writeFileBytes(t, path, []byte(metaLine("sid-tail", `"exec"`)+"\n"))
+
+	cancel, wait, out := runTail(t, root, "codex:"+root, newCursor())
+	defer func() { cancel(); wait() }()
+
+	// Catch-up should emit the SessionStarted from the seeded meta.
+	if _, ok := waitForKind(out, canonical.EvSessionStarted, 5*time.Second); !ok {
+		t.Fatal("catch-up did not emit SessionStarted for seeded meta")
+	}
+
+	// Append a turn_context + task_complete after the watch is live.
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsCtx+`","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`+"\n"))
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsDone+`","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":"`+tsDone+`"}}`+"\n"))
+
+	got, ok := waitForKind(out, canonical.EvTurnFinalized, 5*time.Second)
+	if !ok {
+		t.Fatal("tail did not emit TurnFinalized for appended turn")
+	}
+	// Exactly one SessionStarted total across catch-up + tail (no dup).
+	if n := countKind(got, canonical.EvSessionStarted); n > 1 {
+		t.Errorf("duplicate SessionStarted across tail = %d, want <= 1", n)
+	}
+	_ = rel
+}
+
+// TestTail_NewDateShardDir is the codex-specific requirement: a brand-new
+// YYYY/MM/DD shard directory created AFTER Tail starts is added to the watch
+// and the rollout written into it is ingested.
+func TestTail_NewDateShardDir(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	// Pre-create only the YYYY level so the watch exists at the root; the new
+	// MM/DD dirs are created live.
+	if err := mkdirAll(t, filepath.Join(root, "2025")); err != nil {
+		t.Fatalf("seed year dir: %v", err)
+	}
+
+	cancel, wait, out := runTail(t, root, "codex:"+root, newCursor())
+	defer func() { cancel(); wait() }()
+
+	// Give Tail a moment to establish the watch on the root + year dir.
+	time.Sleep(150 * time.Millisecond)
+
+	// Create a NEW month/day shard and write a complete rollout into it.
+	path := filepath.Join(root, "2025", "12", "25", "rollout-2025-12-25T09-00-00-"+uuid7(2)+".jsonl")
+	writeFileBytes(t, path, completeSession("sid-newday"))
+
+	if _, ok := waitForKind(out, canonical.EvSessionStarted, 8*time.Second); !ok {
+		t.Fatal("rollout in a newly-created date shard dir was not ingested")
+	}
+}
+
+// TestTail_CreateFileInWatchedShard asserts a brand-new rollout file created in
+// an already-watched shard dir after Tail starts is ingested (the Create/Write
+// path, not a new dir).
+func TestTail_CreateFileInWatchedShard(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	shardDir := filepath.Join(root, "2025", "11", "20")
+	if err := mkdirAll(t, shardDir); err != nil {
+		t.Fatalf("seed shard dir: %v", err)
+	}
+
+	cancel, wait, out := runTail(t, root, "codex:"+root, newCursor())
+	defer func() { cancel(); wait() }()
+	time.Sleep(150 * time.Millisecond)
+
+	path := filepath.Join(shardDir, "rollout-2025-11-20T12-00-00-"+uuid7(3)+".jsonl")
+	writeFileBytes(t, path, completeSession("sid-created"))
+
+	if _, ok := waitForKind(out, canonical.EvSessionStarted, 8*time.Second); !ok {
+		t.Fatal("rollout created in a watched shard dir was not ingested")
+	}
+}
+
+// TestTail_MissingRootBenign asserts a missing sessions root surfaces a
+// SourceError and returns cleanly (the daemon keeps running for other sources),
+// rather than erroring out of tailLoop.
+func TestTail_MissingRootBenign(t *testing.T) {
+	t.Parallel()
+	root := filepath.Join(t.TempDir(), "no-such-sessions")
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out := make(chan canonical.Event, 4)
+	var mu sync.Mutex
+	var errs []string
+	err := tailLoop(ctx, root, "codex:"+root, newCursor(), out, func(e error) {
+		mu.Lock()
+		errs = append(errs, e.Error())
+		mu.Unlock()
+	})
+	if err != nil {
+		t.Fatalf("tailLoop on missing root = %v, want nil", err)
+	}
+	if len(errs) == 0 {
+		t.Error("missing root should surface a SourceError")
+	}
+}
+
+// TestTail_CatchUpStaleFinalizes asserts the catch-up path applies the rule #23
+// stale-finalize: a hanging-turn file with a stale mtime present at Tail start
+// gets its synthetic SessionFinalized via catchUpFromCursor → flushDirty →
+// readRollout.
+func TestTail_CatchUpStaleFinalizes(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(4))
+	writeFileBytes(t, path, hangingSession("sid-stale"))
+	setMtime(t, path, 2*time.Hour)
+
+	cancel, wait, out := runTail(t, root, "codex:"+root, newCursor())
+	defer func() { cancel(); wait() }()
+
+	if _, ok := waitForKind(out, canonical.EvSessionFinalized, 5*time.Second); !ok {
+		t.Fatal("catch-up did not stale-finalize a hanging session present at startup")
+	}
+}
+
+// mkdirAll is a tiny test helper that creates a directory tree.
+func mkdirAll(t *testing.T, dir string) error {
+	t.Helper()
+	return os.MkdirAll(dir, 0o755)
+}
diff --git a/internal/adapters/codex/tailer_unit_test.go b/internal/adapters/codex/tailer_unit_test.go
new file mode 100644
index 0000000..aed22aa
--- /dev/null
+++ b/internal/adapters/codex/tailer_unit_test.go
@@ -0,0 +1,309 @@
+package codex
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/fsnotify/fsnotify"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// newWatcherT builds an fsnotify watcher closed on test cleanup.
+func newWatcherT(t *testing.T) *fsnotify.Watcher {
+	t.Helper()
+	w, err := fsnotify.NewWatcher()
+	if err != nil {
+		t.Fatalf("fsnotify watcher: %v", err)
+	}
+	t.Cleanup(func() { _ = w.Close() })
+	return w
+}
+
+// TestHandleEvent_WriteMarksModernDirty asserts a Write on a modern rollout
+// marks its rel dirty, while a Write on a legacy/ignored/non-rollout file does
+// not.
+func TestHandleEvent_WriteMarksModernDirty(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	watched := map[string]struct{}{}
+	dirty := map[string]struct{}{}
+	w := newWatcherT(t)
+
+	modern := filepath.Join(resolved, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	handleEvent(w, resolved, fsnotify.Event{Name: modern, Op: fsnotify.Write}, watched, dirty, func(error) {})
+	rel := "2025/11/20/rollout-2025-11-20T10-00-00-" + uuid7(1) + ".jsonl"
+	if _, ok := dirty[rel]; !ok {
+		t.Fatalf("modern write did not mark %q dirty; dirty=%v", rel, dirty)
+	}
+
+	// A legacy .json at the root is NOT a modern rollout → not dirtied.
+	legacy := filepath.Join(resolved, "rollout-2025-06-26-abc.json")
+	handleEvent(w, resolved, fsnotify.Event{Name: legacy, Op: fsnotify.Write}, watched, dirty, func(error) {})
+	// A sqlite/history file → not dirtied.
+	handleEvent(w, resolved, fsnotify.Event{Name: filepath.Join(resolved, "state_5.sqlite"), Op: fsnotify.Write}, watched, dirty, func(error) {})
+	if len(dirty) != 1 {
+		t.Errorf("non-modern writes dirtied something; dirty=%v", dirty)
+	}
+}
+
+// TestHandleEvent_RemoveRenameLogged asserts a Remove/Rename on a modern rollout
+// surfaces a SourceError (logged, not acted on) and does not dirty anything.
+func TestHandleEvent_RemoveRenameLogged(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	watched := map[string]struct{}{}
+	dirty := map[string]struct{}{}
+	w := newWatcherT(t)
+	var errs []string
+
+	modern := filepath.Join(resolved, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	handleEvent(w, resolved, fsnotify.Event{Name: modern, Op: fsnotify.Remove}, watched, dirty, func(e error) { errs = append(errs, e.Error()) })
+	if len(dirty) != 0 {
+		t.Errorf("remove dirtied a file; dirty=%v", dirty)
+	}
+	if len(errs) == 0 || !strings.Contains(errs[0], "removed/renamed") {
+		t.Errorf("remove not logged; errs=%v", errs)
+	}
+	// A Rename on a non-rollout name logs nothing.
+	errs = nil
+	handleEvent(w, resolved, fsnotify.Event{Name: filepath.Join(resolved, "notes.txt"), Op: fsnotify.Rename}, watched, dirty, func(e error) { errs = append(errs, e.Error()) })
+	if len(errs) != 0 {
+		t.Errorf("rename of non-rollout logged; errs=%v", errs)
+	}
+}
+
+// TestHandleEvent_CreateDirWatchesAndDirties asserts a Create on a directory
+// adds it to the watch set and dirties any rollout already inside it (the
+// create-race window), and that an archive dir Create is ignored.
+func TestHandleEvent_CreateDirWatchesAndDirties(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	watched := map[string]struct{}{}
+	dirty := map[string]struct{}{}
+	w := newWatcherT(t)
+
+	// Create a new shard dir on disk with a rollout already inside it.
+	shard := filepath.Join(resolved, "2025", "12", "31")
+	inside := filepath.Join(shard, "rollout-2025-12-31T10-00-00-"+uuid7(2)+".jsonl")
+	writeFileBytes(t, inside, completeSession("sid-x"))
+	handleEvent(w, resolved, fsnotify.Event{Name: shard, Op: fsnotify.Create}, watched, dirty, func(error) {})
+	if _, ok := watched[shard]; !ok {
+		t.Errorf("created dir not watched; watched=%v", watched)
+	}
+	rel := "2025/12/31/rollout-2025-12-31T10-00-00-" + uuid7(2) + ".jsonl"
+	if _, ok := dirty[rel]; !ok {
+		t.Errorf("rollout inside created dir not dirtied; dirty=%v", dirty)
+	}
+
+	// An archived_sessions dir Create is ignored (never watched).
+	arch := filepath.Join(resolved, archivedSessionsDir)
+	if err := os.MkdirAll(arch, 0o755); err != nil {
+		t.Fatalf("mkdir arch: %v", err)
+	}
+	handleEvent(w, resolved, fsnotify.Event{Name: arch, Op: fsnotify.Create}, watched, dirty, func(error) {})
+	if _, ok := watched[arch]; ok {
+		t.Error("archived_sessions dir must not be watched")
+	}
+}
+
+// TestRolloutForRel covers the recognized-modern, legacy-rejected, and
+// ignored-name branches.
+func TestRolloutForRel(t *testing.T) {
+	t.Parallel()
+	resolved := "/sessions"
+	r, ok := rolloutForRel(resolved, "2025/11/20/rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	if !ok || r.abs != filepath.Join(resolved, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl") {
+		t.Fatalf("rolloutForRel modern = (%+v,%v)", r, ok)
+	}
+	if _, ok := rolloutForRel(resolved, "rollout-2025-06-26-abc.json"); ok {
+		t.Error("legacy .json must not map to a modern rollout")
+	}
+	if _, ok := rolloutForRel(resolved, "session_index.jsonl"); ok {
+		t.Error("ignored name must not map to a rollout")
+	}
+}
+
+// TestRelOrBase covers both branches: a path under base (rel) and a path
+// outside base (basename fallback).
+func TestRelOrBase(t *testing.T) {
+	t.Parallel()
+	if got := relOrBase("/a/b", "/a/b/c.jsonl"); got != "c.jsonl" {
+		t.Errorf("relOrBase in-base = %q, want c.jsonl", got)
+	}
+	// A path on a different absolute subtree still produces a rel via filepath.Rel
+	// on POSIX ("../x"); the basename fallback only triggers on a Rel error
+	// (cross-volume), which is platform-specific. Assert the in-base case and
+	// that the function never panics on an unrelated path.
+	_ = relOrBase("/a/b", "/totally/other/x.jsonl")
+}
+
+// TestMarkExistingDirty_PrunesArchiveAndIgnores asserts markExistingDirty marks
+// modern rollouts dirty, skips legacy/ignored names, and prunes the archive
+// subtree.
+func TestMarkExistingDirty_PrunesArchiveAndIgnores(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	shard := filepath.Join(resolved, "2025", "11", "20")
+	writeFileBytes(t, filepath.Join(shard, "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl"), completeSession("sid-a"))
+	writeFileBytes(t, filepath.Join(shard, "rollout-x.json"), []byte("{}")) // ignored (legacy ext inside shard)
+	writeFileBytes(t, filepath.Join(shard, "notes.jsonl"), []byte("{}"))    // ignored (wrong prefix)
+	writeFileBytes(t, filepath.Join(shard, archivedSessionsDir, "rollout-2025-11-20T10-00-00-"+uuid7(2)+".jsonl"), completeSession("sid-arch"))
+
+	dirty := map[string]struct{}{}
+	markExistingDirty(resolved, shard, dirty, func(error) {})
+	if len(dirty) != 1 {
+		t.Fatalf("markExistingDirty dirtied %d, want 1 (only the modern rollout); dirty=%v", len(dirty), dirty)
+	}
+	rel := "2025/11/20/rollout-2025-11-20T10-00-00-" + uuid7(1) + ".jsonl"
+	if _, ok := dirty[rel]; !ok {
+		t.Errorf("modern rollout not dirtied; dirty=%v", dirty)
+	}
+}
+
+// TestAddWatchTree_AddsDirsSkipsSymlink asserts addWatchTree Add()s the real
+// shard dirs under the root and does NOT descend a symlinked entry (WalkDir does
+// not follow symlinks: an escaping symlink is therefore never watched, and the
+// per-path containment guard — exercised directly in
+// TestWithinSourceRoot_SurfacesEscape — defends a real dir whose resolved path
+// escapes).
+func TestAddWatchTree_AddsDirsSkipsSymlink(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	shard := filepath.Join(resolved, "2025", "11", "20")
+	if err := os.MkdirAll(shard, 0o755); err != nil {
+		t.Fatalf("mkdir shard: %v", err)
+	}
+	// A symlinked dir at the root pointing OUTSIDE the sessions root.
+	outside := t.TempDir()
+	escape := filepath.Join(resolved, "escape")
+	if err := os.Symlink(outside, escape); err != nil {
+		t.Skipf("symlink unsupported: %v", err)
+	}
+
+	w := newWatcherT(t)
+	watched := map[string]struct{}{}
+	addWatchTree(w, resolved, resolved, watched, func(error) {})
+	if _, ok := watched[shard]; !ok {
+		t.Errorf("shard dir not watched; watched=%v", watched)
+	}
+	// The symlinked entry is not a directory to WalkDir, so it is never watched.
+	if _, ok := watched[escape]; ok {
+		t.Error("symlinked entry must not be watched (WalkDir does not follow symlinks)")
+	}
+}
+
+// TestAddWatchTree_PruneArchiveAndDedup covers addWatchTree's archive-prune
+// branch (an archived_sessions subtree is never watched) and the
+// already-watched dedup branch (a second call adds nothing new).
+func TestAddWatchTree_PruneArchiveAndDedup(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	shard := filepath.Join(resolved, "2025", "11", "20")
+	arch := filepath.Join(resolved, archivedSessionsDir, "2025", "11", "20")
+	for _, d := range []string{shard, arch} {
+		if err := os.MkdirAll(d, 0o755); err != nil {
+			t.Fatalf("mkdir %s: %v", d, err)
+		}
+	}
+	w := newWatcherT(t)
+	watched := map[string]struct{}{}
+	addWatchTree(w, resolved, resolved, watched, func(error) {})
+	if _, ok := watched[shard]; !ok {
+		t.Error("shard dir not watched")
+	}
+	if _, ok := watched[arch]; ok {
+		t.Error("archived_sessions subtree must not be watched")
+	}
+	// A second call must not re-Add (dedup branch); watched set size unchanged.
+	before := len(watched)
+	addWatchTree(w, resolved, resolved, watched, func(error) {})
+	if len(watched) != before {
+		t.Errorf("second addWatchTree changed watched set: %d → %d", before, len(watched))
+	}
+}
+
+// TestCatchUpFromCursor_DiscoveryError covers catchUpFromCursor's non-fatal
+// discovery-error branch: an unreadable root surfaces a SourceError and returns
+// nil (the watch loop still runs). Skipped where 0o000 is ignored.
+func TestCatchUpFromCursor_DiscoveryError(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	parent := t.TempDir()
+	root := filepath.Join(parent, "sessions")
+	if err := os.MkdirAll(root, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.Chmod(root, 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(root, 0o755) })
+	if _, derr := os.ReadDir(root); derr == nil {
+		t.Skip("filesystem allowed reading a 0o000 dir; discovery-error seam not exercised")
+	}
+
+	resolved := root // resolve will fail inside; pass root as the resolved arg too
+	var errs []string
+	cur := newCursor()
+	err := catchUpFromCursor(context.Background(), resolved, root, "codex:"+root, &cur, make(chan canonical.Event, 4), func(e error) { errs = append(errs, e.Error()) })
+	if err != nil {
+		t.Fatalf("catchUpFromCursor discovery error should be non-fatal, got %v", err)
+	}
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "tail catch-up discovery") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("catch-up discovery error not surfaced; errs=%v", errs)
+	}
+}
+
+// TestAddWatchTree_WalkErrorSurfaced asserts a non-IsNotExist walk error over an
+// unreadable subtree is surfaced and pruned (fail-soft). Skipped on filesystems
+// that allow descending a 0o000 dir.
+func TestAddWatchTree_WalkErrorSurfaced(t *testing.T) {
+	t.Parallel()
+	if os.Geteuid() == 0 {
+		t.Skip("running as root; chmod 0o000 does not block reads")
+	}
+	root := t.TempDir()
+	resolved, _ := filepath.EvalSymlinks(root)
+	blocked := filepath.Join(resolved, "2025", "11", "20", "deep")
+	if err := os.MkdirAll(blocked, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.Chmod(filepath.Dir(blocked), 0o000); err != nil {
+		t.Skipf("chmod unsupported: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(filepath.Dir(blocked), 0o755) })
+	if _, derr := os.ReadDir(filepath.Dir(blocked)); derr == nil {
+		t.Skip("filesystem allowed descending an unreadable dir; walk-error seam not exercised")
+	}
+
+	w := newWatcherT(t)
+	watched := map[string]struct{}{}
+	var errs []string
+	addWatchTree(w, resolved, resolved, watched, func(e error) { errs = append(errs, e.Error()) })
+	found := false
+	for _, e := range errs {
+		if strings.Contains(e, "walk watch tree") {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("unreadable subtree walk error not surfaced; errs=%v", errs)
+	}
+}

From 0b611ecc8f2ac08e1be16e41e96e068bbafab366 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 13:46:16 +0300
Subject: [PATCH 05/13] codex adapter chunk D: payloads + adapter wiring +
 auto-discovery (live)

Makes the codex adapter live end-to-end.

- payloads.go: containment-checked file://<resolved-abs>#L<line> builder
  (resolveWithinRoot + EvalSymlinks), mirroring claude_code; the mapper's
  placeholder payloadURI now resolves through it (a root field threaded
  from the scanner's already-resolved root), keeping the identical
  #L<line> anchor.
- adapter.go: the canonical.Adapter implementation (New/Factory/Name/
  Format/Scan/Tail/ParseCursor + coerceCursor + snapshotCursor) and
  init()->adapters.Register(Format, Factory). Scan records the final
  cursor on the instance even on cancellation so a following Tail resumes
  from completed work (Scan->Tail handoff); a cold Tail snapshots current
  EOF. Faithful mirror of claude_code.
- cmd/ai-viewer-ingest/sources.go: 4th auto-discovery probe
  (codexSessionsDir = $CODEX_HOME/sessions, default ~/.codex/sessions) +
  countRolloutFiles/countLegacyJSON helpers reporting modern and legacy
  counts separately (acceptance #8); codex blank-import so the binary
  registers it. Existing three probes untouched.
- registry_init_test.go: asserts "codex" is registered + enumerable at
  real package init alongside the other adapters.
- deployment.md: reconcile the auto-discovery table with reality
  (claude-code + codex now live; correct the hyphenated claude-code
  format key; $CODEX_HOME probe).

Gates green whole-repo: gofmt/vet/golangci(0)/gosec(0); race tests pass
(codex + all siblings + cmd, no regression); new adapter/payloads code
95.9% covered.
---
 .agents/sow/specs/deployment.md         |  13 +-
 cmd/ai-viewer-ingest/sources.go         |  99 ++++-
 cmd/ai-viewer-ingest/sources_test.go    | 206 +++++++++++
 internal/adapters/codex/adapter.go      | 205 +++++++++++
 internal/adapters/codex/adapter_test.go | 456 ++++++++++++++++++++++++
 internal/adapters/codex/mapper.go       |  48 ++-
 internal/adapters/codex/payloads.go     |  58 +++
 internal/adapters/codex/scanner.go      |   5 +
 internal/adapters/registry_init_test.go |  10 +-
 9 files changed, 1080 insertions(+), 20 deletions(-)
 create mode 100644 cmd/ai-viewer-ingest/sources_test.go
 create mode 100644 internal/adapters/codex/adapter.go
 create mode 100644 internal/adapters/codex/adapter_test.go
 create mode 100644 internal/adapters/codex/payloads.go

diff --git a/.agents/sow/specs/deployment.md b/.agents/sow/specs/deployment.md
index 351d833..435c071 100644
--- a/.agents/sow/specs/deployment.md
+++ b/.agents/sow/specs/deployment.md
@@ -113,17 +113,18 @@ the default locations of every adapter the binary was compiled
 against. Each existing location becomes a source; missing locations
 are silently skipped.
 
-Phase 1 (Chunk 11 onward) ships only the `aiagent_v3` and `aiagent_v2`
-adapters, so only the first two rows of the table below are wired into
-the binary. The remaining rows are reserved for future Phase 2 SOWs
-that introduce the matching adapter packages.
+Phase 1 shipped the `aiagent_v3` and `aiagent_v2` adapters; Phase 2 added
+`claude-code` (SOW-0003) and `codex` (SOW-0004), both now wired into the
+binary. The `opencode` row is reserved for its Phase 2 SOW (SOW-0005); its
+adapter package is not yet compiled in. The `Format` column is the registry
+key the adapter registers under (note `claude-code` is hyphenated).
 
 | Format | Probe | Status |
 |---|---|---|
 | aiagent_v3 | `~/.ai-agent/sessions/session/` exists | live (Chunk 11) |
 | aiagent_v2 | `~/.ai-agent/sessions/` exists | live (Chunk 11) |
-| claude_code | `~/.claude/projects/` exists | adapter pending (Phase 2 SOW) |
-| codex | `~/.codex/sessions/` exists | adapter pending (Phase 2 SOW) |
+| claude-code | `~/.claude/projects/` (or `$CLAUDE_CONFIG_DIR/projects/`) exists | live (SOW-0003) |
+| codex | `$CODEX_HOME/sessions/` (default `~/.codex/sessions/`) exists | live (SOW-0004) |
 | opencode | `~/.local/share/opencode/opencode.db` exists | adapter pending (Phase 2 SOW) |
 
 The Chunk 11 v2 probe checks for the parent `sessions/` directory
diff --git a/cmd/ai-viewer-ingest/sources.go b/cmd/ai-viewer-ingest/sources.go
index 35e5c05..de00686 100644
--- a/cmd/ai-viewer-ingest/sources.go
+++ b/cmd/ai-viewer-ingest/sources.go
@@ -21,6 +21,12 @@ import (
 	"sync"
 
 	"github.com/netdata/ai-viewer/internal/adapters"
+	// Side-effect import: the codex adapter registers its factory with
+	// internal/adapters via init() so the auto-discovery probe added below can
+	// construct it. main.go blank-imports the other adapters in the same way;
+	// codex is registered from here to keep this chunk's change additive and
+	// co-located with its probe.
+	_ "github.com/netdata/ai-viewer/internal/adapters/codex"
 	"github.com/netdata/ai-viewer/internal/canonical"
 	"github.com/netdata/ai-viewer/internal/ingest"
 )
@@ -105,6 +111,11 @@ func autoDiscoverSources(logger *slog.Logger) []configuredSource {
 			location: claudeProjectsDir(home),
 			probe:    claudeProjectsDir(home),
 		},
+		{
+			format:   "codex",
+			location: codexSessionsDir(home),
+			probe:    codexSessionsDir(home),
+		},
 	}
 
 	var out []configuredSource
@@ -120,11 +131,21 @@ func autoDiscoverSources(logger *slog.Logger) []configuredSource {
 		seen[key] = struct{}{}
 		out = append(out, configuredSource{id: key, format: p.format, location: p.location})
 		attrs := []any{"format", p.format, "location", p.location}
-		if p.format == "claude-code" {
+		switch p.format {
+		case "claude-code":
 			// Surface the project-dir count so the operator sees the source
 			// is non-empty (acceptance #8). The count is the number of
 			// immediate subdirectories under the projects root.
 			attrs = append(attrs, "project_dirs", countProjectDirs(p.location))
+		case "codex":
+			// Surface modern + legacy counts SEPARATELY (SOW-0004 acceptance
+			// #8): modern sharded rollouts are ingested, legacy flat .json are
+			// only logged (one SourceError each, not ingested in v1). Reporting
+			// them apart lets the operator see how many sessions the source will
+			// actually surface vs how many remain on the deferred legacy path.
+			attrs = append(attrs,
+				"modern_rollouts", countRolloutFiles(p.location),
+				"legacy_json", countLegacyJSON(p.location))
 		}
 		logger.Info("ai-viewer-ingest: auto-discovered source", attrs...)
 	}
@@ -158,6 +179,82 @@ func countProjectDirs(root string) int {
 	return n
 }
 
+// codexSessionsDir returns the codex sessions root, honoring $CODEX_HOME
+// (SOW-0004 C#3). When the env var is set, the root is "$CODEX_HOME/sessions";
+// otherwise "~/.codex/sessions". This is the directory the adapter walks and
+// tails; the probe checks it for existence.
+func codexSessionsDir(home string) string {
+	if ch := os.Getenv("CODEX_HOME"); ch != "" {
+		return filepath.Join(ch, "sessions")
+	}
+	return filepath.Join(home, ".codex", "sessions")
+}
+
+// codexRolloutPrefix is the shared filename prefix for both modern and legacy
+// codex rollouts (openai/codex codex-rs/rollout/src/list.rs filters on
+// starts_with("rollout-")). Duplicated here as a lightweight observability
+// predicate; the adapter's discovery.go holds the authoritative anchored
+// regexes used for actual ingest.
+const codexRolloutPrefix = "rollout-"
+
+// codexArchivedDir is the codex session archive, pruned from both ingest and
+// these observability counts (spec adapter-codex.md §"Filesystem Layout").
+const codexArchivedDir = "archived_sessions"
+
+// countRolloutFiles returns the number of modern sharded codex rollouts
+// ("rollout-*.jsonl") under the sessions root, walking the YYYY/MM/DD shards at
+// any depth and pruning archived_sessions/. Returns 0 on any walk error — the
+// count is observability for acceptance #8, not a gate, so it is read
+// best-effort and never blocks discovery. Mirrors discovery.go's modern match
+// (^rollout-.*\.jsonl$) without importing the adapter package.
+func countRolloutFiles(root string) int {
+	n := 0
+	_ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			if d != nil && d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		if d.IsDir() {
+			if d.Name() == codexArchivedDir && path != root {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		name := d.Name()
+		if strings.HasPrefix(name, codexRolloutPrefix) && strings.HasSuffix(name, ".jsonl") {
+			n++
+		}
+		return nil
+	})
+	return n
+}
+
+// countLegacyJSON returns the number of legacy flat codex rollouts
+// ("rollout-*.json") directly under the sessions root (NOT in shards). These are
+// recognized but NOT ingested in v1 (one informational SourceError per file);
+// the count is surfaced separately so the operator sees the deferred-legacy
+// volume (acceptance #8). Returns 0 on any read error. Mirrors discovery.go's
+// legacy match (^rollout-.*\.json$, root-only).
+func countLegacyJSON(root string) int {
+	entries, err := os.ReadDir(root)
+	if err != nil {
+		return 0
+	}
+	n := 0
+	for _, e := range entries {
+		if e.IsDir() {
+			continue
+		}
+		name := e.Name()
+		if strings.HasPrefix(name, codexRolloutPrefix) && strings.HasSuffix(name, ".json") {
+			n++
+		}
+	}
+	return n
+}
+
 // cursorLookup is the minimal contract startSource needs to resume from
 // the durable cursor. The production wiring uses *sql.DB through
 // sqlCursorLookup; tests inject a fake to verify the round-trip without
diff --git a/cmd/ai-viewer-ingest/sources_test.go b/cmd/ai-viewer-ingest/sources_test.go
new file mode 100644
index 0000000..5845384
--- /dev/null
+++ b/cmd/ai-viewer-ingest/sources_test.go
@@ -0,0 +1,206 @@
+// Tests for the codex auto-discovery probe and its observability counters
+// (SOW-0004 acceptance #8). They pin:
+//
+//   - the probe registers a source at $CODEX_HOME/sessions (default
+//     ~/.codex/sessions) when the directory exists, with location = the
+//     walked sessions dir;
+//   - $CODEX_HOME overrides the default location;
+//   - an absent sessions dir registers no codex source;
+//   - countRolloutFiles / countLegacyJSON report the modern (sharded .jsonl)
+//     and legacy (root .json) volumes SEPARATELY;
+//   - the discovery log line carries both counts as distinct keys.
+package main
+
+import (
+	"bytes"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/adapters"
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// plantCodexLayout writes a sessions tree under root with `modern` sharded
+// rollout-*.jsonl files (in a YYYY/MM/DD shard), `legacy` root rollout-*.json
+// files, and a couple of decoys that must NOT be counted (an archived_sessions
+// shard, a non-rollout file, a .jsonl outside the rollout prefix).
+func plantCodexLayout(t *testing.T, root string, modern, legacy int) {
+	t.Helper()
+	shard := filepath.Join(root, "2025", "11", "20")
+	if err := os.MkdirAll(shard, 0o755); err != nil {
+		t.Fatalf("mkdir shard: %v", err)
+	}
+	for i := 0; i < modern; i++ {
+		name := filepath.Join(shard, "rollout-2025-11-20T10-00-0"+itoa(i)+"-uuid.jsonl")
+		if err := os.WriteFile(name, []byte(`{"type":"session_meta"}`+"\n"), 0o644); err != nil {
+			t.Fatalf("write modern rollout: %v", err)
+		}
+	}
+	for i := 0; i < legacy; i++ {
+		name := filepath.Join(root, "rollout-2025-06-0"+itoa(i)+"-uuid.json")
+		if err := os.WriteFile(name, []byte(`{}`), 0o644); err != nil {
+			t.Fatalf("write legacy rollout: %v", err)
+		}
+	}
+	// Decoys: an archived shard rollout (pruned), a non-rollout .jsonl, and a
+	// non-rollout file at the root. None of these must be counted.
+	arch := filepath.Join(root, "archived_sessions", "2025", "11", "20")
+	if err := os.MkdirAll(arch, 0o755); err != nil {
+		t.Fatalf("mkdir archive: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(arch, "rollout-archived-uuid.jsonl"), []byte("{}"), 0o644); err != nil {
+		t.Fatalf("write archived: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(shard, "not-a-rollout.jsonl"), []byte("{}"), 0o644); err != nil {
+		t.Fatalf("write decoy jsonl: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(root, "history.jsonl"), []byte("{}"), 0o644); err != nil {
+		t.Fatalf("write decoy root file: %v", err)
+	}
+}
+
+// itoa is a tiny single-digit int→string helper so plantCodexLayout stays free
+// of strconv for the small counts the tests use.
+func itoa(i int) string { return string(rune('0' + i)) }
+
+// TestAutoDiscover_CodexProbe verifies acceptance #8: a tmpdir
+// ~/.codex/sessions tree with modern sharded rollouts is auto-discovered as a
+// codex source whose location is the sessions root, and the registered factory
+// can construct it.
+func TestAutoDiscover_CodexProbe(t *testing.T) {
+	// Not parallel: t.Setenv mutates process-wide HOME / CODEX_HOME.
+	tmp := t.TempDir()
+	t.Setenv("HOME", tmp)
+	t.Setenv("CODEX_HOME", "")
+	sessions := filepath.Join(tmp, ".codex", "sessions")
+	plantCodexLayout(t, sessions, 2, 3)
+
+	got, err := resolveSources(nil, silentLogger())
+	if err != nil {
+		t.Fatalf("resolveSources: %v", err)
+	}
+	var cdx *configuredSource
+	for i := range got {
+		if got[i].format == "codex" {
+			cdx = &got[i]
+		}
+	}
+	if cdx == nil {
+		t.Fatalf("codex source not auto-discovered; got %+v", got)
+	}
+	if cdx.location != sessions {
+		t.Fatalf("codex location = %q, want %q", cdx.location, sessions)
+	}
+	// The discovered source must be constructable via the registry, proving the
+	// adapter's init() ran (acceptance #1).
+	factory, ok := adapters.Get("codex")
+	if !ok {
+		t.Fatal("codex factory not registered")
+	}
+	if _, err := factory(cdx.location, canonical.AdapterOptions{Logger: silentLogger()}); err != nil {
+		t.Fatalf("codex factory(%q): %v", cdx.location, err)
+	}
+}
+
+// TestAutoDiscover_CodexHomeOverride verifies the probe honors $CODEX_HOME
+// (SOW-0004 C#3): the sessions root is "$CODEX_HOME/sessions", not ~/.codex.
+func TestAutoDiscover_CodexHomeOverride(t *testing.T) {
+	// Not parallel: mutates process-wide env.
+	tmp := t.TempDir()
+	t.Setenv("HOME", tmp) // no ~/.codex here
+	codexHome := filepath.Join(tmp, "custom-codex")
+	t.Setenv("CODEX_HOME", codexHome)
+	sessions := filepath.Join(codexHome, "sessions")
+	plantCodexLayout(t, sessions, 1, 0)
+
+	got, err := resolveSources(nil, silentLogger())
+	if err != nil {
+		t.Fatalf("resolveSources: %v", err)
+	}
+	var loc string
+	for _, s := range got {
+		if s.format == "codex" {
+			loc = s.location
+		}
+	}
+	if loc != sessions {
+		t.Fatalf("codex location = %q, want %q (CODEX_HOME honored)", loc, sessions)
+	}
+}
+
+// TestAutoDiscover_NoCodexWhenAbsent verifies a workstation without
+// ~/.codex/sessions does not register a codex source.
+func TestAutoDiscover_NoCodexWhenAbsent(t *testing.T) {
+	// Not parallel: mutates process-wide env.
+	tmp := t.TempDir()
+	t.Setenv("HOME", tmp)
+	t.Setenv("CODEX_HOME", "")
+
+	got, err := resolveSources(nil, silentLogger())
+	if err != nil {
+		t.Fatalf("resolveSources: %v", err)
+	}
+	for _, s := range got {
+		if s.format == "codex" {
+			t.Fatalf("codex registered with no sessions dir present: %+v", got)
+		}
+	}
+}
+
+// TestAutoDiscover_CodexProbeLogsBothCountsSeparately verifies the probe's
+// discovery log line carries the modern and legacy volumes as DISTINCT keys
+// (acceptance #8: "/api/sources reports both counts separately" — the structured
+// log is the operator-facing surface at discovery time).
+func TestAutoDiscover_CodexProbeLogsBothCountsSeparately(t *testing.T) {
+	// Not parallel: mutates process-wide env.
+	tmp := t.TempDir()
+	t.Setenv("HOME", tmp)
+	t.Setenv("CODEX_HOME", "")
+	sessions := filepath.Join(tmp, ".codex", "sessions")
+	plantCodexLayout(t, sessions, 2, 3)
+
+	var buf bytes.Buffer
+	logger := slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo}))
+	if _, err := resolveSources(nil, logger); err != nil {
+		t.Fatalf("resolveSources: %v", err)
+	}
+	out := buf.String()
+	if !bytes.Contains(buf.Bytes(), []byte("modern_rollouts=2")) {
+		t.Errorf("discovery log missing modern_rollouts=2; got:\n%s", out)
+	}
+	if !bytes.Contains(buf.Bytes(), []byte("legacy_json=3")) {
+		t.Errorf("discovery log missing legacy_json=3; got:\n%s", out)
+	}
+}
+
+// TestCountRolloutFiles verifies the modern-rollout counter mirrors discovery.go's
+// match: rollout-*.jsonl under shards, archived_sessions pruned, non-rollout
+// .jsonl and root non-rollout files ignored.
+func TestCountRolloutFiles(t *testing.T) {
+	t.Parallel()
+	tmp := t.TempDir()
+	plantCodexLayout(t, tmp, 4, 2)
+	if n := countRolloutFiles(tmp); n != 4 {
+		t.Fatalf("countRolloutFiles = %d, want 4 (archived + decoys excluded)", n)
+	}
+	if n := countRolloutFiles(filepath.Join(tmp, "missing")); n != 0 {
+		t.Fatalf("countRolloutFiles(missing) = %d, want 0", n)
+	}
+}
+
+// TestCountLegacyJSON verifies the legacy counter mirrors discovery.go's match:
+// rollout-*.json directly under the root only (not in shards), non-rollout root
+// files ignored.
+func TestCountLegacyJSON(t *testing.T) {
+	t.Parallel()
+	tmp := t.TempDir()
+	plantCodexLayout(t, tmp, 4, 2)
+	if n := countLegacyJSON(tmp); n != 2 {
+		t.Fatalf("countLegacyJSON = %d, want 2", n)
+	}
+	if n := countLegacyJSON(filepath.Join(tmp, "missing")); n != 0 {
+		t.Fatalf("countLegacyJSON(missing) = %d, want 0", n)
+	}
+}
diff --git a/internal/adapters/codex/adapter.go b/internal/adapters/codex/adapter.go
new file mode 100644
index 0000000..fec5b0b
--- /dev/null
+++ b/internal/adapters/codex/adapter.go
@@ -0,0 +1,205 @@
+package codex
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log/slog"
+	"os"
+
+	"github.com/netdata/ai-viewer/internal/adapters"
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// Format is declared in mapper.go (const Format = "codex"); it is the single
+// stable identifier shared by the mapper (which stamps it onto LogEntry.Source)
+// and this file (which registers it). Defining it once mirrors claude_code,
+// where one Format const is shared by mapper.go and adapter.go.
+
+// sourceIDPrefix is prepended to the configured sessions root to produce the
+// canonical events' SourceID. Used only for log attribution; idempotency is a
+// SQL-layer guarantee keyed on each table's natural identity (not SourceSeq).
+// Mirrors claude_code.
+const sourceIDPrefix = Format + ":"
+
+// Adapter is the codex source adapter. One instance corresponds to one sessions
+// root ($CODEX_HOME/sessions, default ~/.codex/sessions). The instance is safe
+// for a single Scan goroutine followed by a single Tail goroutine; concurrent
+// Scan+Tail on one instance is not part of the contract (see
+// specs/adapter-contract.md). Mirrors claude_code.Adapter.
+type Adapter struct {
+	root     string
+	sourceID string
+	logger   *slog.Logger
+	// onError surfaces non-fatal per-record parse errors. Never nil after
+	// construction; New and Factory substitute a no-op when nil so adapter code
+	// can call it unconditionally.
+	onError func(error)
+	// scanCursor holds the final per-file offsets recorded by the most recent
+	// Scan, so a following Tail on the SAME instance resumes from where Scan left
+	// off rather than snapshotting current EOF (closing the Scan→Tail data-loss
+	// window). Nil until Scan runs (a cold Tail then falls back to
+	// snapshotCursor). The ingester drives Scan→Tail on one instance
+	// (cmd/ai-viewer-ingest/sources.go runAdapter), single-threaded, so a plain
+	// field needs no synchronisation. Mirrors claude_code.
+	scanCursor *Cursor
+}
+
+// Compile-time conformance to the canonical.Adapter interface.
+var _ canonical.Adapter = (*Adapter)(nil)
+
+// New constructs an Adapter rooted at the given sessions directory with the
+// shared canonical.AdapterOptions bundle. An empty root is rejected so
+// misconfigured ingesters fail fast. Mirrors claude_code.New.
+func New(root string, opts canonical.AdapterOptions) (*Adapter, error) {
+	if root == "" {
+		return nil, errors.New("codex: root must be non-empty")
+	}
+	logger := opts.Logger
+	if logger == nil {
+		logger = slog.Default()
+	}
+	logger = logger.With("adapter", Format, "root", root)
+	onError := opts.OnError
+	if onError == nil {
+		onError = func(error) {}
+	}
+	return &Adapter{
+		root:     root,
+		sourceID: sourceIDPrefix + root,
+		logger:   logger,
+		onError:  onError,
+	}, nil
+}
+
+// Name implements canonical.Adapter.
+func (a *Adapter) Name() string { return Format }
+
+// Format implements canonical.Adapter.
+func (a *Adapter) Format() string { return Format }
+
+// Scan implements canonical.Adapter. Walks the sessions root, reads each modern
+// rollout from its cursor offset to EOF, and emits canonical events. Returns
+// when caught up or when ctx is cancelled. The caller owns `out`; Scan never
+// closes it. Mirrors claude_code.Scan: the final offsets are recorded on the
+// instance even on cancellation so a following Tail resumes from completed work.
+func (a *Adapter) Scan(ctx context.Context, since canonical.Cursor, out chan<- canonical.Event) error {
+	start := a.coerceCursor(since)
+	final, sErr := scanAll(ctx, a.root, a.sourceID, start, out, a.onError)
+	// Record the final offsets even on cancellation so a Tail that follows a
+	// context-cancelled Scan still resumes from the work that was completed (the
+	// cursor reflects only fully-consumed lines). On a hard error the cursor is
+	// still the best resume point we have.
+	cursorCopy := final
+	a.scanCursor = &cursorCopy
+	if sErr != nil {
+		if errors.Is(sErr, context.Canceled) || errors.Is(sErr, context.DeadlineExceeded) {
+			return nil
+		}
+		return fmt.Errorf("codex: scan: %w", sErr)
+	}
+	return nil
+}
+
+// Tail implements canonical.Adapter. Subscribes to fsnotify events on the
+// sessions tree and emits canonical events as rollouts grow. Returns when ctx is
+// cancelled. Same channel-ownership and cancellation rules as Scan. Tail resumes
+// from the per-file offsets the preceding Scan recorded on this instance,
+// closing the data-loss window where records appended BETWEEN Scan finishing and
+// Tail starting would be skipped if Tail snapshotted current EOF. Any re-emission
+// of an already-seen line is absorbed by the ingester's SQL-layer idempotent
+// upserts. A cold Tail with no preceding Scan falls back to current file sizes so
+// it follows from now rather than replaying full history. Mirrors
+// claude_code.Tail.
+func (a *Adapter) Tail(ctx context.Context, out chan<- canonical.Event) error {
+	var cur Cursor
+	if a.scanCursor != nil {
+		cur = a.coerceCursor(*a.scanCursor)
+	} else {
+		snap, err := a.snapshotCursor()
+		if err != nil {
+			return fmt.Errorf("codex: tail snapshot: %w", err)
+		}
+		cur = snap
+	}
+	return tailLoop(ctx, a.root, a.sourceID, cur, out, a.onError)
+}
+
+// ParseCursor implements canonical.Adapter. Empty input yields the zero Cursor;
+// non-empty input is decoded as JSON. The returned Cursor is opaque to the
+// ingester and used only via Cursor.String() and Cursor.After(). Mirrors
+// claude_code.ParseCursor.
+func (a *Adapter) ParseCursor(stored string) (canonical.Cursor, error) {
+	c, err := ParseCursor(stored)
+	if err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+// coerceCursor accepts a Cursor produced by this adapter, a nil Cursor (first
+// run), or an alien cursor type (treated as empty so the ingester's "I lost
+// track" path re-scans from offset 0). Never returns nil. Mirrors
+// claude_code.coerceCursor; codex's cursor carries LegacyJSON (not MetaSeen), so
+// that is the map normalized here.
+func (a *Adapter) coerceCursor(c canonical.Cursor) Cursor {
+	if c == nil {
+		return newCursor()
+	}
+	if typed, ok := c.(Cursor); ok {
+		if typed.Files == nil {
+			typed.Files = map[string]FileCursor{}
+		}
+		if typed.LegacyJSON == nil {
+			typed.LegacyJSON = map[string]LegacyFile{}
+		}
+		if typed.Version == 0 {
+			typed.Version = cursorVersion
+		}
+		return typed
+	}
+	return newCursor()
+}
+
+// snapshotCursor builds a cursor from current on-disk rollout sizes so a cold
+// Tail does not re-emit historical events (Tail follows changes from now on;
+// existing content is Scan's job). Legacy flat .json files are not stat-tracked
+// here — they are not ingested in v1 and the cursor's LegacyJSON suppression is
+// Scan's concern. Mirrors claude_code.snapshotCursor.
+func (a *Adapter) snapshotCursor() (Cursor, error) {
+	disc, err := discoverRollouts(a.root, a.onError)
+	if err != nil {
+		return Cursor{}, err
+	}
+	cur := newCursor()
+	for _, r := range disc.modern {
+		info, sErr := os.Stat(r.abs)
+		if sErr != nil {
+			a.onError(fmt.Errorf("codex: snapshot size %s: %w", r.rel, sErr))
+			continue
+		}
+		size := info.Size()
+		cur = cur.withFile(r.rel, FileCursor{
+			Offset:  size,
+			Size:    size,
+			MtimeUs: info.ModTime().UnixMicro(),
+		})
+	}
+	return cur, nil
+}
+
+// Factory adapts New to canonical.AdapterFactory so the registry can construct
+// an Adapter from the generic (location, opts) pair. The location is treated as
+// the sessions root ($CODEX_HOME/sessions, default ~/.codex/sessions; SOW C#3).
+// Mirrors claude_code.Factory.
+func Factory(location string, opts canonical.AdapterOptions) (canonical.Adapter, error) {
+	a, err := New(location, opts)
+	if err != nil {
+		return nil, err
+	}
+	return a, nil
+}
+
+func init() {
+	adapters.Register(Format, Factory)
+}
diff --git a/internal/adapters/codex/adapter_test.go b/internal/adapters/codex/adapter_test.go
new file mode 100644
index 0000000..ca61720
--- /dev/null
+++ b/internal/adapters/codex/adapter_test.go
@@ -0,0 +1,456 @@
+package codex
+
+import (
+	"context"
+	"errors"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// silentOpts returns AdapterOptions with a discard logger and a recording
+// onError, plus the slice the errors land in (guarded by mu for the Tail
+// goroutine).
+func silentOpts() (canonical.AdapterOptions, *[]string, *sync.Mutex) {
+	var mu sync.Mutex
+	errs := &[]string{}
+	opts := canonical.AdapterOptions{
+		Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+		OnError: func(e error) {
+			mu.Lock()
+			*errs = append(*errs, e.Error())
+			mu.Unlock()
+		},
+	}
+	return opts, errs, &mu
+}
+
+// TestAdapter_NewRejectsEmptyRoot pins the fail-fast guard.
+func TestAdapter_NewRejectsEmptyRoot(t *testing.T) {
+	t.Parallel()
+	if _, err := New("", canonical.AdapterOptions{}); err == nil {
+		t.Fatal("New(\"\") = nil error, want non-nil")
+	}
+}
+
+// TestAdapter_NewDefaultsNilDeps verifies New tolerates a nil Logger and nil
+// OnError (substituting defaults) so adapter code can call them unconditionally.
+func TestAdapter_NewDefaultsNilDeps(t *testing.T) {
+	t.Parallel()
+	a, err := New(t.TempDir(), canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	if a.logger == nil {
+		t.Error("logger is nil; want default")
+	}
+	if a.onError == nil {
+		t.Error("onError is nil; want no-op default")
+	}
+	// The no-op onError must be callable.
+	a.onError(errors.New("x"))
+}
+
+// TestAdapter_NameAndFormat pins the registry identifiers.
+func TestAdapter_NameAndFormat(t *testing.T) {
+	t.Parallel()
+	a, err := New(t.TempDir(), canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	if a.Name() != "codex" {
+		t.Errorf("Name() = %q, want codex", a.Name())
+	}
+	if a.Format() != "codex" {
+		t.Errorf("Format() = %q, want codex", a.Format())
+	}
+	if a.Name() != Format || a.Format() != Format {
+		t.Errorf("Name/Format must equal Format const %q", Format)
+	}
+}
+
+// TestAdapter_Factory builds an Adapter through the registry factory and rejects
+// the empty location.
+func TestAdapter_Factory(t *testing.T) {
+	t.Parallel()
+	a, err := Factory(t.TempDir(), canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("Factory: %v", err)
+	}
+	if a == nil {
+		t.Fatal("Factory returned nil adapter")
+	}
+	if _, err := Factory("", canonical.AdapterOptions{}); err == nil {
+		t.Fatal("Factory(\"\") = nil error, want non-nil")
+	}
+}
+
+// TestAdapter_ParseCursor round-trips a cursor and rejects a bad version.
+func TestAdapter_ParseCursor(t *testing.T) {
+	t.Parallel()
+	a, err := New(t.TempDir(), canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	// Empty → zero cursor.
+	c, err := a.ParseCursor("")
+	if err != nil {
+		t.Fatalf("ParseCursor(\"\"): %v", err)
+	}
+	if c == nil {
+		t.Fatal("ParseCursor(\"\") = nil cursor")
+	}
+	// Round-trip a non-empty cursor.
+	seed := newCursor().withFile("2025/11/20/rollout-x.jsonl", FileCursor{Offset: 42, Size: 42})
+	got, err := a.ParseCursor(seed.String())
+	if err != nil {
+		t.Fatalf("ParseCursor(round-trip): %v", err)
+	}
+	if !got.After(newCursor()) {
+		t.Errorf("round-tripped cursor should be After the empty cursor")
+	}
+	// Bad version is rejected.
+	if _, err := a.ParseCursor(`{"version":999}`); err == nil {
+		t.Error("ParseCursor(bad version) = nil error, want non-nil")
+	}
+}
+
+// TestAdapter_CoerceCursor covers the nil, typed, and alien-type branches.
+func TestAdapter_CoerceCursor(t *testing.T) {
+	t.Parallel()
+	a, err := New(t.TempDir(), canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	// nil → fresh cursor with non-nil maps.
+	if c := a.coerceCursor(nil); c.Files == nil || c.LegacyJSON == nil || c.Version != cursorVersion {
+		t.Errorf("coerceCursor(nil) = %+v, want initialized maps + version", c)
+	}
+	// Typed cursor with nil maps + zero version is normalized in place.
+	typed := Cursor{}
+	c := a.coerceCursor(typed)
+	if c.Files == nil || c.LegacyJSON == nil || c.Version != cursorVersion {
+		t.Errorf("coerceCursor(typed-zero) = %+v, want normalized", c)
+	}
+	// Alien cursor type → fresh cursor.
+	if c := a.coerceCursor(alienCursor{}); c.Version != cursorVersion {
+		t.Errorf("coerceCursor(alien) = %+v, want fresh cursor", c)
+	}
+}
+
+// alienCursor is a foreign canonical.Cursor used to drive coerceCursor's
+// type-assertion-miss branch.
+type alienCursor struct{}
+
+func (alienCursor) String() string              { return "{}" }
+func (alienCursor) After(canonical.Cursor) bool { return false }
+
+// TestAdapter_ScanThenTailHandoff pins the load-bearing Scan→Tail cursor
+// handoff: Scan records the per-file offset on the instance, and a following
+// Tail resumes from it (not from current EOF), so a record appended between Scan
+// and Tail is emitted exactly once by Tail.
+func TestAdapter_ScanThenTailHandoff(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(1))
+	writeFileBytes(t, path, completeSession("sid-a"))
+
+	opts, _, _ := silentOpts()
+	a, err := New(root, opts)
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+
+	scanOut := make(chan canonical.Event, 4096)
+	if err := a.Scan(context.Background(), nil, scanOut); err != nil {
+		t.Fatalf("Scan: %v", err)
+	}
+	scanEvents := drainBuffered(scanOut)
+	if !hasKind(scanEvents, canonical.EvSessionStarted) {
+		t.Fatal("Scan emitted no SessionStarted")
+	}
+	if a.scanCursor == nil {
+		t.Fatal("Scan did not record scanCursor on the instance")
+	}
+
+	// Append a fresh complete turn AFTER Scan, then run Tail. Tail must resume
+	// from the recorded offset and emit the new turn (and not re-emit the whole
+	// file). A turn materializes on its close (task_complete), so append both.
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsCtx+`","type":"turn_context","payload":{"turn_id":"t2","model":"m"}}`+"\n"))
+	appendFileBytes(t, path, []byte(`{"timestamp":"`+tsDone+`","type":"event_msg","payload":{"type":"task_complete","turn_id":"t2","completed_at":"`+tsDone+`"}}`+"\n"))
+
+	tailOut := make(chan canonical.Event, 4096)
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan struct{})
+	go func() {
+		_ = a.Tail(ctx, tailOut)
+		close(done)
+	}()
+
+	// Wait until the appended turn surfaces, then cancel.
+	if _, ok := waitForKind(tailOut, canonical.EvTurnFinalized, 5*time.Second); !ok {
+		cancel()
+		<-done
+		t.Fatal("Tail did not emit the appended turn")
+	}
+	cancel()
+	<-done
+}
+
+// TestAdapter_TailColdSnapshot covers the cold-Tail path (no preceding Scan):
+// Tail snapshots current file sizes so it follows from now and does NOT replay
+// the historical session that already exists on disk. A brand-new rollout file
+// created AFTER the watch is live is reliably picked up via the Create handler
+// (the snapshot races an append on the SAME file, so a new file is used to keep
+// the test deterministic).
+func TestAdapter_TailColdSnapshot(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	// A pre-existing complete session the cold snapshot must NOT replay.
+	writeFileBytes(t, shardPath(root, uuid7(2)), completeSession("sid-cold"))
+
+	opts, _, _ := silentOpts()
+	a, err := New(root, opts)
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+
+	tailOut := make(chan canonical.Event, 4096)
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan struct{})
+	go func() {
+		_ = a.Tail(ctx, tailOut)
+		close(done)
+	}()
+	defer func() { cancel(); <-done }()
+
+	// Give the watch a moment to establish, then write a NEW rollout file. The
+	// Create handler watches the (existing) shard dir and reads the new file
+	// from offset 0, so its full session surfaces. 150ms mirrors the watch-settle
+	// delay the sibling tailer_test.go tests use.
+	time.Sleep(150 * time.Millisecond)
+	newPath := shardPath(root, uuid7(7))
+	writeFileBytes(t, newPath, completeSession("sid-new"))
+
+	if _, ok := waitForKind(tailOut, canonical.EvTurnFinalized, 5*time.Second); !ok {
+		t.Fatal("cold Tail did not emit the new rollout's turn")
+	}
+}
+
+// TestAdapter_SnapshotCursor pins snapshotCursor: it records current sizes for
+// modern rollouts and skips a missing root cleanly.
+func TestAdapter_SnapshotCursor(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(3))
+	body := completeSession("sid-snap")
+	writeFileBytes(t, path, body)
+
+	opts, _, _ := silentOpts()
+	a, err := New(root, opts)
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	cur, err := a.snapshotCursor()
+	if err != nil {
+		t.Fatalf("snapshotCursor: %v", err)
+	}
+	rel, rerr := relPath(mustResolve(t, root), path)
+	if rerr != nil {
+		t.Fatalf("relPath: %v", rerr)
+	}
+	fc := cur.fileCursor(rel)
+	if fc.Offset != int64(len(body)) || fc.Size != int64(len(body)) {
+		t.Errorf("snapshot FileCursor = %+v, want offset=size=%d", fc, len(body))
+	}
+
+	// A missing root resolves to an empty (non-error) discovery → empty cursor.
+	aMissing, err := New(filepath.Join(root, "does-not-exist"), opts)
+	if err != nil {
+		t.Fatalf("New(missing): %v", err)
+	}
+	emptyCur, err := aMissing.snapshotCursor()
+	if err != nil {
+		t.Fatalf("snapshotCursor(missing): %v", err)
+	}
+	if len(emptyCur.Files) != 0 {
+		t.Errorf("snapshotCursor(missing) = %+v, want empty", emptyCur.Files)
+	}
+}
+
+// TestAdapter_ScanContextCancelled verifies a cancelled Scan returns nil (not an
+// error) and still records the best-effort cursor on the instance.
+func TestAdapter_ScanContextCancelled(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	writeFileBytes(t, shardPath(root, uuid7(4)), completeSession("sid-cancel"))
+
+	opts, _, _ := silentOpts()
+	a, err := New(root, opts)
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel before Scan runs
+	out := make(chan canonical.Event, 4096)
+	if err := a.Scan(ctx, nil, out); err != nil {
+		t.Fatalf("cancelled Scan = %v, want nil", err)
+	}
+	if a.scanCursor == nil {
+		t.Fatal("cancelled Scan did not record a best-effort cursor")
+	}
+}
+
+// TestAdapter_ScanHardErrorAndTailSnapshotError drives the non-cancellation
+// failure branches: an unreadable sessions root makes discovery return a hard
+// (non-IsNotExist) error, which Scan wraps and returns, and which Tail's
+// snapshotCursor surfaces as a wrapped error. Skipped when running as root
+// (root bypasses the 0000 permission).
+func TestAdapter_ScanHardErrorAndTailSnapshotError(t *testing.T) {
+	if os.Geteuid() == 0 {
+		t.Skip("running as root bypasses directory permissions")
+	}
+	t.Parallel()
+	root := t.TempDir()
+	// Plant a file so the dir is non-empty, then make the root unreadable.
+	writeFileBytes(t, shardPath(root, uuid7(8)), completeSession("sid-perm"))
+	if err := os.Chmod(root, 0o000); err != nil {
+		t.Fatalf("chmod 000: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(root, 0o755) })
+
+	opts, _, _ := silentOpts()
+	a, err := New(root, opts)
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	out := make(chan canonical.Event, 16)
+	if err := a.Scan(context.Background(), nil, out); err == nil {
+		t.Error("Scan over unreadable root = nil error, want hard error")
+	}
+	// snapshotCursor (cold-Tail path) likewise surfaces the discovery error,
+	// which Tail wraps and returns before entering the watch loop. Asserting the
+	// snapshot directly avoids racing tailLoop (which would block on the watch).
+	if _, err := a.snapshotCursor(); err == nil {
+		t.Error("snapshotCursor over unreadable root = nil error, want error")
+	}
+}
+
+// --- payloads.go containment branch coverage ---
+
+// TestPayloadLocationURI_EmptyRootSkipsContainment pins the mapper-only path
+// (root == ""): the cleaned absolute path is returned without resolving.
+func TestPayloadLocationURI_EmptyRootSkipsContainment(t *testing.T) {
+	t.Parallel()
+	uri, err := payloadLocationURI("", "/test/sessions/2025/11/20/rollout-x.jsonl")
+	if err != nil {
+		t.Fatalf("payloadLocationURI(empty root): %v", err)
+	}
+	if uri != "file:///test/sessions/2025/11/20/rollout-x.jsonl" {
+		t.Errorf("uri = %q, want file:///test/sessions/...", uri)
+	}
+}
+
+// TestPayloadLocationURI_WithinRoot resolves a real file under a real root.
+func TestPayloadLocationURI_WithinRoot(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	path := shardPath(root, uuid7(5))
+	writeFileBytes(t, path, completeSession("sid-uri"))
+	uri, err := payloadLocationURI(root, path)
+	if err != nil {
+		t.Fatalf("payloadLocationURI: %v", err)
+	}
+	if !strings.HasPrefix(uri, "file://") || !strings.HasSuffix(uri, ".jsonl") {
+		t.Errorf("uri = %q, want file://...jsonl", uri)
+	}
+}
+
+// TestPayloadLocationURI_EscapeRejected plants a symlink inside the root that
+// points OUTSIDE it and asserts the URI builder refuses the escaping path.
+func TestPayloadLocationURI_EscapeRejected(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	outside := t.TempDir()
+	target := filepath.Join(outside, "secret.jsonl")
+	writeFileBytes(t, target, []byte("{}\n"))
+	link := filepath.Join(root, "escape.jsonl")
+	if err := os.Symlink(target, link); err != nil {
+		t.Skipf("symlink unsupported: %v", err)
+	}
+	_, err := payloadLocationURI(root, link)
+	if err == nil {
+		t.Fatal("payloadLocationURI(escaping symlink) = nil error, want escape error")
+	}
+	if !strings.Contains(err.Error(), "escapes root") {
+		t.Errorf("error = %v, want 'escapes root'", err)
+	}
+}
+
+// TestPayloadLocationURI_UnresolvableRoot drives resolveWithinRoot's root-resolve
+// error branch with a non-existent root.
+func TestPayloadLocationURI_UnresolvableRoot(t *testing.T) {
+	t.Parallel()
+	_, err := payloadLocationURI(filepath.Join(t.TempDir(), "nope"), "/x/y.jsonl")
+	if err == nil {
+		t.Fatal("payloadLocationURI(unresolvable root) = nil error, want error")
+	}
+}
+
+// TestMapperPayloadURI_FallsBackOnEscape verifies the mapper's payloadURI keeps
+// the #L<line> anchor and falls back to the cleaned abs path when containment
+// rejects the path, so the ref is never empty (the scanner is the real gate).
+func TestMapperPayloadURI_FallsBackOnEscape(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	outside := t.TempDir()
+	target := filepath.Join(outside, "secret.jsonl")
+	writeFileBytes(t, target, []byte("{}\n"))
+	link := filepath.Join(root, "escape.jsonl")
+	if err := os.Symlink(target, link); err != nil {
+		t.Skipf("symlink unsupported: %v", err)
+	}
+	m := newFileMapper(mapperConfig{sourceID: "codex:" + root, absPath: link, root: root, nativeID: "id"})
+	m.setLineNo(7)
+	uri := m.payloadURI(7)
+	if !strings.HasSuffix(uri, "#L7") {
+		t.Errorf("uri = %q, want #L7 anchor preserved", uri)
+	}
+	if !strings.HasPrefix(uri, "file://") {
+		t.Errorf("uri = %q, want file:// fallback", uri)
+	}
+}
+
+// TestMapperPayloadURI_NoAnchorWhenLineZero pins the lineNo<=0 branch (no anchor)
+// and the empty-absPath branch (anchor only).
+func TestMapperPayloadURI_NoAnchorWhenLineZero(t *testing.T) {
+	t.Parallel()
+	m := newFileMapper(mapperConfig{sourceID: "s", absPath: "/a/b.jsonl"})
+	if got := m.payloadURI(0); got != "file:///a/b.jsonl" {
+		t.Errorf("payloadURI(0) = %q, want no anchor", got)
+	}
+	mNoPath := newFileMapper(mapperConfig{sourceID: "s"})
+	if got := mNoPath.payloadURI(3); got != "#L3" {
+		t.Errorf("payloadURI(no absPath) = %q, want #L3 only", got)
+	}
+}
+
+// --- small test helpers ---
+
+// mustResolve resolves p through symlinks, failing the test on error. Used to
+// derive the cursor key the snapshot records (keyed against the resolved root).
+func mustResolve(t *testing.T, p string) string {
+	t.Helper()
+	r, err := filepath.EvalSymlinks(p)
+	if err != nil {
+		t.Fatalf("EvalSymlinks %s: %v", p, err)
+	}
+	return r
+}
diff --git a/internal/adapters/codex/mapper.go b/internal/adapters/codex/mapper.go
index 4117791..0e74169 100644
--- a/internal/adapters/codex/mapper.go
+++ b/internal/adapters/codex/mapper.go
@@ -71,6 +71,13 @@ type fileMapper struct {
 	// compaction summaries) and for log attribution. Empty in mapper-only unit
 	// tests; the URI then carries the line anchor without an absolute prefix.
 	absPath string
+	// root is the symlink-resolved sessions root (absolute), used to enforce
+	// symlink containment when building a PayloadRef LocationURI (security.md
+	// §6, spec edge #7). Empty in mapper-only unit tests; payloadURI then skips
+	// the containment resolve and emits the cleaned absolute path so URIs are
+	// still anchored. The scanner sets it to the already-resolved root it opened
+	// the file under (mirrors claude_code/mapper.go's root field).
+	root string
 
 	// lineNo is the 1-based file line number of the record currently being
 	// mapped. The scanner (Chunk C) sets it via setLineNo before each mapRecord
@@ -218,6 +225,9 @@ type mapperConfig struct {
 	parentNativeID string
 	kind           canonical.SessionKind
 	agentName      string
+	// root is the symlink-resolved sessions root for PayloadRef containment
+	// (security.md §6). Empty in mapper-only tests (no containment resolve).
+	root string
 }
 
 // newFileMapper constructs a mapper for one rollout file.
@@ -225,6 +235,7 @@ func newFileMapper(cfg mapperConfig) *fileMapper {
 	return &fileMapper{
 		sourceID:       cfg.sourceID,
 		absPath:        cfg.absPath,
+		root:           cfg.root,
 		nativeID:       cfg.nativeID,
 		parentNativeID: cfg.parentNativeID,
 		kind:           cfg.kind,
@@ -388,16 +399,25 @@ func (m *fileMapper) activeTurnSeq() int {
 
 // payloadURI builds the PayloadRef LocationURI for a body inline in this
 // rollout file at the given 1-based line number (spec rule #6/#7/#8, edge #7).
-// The form is "file://<clean-abs>#L<line>" so the presenter reads the exact
-// record on demand without ai-viewer ever copying the body into SQLite.
+// The form is "file://<symlink-resolved-abs>#L<line>" so the presenter reads the
+// exact record on demand without ai-viewer ever copying the body into SQLite.
 //
-// NOTE (Chunk B↔D seam): this mapper-side builder is the minimal contract
-// Chunk B needs to compile and be tested in isolation — it cleans the path and
-// appends the line anchor but does NOT do symlink containment. Chunk D replaces
-// it with the claude_code-verbatim containment version (payloads.go:
-// resolveWithinRoot + EvalSymlinks), keeping the SAME "#L<line>" anchor so the
-// emitted event stream is unchanged. When absPath is empty (mapper-only tests)
-// the URI is just the line anchor.
+// Containment (Chunk D, security.md §6): the absolute path is resolved through
+// symlinks and verified to stay inside the configured sessions root via
+// payloadLocationURI (payloads.go). The "#L<line>" anchor is appended AFTER the
+// file:// path is built so the anchor is identical to Chunk B's contract
+// (TestMapper_PayloadRefLineAnchor). When m.root is empty (mapper-only tests)
+// the containment resolve is skipped and the cleaned absolute path is used; when
+// m.absPath is empty the URI is just the line anchor.
+//
+// The scanner is the authoritative containment gate: readRollout (scanner.go)
+// refuses any file that resolves outside the root BEFORE a single line is
+// streamed, so by the time the mapper builds a ref the owning file is already
+// known to be contained. A resolve failure or apparent escape here (e.g. the
+// file removed between the scanner's open and this build — impossible while the
+// scanner holds the fd, but handled defensively) therefore falls back to the
+// cleaned absolute path rather than dropping the anchor, keeping the ref usable
+// and the op→payload linkage (payload_refs.op_id NOT NULL) intact.
 func (m *fileMapper) payloadURI(lineNo int) string {
 	anchor := ""
 	if lineNo > 0 {
@@ -406,8 +426,14 @@ func (m *fileMapper) payloadURI(lineNo int) string {
 	if m.absPath == "" {
 		return anchor
 	}
-	cleaned := filepath.ToSlash(filepath.Clean(m.absPath))
-	return "file://" + cleaned + anchor
+	uri, err := payloadLocationURI(m.root, m.absPath)
+	if err != nil {
+		// Containment resolve failed (escape or unresolvable). The scanner
+		// already vetted the file before streaming, so fall back to the cleaned
+		// absolute path rather than emit a lossy ref.
+		uri = "file://" + filepath.ToSlash(filepath.Clean(m.absPath))
+	}
+	return uri + anchor
 }
 
 // payloadRef builds a PayloadRefEvent for a body inline in this rollout at the
diff --git a/internal/adapters/codex/payloads.go b/internal/adapters/codex/payloads.go
new file mode 100644
index 0000000..60660de
--- /dev/null
+++ b/internal/adapters/codex/payloads.go
@@ -0,0 +1,58 @@
+package codex
+
+import (
+	"fmt"
+	"path/filepath"
+)
+
+// payloadLocationURI builds a containment-checked "file://<resolved-abs>"
+// location for a body inline in a rollout file (spec rule #6/#7/#8, edge #7:
+// large bodies are referenced, never inlined). The path is resolved with
+// filepath.EvalSymlinks and verified to stay inside the configured sessions
+// root before it is surfaced (security.md §6 "No symlink traversal escape").
+// Returns ("", err) when the path escapes the root so the caller can fall back
+// to the cleaned absolute path rather than emit a lossy ref. When root is empty
+// (mapper-only unit tests) the check is skipped and the cleaned absolute path is
+// returned. Mirrors claude_code/payloads.go verbatim; the only codex difference
+// is that withinResolvedRoot + evalSymlinksAllowingTail already live in
+// stream.go (the scanner reuses them), so this file re-adds only the per-call
+// resolveWithinRoot wrapper Chunk C noted it had dropped.
+func payloadLocationURI(root, abs string) (string, error) {
+	cleaned := filepath.Clean(abs)
+	if root == "" {
+		return "file://" + filepath.ToSlash(cleaned), nil
+	}
+	resolved, ok, err := resolveWithinRoot(root, cleaned)
+	if err != nil {
+		return "", err
+	}
+	if !ok {
+		return "", fmt.Errorf("payload path escapes root: %q", abs)
+	}
+	return "file://" + filepath.ToSlash(resolved), nil
+}
+
+// resolveWithinRoot resolves both root AND abs through filepath.EvalSymlinks and
+// reports whether the fully-resolved path stays inside the fully-resolved root
+// (security.md §6 "No symlink traversal escape"). It resolves the root every
+// call, so per-file walk callers that share one root resolve it once and use
+// withinResolvedRoot (stream.go) instead — this single-shot wrapper is for the
+// mapper's payload-URI builder, which is handed only the absolute file path. A
+// legitimately symlinked sessions root (e.g. ~/.codex → an external volume)
+// still works: containment is judged against the RESOLVED root. A symlink inside
+// the tree that points outside the resolved root is refused (ok=false). Returns:
+//   - (resolvedAbs, true, nil)  — abs resolves to a path under the root.
+//   - ("", false, nil)          — abs resolves outside the root (escape).
+//   - ("", false, err)          — the path or root could not be resolved.
+//
+// When abs does not yet exist on disk, the deepest existing ancestor is resolved
+// and the non-existent tail re-appended, so a not-yet-created file is judged by
+// where it WOULD live (a non-existent path cannot itself be a symlink to
+// elsewhere). Mirrors claude_code/payloads.go's resolveWithinRoot.
+func resolveWithinRoot(root, abs string) (string, bool, error) {
+	resolvedRoot, err := filepath.EvalSymlinks(filepath.Clean(root))
+	if err != nil {
+		return "", false, fmt.Errorf("resolve root %q: %w", root, err)
+	}
+	return withinResolvedRoot(resolvedRoot, abs)
+}
diff --git a/internal/adapters/codex/scanner.go b/internal/adapters/codex/scanner.go
index daa1e8e..1ed3a21 100644
--- a/internal/adapters/codex/scanner.go
+++ b/internal/adapters/codex/scanner.go
@@ -179,6 +179,11 @@ func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID s
 	mapper := newFileMapper(mapperConfig{
 		sourceID: sourceID,
 		absPath:  r.abs,
+		// root is the already-symlink-resolved sessions root this file was
+		// opened under (containment-checked above); the mapper uses it to build
+		// containment-verified PayloadRef file:// URIs without re-resolving the
+		// root per ref (security.md §6).
+		root:     resolvedRoot,
 		nativeID: nativeIDForRollout(r),
 	})
 	dedup := newUnknownDedup()
diff --git a/internal/adapters/registry_init_test.go b/internal/adapters/registry_init_test.go
index 9859701..14095c1 100644
--- a/internal/adapters/registry_init_test.go
+++ b/internal/adapters/registry_init_test.go
@@ -11,6 +11,7 @@ import (
 	_ "github.com/netdata/ai-viewer/internal/adapters/aiagent_v2"
 	_ "github.com/netdata/ai-viewer/internal/adapters/aiagent_v3"
 	_ "github.com/netdata/ai-viewer/internal/adapters/claude_code"
+	_ "github.com/netdata/ai-viewer/internal/adapters/codex"
 )
 
 // TestRegistry_BothAdaptersRegisteredAtInit verifies the init-time
@@ -25,7 +26,7 @@ import (
 // reorder tests across files; the resets in registry_test.go restore
 // state via t.Cleanup before this test runs.
 func TestRegistry_BothAdaptersRegisteredAtInit(t *testing.T) {
-	for _, format := range []string{"aiagent_v2", "aiagent_v3", "claude-code"} {
+	for _, format := range []string{"aiagent_v2", "aiagent_v3", "claude-code", "codex"} {
 		f, ok := adapters.Get(format)
 		if !ok {
 			t.Errorf("Get(%q): not registered", format)
@@ -41,7 +42,7 @@ func TestRegistry_BothAdaptersRegisteredAtInit(t *testing.T) {
 // every init-time registration sorted lexicographically.
 func TestRegistry_FormatsContainsBothAdapters(t *testing.T) {
 	got := adapters.Formats()
-	var sawV2, sawV3, sawCC bool
+	var sawV2, sawV3, sawCC, sawCodex bool
 	for _, name := range got {
 		switch name {
 		case "aiagent_v2":
@@ -50,6 +51,8 @@ func TestRegistry_FormatsContainsBothAdapters(t *testing.T) {
 			sawV3 = true
 		case "claude-code":
 			sawCC = true
+		case "codex":
+			sawCodex = true
 		}
 	}
 	if !sawV2 {
@@ -61,4 +64,7 @@ func TestRegistry_FormatsContainsBothAdapters(t *testing.T) {
 	if !sawCC {
 		t.Errorf("Formats() missing claude-code; got %v", got)
 	}
+	if !sawCodex {
+		t.Errorf("Formats() missing codex; got %v", got)
+	}
 }

From e7746cd1cc43e139fa088170d66089f93e327152 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 14:04:32 +0300
Subject: [PATCH 06/13] codex adapter chunk E: golden fixtures +
 restart/truncation integration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Validation chunk — exercises the full parser->mapper->scanner->adapter
pipeline end-to-end against committed golden output.

- golden_test.go: auto-discovering harness mirroring claude_code (walks
  testdata/codex/<scenario>/INPUT, roots the adapter at the synthetic
  codex-home/sessions, drops SourceProgress, rewrites the absolute root
  to <ROOT>, diffs expected.jsonl, -update-golden regenerates). A fixed
  stale-mtime ager keeps the h_crash_stale synthetic-finalize EndTs
  deterministic.
- adapter_restart_test.go: acceptance #6 at the public API — ingest a
  turn, round-trip the cursor through ParseCursor/String as the ingester
  does, append + resume → identical end state (zero dup, zero gap); a
  truncation test asserts the shrink SourceError + full re-scan.
- 8 fully synthetic, sanitized golden scenarios (a_happy_new,
  b_old_turncontext, c_subagent_threadspawn, d_fork, e_compaction,
  f_exec_truncated, g_turn_aborted, h_crash_stale), each expected.jsonl
  line-checked against the spec mapping rules: turn-boundary dual format,
  reasoning-kind split, tool namespace heuristic, sum-of-last_token_usage
  rollup with cumulative total as CtxUsed only, sub_agent/fork linkage,
  compaction, exec enrichment (no second op), abort cancellation, and the
  synthetic stale finalize as the only SessionFinalized codex emits.

No adapter bug surfaced; every spec-vs-output check passed. Gates green:
golangci(0)/gosec(0)/vet/race; package coverage 92.5%; FuzzParseLine 0
crashes; secret + AI-attribution scans clean on all fixtures.
---
 .../adapters/codex/adapter_restart_test.go    | 296 ++++++++++++++++++
 internal/adapters/codex/golden_test.go        | 219 +++++++++++++
 ...019dfed4-0000-7000-8000-00000000000a.jsonl |  11 +
 testdata/codex/a_happy_new/expected.jsonl     |  21 ++
 ...019aa234-0000-7000-8000-00000000000b.jsonl |   7 +
 .../codex/b_old_turncontext/expected.jsonl    |  16 +
 ...019e35f0-0000-7000-8000-00000000000c.jsonl |   7 +
 .../c_subagent_threadspawn/expected.jsonl     |  12 +
 ...019e0462-0000-7000-8000-00000000000d.jsonl |   6 +
 testdata/codex/d_fork/expected.jsonl          |  12 +
 ...019cb33f-0000-7000-8000-00000000000e.jsonl |   9 +
 testdata/codex/e_compaction/expected.jsonl    |  21 ++
 ...019d452a-0000-7000-8000-00000000000f.jsonl |   9 +
 .../codex/f_exec_truncated/expected.jsonl     |  17 +
 ...019cdd2a-0000-7000-8000-00000000001a.jsonl |   6 +
 testdata/codex/g_turn_aborted/expected.jsonl  |  11 +
 ...019d4abc-0000-7000-8000-00000000001b.jsonl |   5 +
 testdata/codex/h_crash_stale/expected.jsonl   |  12 +
 18 files changed, 697 insertions(+)
 create mode 100644 internal/adapters/codex/adapter_restart_test.go
 create mode 100644 internal/adapters/codex/golden_test.go
 create mode 100644 testdata/codex/a_happy_new/INPUT/codex-home/sessions/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl
 create mode 100644 testdata/codex/a_happy_new/expected.jsonl
 create mode 100644 testdata/codex/b_old_turncontext/INPUT/codex-home/sessions/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl
 create mode 100644 testdata/codex/b_old_turncontext/expected.jsonl
 create mode 100644 testdata/codex/c_subagent_threadspawn/INPUT/codex-home/sessions/2026/05/17/rollout-2026-05-17T15-37-16-019e35f0-0000-7000-8000-00000000000c.jsonl
 create mode 100644 testdata/codex/c_subagent_threadspawn/expected.jsonl
 create mode 100644 testdata/codex/d_fork/INPUT/codex-home/sessions/2026/05/08/rollout-2026-05-08T00-40-27-019e0462-0000-7000-8000-00000000000d.jsonl
 create mode 100644 testdata/codex/d_fork/expected.jsonl
 create mode 100644 testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl
 create mode 100644 testdata/codex/e_compaction/expected.jsonl
 create mode 100644 testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl
 create mode 100644 testdata/codex/f_exec_truncated/expected.jsonl
 create mode 100644 testdata/codex/g_turn_aborted/INPUT/codex-home/sessions/2026/03/11/rollout-2026-03-11T15-51-06-019cdd2a-0000-7000-8000-00000000001a.jsonl
 create mode 100644 testdata/codex/g_turn_aborted/expected.jsonl
 create mode 100644 testdata/codex/h_crash_stale/INPUT/codex-home/sessions/2026/04/01/rollout-2026-04-01T09-00-00-019d4abc-0000-7000-8000-00000000001b.jsonl
 create mode 100644 testdata/codex/h_crash_stale/expected.jsonl

diff --git a/internal/adapters/codex/adapter_restart_test.go b/internal/adapters/codex/adapter_restart_test.go
new file mode 100644
index 0000000..a1f8c20
--- /dev/null
+++ b/internal/adapters/codex/adapter_restart_test.go
@@ -0,0 +1,296 @@
+package codex
+
+import (
+	"context"
+	"sort"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// This file holds the integration-level acceptance #6 tests for the codex
+// adapter: they drive the PUBLIC Adapter API (New / Scan / ParseCursor /
+// Cursor.String) and round-trip the cursor through its JSON serialization,
+// proving a daemon restart resumes in place with zero duplicate and zero gap,
+// and that a mid-stream truncation re-scans from offset 0 with a SourceError.
+// The internal scanAll-level resume/truncation tests (scanner_test.go
+// TestScan_ResumeNoDupNoGap, TestScan_TruncationRescans) are complementary; this
+// file pins the same properties through the exact code path the ingester drives
+// (cmd/ai-viewer-ingest/sources.go: ParseCursor → Scan → persist
+// SourceProgress.Cursor → ParseCursor → Scan).
+
+// twoTurnSession builds a deterministic two-turn rollout body and returns the
+// per-turn line groups so a test can write turn 1, resume, then append turn 2.
+// Each turn is the new (task_started/task_complete) format with a user input and
+// an assistant message so it exercises the full op chain. The id is the session
+// id (which must equal the filename UUID — the mapper derives nativeID from the
+// filename).
+func twoTurnSession(id string) (turn1, turn2 []string) {
+	meta := metaLine(id, `"exec"`)
+	turn1 = []string{
+		meta,
+		`{"timestamp":"2025-11-20T16:59:10.000Z","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5","sandbox_policy":{"type":"workspace-write"},"effort":"high","approval_policy":"never"}}`,
+		`{"timestamp":"2025-11-20T16:59:10.100Z","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763657950,"model_context_window":258400}}`,
+		`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"first request"}]}}`,
+		`{"timestamp":"2025-11-20T16:59:13.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"first answer"}]}}`,
+		`{"timestamp":"2025-11-20T16:59:13.500Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":100,"output_tokens":20,"total_tokens":120},"last_token_usage":{"input_tokens":100,"output_tokens":20,"total_tokens":120},"model_context_window":258400}}}`,
+		`{"timestamp":"2025-11-20T16:59:14.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1","completed_at":1763657954,"duration_ms":4000}}`,
+	}
+	turn2 = []string{
+		`{"timestamp":"2025-11-20T16:59:20.000Z","type":"turn_context","payload":{"turn_id":"t2","model":"gpt-5.5","sandbox_policy":{"type":"workspace-write"},"effort":"high","approval_policy":"never"}}`,
+		`{"timestamp":"2025-11-20T16:59:20.100Z","type":"event_msg","payload":{"type":"task_started","turn_id":"t2","started_at":1763657960,"model_context_window":258400}}`,
+		`{"timestamp":"2025-11-20T16:59:21.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"second request"}]}}`,
+		`{"timestamp":"2025-11-20T16:59:23.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"second answer"}]}}`,
+		`{"timestamp":"2025-11-20T16:59:24.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"t2","completed_at":1763657964,"duration_ms":4000}}`,
+	}
+	return turn1, turn2
+}
+
+// TestRestart_NoDupNoGap verifies acceptance #6: ingesting the first turn of a
+// rollout through the PUBLIC adapter, persisting the cursor by serializing it
+// through Cursor.String / ParseCursor (the ingester's exact round-trip), then
+// resuming over the full file produces the same end state — no duplicate, no gap
+// — as a single one-shot ingest.
+//
+// "Same end state" is compared on the canonical content the SQL layer keys on
+// (event kind + session/turn/op identity + the load-bearing payload fields), NOT
+// on SourceSeq — which is a byte-offset-derived observability counter that
+// intentionally differs across a split vs one-shot pass (mirrors claude_code's
+// content-key comparison; the ingester dedups on natural identity).
+func TestRestart_NoDupNoGap(t *testing.T) {
+	t.Parallel()
+
+	id := uuid7(1)
+	turn1, turn2 := twoTurnSession(id)
+	full := append(append([]string{}, turn1...), turn2...)
+
+	// One-shot reference run over the full file.
+	oneShot := scanFullSession(t, id, full)
+
+	// Split run: write turn 1, scan, persist cursor (JSON round-trip), append
+	// turn 2, resume from the parsed cursor.
+	root := t.TempDir()
+	path := shardPath(root, id)
+	writeFileBytes(t, path, []byte(strings.Join(turn1, "\n")+"\n"))
+	setMtime(t, path, time.Minute) // fresh: turn 1 closed by task_complete, no stale finalize
+
+	a, err := New(root, canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	out1 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), nil, out1); err != nil {
+		t.Fatalf("Scan #1: %v", err)
+	}
+	firstHalf := drainBuffered(out1)
+
+	// Persist + reload the cursor exactly as the ingester does: take the last
+	// SourceProgress cursor string and re-parse it through the public ParseCursor.
+	cursorJSON := lastCursor(t, firstHalf)
+	parsed, err := a.ParseCursor(cursorJSON)
+	if err != nil {
+		t.Fatalf("ParseCursor: %v", err)
+	}
+
+	appendFileBytes(t, path, []byte(strings.Join(turn2, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+
+	out2 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), parsed, out2); err != nil {
+		t.Fatalf("Scan #2 (resume): %v", err)
+	}
+	secondHalf := drainBuffered(out2)
+
+	combined := append(append([]canonical.Event{}, firstHalf...), secondHalf...)
+
+	gotKeys := contentKeys(combined)
+	wantKeys := contentKeys(oneShot)
+	if len(gotKeys) != len(wantKeys) {
+		t.Fatalf("split produced %d content events, one-shot produced %d\nsplit:   %v\noneShot: %v",
+			len(gotKeys), len(wantKeys), gotKeys, wantKeys)
+	}
+	for i := range wantKeys {
+		if gotKeys[i] != wantKeys[i] {
+			t.Fatalf("content mismatch at %d:\n split:   %s\n oneShot: %s", i, gotKeys[i], wantKeys[i])
+		}
+	}
+
+	// Belt-and-braces on the load-bearing invariant: SessionStarted exactly once
+	// across the resume (no dup), both turns finalized exactly once (no gap).
+	if got := countKind(combined, canonical.EvSessionStarted); got != 1 {
+		t.Errorf("SessionStarted across resume = %d, want exactly 1 (no dup)", got)
+	}
+	if got := countKind(combined, canonical.EvTurnFinalized); got != 2 {
+		t.Errorf("TurnFinalized across resume = %d, want 2 (no gap)", got)
+	}
+	if got := countKind(secondHalf, canonical.EvSessionStarted); got != 0 {
+		t.Errorf("resume re-emitted SessionStarted %d times, want 0", got)
+	}
+}
+
+// TestRestart_TruncationReScansWithSourceError verifies acceptance #6
+// (truncation): after a clean ingest persists a cursor recording size N, an
+// operator delete+recreate that leaves the file SHORTER than N must trigger a
+// full re-scan from offset 0 with a SourceError surfaced (codex never truncates,
+// so a shrunken file is the only way size < cursor.size happens). The re-scan
+// re-emits the (now shorter) session; the SQL layer's idempotent upserts absorb
+// the re-emitted rows.
+func TestRestart_TruncationReScansWithSourceError(t *testing.T) {
+	t.Parallel()
+
+	id := uuid7(2)
+	turn1, turn2 := twoTurnSession(id)
+	full := append(append([]string{}, turn1...), turn2...)
+
+	root := t.TempDir()
+	path := shardPath(root, id)
+	writeFileBytes(t, path, []byte(strings.Join(full, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+
+	var errs []string
+	a, err := New(root, canonical.AdapterOptions{OnError: func(e error) { errs = append(errs, e.Error()) }})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+
+	// Full clean ingest → cursor records size == full file size.
+	out1 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), nil, out1); err != nil {
+		t.Fatalf("Scan #1: %v", err)
+	}
+	first := drainBuffered(out1)
+	if len(errs) != 0 {
+		t.Fatalf("phase1 unexpected errors: %v", errs)
+	}
+	if got := countKind(first, canonical.EvSessionStarted); got != 1 {
+		t.Fatalf("phase1 SessionStarted = %d, want 1", got)
+	}
+	cursorJSON := lastCursor(t, first)
+	parsed, err := a.ParseCursor(cursorJSON)
+	if err != nil {
+		t.Fatalf("ParseCursor: %v", err)
+	}
+
+	// Operator delete+recreate leaving the file SHORTER (only turn 1). The new
+	// size is below the cursor's recorded size → truncation defense fires.
+	writeFileBytes(t, path, []byte(strings.Join(turn1, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+	errs = nil
+
+	out2 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), parsed, out2); err != nil {
+		t.Fatalf("Scan #2 (after truncation): %v", err)
+	}
+	second := drainBuffered(out2)
+
+	// A SourceError naming the shrink must surface (no silent failure).
+	if !anyContains(errs, "shrank") {
+		t.Fatalf("truncation did not surface a 'shrank' SourceError; errs=%v", errs)
+	}
+	// The re-scan from 0 re-emits the session (SQL dedup absorbs it downstream).
+	if got := countKind(second, canonical.EvSessionStarted); got != 1 {
+		t.Errorf("re-scan after truncation SessionStarted = %d, want 1 (full re-scan from 0)", got)
+	}
+	// Only turn 1 remains on disk, so exactly its TurnFinalized re-emits.
+	if got := countKind(second, canonical.EvTurnFinalized); got != 1 {
+		t.Errorf("re-scan TurnFinalized = %d, want 1 (only turn 1 remains)", got)
+	}
+}
+
+// scanFullSession runs the public Scan once over a freshly-written rollout and
+// returns the event stream (a one-shot reference run for the resume comparison).
+func scanFullSession(t *testing.T, id string, lines []string) []canonical.Event {
+	t.Helper()
+	root := t.TempDir()
+	path := shardPath(root, id)
+	writeFileBytes(t, path, []byte(strings.Join(lines, "\n")+"\n"))
+	setMtime(t, path, time.Minute)
+	a, err := New(root, canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	out := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), nil, out); err != nil {
+		t.Fatalf("Scan: %v", err)
+	}
+	return drainBuffered(out)
+}
+
+// lastCursor returns the cursor JSON from the last SourceProgressEvent in the
+// stream (the checkpoint the ingester persists into sources.cursor).
+func lastCursor(t *testing.T, events []canonical.Event) string {
+	t.Helper()
+	cur := ""
+	for _, ev := range events {
+		if sp, ok := ev.(canonical.SourceProgressEvent); ok {
+			cur = sp.Cursor
+		}
+	}
+	if cur == "" {
+		t.Fatal("no SourceProgressEvent in events (cannot persist cursor)")
+	}
+	return cur
+}
+
+// contentKeys reduces a stream to a sorted slice of content identity keys,
+// ignoring SourceProgress and SourceSeq. Two streams with identical content keys
+// represent the same end state after SQL-layer dedup (mirrors claude_code).
+func contentKeys(events []canonical.Event) []string {
+	keys := make([]string, 0, len(events))
+	for _, ev := range events {
+		if _, ok := ev.(canonical.SourceProgressEvent); ok {
+			continue
+		}
+		keys = append(keys, contentKey(ev))
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+// contentKey builds a stable identity string for an event from the fields the
+// SQL layer keys on (kind + session/turn/op identity + the load-bearing payload
+// discriminators), excluding SourceSeq. Mirrors claude_code's contentKey,
+// extended with the codex op discriminators (kind, name, namespace,
+// reasoning_kind) and the payload-ref kind so a resume that changed any of them
+// would be caught.
+func contentKey(ev canonical.Event) string {
+	switch e := ev.(type) {
+	case canonical.SessionStartedEvent:
+		return "ss|" + e.NativeID + "|" + string(e.Kind) + "|" + e.ParentNativeID
+	case canonical.SessionUpdatedEvent:
+		return "su|" + e.NativeID + "|" + e.Model + "|" + e.AgentName
+	case canonical.SessionFinalizedEvent:
+		return "sf|" + e.NativeID + "|" + string(e.Status) + "|" + e.ErrorClass
+	case canonical.TurnStartedEvent:
+		return "ts|" + e.SessionNativeID + "|" + itoa(e.Seq)
+	case canonical.TurnFinalizedEvent:
+		return "tf|" + e.SessionNativeID + "|" + itoa(e.Seq) + "|" + e.Status + "|" + itoa64(e.TokensIn) + "|" + itoa64(e.TokensOut)
+	case canonical.OpStartedEvent:
+		return "os|" + e.SessionNativeID + "|" + itoa(e.TurnSeq) + "|" + itoa(e.Seq) + "|" + string(e.Kind) + "|" + e.Name + "|" + e.ToolNamespace + "|" + e.ReasoningKind
+	case canonical.OpFinalizedEvent:
+		return "of|" + e.SessionNativeID + "|" + itoa(e.TurnSeq) + "|" + itoa(e.Seq) + "|" + e.Status + "|" + itoa64(e.CtxUsed)
+	case canonical.PayloadRefEvent:
+		return "pr|" + e.SessionNativeID + "|" + itoa(e.TurnSeq) + "|" + itoa(e.OpSeq) + "|" + e.PayloadKind + "|" + e.Format
+	case canonical.LogEntryEvent:
+		return "log|" + e.SessionNativeID + "|" + itoa(e.TurnSeq) + "|" + e.Message
+	default:
+		return "other"
+	}
+}
+
+// itoa / itoa64 are tiny strconv wrappers used by contentKey.
+func itoa(i int) string     { return strconv.Itoa(i) }
+func itoa64(i int64) string { return strconv.FormatInt(i, 10) }
+
+// anyContains reports whether any string in s contains sub.
+func anyContains(s []string, sub string) bool {
+	for _, v := range s {
+		if strings.Contains(v, sub) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/adapters/codex/golden_test.go b/internal/adapters/codex/golden_test.go
new file mode 100644
index 0000000..fcc29da
--- /dev/null
+++ b/internal/adapters/codex/golden_test.go
@@ -0,0 +1,219 @@
+package codex
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+var updateGolden = flag.Bool("update-golden", false, "rewrite golden expected.jsonl files for the codex adapter")
+
+// goldenEvent is the wire shape written into expected.jsonl: the kind
+// discriminator plus the concrete payload. Resilient to field additions on the
+// canonical types — updating with -update-golden picks up new fields. Mirrors
+// claude_code/golden_test.go.
+type goldenEvent struct {
+	Kind    string          `json:"kind"`
+	Payload json.RawMessage `json:"payload"`
+}
+
+// rootPlaceholder replaces the test's absolute root in SourceID and
+// PayloadRef.LocationURI strings so golden files are portable across
+// workstations and CI AND carry no operator filesystem path. Mirrors
+// claude_code.
+const rootPlaceholder = "<ROOT>"
+
+// TestGolden runs every scenario directory under testdata/codex/ that contains
+// an INPUT/ subtree and asserts Scan produces the canonical events recorded in
+// expected.jsonl. Run with -update-golden to refresh. Mirrors claude_code's
+// auto-discovering harness; the codex INPUT subtree is a $CODEX_HOME root whose
+// sessions/YYYY/MM/DD/rollout-*.jsonl files the adapter walks (the adapter is
+// rooted at INPUT/<codex-home>/sessions — see scenarioRoot).
+func TestGolden(t *testing.T) {
+	t.Parallel()
+
+	base := filepath.Join("..", "..", "..", "testdata", "codex")
+	entries, err := os.ReadDir(base)
+	if err != nil {
+		t.Fatalf("readdir %s: %v", base, err)
+	}
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue
+		}
+		name := e.Name()
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+			runGoldenScenario(t, filepath.Join(base, name))
+		})
+	}
+}
+
+// scenarioRoot returns the sessions root the adapter is rooted at for a
+// scenario: INPUT/<codex-home>/sessions. The fixture lays the rollout files
+// under INPUT/<codex-home>/sessions/YYYY/MM/DD/, mirroring a real $CODEX_HOME,
+// and the adapter is constructed on the sessions dir (SOW C#3: location =
+// $CODEX_HOME/sessions). The single codex-home dir under INPUT/ is discovered by
+// name so a scenario need not hard-code it. A scenario whose INPUT has no
+// sessions/ subtree falls back to INPUT itself (defensive; the staleness
+// scenario still nests under sessions/).
+func scenarioRoot(t *testing.T, inputDir string) string {
+	t.Helper()
+	homes, err := os.ReadDir(inputDir)
+	if err != nil {
+		t.Fatalf("readdir %s: %v", inputDir, err)
+	}
+	for _, h := range homes {
+		if !h.IsDir() {
+			continue
+		}
+		sessions := filepath.Join(inputDir, h.Name(), "sessions")
+		if fi, sErr := os.Stat(sessions); sErr == nil && fi.IsDir() {
+			return sessions
+		}
+	}
+	return inputDir
+}
+
+func runGoldenScenario(t *testing.T, scenarioDir string) {
+	t.Helper()
+	inputDir := filepath.Join(scenarioDir, "INPUT")
+	if _, err := os.Stat(inputDir); err != nil {
+		t.Skipf("INPUT directory missing: %v", err)
+		return
+	}
+
+	// The crash/stale scenario needs a stale mtime so the synthetic
+	// failed/incomplete finalize fires (rule #23). Golden fixtures cannot carry
+	// an mtime in git, so the harness ages every rollout under a scenario whose
+	// name marks it stale (the "h_crash_stale" suffix). Aging in the test (not a
+	// fixture artifact) keeps the input bytes deterministic while exercising the
+	// stale path; a non-stale scenario leaves mtimes fresh so no clean-EOF
+	// finalize is asserted (SOW C#3).
+	if strings.Contains(filepath.Base(scenarioDir), "crash_stale") {
+		ageRolloutsStale(t, inputDir)
+	}
+
+	sessionsRoot := scenarioRoot(t, inputDir)
+	absRoot, err := filepath.Abs(sessionsRoot)
+	if err != nil {
+		t.Fatalf("abs: %v", err)
+	}
+	absRoot = filepath.Clean(absRoot)
+
+	a, err := New(absRoot, canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+	out := make(chan canonical.Event, 8192)
+	if err := a.Scan(context.Background(), nil, out); err != nil {
+		t.Fatalf("Scan: %v", err)
+	}
+	events := drainBuffered(out)
+
+	filtered := make([]canonical.Event, 0, len(events))
+	for _, ev := range events {
+		if _, ok := ev.(canonical.SourceProgressEvent); ok {
+			continue
+		}
+		filtered = append(filtered, ev)
+	}
+
+	encoded, err := encodeEvents(filtered, absRoot)
+	if err != nil {
+		t.Fatalf("encode: %v", err)
+	}
+
+	goldenPath := filepath.Join(scenarioDir, "expected.jsonl")
+	if *updateGolden {
+		if err := os.WriteFile(goldenPath, encoded, 0o644); err != nil {
+			t.Fatalf("write golden: %v", err)
+		}
+		t.Logf("updated golden: %s", goldenPath)
+		return
+	}
+
+	want, err := os.ReadFile(goldenPath)
+	if err != nil {
+		t.Fatalf("read golden %s: %v (run with -update-golden to create)", goldenPath, err)
+	}
+	if string(want) != string(encoded) {
+		t.Errorf("golden mismatch for %s\n--- want ---\n%s\n--- got ---\n%s",
+			goldenPath, string(want), string(encoded))
+	}
+}
+
+// staleMtime is the FIXED mtime the crash/stale scenario's rollout is aged to.
+// It must be (a) far enough in the past that "now - mtime >= 1 h" always holds
+// (rule #23) and (b) a constant so the synthetic-finalize EndTs (the scanner
+// stamps it from the file mtime, mapper_finalize.go) is DETERMINISTIC across
+// runs — a wall-clock-relative mtime would make the golden EndTs change every
+// run and never match. 2026-04-01T11:00:00Z is two hours after the fixture's
+// session start and a fixed absolute instant well over 1 h before any plausible
+// test run.
+var staleMtime = time.Date(2026, 4, 1, 11, 0, 0, 0, time.UTC)
+
+// ageRolloutsStale sets the mtime of every rollout file under inputDir to the
+// fixed staleMtime so the scanner's "mtime stale >= 1 h" EOF-finalize path fires
+// deterministically (rule #23). Operates on the working-tree copy only; the
+// committed bytes are unchanged.
+func ageRolloutsStale(t *testing.T, inputDir string) {
+	t.Helper()
+	stale := staleMtime
+	_ = filepath.WalkDir(inputDir, func(path string, d os.DirEntry, err error) error {
+		if err != nil || d.IsDir() {
+			return nil //nolint:nilerr // walk best-effort; a stat error just leaves mtime fresh
+		}
+		if !strings.HasSuffix(path, ".jsonl") {
+			return nil
+		}
+		if cErr := os.Chtimes(path, stale, stale); cErr != nil {
+			t.Fatalf("chtimes %s: %v", path, cErr)
+		}
+		return nil
+	})
+}
+
+// encodeEvents serialises events one goldenEvent per line, with the absolute
+// test-machine root replaced by rootPlaceholder so golden files are portable
+// AND carry no operator filesystem path (sensitive-data hygiene). Two fields
+// embed the root: SourceID ("codex:<root>") and PayloadRef.LocationURI
+// ("file://<resolved-root>/..."). The latter is symlink-resolved by the adapter,
+// so the resolved form of the root is also rewritten. Mirrors claude_code.
+func encodeEvents(events []canonical.Event, absRoot string) ([]byte, error) {
+	resolvedRoot := absRoot
+	if r, err := filepath.EvalSymlinks(absRoot); err == nil {
+		resolvedRoot = r
+	}
+	var b strings.Builder
+	for _, ev := range events {
+		payload, err := json.Marshal(ev)
+		if err != nil {
+			return nil, fmt.Errorf("marshal %T: %w", ev, err)
+		}
+		s := string(payload)
+		// Rewrite every embedding of the absolute root to the portable
+		// placeholder: SourceID's "codex:<root>" prefix and any LocationURI
+		// "file://<root>" (raw or symlink-resolved).
+		s = strings.ReplaceAll(s, sourceIDPrefix+absRoot, sourceIDPrefix+rootPlaceholder)
+		s = strings.ReplaceAll(s, "file://"+filepath.ToSlash(resolvedRoot), "file://"+rootPlaceholder)
+		s = strings.ReplaceAll(s, "file://"+filepath.ToSlash(absRoot), "file://"+rootPlaceholder)
+
+		ge := goldenEvent{Kind: string(ev.EventKind()), Payload: json.RawMessage(s)}
+		enc, err := json.Marshal(ge)
+		if err != nil {
+			return nil, err
+		}
+		b.Write(enc)
+		b.WriteByte('\n')
+	}
+	return []byte(b.String()), nil
+}
diff --git a/testdata/codex/a_happy_new/INPUT/codex-home/sessions/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl b/testdata/codex/a_happy_new/INPUT/codex-home/sessions/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl
new file mode 100644
index 0000000..da2cd2c
--- /dev/null
+++ b/testdata/codex/a_happy_new/INPUT/codex-home/sessions/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl
@@ -0,0 +1,11 @@
+{"timestamp":"2026-05-06T19:47:01.532Z","type":"session_meta","payload":{"id":"019dfed4-0000-7000-8000-00000000000a","timestamp":"2026-05-06T19:47:01.532Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","thread_source":null,"model_provider":"openai","git":{"commit_hash":"0000000000000000000000000000000000000000","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-05-06T19:47:01.533Z","type":"turn_context","payload":{"turn_id":"019dfed4-1111-7000-8000-00000000001a","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write","network_access":true},"summary":"none"}}
+{"timestamp":"2026-05-06T19:47:01.534Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019dfed4-1111-7000-8000-00000000001a","started_at":1778096821,"model_context_window":258400,"collaboration_mode_kind":"default"}}
+{"timestamp":"2026-05-06T19:47:02.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"please summarize the build script"}]}}
+{"timestamp":"2026-05-06T19:47:05.000Z","type":"response_item","payload":{"type":"reasoning","summary":[{"type":"summary_text","text":"plan the response"}],"content":null,"encrypted_content":null}}
+{"timestamp":"2026-05-06T19:47:06.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","call_id":"call_a1","arguments":"{\"command\":[\"cat\",\"build.sh\"]}"}}
+{"timestamp":"2026-05-06T19:47:06.500Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_a1","output":"#!/usr/bin/env bash\necho build"}}
+{"timestamp":"2026-05-06T19:47:08.000Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":1200,"cached_input_tokens":300,"output_tokens":80,"reasoning_output_tokens":40,"total_tokens":1280},"last_token_usage":{"input_tokens":1200,"cached_input_tokens":300,"output_tokens":80,"reasoning_output_tokens":40,"total_tokens":1280},"model_context_window":258400}}}
+{"timestamp":"2026-05-06T19:47:10.000Z","type":"response_item","payload":{"type":"message","role":"assistant","phase":"final_answer","content":[{"type":"output_text","text":"the build script prints build"}]}}
+{"timestamp":"2026-05-06T19:47:10.100Z","type":"event_msg","payload":{"type":"agent_message","message":"the build script prints build"}}
+{"timestamp":"2026-05-06T19:47:10.200Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019dfed4-1111-7000-8000-00000000001a","completed_at":1778096830,"duration_ms":8666,"time_to_first_token_ms":3500,"last_agent_message":"the build script prints build"}}
diff --git a/testdata/codex/a_happy_new/expected.jsonl b/testdata/codex/a_happy_new/expected.jsonl
new file mode 100644
index 0000000..054b41e
--- /dev/null
+++ b/testdata/codex/a_happy_new/expected.jsonl
@@ -0,0 +1,21 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1778096821532000,"NativeID":"019dfed4-0000-7000-8000-00000000000a","RootNativeID":"019dfed4-0000-7000-8000-00000000000a","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"0000000000000000000000000000000000000000","repository_url":"git@github.com:example/example.git"},"model_provider":"openai","originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1778096821533000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1778096821533000,"NativeID":"019dfed4-0000-7000-8000-00000000000a","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1778096822000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1778096822000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778096822000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1778096822000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl#L4","OriginalBytes":183,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1778096825000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"reasoning","Name":"reasoning","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"summary","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1778096825000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778096825000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16387,"Ts":1778096825000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":2,"PayloadKind":"llm_reasoning","Format":"text","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl#L5","OriginalBytes":197,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1778096826000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_a1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1778096826000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":3,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl#L6","OriginalBytes":184,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1778096826500000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778096826500000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1778096826500000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":3,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl#L7","OriginalBytes":168,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778096830000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32771,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":4,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/06/rollout-2026-05-06T22-46-58-019dfed4-0000-7000-8000-00000000000a.jsonl#L9","OriginalBytes":208,"StoredBytes":0,"SHA256":""}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32772,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"final_answer","Extras":{}}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36864,"Ts":1778096830100000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":0,"Severity":"DBG","Source":"codex","Message":"agent_message","Extras":{}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":40960,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778096830000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":1280,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":40961,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1778096830000000,"TokensIn":1200,"TokensOut":80,"TokensCacheRead":300,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":40962,"Ts":1778096830000000,"SessionNativeID":"019dfed4-0000-7000-8000-00000000000a","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019dfed4-1111-7000-8000-00000000001a","effort":"high","last_agent_message":"the build script prints build","sandbox":"workspace-write","ttft_ms":3500}}}
diff --git a/testdata/codex/b_old_turncontext/INPUT/codex-home/sessions/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl b/testdata/codex/b_old_turncontext/INPUT/codex-home/sessions/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl
new file mode 100644
index 0000000..a80237b
--- /dev/null
+++ b/testdata/codex/b_old_turncontext/INPUT/codex-home/sessions/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl
@@ -0,0 +1,7 @@
+{"timestamp":"2025-11-20T16:59:09.857Z","type":"session_meta","payload":{"id":"019aa234-0000-7000-8000-00000000000b","timestamp":"2025-11-20T16:59:09.857Z","cwd":"<ROOT>/legacy-project","originator":"codex_cli_rs","cli_version":"0.61.0","source":"cli","git":{"commit_hash":"1111111111111111111111111111111111111111","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2025-11-20T16:59:09.858Z","type":"turn_context","payload":{"cwd":"<ROOT>/legacy-project","model":"gpt-5.1-codex-max","effort":"medium","approval_policy":"on-request","sandbox_policy":{"type":"read-only"},"summary":"auto"}}
+{"timestamp":"2025-11-20T16:59:10.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"list the files"}]}}
+{"timestamp":"2025-11-20T16:59:12.000Z","type":"response_item","payload":{"type":"reasoning","summary":[],"content":[{"type":"reasoning_text","text":"decide to list"}],"encrypted_content":null}}
+{"timestamp":"2025-11-20T16:59:13.000Z","type":"response_item","payload":{"type":"function_call","name":"list_dir","call_id":"call_b1","arguments":"{\"path\":\".\"}"}}
+{"timestamp":"2025-11-20T16:59:13.500Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_b1","output":"build.sh\nREADME.md"}}
+{"timestamp":"2025-11-20T16:59:15.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"there are two files"}]}}
diff --git a/testdata/codex/b_old_turncontext/expected.jsonl b/testdata/codex/b_old_turncontext/expected.jsonl
new file mode 100644
index 0000000..d4214c5
--- /dev/null
+++ b/testdata/codex/b_old_turncontext/expected.jsonl
@@ -0,0 +1,16 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1763657949857000,"NativeID":"019aa234-0000-7000-8000-00000000000b","RootNativeID":"019aa234-0000-7000-8000-00000000000b","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_cli_rs","Model":"","Cwd":"\u003cROOT\u003e/legacy-project","CallPath":"","Extras":{"cli_version":"0.61.0","cwd":"\u003cROOT\u003e/legacy-project","git":{"branch":"main","commit_hash":"1111111111111111111111111111111111111111","repository_url":"git@github.com:example/example.git"},"originator":"codex_cli_rs","source":"cli"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1763657949858000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1763657949858000,"NativeID":"019aa234-0000-7000-8000-00000000000b","AgentName":"","Model":"gpt-5.1-codex-max","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":8193,"Ts":1763657950000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":8194,"Ts":1763657950000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1763657950000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":8195,"Ts":1763657950000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L3","OriginalBytes":164,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1763657952000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"reasoning","Name":"reasoning","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"raw","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1763657952000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1763657952000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1763657952000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":2,"PayloadKind":"llm_reasoning","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L4","OriginalBytes":194,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1763657953000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"tool","Name":"list_dir","ToolNamespace":"fs","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_b1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1763657953000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":3,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L5","OriginalBytes":167,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1763657953500000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1763657953500000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1763657953500000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":3,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L6","OriginalBytes":156,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1763657955000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24579,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":4,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L7","OriginalBytes":175,"StoredBytes":0,"SHA256":""}}
diff --git a/testdata/codex/c_subagent_threadspawn/INPUT/codex-home/sessions/2026/05/17/rollout-2026-05-17T15-37-16-019e35f0-0000-7000-8000-00000000000c.jsonl b/testdata/codex/c_subagent_threadspawn/INPUT/codex-home/sessions/2026/05/17/rollout-2026-05-17T15-37-16-019e35f0-0000-7000-8000-00000000000c.jsonl
new file mode 100644
index 0000000..2a5ed9f
--- /dev/null
+++ b/testdata/codex/c_subagent_threadspawn/INPUT/codex-home/sessions/2026/05/17/rollout-2026-05-17T15-37-16-019e35f0-0000-7000-8000-00000000000c.jsonl
@@ -0,0 +1,7 @@
+{"timestamp":"2026-05-17T12:37:16.100Z","type":"session_meta","payload":{"id":"019e35f0-0000-7000-8000-00000000000c","timestamp":"2026-05-17T12:37:16.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","thread_source":"subagent","agent_nickname":"Dewey","agent_role":"explorer","source":{"subagent":{"thread_spawn":{"parent_thread_id":"019e2e0a-0000-7000-8000-0000000000fe","depth":1,"agent_path":null,"agent_nickname":"Dewey","agent_role":"explorer"}}},"git":{"commit_hash":"2222222222222222222222222222222222222222","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-05-17T12:37:16.200Z","type":"turn_context","payload":{"turn_id":"019e35f0-1111-7000-8000-00000000001c","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"danger-full-access"},"summary":"none"}}
+{"timestamp":"2026-05-17T12:37:16.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019e35f0-1111-7000-8000-00000000001c","started_at":1779021436,"model_context_window":258400}}
+{"timestamp":"2026-05-17T12:37:17.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"explore the repository structure"}]}}
+{"timestamp":"2026-05-17T12:37:20.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the repository has a standard layout"}]}}
+{"timestamp":"2026-05-17T12:37:20.500Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":900,"cached_input_tokens":0,"output_tokens":60,"reasoning_output_tokens":20,"total_tokens":960},"last_token_usage":{"input_tokens":900,"cached_input_tokens":0,"output_tokens":60,"reasoning_output_tokens":20,"total_tokens":960},"model_context_window":258400}}}
+{"timestamp":"2026-05-17T12:37:21.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019e35f0-1111-7000-8000-00000000001c","completed_at":1779021441,"duration_ms":4900,"time_to_first_token_ms":2100}}
diff --git a/testdata/codex/c_subagent_threadspawn/expected.jsonl b/testdata/codex/c_subagent_threadspawn/expected.jsonl
new file mode 100644
index 0000000..fea556b
--- /dev/null
+++ b/testdata/codex/c_subagent_threadspawn/expected.jsonl
@@ -0,0 +1,12 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1779021436100000,"NativeID":"019e35f0-0000-7000-8000-00000000000c","RootNativeID":"019e2e0a-0000-7000-8000-0000000000fe","ParentNativeID":"019e2e0a-0000-7000-8000-0000000000fe","ParentOpKey":"","Kind":"sub_agent","AgentName":"Dewey","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"2222222222222222222222222222222222222222","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","relationship":"sub_agent","source":"subagent","subagent_depth":1}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1779021436200000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1779021436200000,"NativeID":"019e35f0-0000-7000-8000-00000000000c","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1779021437000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1779021437000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779021437000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1779021437000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/17/rollout-2026-05-17T15-37-16-019e35f0-0000-7000-8000-00000000000c.jsonl#L4","OriginalBytes":182,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1779021440000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1779021440000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779021440000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16387,"Ts":1779021440000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"OpSeq":2,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/17/rollout-2026-05-17T15-37-16-019e35f0-0000-7000-8000-00000000000c.jsonl#L5","OriginalBytes":192,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1779021441000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779021440000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":960,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1779021441000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1779021441000000,"TokensIn":900,"TokensOut":60,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1779021441000000,"SessionNativeID":"019e35f0-0000-7000-8000-00000000000c","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019e35f0-1111-7000-8000-00000000001c","effort":"high","sandbox":"danger-full-access","ttft_ms":2100}}}
diff --git a/testdata/codex/d_fork/INPUT/codex-home/sessions/2026/05/08/rollout-2026-05-08T00-40-27-019e0462-0000-7000-8000-00000000000d.jsonl b/testdata/codex/d_fork/INPUT/codex-home/sessions/2026/05/08/rollout-2026-05-08T00-40-27-019e0462-0000-7000-8000-00000000000d.jsonl
new file mode 100644
index 0000000..6626c1e
--- /dev/null
+++ b/testdata/codex/d_fork/INPUT/codex-home/sessions/2026/05/08/rollout-2026-05-08T00-40-27-019e0462-0000-7000-8000-00000000000d.jsonl
@@ -0,0 +1,6 @@
+{"timestamp":"2026-05-08T00:40:27.500Z","type":"session_meta","payload":{"id":"019e0462-0000-7000-8000-00000000000d","forked_from_id":"019e03c4-0000-7000-8000-0000000000fd","timestamp":"2026-05-08T00:40:27.500Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","git":{"commit_hash":"3333333333333333333333333333333333333333","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-05-08T00:40:27.600Z","type":"turn_context","payload":{"turn_id":"019e0462-1111-7000-8000-00000000001d","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"medium","approval_policy":"on-request","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-05-08T00:40:27.700Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019e0462-1111-7000-8000-00000000001d","started_at":1778366427,"model_context_window":258400}}
+{"timestamp":"2026-05-08T00:40:28.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"continue from the previous session"}]}}
+{"timestamp":"2026-05-08T00:40:31.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"resuming the prior work"}]}}
+{"timestamp":"2026-05-08T00:40:31.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019e0462-1111-7000-8000-00000000001d","completed_at":1778366431,"duration_ms":3800,"time_to_first_token_ms":1500}}
diff --git a/testdata/codex/d_fork/expected.jsonl b/testdata/codex/d_fork/expected.jsonl
new file mode 100644
index 0000000..dce4210
--- /dev/null
+++ b/testdata/codex/d_fork/expected.jsonl
@@ -0,0 +1,12 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1778200827500000,"NativeID":"019e0462-0000-7000-8000-00000000000d","RootNativeID":"019e03c4-0000-7000-8000-0000000000fd","ParentNativeID":"019e03c4-0000-7000-8000-0000000000fd","ParentOpKey":"","Kind":"fork","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"3333333333333333333333333333333333333333","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","relationship":"fork","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1778200827600000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1778200827600000,"NativeID":"019e0462-0000-7000-8000-00000000000d","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1778200828000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1778200828000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778200828000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1778200828000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/08/rollout-2026-05-08T00-40-27-019e0462-0000-7000-8000-00000000000d.jsonl#L4","OriginalBytes":184,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1778200831000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1778200831000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778200831000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16387,"Ts":1778200831000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"OpSeq":2,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/08/rollout-2026-05-08T00-40-27-019e0462-0000-7000-8000-00000000000d.jsonl#L5","OriginalBytes":179,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1778366431000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1778200831000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1778366431000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1778366431000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1778366431000000,"SessionNativeID":"019e0462-0000-7000-8000-00000000000d","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"on-request","codex_turn_id":"019e0462-1111-7000-8000-00000000001d","effort":"medium","sandbox":"workspace-write","ttft_ms":1500}}}
diff --git a/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl b/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl
new file mode 100644
index 0000000..bdd9abf
--- /dev/null
+++ b/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl
@@ -0,0 +1,9 @@
+{"timestamp":"2026-03-03T12:30:03.100Z","type":"session_meta","payload":{"id":"019cb33f-0000-7000-8000-00000000000e","timestamp":"2026-03-03T12:30:03.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.110.0","source":"exec","git":{"commit_hash":"4444444444444444444444444444444444444444","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-03-03T12:30:03.200Z","type":"turn_context","payload":{"turn_id":"019cb33f-1111-7000-8000-00000000001e","cwd":"<ROOT>/project","model":"gpt-5.1-codex-max","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-03-03T12:30:03.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019cb33f-1111-7000-8000-00000000001e","started_at":1772800203,"model_context_window":258400}}
+{"timestamp":"2026-03-03T12:30:04.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"work on a long task"}]}}
+{"timestamp":"2026-03-03T12:30:06.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"working on it"}]}}
+{"timestamp":"2026-03-03T12:30:08.000Z","type":"compacted","payload":{"message":"summary of the conversation so far","replacement_history":[{"type":"message","role":"user","content":[]},{"type":"message","role":"assistant","content":[]}]}}
+{"timestamp":"2026-03-03T12:30:09.000Z","type":"response_item","payload":{"type":"context_compaction","encrypted_content":"AAAABBBBCCCCDDDD"}}
+{"timestamp":"2026-03-03T12:30:10.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"continuing after compaction"}]}}
+{"timestamp":"2026-03-03T12:30:11.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019cb33f-1111-7000-8000-00000000001e","completed_at":1772800211,"duration_ms":8000}}
diff --git a/testdata/codex/e_compaction/expected.jsonl b/testdata/codex/e_compaction/expected.jsonl
new file mode 100644
index 0000000..a6a6f6a
--- /dev/null
+++ b/testdata/codex/e_compaction/expected.jsonl
@@ -0,0 +1,21 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1772541003100000,"NativeID":"019cb33f-0000-7000-8000-00000000000e","RootNativeID":"019cb33f-0000-7000-8000-00000000000e","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.110.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"4444444444444444444444444444444444444444","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1772541003200000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1772541003200000,"NativeID":"019cb33f-0000-7000-8000-00000000000e","AgentName":"","Model":"gpt-5.1-codex-max","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1772541004000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1772541004000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541004000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1772541004000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L4","OriginalBytes":169,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1772541006000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1772541006000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541006000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16387,"Ts":1772541006000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":2,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L5","OriginalBytes":169,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1772541008000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"compaction","Name":"compaction","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"message_preview":"summary of the conversation so far","replacement_history_size":2,"trigger":"auto"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1772541008000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541008000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20483,"Ts":1772541008000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":3,"PayloadKind":"log","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L6","OriginalBytes":-1,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1772541009000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"compaction","Name":"compaction","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"trigger":"auto"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1772541009000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541009000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24579,"Ts":1772541009000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":4,"PayloadKind":"log","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L7","OriginalBytes":-1,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":5,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":5,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541010000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":5,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L8","OriginalBytes":183,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":5,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541010000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1772800211000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019cb33f-1111-7000-8000-00000000001e","effort":"high","sandbox":"workspace-write"}}}
diff --git a/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl b/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl
new file mode 100644
index 0000000..dfd6f75
--- /dev/null
+++ b/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl
@@ -0,0 +1,9 @@
+{"timestamp":"2026-03-31T21:32:12.100Z","type":"session_meta","payload":{"id":"019d452a-0000-7000-8000-00000000000f","timestamp":"2026-03-31T21:32:12.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.120.0","source":"exec","git":{"commit_hash":"5555555555555555555555555555555555555555","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-03-31T21:32:12.200Z","type":"turn_context","payload":{"turn_id":"019d452a-1111-7000-8000-00000000001f","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"danger-full-access"},"summary":"none"}}
+{"timestamp":"2026-03-31T21:32:12.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d452a-1111-7000-8000-00000000001f","started_at":1774992732,"model_context_window":258400}}
+{"timestamp":"2026-03-31T21:32:13.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"run the long command"}]}}
+{"timestamp":"2026-03-31T21:32:14.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","call_id":"call_f1","arguments":"{\"command\":[\"bash\",\"-lc\",\"cat big.txt\"]}"}}
+{"timestamp":"2026-03-31T21:32:18.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_f1","output":"line1\nline2"}}
+{"timestamp":"2026-03-31T21:32:18.100Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_f1","turn_id":"019d452a-1111-7000-8000-00000000001f","command":["bash","-lc","cat big.txt"],"cwd":"<ROOT>/project","stdout":"","stderr":"","formatted_output":"","aggregated_output":"line1\n[.. omitted 4096 bytes ..]\nlineN","exit_code":0,"duration":{"secs":4,"nanos":0},"source":"unified_exec","status":"completed"}}
+{"timestamp":"2026-03-31T21:32:19.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the file is large"}]}}
+{"timestamp":"2026-03-31T21:32:19.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d452a-1111-7000-8000-00000000001f","completed_at":1774992739,"duration_ms":7400}}
diff --git a/testdata/codex/f_exec_truncated/expected.jsonl b/testdata/codex/f_exec_truncated/expected.jsonl
new file mode 100644
index 0000000..8ccfb92
--- /dev/null
+++ b/testdata/codex/f_exec_truncated/expected.jsonl
@@ -0,0 +1,17 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1774992732100000,"NativeID":"019d452a-0000-7000-8000-00000000000f","RootNativeID":"019d452a-0000-7000-8000-00000000000f","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.120.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"5555555555555555555555555555555555555555","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1774992732200000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1774992732200000,"NativeID":"019d452a-0000-7000-8000-00000000000f","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1774992733000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1774992733000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992733000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1774992733000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L4","OriginalBytes":170,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1774992734000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_f1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1774992734000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L5","OriginalBytes":196,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1774992738000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992738000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1774992738000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L6","OriginalBytes":149,"StoredBytes":0,"SHA256":""}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":0,"Severity":"DBG","Source":"codex","Message":"enrich_exec_command_end","Extras":{"call_id":"call_f1","exec_cwd":"\u003cROOT\u003e/project","exec_exit_code":0,"exec_output_bytes":38,"exec_source":"unified_exec"}}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992739000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":3,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L8","OriginalBytes":173,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992739000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1774992739000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019d452a-1111-7000-8000-00000000001f","effort":"high","sandbox":"danger-full-access"}}}
diff --git a/testdata/codex/g_turn_aborted/INPUT/codex-home/sessions/2026/03/11/rollout-2026-03-11T15-51-06-019cdd2a-0000-7000-8000-00000000001a.jsonl b/testdata/codex/g_turn_aborted/INPUT/codex-home/sessions/2026/03/11/rollout-2026-03-11T15-51-06-019cdd2a-0000-7000-8000-00000000001a.jsonl
new file mode 100644
index 0000000..335dffd
--- /dev/null
+++ b/testdata/codex/g_turn_aborted/INPUT/codex-home/sessions/2026/03/11/rollout-2026-03-11T15-51-06-019cdd2a-0000-7000-8000-00000000001a.jsonl
@@ -0,0 +1,6 @@
+{"timestamp":"2026-03-11T15:51:06.100Z","type":"session_meta","payload":{"id":"019cdd2a-0000-7000-8000-00000000001a","timestamp":"2026-03-11T15:51:06.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.115.0","source":"exec","git":{"commit_hash":"6666666666666666666666666666666666666666","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-03-11T15:51:06.200Z","type":"turn_context","payload":{"turn_id":"019cdd2a-1111-7000-8000-00000000002a","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-03-11T15:51:06.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019cdd2a-1111-7000-8000-00000000002a","started_at":1773589866,"model_context_window":258400}}
+{"timestamp":"2026-03-11T15:51:07.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"start a slow operation"}]}}
+{"timestamp":"2026-03-11T15:51:08.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","call_id":"call_g1","arguments":"{\"command\":[\"sleep\",\"600\"]}"}}
+{"timestamp":"2026-03-11T15:51:30.000Z","type":"event_msg","payload":{"type":"turn_aborted","turn_id":"019cdd2a-1111-7000-8000-00000000002a","reason":"interrupted","completed_at":1773589890,"duration_ms":23700}}
diff --git a/testdata/codex/g_turn_aborted/expected.jsonl b/testdata/codex/g_turn_aborted/expected.jsonl
new file mode 100644
index 0000000..753a147
--- /dev/null
+++ b/testdata/codex/g_turn_aborted/expected.jsonl
@@ -0,0 +1,11 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1773244266100000,"NativeID":"019cdd2a-0000-7000-8000-00000000001a","RootNativeID":"019cdd2a-0000-7000-8000-00000000001a","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.115.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"6666666666666666666666666666666666666666","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1773244266200000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1773244266200000,"NativeID":"019cdd2a-0000-7000-8000-00000000001a","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1773244267000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1773244267000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1773244267000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1773244267000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/11/rollout-2026-03-11T15-51-06-019cdd2a-0000-7000-8000-00000000001a.jsonl#L4","OriginalBytes":172,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1773244268000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_g1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1773244268000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/11/rollout-2026-03-11T15-51-06-019cdd2a-0000-7000-8000-00000000001a.jsonl#L5","OriginalBytes":181,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1773589890000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"Seq":2,"Status":"cancelled","ErrorClass":"","ErrorMessage":"","EndTs":1773589890000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1773589890000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","Seq":1,"Status":"failed","ErrorClass":"user_interrupt","EndTs":1773589890000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1773589890000000,"SessionNativeID":"019cdd2a-0000-7000-8000-00000000001a","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019cdd2a-1111-7000-8000-00000000002a","effort":"high","sandbox":"workspace-write"}}}
diff --git a/testdata/codex/h_crash_stale/INPUT/codex-home/sessions/2026/04/01/rollout-2026-04-01T09-00-00-019d4abc-0000-7000-8000-00000000001b.jsonl b/testdata/codex/h_crash_stale/INPUT/codex-home/sessions/2026/04/01/rollout-2026-04-01T09-00-00-019d4abc-0000-7000-8000-00000000001b.jsonl
new file mode 100644
index 0000000..b8048d9
--- /dev/null
+++ b/testdata/codex/h_crash_stale/INPUT/codex-home/sessions/2026/04/01/rollout-2026-04-01T09-00-00-019d4abc-0000-7000-8000-00000000001b.jsonl
@@ -0,0 +1,5 @@
+{"timestamp":"2026-04-01T09:00:00.100Z","type":"session_meta","payload":{"id":"019d4abc-0000-7000-8000-00000000001b","timestamp":"2026-04-01T09:00:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.121.0","source":"exec","git":{"commit_hash":"7777777777777777777777777777777777777777","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-04-01T09:00:00.200Z","type":"turn_context","payload":{"turn_id":"019d4abc-1111-7000-8000-00000000002b","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-04-01T09:00:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d4abc-1111-7000-8000-00000000002b","started_at":1775034000,"model_context_window":258400}}
+{"timestamp":"2026-04-01T09:00:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"do something heavy"}]}}
+{"timestamp":"2026-04-01T09:00:02.000Z","type":"response_item","payload":{"type":"function_call","name":"apply_patch","call_id":"call_h1","arguments":"{\"patch\":\"*** Begin Patch\"}"}}
diff --git a/testdata/codex/h_crash_stale/expected.jsonl b/testdata/codex/h_crash_stale/expected.jsonl
new file mode 100644
index 0000000..c42211d
--- /dev/null
+++ b/testdata/codex/h_crash_stale/expected.jsonl
@@ -0,0 +1,12 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775034000100000,"NativeID":"019d4abc-0000-7000-8000-00000000001b","RootNativeID":"019d4abc-0000-7000-8000-00000000001b","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.121.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"7777777777777777777777777777777777777777","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1775034000200000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1775034000200000,"NativeID":"019d4abc-0000-7000-8000-00000000001b","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1775034001000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1775034001000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775034001000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1775034001000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/01/rollout-2026-04-01T09-00-00-019d4abc-0000-7000-8000-00000000001b.jsonl#L4","OriginalBytes":168,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1775034002000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"apply_patch","ToolNamespace":"fs","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_h1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1775034002000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/01/rollout-2026-04-01T09-00-00-019d4abc-0000-7000-8000-00000000001b.jsonl#L5","OriginalBytes":185,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775041200000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"Seq":2,"Status":"cancelled","ErrorClass":"","ErrorMessage":"","EndTs":1775041200000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775041200000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","Seq":1,"Status":"failed","ErrorClass":"incomplete","EndTs":1775041200000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775041200000000,"SessionNativeID":"019d4abc-0000-7000-8000-00000000001b","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019d4abc-1111-7000-8000-00000000002b","effort":"high","sandbox":"workspace-write"}}}
+{"kind":"session_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775041200000000,"NativeID":"019d4abc-0000-7000-8000-00000000001b","Status":"failed","ErrorClass":"incomplete","ErrorMessage":"","EndTs":1775041200000000}}

From 2b3a155e104fa8171820ab68b189b59d07feec01 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 15:19:59 +0300
Subject: [PATCH 07/13] codex adapter: fix turn lifecycle, enrichment, collab,
 compaction, web_search

Round-2 fixes after external review found real spec-conformance gaps that
the per-chunk goldens (built from the same assumptions as the code) missed.
Verified against the real ~/.codex corpus (2,660 files).

- Turn lifecycle (the P1): old-format turn_context-only sessions (1,006
  real files, 38%) ended cleanly but were mislabeled failed/incomplete.
  Track NEW- vs OLD-format per turn; a new turn now finalizes the prior
  one (NEW->failed/replaced per edge #2; OLD->completed per edge #3);
  finalizeStale becomes finalizeAtEOF(stale): OLD-format turns close
  completed at EOF regardless of staleness (no SessionFinalized), NEW-format
  only finalize failed/incomplete when >=1h stale. scanner calls it at
  every full-read EOF.
- exec/patch enrichment now reaches op Extras order-independently: a
  finalizedOps lookup + an idempotent OpStarted re-emit carry exec_exit_code/
  exec_cwd/exec_duration_ms/exec_source onto the op row (OpFinalizedEvent has
  no Extras field), instead of degrading to a DBG log. Real order is
  exec_command_end before function_call_output in the majority.
- collab_agent_spawn_end now emits Op Kind=session Name=spawn
  ChildSessionNativeID=new_thread_id (was treated as unknown); collab_close_end/
  collab_waiting_end recognized. Spec corrected: the link is
  sender_thread_id->new_thread_id, not agent_ref.thread_id.
- Compaction dedup: one op from the data-bearing top-level compacted line;
  the adjacent event_msg.context_compacted (same compaction) is suppressed.
  Spec corrected: response_item.compaction/context_compaction do not occur in
  real data (forward-compat only).
- web_search_call carries no correlation id; pair positionally with the
  following web_search_end. image_generation has no real data (forward-compat).
- token_count with no open turn now emits a DBG log instead of silently
  dropping. Discovery enforces the YYYY/MM/DD shard depth. Softened a
  containment comment that overclaimed no-TOCTOU.

Regenerated b_old_turncontext / e_compaction / f_exec_truncated goldens to
the corrected behavior + real wire shapes; added i_collab_spawn /
j_replaced_turn / k_web_search fixtures, each line-checked against the spec.
Gates green: golangci(0)/gosec(0)/vet; go test -race ./... 13/13; codex
coverage 92.4%; FuzzParseLine 0 crashes; secret + AI-attribution scans clean.
---
 .../SOW-0004-20260526-codex-adapter.md        |  37 +-
 .agents/sow/specs/adapter-codex.md            |  50 ++-
 cmd/ai-viewer-ingest/sources.go               |  41 ++-
 internal/adapters/codex/discovery.go          |  40 +-
 internal/adapters/codex/helpers_unit_test.go  |   2 +-
 internal/adapters/codex/mapper.go             | 193 +++-------
 .../adapters/codex/mapper_coverage_test.go    | 243 ++++++++++++-
 internal/adapters/codex/mapper_finalize.go    |  99 +++--
 internal/adapters/codex/mapper_state.go       | 129 +++++++
 internal/adapters/codex/mapper_test.go        | 184 ++++++++--
 internal/adapters/codex/mapper_turn.go        |  73 +++-
 internal/adapters/codex/ops.go                |  54 ++-
 internal/adapters/codex/ops_collab.go         | 137 +++++++
 internal/adapters/codex/ops_enrich.go         | 344 ++++++++----------
 internal/adapters/codex/ops_enrich_decode.go  | 154 ++++++++
 internal/adapters/codex/ops_event.go          |  38 +-
 internal/adapters/codex/ops_tools.go          |  92 ++++-
 internal/adapters/codex/parser_fuzz_test.go   |  11 +-
 internal/adapters/codex/payloads.go           |  60 +++
 internal/adapters/codex/scanner.go            |  28 +-
 internal/adapters/codex/scanner_test.go       |  38 +-
 internal/adapters/codex/tailer.go             |   6 +-
 internal/adapters/codex/types.go              |   7 +
 .../codex/b_old_turncontext/expected.jsonl    |   2 +
 ...019cb33f-0000-7000-8000-00000000000e.jsonl |   2 +-
 testdata/codex/e_compaction/expected.jsonl    |  11 +-
 ...019d452a-0000-7000-8000-00000000000f.jsonl |   4 +-
 .../codex/f_exec_truncated/expected.jsonl     |   6 +-
 ...019e8a10-0000-7000-8000-000000000010.jsonl |   8 +
 testdata/codex/i_collab_spawn/expected.jsonl  |  14 +
 ...019e9b20-0000-7000-8000-000000000011.jsonl |  10 +
 testdata/codex/j_replaced_turn/expected.jsonl |  21 ++
 ...019eac30-0000-7000-8000-000000000012.jsonl |   9 +
 testdata/codex/k_web_search/expected.jsonl    |  16 +
 34 files changed, 1641 insertions(+), 522 deletions(-)
 create mode 100644 internal/adapters/codex/mapper_state.go
 create mode 100644 internal/adapters/codex/ops_collab.go
 create mode 100644 internal/adapters/codex/ops_enrich_decode.go
 create mode 100644 testdata/codex/i_collab_spawn/INPUT/codex-home/sessions/2026/05/20/rollout-2026-05-20T10-15-00-019e8a10-0000-7000-8000-000000000010.jsonl
 create mode 100644 testdata/codex/i_collab_spawn/expected.jsonl
 create mode 100644 testdata/codex/j_replaced_turn/INPUT/codex-home/sessions/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl
 create mode 100644 testdata/codex/j_replaced_turn/expected.jsonl
 create mode 100644 testdata/codex/k_web_search/INPUT/codex-home/sessions/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl
 create mode 100644 testdata/codex/k_web_search/expected.jsonl

diff --git a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
index 0c27cc3..acdfee3 100644
--- a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
+++ b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
@@ -160,7 +160,20 @@ Producer of codex canonical rows: the new adapter's `Scan` (backfill) + `Tail` (
 
 ## Implementation
 
-(Empty placeholder. Filled as chunks complete.)
+Chunks A–E delivered the adapter (parser, cursor, mapper state machine, scanner/tailer, payloads, adapter wiring, discovery probe, fixtures). Round-2 review fixes (F1–F9) landed on top, all within `internal/adapters/codex/`, the additive `cmd/ai-viewer-ingest/sources.go` probe (F8 only), and the F3/F5/F7 spec corrections in `adapter-codex.md`:
+
+- **F1+F2 — turn lifecycle.** `turnState.sawTaskStarted` discriminates NEW-format (task_started) from OLD-format (turn_context-only) turns. `supersedePriorTurn` (mapper_turn.go) closes a prior open turn when a new turn_id opens via EITHER turn_context or task_started: NEW-format prior → failed/replaced (edge #2); OLD-format prior → completed (edge #3). `finalizeStale` was replaced by `finalizeAtEOF(stale bool, nowUs int64)` (mapper_finalize.go): OLD-format open turns close completed at EOF regardless of staleness; NEW-format open turns close failed/incomplete + SessionFinalized ONLY when stale; scanner.go now calls it UNCONDITIONALLY at full-read EOF passing the stale bool.
+- **F3 — collab.** `collab_agent_spawn_end`/`collab_close_end`/`collab_waiting_end` added to `eventMsgTypes`; `mapCollabSpawn` (ops_collab.go) emits Op Kind=session Name=spawn ChildSessionNativeID=new_thread_id; close/waiting → DBG log, no op. Spec corrected (sender_thread_id→new_thread_id; close/waiting documented).
+- **F4 — enrichment to op Extras (order-independent).** Late enrichment lands via a re-emitted OpStarted (idempotent UPDATE on (turn,seq)); a `finalizedOps` lookup re-emits onto already-finalized ops; exec-first ordering stashes extras+status on the open op and re-emits at the *_output finalize; an exec exit_code is authoritative over a benign output string. The always-log path is gone (only logs when the op truly can't be located).
+- **F5 — compaction dedup.** ONE op from the data-bearing top-level `compacted`; the adjacent `event_msg.context_compacted` (same timestamp) is suppressed; a lone context_compacted (defensive) still emits. Spec rule #20 + table + response_item rows corrected (response_item.compaction/context_compaction = 0 real files).
+- **F6 — token_count.** `mapTokenCount` emits a DBG `token_count_no_turn` log instead of silently dropping; dead `_ = tsUs` removed.
+- **F7 — web_search positional pairing.** `web_search_call` (no id) tracked as the active turn's `openWebSearch`; `enrichWebSearch` pairs the following `web_search_end` positionally. image_generation kept forward-compat (no fixture). Spec rules #11/#12 corrected.
+- **F8 — shard depth.** `hasShardDepth` (discovery.go) requires three numeric path components before `rollout-*.jsonl`; applied in discoverRollouts, rolloutForRel (tailer.go), and countRolloutFiles (sources.go via codexAtShardDepth).
+- **F9 — TOCTOU comment.** scanner.go containment comment softened to state the check-then-open window is an accepted localhost read-only limitation.
+
+File splits to honor the ~400-line budget: `ops_collab.go` (F3), `ops_enrich_decode.go` (enrichment JSON decoders), `mapper_state.go` (turn/op state types), and `payloadURI`/`payloadRef` moved to `payloads.go`.
+
+Fixtures: regenerated `b_old_turncontext` (EOF-completed close), `e_compaction` (real compacted + adjacent context_compacted), `f_exec_truncated` (exec-first ordering); added `i_collab_spawn`, `j_replaced_turn`, `k_web_search`. All synthetic + sanitized (`<ROOT>`, `git@github.com:example/example.git`, synthetic UUIDs).
 
 ## Validation
 
@@ -168,7 +181,27 @@ Producer of codex canonical rows: the new adapter's `Scan` (backfill) + `Tail` (
 
 ## Reviews
 
-(Empty placeholder. Filled as external reviewers run.)
+### Round 1 (2026-05-30) — codex + glm + minimax, parallel, on the whole adapter
+
+- **minimax**: SAFE TO MERGE, 0 P1, 1 P2 (golden coverage of `role=developer`/`system` messages). Rubber-stamped — falsely claimed `mapTokenCount` logs the no-turn case (it does not; see F6).
+- **glm**: SAFE TO MERGE, 0 P1, 3 P2 (enrichment-to-log; `mapTokenCount` silent drop; spec #23 "5 min" wording) + 4 P3.
+- **codex**: NOT SAFE TO MERGE, 1 P1 + 5 P2 + 2 P3. The decisive reviewer; surfaced the real spec-conformance gaps the others missed.
+
+Adjudicated on ground truth (spec lines + a read-only investigation of the real `~/.codex/sessions/` corpus, 2,660 modern + 19 legacy files). Every finding was verified against code+spec+real-data, not taken on reviewer say-so. The real-data evidence **confirmed all of codex's findings AND corrected their details** (codex guessed some wire shapes wrong):
+
+| # | Sev | Finding | Ground-truth verdict |
+|---|---|---|---|
+| F1 | P1 | Old `turn_context`-only sessions never close their last turn → stale-finalize marks them `failed/incomplete` | CONFIRMED. spec edge #3 (adapter-codex.md:449) says close at EOF. **1,006 real files (38%)** are pure old-format, ending cleanly with no completion marker — all would be mislabeled crashes. `b_old_turncontext` golden hid it (fresh test mtime). |
+| F2 | P2 | `task_started` replacing an open turn doesn't finalize the prior `failed/replaced` | CONFIRMED. spec edge #2 (adapter-codex.md:447). `openTurn` (mapper_turn.go:192) doesn't close the prior turn. |
+| F3 | P2 | `collab_agent_spawn_end` treated as unknown → SourceError; loses parent→child spawn op | CONFIRMED real (5 files, 88 lines). **Spec is wrong**: real link is `sender_thread_id`→`new_thread_id`, NOT `agent_ref.thread_id` (adapter-codex.md:433). Also `collab_close_end` (72), `collab_waiting_end` (74) exist and are unhandled. |
+| F4 | P2 | exec/patch enrichment doesn't reach op Extras (OpFinalizedEvent has no Extras field) → degrades to a log | CONFIRMED. spec rule #14 (adapter-codex.md:354) requires merge into op Extras. Real order is `exec_command_end` BEFORE `function_call_output` in ~68-85% (the rest output-first) — enrichment is lost in BOTH orders. Fix must be order-independent. |
+| F5 | P2 | Compaction emits two ops; spec wants one | CONFIRMED. spec rule #20 (adapter-codex.md:375) + table (:414). Real pair is top-level `compacted` (293 files, data-bearing) + adjacent `event_msg.context_compacted` (258 files, bare marker, same timestamp) — **two representations of one event**. `response_item.compaction`/`context_compaction` have **0 real files** (the `e_compaction` fixture used a shape that never occurs). |
+| F6 | P2 | `mapTokenCount` silently drops a no-turn `token_count` with no log (dead `_ = tsUs`) | CONFIRMED (glm). ops_event.go:166-172 — comment promises a DBG log the code never emits; violates "no silent failures". |
+| F7 | P2 | `web_search_call`/`image_generation_call` won't pair with their end events | CONFIRMED + REFINED. codex guessed `id`; real `web_search_call` (483 files) carries **neither `id` nor `call_id`** — no call-side key, must pair positionally with the following `web_search_end` (which carries `call_id`). `image_generation_*` has **0 real files** (dead/forward-compat). |
+| F8 | P3 | Discovery matches `rollout-*.jsonl` at any depth, not just `YYYY/MM/DD` | CONFIRMED minor. discovery.go:115 / tailer.go:350 / sources.go:210. |
+| F9 | P3 | Symlink containment check-then-open TOCTOU; comment overclaims "no TOCTOU" | CONFIRMED minor (matches merged claude_code). Soften the overclaiming comment; full O_NOFOLLOW hardening deferred. |
+
+**Decided fix plan (round 2):** code fixes to match the (mostly already-correct) spec + spec corrections where the spec had wrong wire shapes (F3 collab fields, F5/F7 dead variants) + regenerated goldens (the round-1 goldens were partly circular — built by the same understanding as the code) + new real-shape fixtures (collab spawn, replaced turn, old-format-stale, realistic web_search + compaction + exec-first ordering). All code fixes stay within `internal/adapters/codex/` + the additive `sources.go` probe; no canonical/ingest/store change. F9 hardening and `image_generation` real-shape coverage (no real data exists) are documented as accepted limitations.
 
 ## Outcome
 
diff --git a/.agents/sow/specs/adapter-codex.md b/.agents/sow/specs/adapter-codex.md
index db6eb21..ba31474 100644
--- a/.agents/sow/specs/adapter-codex.md
+++ b/.agents/sow/specs/adapter-codex.md
@@ -157,9 +157,9 @@ References: `codex-rs/protocol/src/models.rs:750-903`. Tagged union; the variant
 | `custom_tool_call_output` | `CustomToolCallOutput` | `call_id`, `output` (same shape as function_call_output) |
 | `tool_search_call`, `tool_search_output` | tool-search subsystem | |
 | `web_search_call` | `WebSearchCall` | `call_id`, `status`, `action` (e.g. `{type:"search", query}`) |
-| `image_generation_call` | `ImageGenerationCall` | `id`, `status`, `revised_prompt`, `result` |
-| `compaction` | `Compaction` | `encrypted_content` (opaque) |
-| `context_compacted` | `ContextCompaction` | `encrypted_content`\|null — companion summary item |
+| `image_generation_call` | `ImageGenerationCall` | `id`, `status`, `revised_prompt`, `result` — **0 real files** (forward-compat only) |
+| `compaction` | `Compaction` | `encrypted_content` (opaque) — **0 real files** (forward-compat only) |
+| `context_compaction` | `ContextCompaction` | `encrypted_content`\|null — **0 real files** (forward-compat only). NOTE: distinct from the real `event_msg.context_compacted` bare marker (rule #20), which is the actual compaction companion that occurs in the corpus. |
 | `ghost_snapshot` | `Other` (catch-all) | observed in real files; not in the persisted-allowlist enum but slips through as `Other`; older lines stripped during reconstruction (`recorder.rs:836,926`) |
 
 The `ResponseItem` enum has `#[serde(other)] Other` (`models.rs:901`), so any unknown variant deserializes successfully — the adapter MUST be forgiving and emit `LogEntry` at most once per unknown variant.
@@ -342,10 +342,10 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
     - Same as function_call/output. `tool_namespace = "custom"`.
 
 11. **`response_item` payload `web_search_call` / `event_msg.web_search_end`:**
-    - Single op: Kind=`tool`, Name=`web_search`, ToolNamespace=`web`. Pair the `response_item` (start) with the `event_msg.web_search_end` carrying `query` and `action`.
+    - Single op: Kind=`tool`, Name=`web_search`, ToolNamespace=`web`. **`web_search_call` carries NEITHER `id` NOR `call_id`** (real corpus: 483 files, no call-side correlation key), so it CANNOT pair by key. Pair the `response_item` (start) POSITIONALLY with the next `event_msg.web_search_end` in the same turn — track the most-recent open web_search op per turn and finalize it on the next `web_search_end` (which DOES carry a `call_id`, but in a different correlation space). The end carries `query` and `action`, merged onto the op's Extras via an OpStarted re-emit (F7).
 
 12. **`response_item` payload `image_generation_call` / `event_msg.image_generation_end`:**
-    - Op: Kind=`tool`, Name=`image_generation`, ToolNamespace=`media`.
+    - Op: Kind=`tool`, Name=`image_generation`, ToolNamespace=`media`. **UNOBSERVED: 0 real files for both `image_generation_call` and `image_generation_end`** — this mapping is forward-compat only and has no fixture coverage (no real data exists to sanitize). `image_generation_call` would use `id` (not `call_id`); the code keeps the path but does not pair beyond the active-turn fallback (F7).
 
 13. **`response_item` payload `local_shell_call` / `local_shell_call_output`:**
     - LEGACY ONLY (does not occur in modern `.jsonl`). When ingesting legacy `.json` files: Kind=`tool`, Name=`shell`, ToolNamespace=`shell`.
@@ -372,8 +372,8 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
 19. **`event_msg` payload `agent_message`:**
     - Companion to the assistant `response_item.message`. Adapter emits only the `response_item` (see #7); uses `event_msg.agent_message` only to populate `TurnFinalized.LastAgentMessage` Extras (for the UI "latest answer" preview).
 
-20. **`event_msg` payload `context_compacted` AND top-level `compacted` line:**
-    - Both signal compaction. Emit a single Op Kind=`internal`, Name=`compaction`, Extras={`replacement_history_size`, `message_preview`}. The body goes to PayloadRef Format=`json`.
+20. **Top-level `compacted` line AND its companion `event_msg.context_compacted`:**
+    - These are TWO representations of ONE compaction, written as ADJACENT lines with IDENTICAL timestamps (real workstation corpus: 293 `compacted` + 258 `event_msg.context_compacted`). The top-level `compacted` is data-bearing (`{message, replacement_history}`); the `event_msg.context_compacted` is a bare `{type}` marker. Emit exactly ONE Op Kind=`compaction`, Name=`compaction`, Extras={`replacement_history_size`, `message_preview`} from the data-bearing `compacted` line; SUPPRESS the adjacent `event_msg.context_compacted` so it does NOT produce a second op. A lone `event_msg.context_compacted` with no preceding `compacted` (defensive) emits the op itself. The body goes to PayloadRef Format=`json`. (Note: `response_item.compaction` / `response_item.context_compaction` have ZERO real files — they are forward-compat only; if a future CLI emits one it converges on the same OpCompaction.)
 
 21. **`event_msg` payload `ghost_snapshot`:**
     - Codex internal book-keeping for resume — strip and ignore (`recorder.rs:836,926` shows upstream strips them during read).
@@ -382,9 +382,24 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
     - Some older sessions (e.g. cli 0.61.0) have `turn_context` only, no task_started/complete. Newer (>= ~0.93.0) emit both. Adapter must treat either as the turn boundary signal — whichever arrives first opens the turn, whichever last closes it.
 
 23. **EOF without any `task_complete` or `turn_aborted` for the most recent turn:**
-    - Turn is still in-flight (running session) OR codex crashed.
-    - If file mtime is recent (< 5 min): keep turn open, ingest more on next event.
-    - If file mtime is stale (>= 1 hour): emit `TurnFinalizedEvent(Status="failed", ErrorClass="incomplete")` and `SessionFinalizedEvent(Status="failed", ErrorClass="incomplete")`. (`turn_aborted` upstream uses similar logic when codex restarts on a crashed session.)
+    The EOF-finalize splits on the most-recent open turn's FORMAT (whether a
+    `task_started` was ever seen for it) — this is the F1 fix; an earlier draft
+    treated all formats identically and mislabeled the large pure-old-format
+    corpus as crashes:
+    - **OLD-format turn (turn_context-only, no `task_started` — cli < ~0.93):**
+      close `TurnFinalizedEvent(Status="completed")` at EOF, REGARDLESS of
+      staleness (edge #3 "close at EOF"; ~38% of the real corpus is pure
+      old-format ending cleanly with no completion marker). NO
+      `SessionFinalizedEvent` — codex has no per-session terminal signal (C#3);
+      the session stays `running`.
+    - **NEW-format turn (a `task_started` opened it, no `task_complete`):** the
+      turn is still in-flight on a fresh file — keep it open and ingest more on
+      the next event. Only when the file mtime is stale (≥ 1 hour) is it treated
+      as a crash: emit `TurnFinalizedEvent(Status="failed", ErrorClass="incomplete")`
+      and `SessionFinalizedEvent(Status="failed", ErrorClass="incomplete")`.
+      (`turn_aborted` upstream uses similar logic when codex restarts on a crashed
+      session.)
+    - **No open turn (clean end, or none opened):** nothing — stays `running`.
 
 24. **No `session_meta` line ever seen (corrupt file or pre-write crash):**
     - Emit `SourceError` and skip the file. Cursor.offset stays 0 so it is retried on next CREATE-style event.
@@ -411,9 +426,13 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
 | `event_msg.token_count` | turn rollups + LLM op ctx_used/ctx_max |
 | `event_msg.user_message` | dedup with response_item.message(user); use as canonical user op |
 | `event_msg.agent_message` | dedup with response_item.message(assistant); populate TurnFinalized.LastAgentMessage |
-| `compacted` line / `response_item.context_compaction` / `event_msg.context_compacted` | one Op Kind=internal Name=compaction |
-| EOF without task_complete + file mtime-stale ≥ 1 h | synthetic `TurnFinalizedEvent(failed,incomplete)` + `SessionFinalizedEvent(failed,incomplete)` |
-| EOF clean (most recent event is task_complete) | **no `SessionFinalizedEvent`** — session stays `running` (codex has no per-session terminal signal; rollouts are resumable and metadata-appendable per `recorder.rs:1610`). UI uses `last_activity_ts` for staleness, identical to claude-code. |
+| `compacted` line (+ adjacent `event_msg.context_compacted` companion, suppressed) | one Op Kind=compaction Name=compaction |
+| lone `event_msg.context_compacted` (no preceding `compacted`) | one Op Kind=compaction Name=compaction (defensive) |
+| `response_item.compaction` / `response_item.context_compaction` | forward-compat only (0 real files); converges on one OpCompaction if ever emitted |
+| EOF, OLD-format open turn (turn_context-only, no task_started) | `TurnFinalizedEvent(completed)` at EOF regardless of staleness (edge #3); **no `SessionFinalizedEvent`** (F1) |
+| EOF, NEW-format open turn (saw task_started), file mtime-stale ≥ 1 h | synthetic `TurnFinalizedEvent(failed,incomplete)` + `SessionFinalizedEvent(failed,incomplete)` |
+| EOF, NEW-format open turn, file FRESH (< 1 h) | turn stays open (still in-flight); no finalize (F1) |
+| EOF clean (most recent event is task_complete / no open turn) | **no `SessionFinalizedEvent`** — session stays `running` (codex has no per-session terminal signal; rollouts are resumable and metadata-appendable per `recorder.rs:1610`). UI uses `last_activity_ts` for staleness, identical to claude-code. |
 | unknown `type` or unknown `payload.type` | `SourceError` (once per variant) + `LogEntry` |
 
 ### Cost calculation
@@ -430,12 +449,13 @@ Codex supports sub-agents (`SubAgentSource::ThreadSpawn`) and forks (`forked_fro
 
 - **Sub-agent**: `session_meta.payload.source = {"subagent": {"thread_spawn": {"parent_thread_id": "<uuid>", "depth": N, "agent_nickname": "...", "agent_role": "..."}}}` and `thread_source = "subagent"`. The parent session's rollout file does NOT inline the child; it appears separately and the parent is identified via `parent_thread_id`.
 - **Fork**: `session_meta.payload.forked_from_id = "<uuid>"` — branched/resumed from another session.
-- **`event_msg.collab_agent_spawn_begin`/`_end`** in the PARENT rollout name the spawn but the `_begin` event is NOT persisted (`policy.rs:215`). Only `_end` is. The `_end` event carries `agent_ref.thread_id` linking parent→child.
+- **`event_msg.collab_agent_spawn_begin`/`_end`** in the PARENT rollout name the spawn but the `_begin` event is NOT persisted (`policy.rs:215`). Only `_end` is. The `_end` event carries the parent→child link as `sender_thread_id` (parent) → `new_thread_id` (child), alongside `new_agent_nickname`, `new_agent_role`, `model`, `reasoning_effort`, and `status`. (Real workstation corpus: 5 `collab_agent_spawn_end` files; the field is `new_thread_id`, NOT `agent_ref.thread_id` as an earlier draft of this spec wrongly stated.)
+- **`event_msg.collab_close_end`** (72 files) and **`event_msg.collab_waiting_end`** (74 files) also appear in collab sessions. They carry no parent→child edge the topology view needs, so the adapter recognizes them (no `SourceError`) and surfaces each as a `LogEntry` only — no canonical op.
 
 Adapter behavior:
 - Emit `SessionStartedEvent.ParentNativeID = parent_thread_id` when the child's `session_meta.source` is `subagent`.
 - Emit `SessionStartedEvent.ParentNativeID = forked_from_id` otherwise when `forked_from_id` is present.
-- In the parent, when an `event_msg.collab_agent_spawn_end` line appears, emit an Op Kind=`session`, Name=`spawn`, ChildSessionNativeID=`agent_ref.thread_id`. (If the child rollout file doesn't yet exist at that moment, the ingester's foreign-key constraint must be relaxed temporarily — the canonical-events spec allows out-of-order child arrival.)
+- In the parent, when an `event_msg.collab_agent_spawn_end` line appears, emit an Op Kind=`session`, Name=`spawn`, ChildSessionNativeID=`new_thread_id`. (If the child rollout file doesn't yet exist at that moment, the ingester's foreign-key constraint must be relaxed temporarily — the canonical-events spec allows out-of-order child arrival.)
 - A sub-agent rollout file with `parent_thread_id` referring to an unknown session is recorded with `parent_session_id` set to NULL and a `LogEntry` warning; reconciled when the parent appears.
 
 Real observation: 8 distinct sub-agent sessions in the sampled set, all `depth=1`, with named nicknames (Raman, Tesla, Nash, Boyle, etc.) and role `"explorer"`.
diff --git a/cmd/ai-viewer-ingest/sources.go b/cmd/ai-viewer-ingest/sources.go
index de00686..4d3dd78 100644
--- a/cmd/ai-viewer-ingest/sources.go
+++ b/cmd/ai-viewer-ingest/sources.go
@@ -202,11 +202,12 @@ const codexRolloutPrefix = "rollout-"
 const codexArchivedDir = "archived_sessions"
 
 // countRolloutFiles returns the number of modern sharded codex rollouts
-// ("rollout-*.jsonl") under the sessions root, walking the YYYY/MM/DD shards at
-// any depth and pruning archived_sessions/. Returns 0 on any walk error — the
-// count is observability for acceptance #8, not a gate, so it is read
-// best-effort and never blocks discovery. Mirrors discovery.go's modern match
-// (^rollout-.*\.jsonl$) without importing the adapter package.
+// ("rollout-*.jsonl") under the sessions root, counting ONLY files at the
+// YYYY/MM/DD shard depth and pruning archived_sessions/. Returns 0 on any walk
+// error — the count is observability for acceptance #8, not a gate, so it is
+// read best-effort and never blocks discovery. Mirrors discovery.go's modern
+// match (^rollout-.*\.jsonl$) AND its shard-depth requirement (F8) without
+// importing the adapter package, so the surfaced count matches what is ingested.
 func countRolloutFiles(root string) int {
 	n := 0
 	_ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
@@ -223,7 +224,7 @@ func countRolloutFiles(root string) int {
 			return nil
 		}
 		name := d.Name()
-		if strings.HasPrefix(name, codexRolloutPrefix) && strings.HasSuffix(name, ".jsonl") {
+		if strings.HasPrefix(name, codexRolloutPrefix) && strings.HasSuffix(name, ".jsonl") && codexAtShardDepth(root, path) {
 			n++
 		}
 		return nil
@@ -231,6 +232,34 @@ func countRolloutFiles(root string) int {
 	return n
 }
 
+// codexAtShardDepth reports whether path is a rollout at the required YYYY/MM/DD
+// shard depth relative to root: exactly three leading numeric path components
+// then the basename (F8). Mirrors discovery.go's hasShardDepth without importing
+// the adapter package, so countRolloutFiles never over-counts a stray
+// rollout-*.jsonl placed at the wrong depth. A relpath failure counts the file
+// out (best-effort observability).
+func codexAtShardDepth(root, path string) bool {
+	rel, err := filepath.Rel(root, path)
+	if err != nil {
+		return false
+	}
+	parts := strings.Split(filepath.ToSlash(rel), "/")
+	if len(parts) != 4 {
+		return false
+	}
+	for _, p := range parts[:3] {
+		if len(p) == 0 {
+			return false
+		}
+		for _, c := range p {
+			if c < '0' || c > '9' {
+				return false
+			}
+		}
+	}
+	return true
+}
+
 // countLegacyJSON returns the number of legacy flat codex rollouts
 // ("rollout-*.json") directly under the sessions root (NOT in shards). These are
 // recognized but NOT ingested in v1 (one informational SourceError per file);
diff --git a/internal/adapters/codex/discovery.go b/internal/adapters/codex/discovery.go
index 6956c90..74bb900 100644
--- a/internal/adapters/codex/discovery.go
+++ b/internal/adapters/codex/discovery.go
@@ -26,6 +26,34 @@ const archivedSessionsDir = "archived_sessions"
 // name like "x-rollout-….jsonl" or "rollout-….jsonl.bak" does not match.
 var modernNameRe = regexp.MustCompile(`^rollout-.*\.jsonl$`)
 
+// shardComponentRe matches a single numeric path component of a YYYY/MM/DD
+// shard. The upstream layout is strictly numeric date shards
+// (codex-rs/rollout/src/recorder.rs:1325-1354), so a non-numeric component
+// (e.g. "rollout-…jsonl" placed directly under sessions/, or under a
+// "scratch/" dir) is NOT a real rollout location and must not be ingested (F8).
+var shardComponentRe = regexp.MustCompile(`^[0-9]+$`)
+
+// hasShardDepth reports whether rel (forward-slashed, root-relative) is a modern
+// rollout at the required YYYY/MM/DD shard depth: exactly three leading numeric
+// path components followed by the "rollout-….jsonl" basename (four components
+// total). A stray "rollout-….jsonl" directly under the sessions root, or at the
+// wrong depth, returns false so discovery, the tailer, and the observability
+// counts all agree on what is ingestable (F8; spec §"Filesystem Layout",
+// recorder.rs:1325-1354). The basename match itself is the caller's job; this
+// only validates the shard prefix and component count.
+func hasShardDepth(rel string) bool {
+	parts := strings.Split(rel, "/")
+	if len(parts) != 4 {
+		return false
+	}
+	for _, p := range parts[:3] {
+		if !shardComponentRe.MatchString(p) {
+			return false
+		}
+	}
+	return true
+}
+
 // legacyNameRe matches a legacy flat rollout filename: "rollout-*.json" (no
 // time component, directly under sessions/; spec §"Legacy `.json` layout").
 var legacyNameRe = regexp.MustCompile(`^rollout-.*\.json$`)
@@ -113,14 +141,20 @@ func discoverRollouts(root string, onError func(error)) (discovered, error) {
 		atRoot := filepath.Dir(path) == resolvedRoot
 		switch {
 		case modernNameRe.MatchString(name):
-			if !withinSourceRoot(resolvedRoot, path, onError) {
-				return nil
-			}
 			rel, rrErr := relPath(resolvedRoot, path)
 			if rrErr != nil {
 				onError(fmt.Errorf("codex: relpath rollout %s: %w; skipping", path, rrErr))
 				return nil
 			}
+			// Require the YYYY/MM/DD shard depth (F8): a stray rollout-*.jsonl
+			// directly under the root, or at the wrong depth, is not a real codex
+			// rollout location (recorder.rs:1325-1354) and is silently ignored.
+			if !hasShardDepth(rel) {
+				return nil
+			}
+			if !withinSourceRoot(resolvedRoot, path, onError) {
+				return nil
+			}
 			out.modern = append(out.modern, rollout{rel: rel, abs: path})
 		case atRoot && legacyNameRe.MatchString(name):
 			// Legacy flat .json directly under the root: recorded by basename so a
diff --git a/internal/adapters/codex/helpers_unit_test.go b/internal/adapters/codex/helpers_unit_test.go
index 186d5a3..f7b57ea 100644
--- a/internal/adapters/codex/helpers_unit_test.go
+++ b/internal/adapters/codex/helpers_unit_test.go
@@ -308,7 +308,7 @@ func TestSmallHelpers(t *testing.T) {
 		t.Error("mergeExtras did not merge")
 	}
 	// trackOp empty call_id is not tracked.
-	m.trackOp("", "t", 1, 1, canonical.OpTool, "x")
+	m.trackOp("", "t", 1, 1, canonical.OpTool, "x", "shell")
 	if len(m.openOps) != 0 {
 		t.Error("trackOp tracked an empty call_id")
 	}
diff --git a/internal/adapters/codex/mapper.go b/internal/adapters/codex/mapper.go
index 0e74169..4090fba 100644
--- a/internal/adapters/codex/mapper.go
+++ b/internal/adapters/codex/mapper.go
@@ -2,7 +2,6 @@ package codex
 
 import (
 	"fmt"
-	"path/filepath"
 
 	"github.com/netdata/ai-viewer/internal/canonical"
 )
@@ -41,9 +40,11 @@ const provider = "openai"
 //
 // The mapper is PURE with respect to I/O: it reads no files and watches no
 // directories. The scanner/tailer (Chunk C) drives it line-by-line via
-// mapRecord and, at EOF, asks finalizeStale whether a hanging turn must be
-// synthetically failed (the scanner owns file mtime; the mapper owns the
-// open-turn state — spec rule #23, SOW C#3).
+// mapRecord and, at full-read EOF, calls finalizeAtEOF (passing the file's stale
+// bool) so the mapper can close a hanging turn: OLD-format turns close completed
+// at EOF, NEW-format turns close failed/incomplete only when stale (the scanner
+// owns file mtime; the mapper owns the open-turn state — spec rule #23, edge #3,
+// SOW C#3, F1).
 //
 // State persists across Scan→Tail of the same file via the rebuild path
 // (mirrors claude_code): the scanner replays the chain from offset 0 to
@@ -105,8 +106,9 @@ type fileMapper struct {
 	// A turn_id of "" is the absent-turn_id fallback bucket (old CLI without
 	// turn_id — spec edge #3); it shares the same map under the empty key.
 	turns map[string]*turnState
-	// turnOrder lists turn_ids in open order so finalizeStale can find the most
-	// recent still-open turn deterministically (spec rule #23).
+	// turnOrder lists turn_ids in open order so finalizeAtEOF / the
+	// replaced/superseded-turn helpers can find the most recent still-open turn
+	// deterministically (spec rule #23, edge #2/#3).
 	turnOrder []string
 	// turnSeqCounter is the last assigned 1-based turn Seq. Monotone per file.
 	turnSeqCounter int
@@ -124,14 +126,46 @@ type fileMapper struct {
 	// op (spec rule #9, #14, #15, #16). A call_id of "" is never tracked.
 	openOps map[string]*openOp
 
+	// finalizedOps maps a finalized op's call_id to where it was emitted so a
+	// LATE enrichment event (exec_command_end / patch_apply_end arriving AFTER the
+	// op's *_output already finalized it) can still merge its Extras onto the op
+	// via an idempotent OpStarted re-emit (F4, spec rule #14). Real exec ordering
+	// is exec_command_end BEFORE function_call_output in ~68-85% of files but the
+	// other ~15-32% put it after — the enrichment must reach the op in BOTH
+	// orders. Entries persist for the life of the file (small; one per tool op).
+	finalizedOps map[string]finalizedOp
+
+	// openWebSearch is the most-recently-opened, not-yet-paired web_search op in
+	// the active turn (F7). web_search_call carries NEITHER id NOR call_id, so its
+	// companion event_msg.web_search_end (which DOES carry call_id) cannot pair by
+	// key — it pairs POSITIONALLY with the most-recent open web_search op in the
+	// same turn. nil when no web_search awaits an end. Cleared on pairing or at
+	// turn close (the op then finalizes as a dangling op).
+	openWebSearch *openWebSearchRef
+
 	// seenUserCallIDs dedups user input across response_item.message(role=user)
 	// and event_msg.user_message (spec rule #6, #18). Keyed on a content
 	// fingerprint so the second arrival is suppressed regardless of order.
 	seenUser map[string]struct{}
 
-	// finalized guards finalizeStale so the synthetic finalize is emitted at
-	// most once per file even if the scanner calls it more than once.
-	staleFinalized bool
+	// eofFinalized guards finalizeAtEOF so the EOF-finalize is emitted at most
+	// once per file even if the scanner calls it more than once. It is set only
+	// when a terminal decision is made (an OLD-format turn closed completed, a
+	// stale NEW-format turn closed failed, or no open turn); a FRESH new-format
+	// file with an open turn does NOT set it, so a later stale sweep can still
+	// close that turn (F1).
+	eofFinalized bool
+
+	// compactedSeen reports whether any top-level `compacted` line has emitted a
+	// compaction op (F5), and compactedRecordIdx is the recordIdx of the most
+	// recent such line. The real wire shape is a data-bearing top-level `compacted`
+	// line IMMEDIATELY followed by a bare event_msg.context_compacted marker with
+	// the SAME timestamp — two representations of ONE compaction. The op is emitted
+	// from `compacted`; the adjacent context_compacted is suppressed (no second op)
+	// when it is the very next record. A context_compacted with no preceding
+	// compacted (defensive) emits the op itself (spec rule #20).
+	compactedSeen      bool
+	compactedRecordIdx uint64
 
 	// lastTsUs is the timestamp (micros) of the most recent record carrying one.
 	// Observability only (cursor LastTsUs). Stays 0 for a file whose records all
@@ -139,83 +173,8 @@ type fileMapper struct {
 	lastTsUs int64
 }
 
-// turnState tracks one synthesized turn's accumulation between its open
-// (turn_context or task_started) and its close (task_complete / turn_aborted /
-// stale finalize). Token rollup is the C#1 model: TokensIn/Out are the SUM of
-// per-call last_token_usage over the token_count events attributed to this turn
-// — never a delta of the cumulative total_token_usage (spec rule #4, #17).
-type turnState struct {
-	// seq is the canonical 1-based turn Seq.
-	seq int
-	// codexTurnID is the source turn_id (UUID), surfaced in
-	// turns.extras_json.codex_turn_id (spec "Canonical Model Gaps" #2). Empty
-	// for the absent-turn_id fallback turn.
-	codexTurnID string
-	// opSeq is the 1-based op counter within this turn.
-	opSeq int
-	// started reports whether a TurnStartedEvent was already emitted for this
-	// turn (idempotency across turn_context + task_started — spec rule #2, #3).
-	started bool
-	// finalized reports whether a TurnFinalizedEvent was already emitted, so a
-	// duplicate task_complete / a later stale finalize does not double-close.
-	finalized bool
-	// startTsUs is the turn's open timestamp (micros), used as a floor for the
-	// synthetic stale-finalize EndTs (spec rule #23).
-	startTsUs int64
-	// tokensIn / tokensOut accumulate the C#1 per-call last_token_usage rollup
-	// (spec rule #4, #17).
-	tokensIn  int64
-	tokensOut int64
-	// tokensCacheRead / tokensCacheWrite accumulate the per-call cached-token
-	// split when newer rollouts report it (canonical-events.md codex cache row).
-	tokensCacheRead  int64
-	tokensCacheWrite int64
-	// ctxMax is the model_context_window stashed from task_started /
-	// token_count, applied to the turn's last LLM op at finalize (spec rule #3,
-	// #17).
-	ctxMax int64
-	// sandbox is the sandbox_policy.type snapshotted from the turn's
-	// turn_context, surfaced in turns.extras_json.sandbox (spec rule #2,
-	// "Canonical Model Gaps" #3).
-	sandbox string
-	// effort / approvalPolicy are turn_context policy snapshots for turn extras.
-	effort         string
-	approvalPolicy string
-	// ttftMs is task_complete.time_to_first_token_ms, surfaced in
-	// turns.extras_json.ttft_ms (spec "Canonical Model Gaps" #8).
-	ttftMs int64
-	// lastAgentMessage is event_msg.agent_message.message, surfaced in
-	// TurnFinalized extras as the UI "latest answer" preview (spec rule #19).
-	lastAgentMessage string
-	// lastLLMOpSeq is the op Seq of the most recent LLM op in this turn, so a
-	// trailing token_count attaches CtxUsed/CtxMax to it (spec rule #17). 0 when
-	// no LLM op has been emitted yet.
-	lastLLMOpSeq int
-	// lastLLMEndTs is the EndTs of the turn's last LLM op, preserved so a
-	// token_count re-finalize that adds CtxUsed/CtxMax does not clobber the op's
-	// real end timestamp (the ingester reconciles fields on the (turn,seq)
-	// upsert — canonical-events.md §Idempotency).
-	lastLLMEndTs int64
-	// lastLLMCtxUsed is the cumulative total_token_usage observed for the turn's
-	// last LLM op (spec rule #17). The op's CtxUsed is set from this at finalize.
-	lastLLMCtxUsed int64
-}
-
-// openOp records where an in-flight op was emitted so its finalize / enrichment
-// lands under the same turn/op (spec rule #9, #14-16).
-type openOp struct {
-	turnID  string
-	turnSeq int
-	opSeq   int
-	kind    canonical.OpKind
-	name    string
-	// extras accumulates enrichment (exec_command_end, mcp_tool_call_end,
-	// patch_apply_end) merged onto the op's OpFinalized (spec rule #14-16). The
-	// adapter does NOT emit a second op for an enrichment event.
-	extras map[string]any
-	// finalized guards against a second *_output finalizing the same op.
-	finalized bool
-}
+// The per-file inference STATE TYPES (turnState, openOp, finalizedOp,
+// openWebSearchRef) live in mapper_state.go.
 
 // mapperConfig bundles the per-file inputs newFileMapper needs.
 type mapperConfig struct {
@@ -242,6 +201,7 @@ func newFileMapper(cfg mapperConfig) *fileMapper {
 		agentName:      cfg.agentName,
 		turns:          map[string]*turnState{},
 		openOps:        map[string]*openOp{},
+		finalizedOps:   map[string]finalizedOp{},
 		seenUser:       map[string]struct{}{},
 	}
 }
@@ -397,60 +357,7 @@ func (m *fileMapper) activeTurnSeq() int {
 	return 0
 }
 
-// payloadURI builds the PayloadRef LocationURI for a body inline in this
-// rollout file at the given 1-based line number (spec rule #6/#7/#8, edge #7).
-// The form is "file://<symlink-resolved-abs>#L<line>" so the presenter reads the
-// exact record on demand without ai-viewer ever copying the body into SQLite.
-//
-// Containment (Chunk D, security.md §6): the absolute path is resolved through
-// symlinks and verified to stay inside the configured sessions root via
-// payloadLocationURI (payloads.go). The "#L<line>" anchor is appended AFTER the
-// file:// path is built so the anchor is identical to Chunk B's contract
-// (TestMapper_PayloadRefLineAnchor). When m.root is empty (mapper-only tests)
-// the containment resolve is skipped and the cleaned absolute path is used; when
-// m.absPath is empty the URI is just the line anchor.
-//
-// The scanner is the authoritative containment gate: readRollout (scanner.go)
-// refuses any file that resolves outside the root BEFORE a single line is
-// streamed, so by the time the mapper builds a ref the owning file is already
-// known to be contained. A resolve failure or apparent escape here (e.g. the
-// file removed between the scanner's open and this build — impossible while the
-// scanner holds the fd, but handled defensively) therefore falls back to the
-// cleaned absolute path rather than dropping the anchor, keeping the ref usable
-// and the op→payload linkage (payload_refs.op_id NOT NULL) intact.
-func (m *fileMapper) payloadURI(lineNo int) string {
-	anchor := ""
-	if lineNo > 0 {
-		anchor = fmt.Sprintf("#L%d", lineNo)
-	}
-	if m.absPath == "" {
-		return anchor
-	}
-	uri, err := payloadLocationURI(m.root, m.absPath)
-	if err != nil {
-		// Containment resolve failed (escape or unresolvable). The scanner
-		// already vetted the file before streaming, so fall back to the cleaned
-		// absolute path rather than emit a lossy ref.
-		uri = "file://" + filepath.ToSlash(filepath.Clean(m.absPath))
-	}
-	return uri + anchor
-}
-
-// payloadRef builds a PayloadRefEvent for a body inline in this rollout at the
-// record currently being mapped (m.lineNo). It is scoped to the owning op
-// (turnSeq/opSeq) so it references an op that EXISTS — payload_refs.op_id is NOT
-// NULL REFERENCES ops(id), so an orphan ref would FK-roll-back the ingest batch
-// (mirrors claude_code's P1.1a discipline). OriginalBytes is the byte length of
-// the verbatim line so the presenter can budget a read; -1 when unknown.
-func (m *fileMapper) payloadRef(base canonical.EventBase, turnSeq, opSeq int, kind, format string, originalBytes int64) canonical.PayloadRefEvent {
-	return canonical.PayloadRefEvent{
-		EventBase:       base,
-		SessionNativeID: m.nativeID,
-		TurnSeq:         turnSeq,
-		OpSeq:           opSeq,
-		PayloadKind:     kind,
-		Format:          format,
-		LocationURI:     m.payloadURI(m.lineNo),
-		OriginalBytes:   originalBytes,
-	}
-}
+// payloadURI and payloadRef (the PayloadRef LocationURI builder + the
+// op-scoped PayloadRefEvent constructor) live in payloads.go alongside the
+// symlink-containment helper they call, keeping mapper.go focused on the per-file
+// inference state and dispatch.
diff --git a/internal/adapters/codex/mapper_coverage_test.go b/internal/adapters/codex/mapper_coverage_test.go
index a6134a7..b55e773 100644
--- a/internal/adapters/codex/mapper_coverage_test.go
+++ b/internal/adapters/codex/mapper_coverage_test.go
@@ -246,31 +246,42 @@ func TestMapper_ModelLearnedOnceAcrossTurns(t *testing.T) {
 	}
 }
 
-// TestMapper_EnrichOnAlreadyFinalizedOpLogs covers enrichFinalizedOrLog: an
-// exec_command_end whose op was ALREADY finalized by its function_call_output
-// surfaces a DBG enrichment log (spec rule #14 supplementary telemetry).
-func TestMapper_EnrichOnAlreadyFinalizedOpLogs(t *testing.T) {
+// TestMapper_EnrichOnAlreadyFinalizedOpReemits covers the output-first exec
+// ordering (~15-32% of real files, F4): an exec_command_end whose op was ALREADY
+// finalized by its function_call_output re-emits an OpStarted carrying the exec
+// Extras onto the SAME (turn,seq) — an idempotent UPDATE, NOT a DBG log (spec
+// rule #14). The enrichment must land in ops.extras_json regardless of order.
+func TestMapper_EnrichOnAlreadyFinalizedOpReemits(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid")
 	lines := []string{
 		metaLine("sid", `"exec"`),
 		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
 		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
-		// output finalizes c1 first (deletes it from openOps).
+		// output finalizes c1 first (deletes it from openOps) — output-first order.
 		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"ok"}}`,
 		// exec_command_end now arrives for the already-finalized op.
 		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"ok","source":"model"}}`,
 	}
 	events := runLines(t, m, lines)
-	dbg := false
+	// The enrichment must arrive as a re-emitted OpStarted (same turn/seq as the
+	// shell op) carrying exec_* Extras — NOT a DBG log.
+	reemit := false
+	for _, s := range opStarts(events) {
+		if s.Name == "shell" && s.Seq == 1 {
+			if code, ok := s.Extras["exec_exit_code"]; ok && code == int64(0) {
+				reemit = true
+			}
+		}
+	}
+	if !reemit {
+		t.Errorf("late exec_command_end on finalized op did not re-emit an OpStarted carrying exec Extras (F4)")
+	}
 	for _, ev := range events {
 		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "enrich_exec_command_end" {
-			dbg = true
+			t.Errorf("late exec_command_end logged instead of re-emitting onto the op (F4 regression)")
 		}
 	}
-	if !dbg {
-		t.Errorf("late exec_command_end on finalized op did not surface a DBG enrichment log")
-	}
 }
 
 // TestMapper_McpEndUnmatchedLogs covers the mcp_tool_call_end no-op-match branch.
@@ -372,20 +383,222 @@ func TestMapper_ReasoningContentRaw(t *testing.T) {
 	}
 }
 
-// TestMapper_TokenCountBeforeAnyTurnDropped covers mapTokenCount's nil-turn path
-// (token_count before any turn opened).
-func TestMapper_TokenCountBeforeAnyTurnDropped(t *testing.T) {
+// TestMapper_TokenCountBeforeAnyTurnLogs covers mapTokenCount's nil-turn path
+// (token_count before any turn opened): it must surface a DBG
+// "token_count_no_turn" log, NOT drop silently (F6, spec rule #6 "no silent
+// failures").
+func TestMapper_TokenCountBeforeAnyTurnLogs(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid")
 	lines := []string{
 		metaLine("sid", `"exec"`),
-		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"token_count","info":{"last_token_usage":{"input_tokens":5}}}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"token_count","turn_id":"ghost","info":{"last_token_usage":{"input_tokens":5}}}}`,
 	}
 	events := runLines(t, m, lines)
-	// No turn → no rollup, no crash, no token_count-derived event.
+	// No turn → no rollup, no crash, no token_count-derived turn finalize.
 	if got := countKind(events, canonical.EvTurnFinalized); got != 0 {
 		t.Errorf("TurnFinalized = %d, want 0", got)
 	}
+	// But a DBG log MUST surface the dropped count (F6).
+	logged := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "token_count_no_turn" {
+			logged = true
+			if le.Extras["turn_id"] != "ghost" {
+				t.Errorf("token_count_no_turn log turn_id = %v, want ghost", le.Extras["turn_id"])
+			}
+		}
+	}
+	if !logged {
+		t.Errorf("token_count with no open turn was dropped silently (F6 regression: must DBG-log)")
+	}
+}
+
+// TestMapper_CollabSpawnSessionOp covers collab_agent_spawn_end (F3): a
+// session/spawn op whose ChildSessionNativeID is new_thread_id (NOT
+// agent_ref.thread_id), carrying the spawned agent metadata in Extras.
+func TestMapper_CollabSpawnSessionOp(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("parent-sid")
+	lines := []string{
+		metaLine("parent-sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"collab_agent_spawn_end","sender_thread_id":"parent-sid","new_thread_id":"child-uuid","new_agent_nickname":"Dewey","new_agent_role":"explorer","status":"completed"}}`,
+	}
+	events := runLines(t, m, lines)
+	var spawn *canonical.OpStartedEvent
+	for i := range events {
+		if s, ok := events[i].(canonical.OpStartedEvent); ok && s.Kind == canonical.OpSession && s.Name == "spawn" {
+			sc := s
+			spawn = &sc
+		}
+	}
+	if spawn == nil {
+		t.Fatalf("no session/spawn op emitted for collab_agent_spawn_end (F3)")
+	}
+	if spawn.ChildSessionNativeID != "child-uuid" {
+		t.Errorf("ChildSessionNativeID = %q, want child-uuid (new_thread_id, NOT agent_ref)", spawn.ChildSessionNativeID)
+	}
+	if spawn.Extras["relationship"] != "sub_agent" || spawn.Extras["new_agent_nickname"] != "Dewey" {
+		t.Errorf("spawn extras = %v, want relationship=sub_agent + nickname Dewey", spawn.Extras)
+	}
+}
+
+// TestMapper_CollabCloseAndWaitingRecognized covers collab_close_end and
+// collab_waiting_end (F3): recognized (runLines would Fatalf on an unknown
+// payload type via parseLine), surfaced as a DBG log, and producing NO canonical
+// op.
+func TestMapper_CollabCloseAndWaitingRecognized(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"collab_close_end","call_id":"x"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"collab_waiting_end","call_id":"y"}}`,
+	}
+	// runLines Fatalf's on a parse error, so reaching here proves the types are
+	// recognized (no errUnknownPayloadType).
+	events := runLines(t, m, lines)
+	// No session/spawn or other op from these markers.
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpSession {
+			t.Errorf("collab_close_end/waiting_end wrongly produced a session op")
+		}
+	}
+	closeLogged, waitLogged := false, false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok {
+			if le.Message == "event_msg:collab_close_end" {
+				closeLogged = true
+			}
+			if le.Message == "event_msg:collab_waiting_end" {
+				waitLogged = true
+			}
+		}
+	}
+	if !closeLogged || !waitLogged {
+		t.Errorf("collab_close_end/waiting_end not surfaced as DBG logs (close=%v wait=%v)", closeLogged, waitLogged)
+	}
+}
+
+// TestMapper_CollabSpawnNoChildLogs covers mapCollabSpawn's no-child branch (F3):
+// a collab_agent_spawn_end with no new_thread_id surfaces a DBG log and emits no
+// session op. Also covers spawnStatus's failed branch via a "failed" status.
+func TestMapper_CollabSpawnNoChildLogs(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"collab_agent_spawn_end","sender_thread_id":"p","status":"failed"}}`,
+	}
+	events := runLines(t, m, lines)
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpSession {
+			t.Errorf("collab_agent_spawn_end with no new_thread_id wrongly produced a session op")
+		}
+	}
+	logged := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "collab_agent_spawn_end_no_child" {
+			logged = true
+		}
+	}
+	if !logged {
+		t.Errorf("collab_agent_spawn_end with no child did not surface a DBG log (F3)")
+	}
+	// spawnStatus failed branch (a spawned child + status=failed → op finalized failed).
+	if got := spawnStatus("failed"); got != "failed" {
+		t.Errorf("spawnStatus(failed) = %q, want failed", got)
+	}
+	if got := spawnStatus("completed"); got != "completed" {
+		t.Errorf("spawnStatus(completed) = %q, want completed", got)
+	}
+}
+
+// TestMapper_WebSearchEndOrphanLogs covers enrichWebSearch's no-call branch (F7):
+// a web_search_end with no preceding web_search_call surfaces a DBG log.
+func TestMapper_WebSearchEndOrphanLogs(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"web_search_end","call_id":"orphan","query":"q"}}`,
+	}
+	events := runLines(t, m, lines)
+	logged := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "web_search_end_no_call" {
+			logged = true
+		}
+	}
+	if !logged {
+		t.Errorf("orphan web_search_end did not surface a DBG log (F7)")
+	}
+}
+
+// TestMapper_LateEnrichOrphanLogs covers enrichFinalizedOp's not-locatable branch
+// (F4): an exec_command_end whose call_id matches NO op (neither open nor
+// finalized) surfaces a DBG log rather than inventing an op reference.
+func TestMapper_LateEnrichOrphanLogs(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"ghost","exit_code":0,"aggregated_output":"x"}}`,
+	}
+	events := runLines(t, m, lines)
+	logged := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "enrich_exec_command_end" {
+			logged = true
+		}
+	}
+	if !logged {
+		t.Errorf("orphan exec_command_end did not surface a DBG log (F4 not-locatable path)")
+	}
+}
+
+// TestMapper_OutputFirstExecEnrich covers the output-first ordering where the
+// function_call_output finalizes the op BEFORE the exec_command_end, so the late
+// exec_command_end re-emits onto the finalized op via finalizedOps (F4). Also
+// covers mapToolOutput's finalizedOps PayloadRef-attach branch via a duplicate
+// output line.
+func TestMapper_OutputFirstExecEnrich(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"ok"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"ok","cwd":"<ROOT>"}}`,
+		// A second (duplicate) output for the now-finalized op: its tool_response
+		// PayloadRef should still attach via finalizedOps, not warn.
+		`{"timestamp":"` + tsDone + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"ok-again"}}`,
+	}
+	events := runLines(t, m, lines)
+	// The late exec_command_end re-emitted onto the shell op carrying exec_exit_code.
+	reemit := false
+	for _, s := range opStarts(events) {
+		if s.Name == "shell" {
+			if _, ok := s.Extras["exec_exit_code"]; ok {
+				reemit = true
+			}
+		}
+	}
+	if !reemit {
+		t.Errorf("output-first late exec_command_end did not re-emit exec Extras onto the op (F4)")
+	}
+	// No tool_output_unmatched warn for the duplicate output (it attaches via finalizedOps).
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "tool_output_unmatched" {
+			t.Errorf("duplicate output on a finalized op wrongly warned tool_output_unmatched (F4)")
+		}
+	}
 }
 
 // TestMapper_DeveloperMessageIsLLM covers the assistant/system/developer message
diff --git a/internal/adapters/codex/mapper_finalize.go b/internal/adapters/codex/mapper_finalize.go
index aedb780..c66270a 100644
--- a/internal/adapters/codex/mapper_finalize.go
+++ b/internal/adapters/codex/mapper_finalize.go
@@ -8,31 +8,62 @@ import (
 	"github.com/netdata/ai-viewer/internal/canonical"
 )
 
-// finalizeStale is the EOF-finalize surface the scanner (Chunk C) calls when a
-// rollout file has reached EOF AND its mtime is stale (>= 1 h — the scanner owns
-// the mtime check; spec rule #23, SOW C#3). nowUs is the synthetic end
-// timestamp (the scanner passes the file mtime in micros). When the most-recent
-// turn is still open (no task_complete / turn_aborted) the mapper emits a
-// synthetic TurnFinalizedEvent(failed, incomplete) for it AND a
-// SessionFinalizedEvent(failed, incomplete) for the session — the ONLY
-// SessionFinalizedEvent codex ever emits. A cleanly-ended session (most recent
-// turn already finalized) returns no events and stays running (SOW C#3:
-// no clean-EOF completed finalize). Idempotent: a second call is a no-op.
+// finalizeAtEOF is the EOF-finalize surface the scanner (Chunk C) calls
+// UNCONDITIONALLY when a rollout file has been fully read to EOF (F1). The
+// behavior splits on the most-recent open turn's format and on staleness:
 //
-// The scanner MUST NOT call this on a fresh (mtime < 1 h) file — an in-flight
-// turn there is legitimately still running and must stay open for the next
-// append (spec rule #23 "keep turn open").
-func (m *fileMapper) finalizeStale(nowUs int64) []canonical.Event {
-	if m.staleFinalized {
+//   - OLD-format open turn (turn_context-only, no task_started — cli < ~0.93):
+//     finalize the turn COMPLETED, REGARDLESS of staleness (spec edge #3 "close
+//     at EOF"). Real corpus: 1,006 files (38%) are pure old-format ending cleanly
+//     with no completion marker — without this they would be mislabeled crashes.
+//     NO SessionFinalizedEvent (codex has no per-session terminal signal — SOW
+//     C#3); the session stays running and the UI uses last_activity_ts.
+//   - NEW-format open turn (saw a task_started but no task_complete/turn_aborted):
+//     finalize the turn FAILED/incomplete AND emit SessionFinalizedEvent(failed,
+//     incomplete) ONLY when `stale` (mtime ≥ 1 h — the scanner owns the check;
+//     spec rule #23, SOW C#3). On a FRESH new-format file the turn is left open
+//     (it is legitimately still running; the next append continues it).
+//   - No open turn (clean end, or never opened a turn): nothing — stays running.
+//
+// nowUs is the synthetic end timestamp (the scanner passes the file mtime in
+// micros). Idempotent: a second call is a no-op. The scanner now calls this at
+// EVERY full-read EOF (not only when stale) and passes the stale bool, so the
+// OLD-format completed-close fires on fresh files too (F1).
+func (m *fileMapper) finalizeAtEOF(stale bool, nowUs int64) []canonical.Event {
+	if m.eofFinalized {
 		return nil
 	}
-	m.staleFinalized = true
 	ts := m.mostRecentOpenTurn()
 	if ts == nil {
 		// No open turn: the session ended cleanly (or never opened a turn).
-		// Codex has no per-session terminal signal, so it stays running.
+		// Codex has no per-session terminal signal, so it stays running. Mark
+		// finalized so a later call is a no-op.
+		m.eofFinalized = true
+		return nil
+	}
+	if !ts.sawTaskStarted {
+		// OLD-format: close COMPLETED at EOF regardless of staleness (spec edge #3).
+		m.eofFinalized = true
+		return m.closeOpenTurnAtEOF(ts, nowUs, "completed", "", false)
+	}
+	// NEW-format: only a stale file's hanging turn is a crash (rule #23). A fresh
+	// file's turn is still running — leave it open (do NOT set eofFinalized, so a
+	// later stale sweep can still close it).
+	if !stale {
 		return nil
 	}
+	m.eofFinalized = true
+	return m.closeOpenTurnAtEOF(ts, nowUs, "failed", "incomplete", true)
+}
+
+// closeOpenTurnAtEOF finalizes a hanging turn at EOF: its dangling ops are closed
+// (cancelled for a crashed new-format turn, completed for a cleanly-ended
+// old-format turn), the turn is finalized with the supplied status/errClass, its
+// turn-extras log is emitted, and — only when withSessionFinalize is set (the
+// stale new-format crash path) — a SessionFinalizedEvent(failed, incomplete) is
+// appended (the ONLY SessionFinalizedEvent codex emits; SOW C#3). endUs is
+// floored at the turn's start so the synthetic close never predates the open.
+func (m *fileMapper) closeOpenTurnAtEOF(ts *turnState, nowUs int64, status, errClass string, withSessionFinalize bool) []canonical.Event {
 	endUs := nowUs
 	if endUs < ts.startTsUs {
 		endUs = ts.startTsUs
@@ -40,27 +71,33 @@ func (m *fileMapper) finalizeStale(nowUs int64) []canonical.Event {
 	base := func() canonical.EventBase {
 		return canonical.EventBase{SourceID: m.sourceID, SourceSeq: 0, Ts: endUs}
 	}
-	// Finalize the hanging turn's dangling ops as cancelled (the process died
-	// mid-turn, so in-flight tool/llm ops never completed), then close the turn
-	// failed/incomplete and the session failed/incomplete (the ONLY
-	// SessionFinalizedEvent codex emits — spec rule #23, SOW C#3).
-	out := m.finalizeDanglingOps(ts.codexTurnID, base, endUs, "cancelled")
-	out = append(out, m.finalizeTurn(ts, base(), endUs, "failed", "incomplete"))
+	danglingStatus := "cancelled"
+	if status == "completed" {
+		// An old-format turn that ended cleanly: its in-flight ops (e.g. a final
+		// assistant message with no explicit output) are treated as completed, not
+		// cancelled — the session did not crash.
+		danglingStatus = "completed"
+	}
+	out := m.finalizeDanglingOps(ts.codexTurnID, base, endUs, danglingStatus)
+	out = append(out, m.finalizeTurn(ts, base(), endUs, status, errClass))
 	if ev := m.turnExtrasLog(ts, base()); ev != nil {
 		out = append(out, ev)
 	}
-	out = append(out, canonical.SessionFinalizedEvent{
-		EventBase:  base(),
-		NativeID:   m.nativeID,
-		Status:     canonical.StatusFailed,
-		ErrorClass: "incomplete",
-		EndTs:      endUs,
-	})
+	if withSessionFinalize {
+		out = append(out, canonical.SessionFinalizedEvent{
+			EventBase:  base(),
+			NativeID:   m.nativeID,
+			Status:     canonical.StatusFailed,
+			ErrorClass: "incomplete",
+			EndTs:      endUs,
+		})
+	}
 	return out
 }
 
 // mostRecentOpenTurn returns the latest-opened turn that has not been finalized,
-// or nil when every turn is closed (or none exist). Used by finalizeStale.
+// or nil when every turn is closed (or none exist). Used by finalizeAtEOF and
+// the replaced/superseded-turn helpers.
 func (m *fileMapper) mostRecentOpenTurn() *turnState {
 	for i := len(m.turnOrder) - 1; i >= 0; i-- {
 		if ts, ok := m.turns[m.turnOrder[i]]; ok && !ts.finalized {
diff --git a/internal/adapters/codex/mapper_state.go b/internal/adapters/codex/mapper_state.go
new file mode 100644
index 0000000..9e0187f
--- /dev/null
+++ b/internal/adapters/codex/mapper_state.go
@@ -0,0 +1,129 @@
+package codex
+
+import "github.com/netdata/ai-viewer/internal/canonical"
+
+// This file holds the per-file inference STATE TYPES the mapper threads through
+// one rollout: the synthesized turn, the in-flight and finalized op trackers, and
+// the positional web_search ref. The mapper's dispatch and bootstrap live in
+// mapper.go; these types are split out to keep that file focused.
+
+// turnState tracks one synthesized turn's accumulation between its open
+// (turn_context or task_started) and its close (task_complete / turn_aborted /
+// stale finalize). Token rollup is the C#1 model: TokensIn/Out are the SUM of
+// per-call last_token_usage over the token_count events attributed to this turn
+// — never a delta of the cumulative total_token_usage (spec rule #4, #17).
+type turnState struct {
+	// seq is the canonical 1-based turn Seq.
+	seq int
+	// codexTurnID is the source turn_id (UUID), surfaced in
+	// turns.extras_json.codex_turn_id (spec "Canonical Model Gaps" #2). Empty
+	// for the absent-turn_id fallback turn.
+	codexTurnID string
+	// opSeq is the 1-based op counter within this turn.
+	opSeq int
+	// started reports whether a TurnStartedEvent was already emitted for this
+	// turn (idempotency across turn_context + task_started — spec rule #2, #3).
+	started bool
+	// finalized reports whether a TurnFinalizedEvent was already emitted, so a
+	// duplicate task_complete / a later stale finalize does not double-close.
+	finalized bool
+	// sawTaskStarted reports whether an event_msg.task_started ever opened/touched
+	// this turn (F1/F2). It discriminates NEW-format turns (task_started present —
+	// close failed/incomplete only at stale EOF, and a replacing task_started
+	// closes the prior failed/replaced) from OLD-format turns (turn_context only,
+	// cli < ~0.93 — close completed at EOF or when a different turn_context opens,
+	// spec edge #2/#3). Set by mapTaskStarted; never cleared.
+	sawTaskStarted bool
+	// startTsUs is the turn's open timestamp (micros), used as a floor for the
+	// synthetic stale-finalize EndTs (spec rule #23).
+	startTsUs int64
+	// tokensIn / tokensOut accumulate the C#1 per-call last_token_usage rollup
+	// (spec rule #4, #17).
+	tokensIn  int64
+	tokensOut int64
+	// tokensCacheRead / tokensCacheWrite accumulate the per-call cached-token
+	// split when newer rollouts report it (canonical-events.md codex cache row).
+	tokensCacheRead  int64
+	tokensCacheWrite int64
+	// ctxMax is the model_context_window stashed from task_started /
+	// token_count, applied to the turn's last LLM op at finalize (spec rule #3,
+	// #17).
+	ctxMax int64
+	// sandbox is the sandbox_policy.type snapshotted from the turn's
+	// turn_context, surfaced in turns.extras_json.sandbox (spec rule #2,
+	// "Canonical Model Gaps" #3).
+	sandbox string
+	// effort / approvalPolicy are turn_context policy snapshots for turn extras.
+	effort         string
+	approvalPolicy string
+	// ttftMs is task_complete.time_to_first_token_ms, surfaced in
+	// turns.extras_json.ttft_ms (spec "Canonical Model Gaps" #8).
+	ttftMs int64
+	// lastAgentMessage is event_msg.agent_message.message, surfaced in
+	// TurnFinalized extras as the UI "latest answer" preview (spec rule #19).
+	lastAgentMessage string
+	// lastLLMOpSeq is the op Seq of the most recent LLM op in this turn, so a
+	// trailing token_count attaches CtxUsed/CtxMax to it (spec rule #17). 0 when
+	// no LLM op has been emitted yet.
+	lastLLMOpSeq int
+	// lastLLMEndTs is the EndTs of the turn's last LLM op, preserved so a
+	// token_count re-finalize that adds CtxUsed/CtxMax does not clobber the op's
+	// real end timestamp (the ingester reconciles fields on the (turn,seq)
+	// upsert — canonical-events.md §Idempotency).
+	lastLLMEndTs int64
+	// lastLLMCtxUsed is the cumulative total_token_usage observed for the turn's
+	// last LLM op (spec rule #17). The op's CtxUsed is set from this at finalize.
+	lastLLMCtxUsed int64
+}
+
+// openOp records where an in-flight op was emitted so its finalize / enrichment
+// lands under the same turn/op (spec rule #9, #14-16).
+type openOp struct {
+	turnID    string
+	turnSeq   int
+	opSeq     int
+	kind      canonical.OpKind
+	name      string
+	namespace string
+	// extras accumulates enrichment (exec_command_end, mcp_tool_call_end,
+	// patch_apply_end) merged onto the op via an OpStarted re-emit (spec rule
+	// #14-16). The adapter does NOT emit a SECOND op for an enrichment event — the
+	// re-emit is an idempotent UPDATE on (turn,seq).
+	extras map[string]any
+	// enrichStatus / enrichErrClass carry a terminal status derived from an
+	// enrichment event (exec_command_end exit_code) that arrived BEFORE the op's
+	// *_output (the ~68-85% exec-first ordering, F4). The op stays open so its
+	// *_output finalizes it, but the *_output's finalize PREFERS this exec-derived
+	// status (a non-zero exit_code is authoritative over a benign-looking output
+	// string). Empty when no enrichment status was observed.
+	enrichStatus   string
+	enrichErrClass string
+	// finalized guards against a second *_output finalizing the same op.
+	finalized bool
+}
+
+// finalizedOp records where a now-finalized op lives so a LATE enrichment event
+// (F4, spec rule #14) can merge Extras onto it via an idempotent OpStarted
+// re-emit. The kind/name/namespace are preserved so the re-emit restates the op
+// faithfully (the writer's ON CONFLICT UPDATE keeps the original start_ts via
+// MIN and grafts the resolver stash, so re-emitting with the op's known
+// identity only adds the enrichment Extras).
+type finalizedOp struct {
+	turnSeq   int
+	opSeq     int
+	kind      canonical.OpKind
+	name      string
+	namespace string
+}
+
+// openWebSearchRef records the most-recent open web_search op in the active turn
+// for POSITIONAL pairing with event_msg.web_search_end (F7). web_search_call
+// carries no correlation key, so the end pairs by position, not call_id.
+// syntheticCallID is the openOps key the call was tracked under, so the end can
+// finalize the SAME op and remove it from the dangling set.
+type openWebSearchRef struct {
+	turnID          string
+	turnSeq         int
+	opSeq           int
+	syntheticCallID string
+}
diff --git a/internal/adapters/codex/mapper_test.go b/internal/adapters/codex/mapper_test.go
index cb1ece5..c2f8f42 100644
--- a/internal/adapters/codex/mapper_test.go
+++ b/internal/adapters/codex/mapper_test.go
@@ -164,9 +164,11 @@ func TestMapper_TurnBoundaryNewFormat(t *testing.T) {
 }
 
 // TestMapper_TurnBoundaryOldFormat asserts the old format (turn_context only, no
-// task_started/complete) opens a turn per turn_id; the running turn stays open
-// at clean EOF (no clean finalize — SOW C#3) (spec rule #2,#22; edge #3;
-// acceptance #3).
+// task_started/complete) opens a turn per turn_id, and a NEW turn_context closes
+// the prior OLD-format turn COMPLETED (F1, spec edge #3 — there is no task_complete
+// to close it). The last turn stays open until EOF (closed by finalizeAtEOF, not
+// exercised in this mapper-only test). No SessionFinalized (SOW C#3) (spec rule
+// #2,#22; edge #3; acceptance #3).
 func TestMapper_TurnBoundaryOldFormat(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid-old")
@@ -180,12 +182,18 @@ func TestMapper_TurnBoundaryOldFormat(t *testing.T) {
 	if got := countKind(events, canonical.EvTurnStarted); got != 2 {
 		t.Fatalf("TurnStarted count = %d, want 2 (one per turn_id)", got)
 	}
-	// Clean EOF: no TurnFinalized, no SessionFinalized (SOW C#3).
-	if got := countKind(events, canonical.EvTurnFinalized); got != 0 {
-		t.Errorf("TurnFinalized count = %d, want 0 at clean EOF", got)
+	// F1: the second turn_context closes the first OLD-format turn COMPLETED; the
+	// second turn stays open (no further boundary, no EOF-close in this test).
+	tf := turnFinals(events)
+	if len(tf) != 1 {
+		t.Fatalf("TurnFinalized count = %d, want 1 (turn t1 superseded by t2)", len(tf))
 	}
+	if tf[0].Status != "completed" || tf[0].ErrorClass != "" || tf[0].Seq != 1 {
+		t.Errorf("superseded turn finalize = {Status:%q ErrorClass:%q Seq:%d}, want {completed  1}", tf[0].Status, tf[0].ErrorClass, tf[0].Seq)
+	}
+	// No SessionFinalized (codex has no per-session terminal signal — SOW C#3).
 	if got := countKind(events, canonical.EvSessionFinalized); got != 0 {
-		t.Errorf("SessionFinalized count = %d, want 0 at clean EOF", got)
+		t.Errorf("SessionFinalized count = %d, want 0 (stays running)", got)
 	}
 }
 
@@ -479,9 +487,12 @@ func TestMapper_Compaction(t *testing.T) {
 	}
 }
 
-// TestMapper_ContextCompactionVariants asserts response_item.compaction,
-// response_item.context_compaction, and event_msg.context_compacted all converge
-// on a compaction op (spec rule #20, gap #4).
+// TestMapper_ContextCompactionVariants asserts each compaction representation, in
+// ISOLATION, converges on exactly one compaction op (spec rule #20, gap #4). The
+// response_item forms are forward-compat (0 real files); a LONE
+// event_msg.context_compacted (no preceding `compacted`) emits the op via the F5
+// defensive path. The adjacent-companion SUPPRESSION (compacted + context_compacted
+// pair → ONE op) is asserted separately in TestMapper_CompactionPairSuppressed.
 func TestMapper_ContextCompactionVariants(t *testing.T) {
 	t.Parallel()
 	for _, line := range []string{
@@ -504,28 +515,72 @@ func TestMapper_ContextCompactionVariants(t *testing.T) {
 	}
 }
 
+// TestMapper_CompactionPairSuppressed asserts the REAL compaction wire shape — a
+// data-bearing top-level `compacted` line IMMEDIATELY followed by a bare
+// event_msg.context_compacted marker (same timestamp) — produces EXACTLY ONE
+// compaction op (F5, spec rule #20). The op comes from `compacted` (carries the
+// message preview); the adjacent context_compacted is suppressed.
+func TestMapper_CompactionPairSuppressed(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"compacted","payload":{"message":"summary so far","replacement_history":[{"type":"message"}]}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"context_compacted"}}`,
+	}
+	events := runLines(t, m, lines)
+	compactions := 0
+	var op canonical.OpStartedEvent
+	for _, s := range opStarts(events) {
+		if s.Kind == canonical.OpCompaction {
+			compactions++
+			op = s
+		}
+	}
+	if compactions != 1 {
+		t.Fatalf("compaction op count = %d, want 1 (the adjacent context_compacted must be suppressed; F5)", compactions)
+	}
+	// The single op must be the data-bearing one (from `compacted`).
+	if op.Extras["message_preview"] != "summary so far" {
+		t.Errorf("compaction op did not carry the data-bearing `compacted` preview; extras=%v", op.Extras)
+	}
+}
+
 // TestMapper_ExecCommandEndEnrichesNoSecondOp asserts exec_command_end enriches
-// the matching tool op (it finalizes via exit_code) without emitting a second
-// op-start (spec rule #14).
+// the matching tool op (it finalizes via exit_code) without creating a second op
+// ROW (F4, spec rule #14). The enrichment lands via an OpStarted re-emit on the
+// SAME (turn,seq) — an idempotent UPDATE — so op count is measured by DISTINCT op
+// seq, and the exec Extras must reach that op.
 func TestMapper_ExecCommandEndEnrichesNoSecondOp(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid")
+	// exec-first ordering: exec_command_end (enrichment) arrives BEFORE the
+	// function_call_output (which finalizes the op carrying the stashed Extras).
 	lines := []string{
 		metaLine("sid", `"exec"`),
 		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.5"}}`,
 		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
 		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"out","cwd":"<ROOT>","source":"model"}}`,
+		`{"timestamp":"` + tsDone + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"out"}}`,
 	}
 	events := runLines(t, m, lines)
 
-	toolStarts := 0
+	toolSeqs := map[int]struct{}{}
+	enriched := false
 	for _, s := range opStarts(events) {
 		if s.Kind == canonical.OpTool {
-			toolStarts++
+			toolSeqs[s.Seq] = struct{}{}
+			if code, ok := s.Extras["exec_exit_code"]; ok && code == int64(0) {
+				enriched = true
+			}
 		}
 	}
-	if toolStarts != 1 {
-		t.Fatalf("tool op starts = %d, want 1 (exec_command_end must not add a second op)", toolStarts)
+	if len(toolSeqs) != 1 {
+		t.Fatalf("distinct tool op seqs = %d, want 1 (exec_command_end must not add a second op row)", len(toolSeqs))
+	}
+	if !enriched {
+		t.Errorf("exec_command_end Extras did not reach the tool op via the OpStarted re-emit (F4)")
 	}
 	// The op is finalized completed (exit_code 0).
 	completed := false
@@ -539,8 +594,10 @@ func TestMapper_ExecCommandEndEnrichesNoSecondOp(t *testing.T) {
 	}
 }
 
-// TestMapper_ExecCommandEndNonZeroExitFails asserts a non-zero exit_code
-// finalizes the op failed/command_failed (spec rule #14).
+// TestMapper_ExecCommandEndNonZeroExitFails asserts a non-zero exit_code is
+// authoritative over a benign-looking output: the op finalizes failed/
+// command_failed even though the function_call_output text ("done") does not
+// itself look like an error (F4, spec rule #14).
 func TestMapper_ExecCommandEndNonZeroExitFails(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid")
@@ -549,6 +606,7 @@ func TestMapper_ExecCommandEndNonZeroExitFails(t *testing.T) {
 		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
 		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
 		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":2,"aggregated_output":""}}`,
+		`{"timestamp":"` + tsDone + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"done"}}`,
 	}
 	events := runLines(t, m, lines)
 	failed := false
@@ -558,7 +616,7 @@ func TestMapper_ExecCommandEndNonZeroExitFails(t *testing.T) {
 		}
 	}
 	if !failed {
-		t.Errorf("non-zero exit did not finalize failed/command_failed")
+		t.Errorf("non-zero exit did not finalize failed/command_failed (exit_code must win over benign output)")
 	}
 }
 
@@ -643,25 +701,35 @@ func TestMapper_SandboxDeniedOutput(t *testing.T) {
 }
 
 // TestMapper_WebSearchOp asserts a web_search_call + web_search_end produces one
-// web tool op enriched by the end event (spec rule #11).
+// web tool op enriched by the end event (spec rule #11, F7). web_search_call
+// carries no correlation key, so the end pairs POSITIONALLY; the enrichment lands
+// via an idempotent OpStarted re-emit on the SAME (turn,seq), so the op count is
+// measured by DISTINCT op seq, not by raw OpStarted-event count.
 func TestMapper_WebSearchOp(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid")
 	lines := []string{
 		metaLine("sid", `"exec"`),
 		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
-		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"web_search_call","call_id":"w1","status":"completed","action":{"type":"search","query":"q"}}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"web_search_call","status":"completed","action":{"type":"search","query":"q"}}}`,
 		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"web_search_end","call_id":"w1","query":"q"}}`,
 	}
 	events := runLines(t, m, lines)
-	web := 0
+	webSeqs := map[int]struct{}{}
+	enriched := false
 	for _, s := range opStarts(events) {
 		if s.Name == "web_search" && s.ToolNamespace == "web" {
-			web++
+			webSeqs[s.Seq] = struct{}{}
+			if q, ok := s.Extras["query"]; ok && q == "q" {
+				enriched = true
+			}
 		}
 	}
-	if web != 1 {
-		t.Fatalf("web_search op count = %d, want 1", web)
+	if len(webSeqs) != 1 {
+		t.Fatalf("distinct web_search op seqs = %d, want 1", len(webSeqs))
+	}
+	if !enriched {
+		t.Fatalf("web_search op was not enriched with the query from web_search_end (F7)")
 	}
 }
 
@@ -681,16 +749,21 @@ func TestMapper_NoCleanFinalize(t *testing.T) {
 	if got := countKind(events, canonical.EvSessionFinalized); got != 0 {
 		t.Fatalf("SessionFinalized count = %d, want 0 (no clean-EOF finalize)", got)
 	}
-	// And finalizeStale on a clean session (no open turn) emits nothing.
-	if extra := m.finalizeStale(1_700_000_000_000_000); len(extra) != 0 {
-		t.Fatalf("finalizeStale on clean session emitted %d events, want 0", len(extra))
+	// And finalizeAtEOF on a clean session (no open turn) emits nothing whether or
+	// not the file is stale (F1).
+	if extra := m.finalizeAtEOF(false, 1_700_000_000_000_000); len(extra) != 0 {
+		t.Fatalf("finalizeAtEOF(fresh) on clean session emitted %d events, want 0", len(extra))
+	}
+	if extra := m.finalizeAtEOF(true, 1_700_000_000_000_000); len(extra) != 0 {
+		t.Fatalf("finalizeAtEOF(stale) on clean session emitted %d events, want 0", len(extra))
 	}
 }
 
-// TestMapper_SyntheticStaleFinalize asserts a hanging turn (no task_complete)
-// yields a synthetic TurnFinalized(failed,incomplete) + SessionFinalized(failed,
-// incomplete) ONLY when the scanner calls finalizeStale (spec rule #23, SOW
-// C#3, acceptance #5h).
+// TestMapper_SyntheticStaleFinalize asserts a hanging NEW-format turn (saw a
+// task_started, no task_complete) yields a synthetic TurnFinalized(failed,
+// incomplete) + SessionFinalized(failed,incomplete) ONLY when the scanner calls
+// finalizeAtEOF with stale=true (spec rule #23, SOW C#3, acceptance #5h). A fresh
+// new-format file (stale=false) leaves the turn open (F1).
 func TestMapper_SyntheticStaleFinalize(t *testing.T) {
 	t.Parallel()
 	m := newTestMapper("sid-crash")
@@ -706,9 +779,15 @@ func TestMapper_SyntheticStaleFinalize(t *testing.T) {
 		t.Fatalf("pre-EOF TurnFinalized = %d, want 0", got)
 	}
 
-	// Scanner determines mtime is stale ≥ 1h and calls finalizeStale.
+	// A FRESH new-format file (stale=false) leaves the hanging turn open: no
+	// finalize, and a later stale sweep can still close it (F1).
+	if fresh := m.finalizeAtEOF(false, 1_763_700_000_000_000); len(fresh) != 0 {
+		t.Fatalf("finalizeAtEOF(fresh) on a hanging new-format turn emitted %d events, want 0", len(fresh))
+	}
+
+	// Scanner later determines mtime is stale ≥ 1h and calls finalizeAtEOF(true).
 	const staleUs = 1_763_700_000_000_000
-	stale := m.finalizeStale(staleUs)
+	stale := m.finalizeAtEOF(true, staleUs)
 
 	if countKind(stale, canonical.EvTurnFinalized) != 1 {
 		t.Fatalf("stale finalize: TurnFinalized = %d, want 1", countKind(stale, canonical.EvTurnFinalized))
@@ -740,8 +819,41 @@ func TestMapper_SyntheticStaleFinalize(t *testing.T) {
 		t.Errorf("stale dangling op finalize cancelled = %d, want 1", cancelled)
 	}
 	// Idempotent: a second call emits nothing.
-	if again := m.finalizeStale(staleUs); len(again) != 0 {
-		t.Fatalf("second finalizeStale emitted %d events, want 0 (idempotent)", len(again))
+	if again := m.finalizeAtEOF(true, staleUs); len(again) != 0 {
+		t.Fatalf("second finalizeAtEOF emitted %d events, want 0 (idempotent)", len(again))
+	}
+}
+
+// TestMapper_OldFormatClosesCompletedAtEOF asserts an OLD-format session
+// (turn_context-only, no task_started) closes its last turn COMPLETED at EOF
+// regardless of staleness, and emits NO SessionFinalizedEvent (F1, spec edge #3).
+func TestMapper_OldFormatClosesCompletedAtEOF(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid-old")
+	lines := []string{
+		`{"timestamp":"` + tsMeta + `","type":"session_meta","payload":{"id":"sid-old","source":"cli","cli_version":"0.61.0"}}`,
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.1-codex-max"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"hi"}]}}`,
+		`{"timestamp":"` + tsDone + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hello"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	if got := countKind(events, canonical.EvTurnFinalized); got != 0 {
+		t.Fatalf("pre-EOF TurnFinalized = %d, want 0", got)
+	}
+	// FRESH file: the old-format turn still closes completed at EOF (F1 — the bug
+	// was that only stale files were finalized, mislabeling 38% of the corpus).
+	const eofUs = 1_763_700_000_000_000
+	out := m.finalizeAtEOF(false, eofUs)
+	tf := turnFinals(out)
+	if len(tf) != 1 || tf[0].Status != "completed" || tf[0].ErrorClass != "" {
+		t.Fatalf("old-format EOF finalize = %+v, want one {completed }", tf)
+	}
+	if got := countKind(out, canonical.EvSessionFinalized); got != 0 {
+		t.Fatalf("old-format EOF SessionFinalized = %d, want 0 (stays running, SOW C#3)", got)
+	}
+	// Idempotent.
+	if again := m.finalizeAtEOF(true, eofUs); len(again) != 0 {
+		t.Fatalf("second finalizeAtEOF emitted %d events, want 0 (idempotent)", len(again))
 	}
 }
 
diff --git a/internal/adapters/codex/mapper_turn.go b/internal/adapters/codex/mapper_turn.go
index 786f256..377470f 100644
--- a/internal/adapters/codex/mapper_turn.go
+++ b/internal/adapters/codex/mapper_turn.go
@@ -246,6 +246,41 @@ func (m *fileMapper) finalizeTurn(ts *turnState, base canonical.EventBase, endUs
 	}
 }
 
+// supersedePriorTurn closes the most-recent still-open turn when a NEW turn_id
+// opens (via turn_context OR task_started), deciding the close status from the
+// PRIOR turn's OWN format (F1/F2, spec edge #2/#3):
+//   - NEW-format prior (it saw a task_started but no task_complete/turn_aborted):
+//     close FAILED/replaced — the user interrupted and re-prompted (edge #2).
+//     Dangling ops on it are cancelled.
+//   - OLD-format prior (turn_context-only, never saw a task_started): close
+//     COMPLETED — an old-format session has no task_complete, so the next
+//     turn_context boundary is its only close signal (edge #3). Dangling ops on it
+//     are completed (it finished cleanly).
+//
+// It is a no-op when there is no prior open turn or the prior turn IS this
+// turn_id (a re-announce / a turn_context re-emitted post-compaction for the same
+// turn). Called from BOTH the turn_context and task_started handlers; in a
+// new-format session task_started follows turn_context, so the turn_context call
+// supersedes the prior turn and the task_started call is then a same-id no-op.
+// turnExtrasLog is emitted for the closed turn so its metadata is not lost.
+func (m *fileMapper) supersedePriorTurn(newTurnID string, advance func(int64) canonical.EventBase, atUs int64) []canonical.Event {
+	prior := m.mostRecentOpenTurn()
+	if prior == nil || prior.codexTurnID == newTurnID {
+		return nil
+	}
+	status, errClass, danglingStatus := "completed", "", "completed"
+	if prior.sawTaskStarted {
+		status, errClass, danglingStatus = "failed", "replaced", "cancelled"
+	}
+	base := func() canonical.EventBase { return advance(atUs) }
+	out := m.finalizeDanglingOps(prior.codexTurnID, base, atUs, danglingStatus)
+	out = append(out, m.finalizeTurn(prior, base(), atUs, status, errClass))
+	if ev := m.turnExtrasLog(prior, base()); ev != nil {
+		out = append(out, ev)
+	}
+	return out
+}
+
 // finalizeDanglingOps finalizes every op still open under the given turn at turn
 // close, with the supplied status (spec rule #4 — "completed inferred or unknown
 // if no output ever arrived" at task_complete; edge #9 — "cancelled" at abort/
@@ -266,22 +301,40 @@ func (m *fileMapper) finalizeDanglingOps(turnID string, base func() canonical.Ev
 	out := make([]canonical.Event, 0, len(ops))
 	for _, p := range ops {
 		p.op.finalized = true
-		fin := canonical.OpFinalizedEvent{
+		// If a prior enrichment (e.g. exec_command_end) stashed Extras on this
+		// still-open op, re-emit an OpStarted carrying them so they reach
+		// ops.extras_json before the dangling finalize (F4, spec rule #14). The
+		// writer upserts (turn,seq), so this is an idempotent UPDATE.
+		if len(p.op.extras) > 0 {
+			out = append(out, canonical.OpStartedEvent{
+				EventBase:       base(),
+				SessionNativeID: m.nativeID,
+				TurnSeq:         p.op.turnSeq,
+				Seq:             p.op.opSeq,
+				ParentOpSeq:     -1,
+				Kind:            p.op.kind,
+				Name:            p.op.name,
+				ToolNamespace:   p.op.namespace,
+				Extras:          p.op.extras,
+			})
+		}
+		// An exec-derived status (exit_code) from an exec_command_end with no
+		// following *_output is authoritative over the generic dangling status (F4).
+		opStatus, opErrClass := status, ""
+		if p.op.enrichStatus != "" {
+			opStatus, opErrClass = p.op.enrichStatus, p.op.enrichErrClass
+		}
+		out = append(out, canonical.OpFinalizedEvent{
 			EventBase:       base(),
 			SessionNativeID: m.nativeID,
 			TurnSeq:         p.op.turnSeq,
 			Seq:             p.op.opSeq,
-			Status:          status,
+			Status:          opStatus,
+			ErrorClass:      opErrClass,
 			EndTs:           endUs,
-		}
-		if len(p.op.extras) > 0 {
-			// Enrichment already merged onto the op carries no canonical
-			// finalize field beyond status; the extras live on the OpStarted's
-			// Extras (set at enrichment time), so nothing extra to attach here.
-			_ = p.op.extras
-		}
-		out = append(out, fin)
+		})
 		delete(m.openOps, p.callID)
+		m.recordFinalizedOp(p.callID, p.op)
 	}
 	return out
 }
diff --git a/internal/adapters/codex/ops.go b/internal/adapters/codex/ops.go
index be18062..2485f34 100644
--- a/internal/adapters/codex/ops.go
+++ b/internal/adapters/codex/ops.go
@@ -25,8 +25,13 @@ func (m *fileMapper) mapTurnContext(rec record, advance func(int64) canonical.Ev
 		return nil
 	}
 	tsUs := m.recordTs(rec)
+	out := make([]canonical.Event, 0, 3)
+	// A new turn_id opening supersedes the prior open turn (F1/F2, spec edge #2/#3).
+	// supersedePriorTurn decides the prior turn's close status from ITS OWN format
+	// (NEW-format → failed/replaced; OLD-format → completed) and is a no-op for a
+	// re-activating turn_context with the SAME turn_id (post-compaction).
+	out = append(out, m.supersedePriorTurn(p.TurnID, advance, tsUs)...)
 	ts := m.openTurn(p.TurnID, tsUs)
-	out := make([]canonical.Event, 0, 2)
 	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
 		out = append(out, ev)
 	}
@@ -59,9 +64,12 @@ func (m *fileMapper) mapTurnContext(rec record, advance func(int64) canonical.Ev
 // mapCompacted handles a top-level compacted line (spec rule #20). It emits a
 // single compaction op (Kind=compaction, Name=compaction) with the
 // replacement_history size and a message preview in Extras; the full summary
-// body goes to a PayloadRef. response_item.compaction / context_compaction and
-// event_msg.context_compacted are handled the same way in ops_response.go /
-// ops_event.go so all compaction signals converge on OpCompaction (spec gap #4).
+// body goes to a PayloadRef. This is the data-bearing representation; the
+// adjacent event_msg.context_compacted bare marker (same timestamp) is its
+// companion and is SUPPRESSED so ONE op is emitted per compaction (F5, spec rule
+// #20). recordIdx-1 is the current record's index (mapRecord pre-incremented
+// recordIdx); recording it lets the immediately-following context_compacted
+// recognize itself as the companion.
 func (m *fileMapper) mapCompacted(rec record, advance func(int64) canonical.EventBase) []canonical.Event {
 	p := rec.Compacted
 	tsUs := m.recordTs(rec)
@@ -72,9 +80,30 @@ func (m *fileMapper) mapCompacted(rec record, advance func(int64) canonical.Even
 			extras["message_preview"] = prev
 		}
 	}
+	// recordIdx was pre-incremented in mapRecord, so recordIdx-1 is THIS line's
+	// index; recording it lets the immediately-following context_compacted detect
+	// adjacency (F5). recordIdx is always >= 1 here (this record was counted).
+	m.compactedSeen = true
+	m.compactedRecordIdx = m.recordIdx - 1
 	return m.emitCompactionOp(advance, tsUs, extras, "json")
 }
 
+// suppressContextCompacted reports whether an event_msg.context_compacted record
+// is the bare companion marker of the immediately-preceding top-level `compacted`
+// line and must NOT emit a second compaction op (F5). The real wire pair is two
+// adjacent lines (compacted then context_compacted) with identical timestamps;
+// only the data-bearing `compacted` produces the op. A context_compacted with no
+// preceding compacted (defensive) is NOT suppressed and emits the op itself.
+// recordIdx-1 is the current record's index; the companion is suppressed when the
+// recorded `compacted` index is exactly one before it (adjacent).
+func (m *fileMapper) suppressContextCompacted() bool {
+	// recordIdx-1 is THIS context_compacted's index; it is the companion when the
+	// recorded `compacted` index is exactly one before it (adjacent). recordIdx is
+	// >= 2 here (a session_meta/turn at minimum precedes any compaction pair), so
+	// recordIdx-2 does not underflow.
+	return m.compactedSeen && m.compactedRecordIdx+1 == m.recordIdx-1
+}
+
 // emitCompactionOp emits the OpStarted+OpFinalized compaction pair plus a
 // PayloadRef for the summary body (spec rule #20, gap #4). It opens a turn 0
 // fallback when compaction precedes any turn_context so the op attaches to a
@@ -136,17 +165,20 @@ func (m *fileMapper) nextOp(ts *turnState) (int, int) {
 // trackOp records an in-flight op by call_id so its matching *_output (or an
 // enrichment event) finalizes/enriches the SAME op (spec rule #9, #14-16). A
 // call_id of "" is not tracked (an unpaired op finalizes inline or at turn end).
-func (m *fileMapper) trackOp(callID, turnID string, turnSeq, opSeq int, kind canonical.OpKind, name string) {
+// namespace is stored so a late-enrichment OpStarted re-emit (F4) restates the
+// op's tool_namespace faithfully.
+func (m *fileMapper) trackOp(callID, turnID string, turnSeq, opSeq int, kind canonical.OpKind, name, namespace string) {
 	if callID == "" {
 		return
 	}
 	m.openOps[callID] = &openOp{
-		turnID:  turnID,
-		turnSeq: turnSeq,
-		opSeq:   opSeq,
-		kind:    kind,
-		name:    name,
-		extras:  map[string]any{},
+		turnID:    turnID,
+		turnSeq:   turnSeq,
+		opSeq:     opSeq,
+		kind:      kind,
+		name:      name,
+		namespace: namespace,
+		extras:    map[string]any{},
 	}
 }
 
diff --git a/internal/adapters/codex/ops_collab.go b/internal/adapters/codex/ops_collab.go
new file mode 100644
index 0000000..ce4199d
--- /dev/null
+++ b/internal/adapters/codex/ops_collab.go
@@ -0,0 +1,137 @@
+package codex
+
+import (
+	"encoding/json"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// This file handles the codex sub-agent collab lifecycle event_msgs (F3): the
+// parent→child spawn link (collab_agent_spawn_end) and the recognized-but-op-less
+// markers (collab_close_end / collab_waiting_end, dispatched in ops_event.go).
+// Kept separate from ops_enrich.go so the enrichment path stays focused.
+
+// mapCollabSpawn handles event_msg.collab_agent_spawn_end (spec "Sub-Agent
+// Linkage", F3). It emits a session op (Kind=session, Name=spawn) whose
+// ChildSessionNativeID is the spawned thread's new_thread_id, so the topology
+// view links the parent rollout to the child sub-agent rollout. The real wire
+// link is sender_thread_id→new_thread_id (NOT agent_ref.thread_id, which the
+// earlier spec wrongly named). The child rollout lands as its own file with
+// source.subagent.thread_spawn.parent_thread_id; the ingester relaxes the FK and
+// re-links when the child arrives (canonical-events.md out-of-order child). The
+// spawn metadata (nickname/role/model) goes into the op Extras for the UI. When
+// new_thread_id is absent the op is suppressed and a DBG log keeps it visible.
+func (m *fileMapper) mapCollabSpawn(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	sp := decodeCollabSpawn(rec.Raw)
+	if sp.newThreadID == "" {
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "collab_agent_spawn_end_no_child", nil)}
+	}
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 3)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	extras := collabSpawnExtras(sp)
+	out = append(out,
+		canonical.OpStartedEvent{
+			EventBase:            advance(tsUs),
+			SessionNativeID:      m.nativeID,
+			TurnSeq:              turnSeq,
+			Seq:                  opSeq,
+			ParentOpSeq:          -1,
+			Kind:                 canonical.OpSession,
+			Name:                 "spawn",
+			ChildSessionNativeID: sp.newThreadID,
+			Extras:               extras,
+		},
+		canonical.OpFinalizedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         turnSeq,
+			Seq:             opSeq,
+			Status:          spawnStatus(sp.status),
+			EndTs:           tsUs,
+		},
+	)
+	return out
+}
+
+// collabSpawn is the decoded subset of a collab_agent_spawn_end payload (F3).
+// Field names match the real wire form (5 real files; keys sender_thread_id,
+// new_thread_id, new_agent_nickname, new_agent_role, model, reasoning_effort,
+// status).
+type collabSpawn struct {
+	senderThreadID  string
+	newThreadID     string
+	newAgentNick    string
+	newAgentRole    string
+	model           string
+	reasoningEffort string
+	status          string
+}
+
+// decodeCollabSpawn reads the collab_agent_spawn_end payload fields off the
+// verbatim envelope (F3). Unknown siblings are dropped (forward-compat).
+func decodeCollabSpawn(raw []byte) collabSpawn {
+	var env struct {
+		Payload struct {
+			SenderThreadID  string `json:"sender_thread_id"`
+			NewThreadID     string `json:"new_thread_id"`
+			NewAgentNick    string `json:"new_agent_nickname"`
+			NewAgentRole    string `json:"new_agent_role"`
+			Model           string `json:"model"`
+			ReasoningEffort string `json:"reasoning_effort"`
+			Status          string `json:"status"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return collabSpawn{}
+	}
+	p := env.Payload
+	return collabSpawn{
+		senderThreadID:  p.SenderThreadID,
+		newThreadID:     p.NewThreadID,
+		newAgentNick:    p.NewAgentNick,
+		newAgentRole:    p.NewAgentRole,
+		model:           p.Model,
+		reasoningEffort: p.ReasoningEffort,
+		status:          p.Status,
+	}
+}
+
+// collabSpawnExtras builds the spawn op's Extras from the decoded fields (F3),
+// surfacing the spawned agent's nickname/role/model and the relationship marker
+// so the UI can label the sub-agent edge. Returns a non-empty map (the relation
+// is always set when a child exists).
+func collabSpawnExtras(sp collabSpawn) map[string]any {
+	extras := map[string]any{"relationship": "sub_agent"}
+	if sp.senderThreadID != "" {
+		extras["sender_thread_id"] = sp.senderThreadID
+	}
+	if sp.newAgentNick != "" {
+		extras["new_agent_nickname"] = sp.newAgentNick
+	}
+	if sp.newAgentRole != "" {
+		extras["new_agent_role"] = sp.newAgentRole
+	}
+	if sp.model != "" {
+		extras["model"] = sp.model
+	}
+	if sp.reasoningEffort != "" {
+		extras["reasoning_effort"] = sp.reasoningEffort
+	}
+	return extras
+}
+
+// spawnStatus maps a collab_agent_spawn_end.status to a canonical op status: a
+// "failed"/"error" reported status is failed; anything else (including the
+// common "completed"/"") is completed (F3).
+func spawnStatus(status string) string {
+	switch status {
+	case "failed", "error":
+		return "failed"
+	default:
+		return "completed"
+	}
+}
diff --git a/internal/adapters/codex/ops_enrich.go b/internal/adapters/codex/ops_enrich.go
index e1a0db7..db8cb7c 100644
--- a/internal/adapters/codex/ops_enrich.go
+++ b/internal/adapters/codex/ops_enrich.go
@@ -1,18 +1,23 @@
 package codex
 
-import (
-	"encoding/json"
-
-	"github.com/netdata/ai-viewer/internal/canonical"
-)
+import "github.com/netdata/ai-viewer/internal/canonical"
 
 // enrichOp merges telemetry from an event_msg end-event onto the op matched by
-// call_id, emitting an OpFinalizedEvent that re-states the op's terminal status
-// and carries the enrichment Extras (spec rule #14 exec_command_end, #11
-// web_search_end). It does NOT emit a second op — the ingester reconciles this
-// finalize with the op's existing (turn,seq) row (idempotent upsert). When no
-// op matches the call_id (the start was below a resume offset, or the end is
-// orphaned), it surfaces a DBG log so the enrichment is not silently lost.
+// call_id, so the enrichment Extras reach the op's ops.extras_json (F4, spec rule
+// #14 exec_command_end). The merge is ORDER-INDEPENDENT — real exec ordering is
+// exec_command_end BEFORE function_call_output in ~68-85% of files and after it
+// in the rest:
+//   - op still OPEN (exec-first, the common case): merge the extras onto the
+//     tracked op and stash any exec-derived terminal status; the op STAYS OPEN so
+//     its *_output finalizes it (mapToolOutput re-emits an OpStarted carrying the
+//     merged Extras at that point — OpFinalizedEvent has no Extras field, and the
+//     writer upserts (turn,seq), so the re-emit is an idempotent UPDATE, not a
+//     second op, satisfying rule #14 "do not emit a second op"). If no *_output
+//     ever arrives, the turn-close dangling finalize re-emits the Extras.
+//   - op already FINALIZED (output-first): look it up in finalizedOps and emit an
+//     OpStarted carrying the merged Extras to UPDATE the existing row.
+//   - op NOT locatable (start below a resume offset, or orphaned): a DBG log is
+//     the only honest surface (no op row to attach to).
 //
 // extractor builds the Extras map from the raw payload (nil → no extras, e.g.
 // image_generation_end which only marks completion). A blanked-output
@@ -20,28 +25,74 @@ import (
 // status stays the op's derived terminal status (spec rule #14).
 func (m *fileMapper) enrichOp(rec record, advance func(int64) canonical.EventBase, tsUs int64, extractor func([]byte) map[string]any) []canonical.Event {
 	p := rec.EventMsg
-	op, ok := m.openOps[p.CallID]
-	if !ok {
-		// The op may have already been finalized by its *_output before this
-		// end-event; re-state with the enrichment so the Extras still land.
-		return m.enrichFinalizedOrLog(rec, advance, tsUs, extractor)
-	}
 	var extras map[string]any
 	if extractor != nil {
 		extras = extractor(rec.Raw)
 	}
 	status, errClass := enrichStatus(rec.Raw)
-	if status == "" {
-		// No explicit status/exit_code on the end-event: leave the op's terminal
-		// status to its *_output (or turn-close inference). Emit nothing here but
-		// record the extras on the tracked op so its eventual finalize carries
-		// them (the finalize path reads op.extras when present).
-		mergeExtras(op, extras)
-		return nil
+
+	op, ok := m.openOps[p.CallID]
+	if !ok {
+		// The op may have already been finalized by its *_output before this
+		// end-event (the ~15-32% output-first ordering): re-emit an OpStarted onto
+		// the finalized op so the Extras still land (F4).
+		return m.enrichFinalizedOp(p.CallID, advance, tsUs, p.Type, extras)
 	}
-	op.finalized = true
+	// Op still open (exec-first): stash extras + the exec-derived status on the op
+	// and leave it open. Its *_output (or the turn-close dangling finalize) emits
+	// the canonical OpFinalized AND re-emits an OpStarted carrying these Extras.
 	mergeExtras(op, extras)
-	fin := canonical.OpFinalizedEvent{
+	if status != "" {
+		op.enrichStatus = status
+		op.enrichErrClass = errClass
+	}
+	return nil
+}
+
+// enrichFinalizedOp handles an end-event whose op was already finalized by its
+// *_output (output-first ordering, ~15-32% of exec files) (F4). It re-emits an
+// OpStarted carrying the enrichment Extras to UPDATE the existing op row
+// (idempotent on (turn,seq) — NOT a second op). When the op cannot be located in
+// finalizedOps (start below a resume offset, or orphaned), a DBG log is the only
+// honest surface. The end-event's status is NOT re-applied here: the *_output
+// already produced the canonical finalize, and the enrichment is supplementary.
+func (m *fileMapper) enrichFinalizedOp(callID string, advance func(int64) canonical.EventBase, tsUs int64, evType string, extras map[string]any) []canonical.Event {
+	fop, ok := m.finalizedOps[callID]
+	if !ok {
+		log := map[string]any{"call_id": callID}
+		for k, v := range extras {
+			log[k] = v
+		}
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_"+evType, log)}
+	}
+	if len(extras) == 0 {
+		return nil
+	}
+	return []canonical.Event{m.reemitOpStarted(fop, advance, tsUs, extras)}
+}
+
+// finalizeWithExtras emits the op's OpFinalized AND, when the op accumulated
+// enrichment Extras, a re-emitted OpStarted that carries them onto (turn,seq)
+// (F4, spec rule #14). The OpStarted is emitted FIRST so the op row exists with
+// its Extras before the finalize updates its terminal status; both upsert the
+// same (turn,seq) row. The op's kind/name/namespace are restated so the re-emit
+// is faithful (the writer keeps the original start_ts via MIN).
+func (m *fileMapper) finalizeWithExtras(op *openOp, advance func(int64) canonical.EventBase, tsUs int64, status, errClass string) []canonical.Event {
+	out := make([]canonical.Event, 0, 2)
+	if len(op.extras) > 0 {
+		out = append(out, canonical.OpStartedEvent{
+			EventBase:       advance(tsUs),
+			SessionNativeID: m.nativeID,
+			TurnSeq:         op.turnSeq,
+			Seq:             op.opSeq,
+			ParentOpSeq:     -1,
+			Kind:            op.kind,
+			Name:            op.name,
+			ToolNamespace:   op.namespace,
+			Extras:          op.extras,
+		})
+	}
+	out = append(out, canonical.OpFinalizedEvent{
 		EventBase:       advance(tsUs),
 		SessionNativeID: m.nativeID,
 		TurnSeq:         op.turnSeq,
@@ -49,28 +100,72 @@ func (m *fileMapper) enrichOp(rec record, advance func(int64) canonical.EventBas
 		Status:          status,
 		ErrorClass:      errClass,
 		EndTs:           tsUs,
+	})
+	return out
+}
+
+// reemitOpStarted builds an idempotent OpStarted that carries enrichment Extras
+// onto an already-finalized op's (turn,seq) row (F4). The writer's ON CONFLICT
+// UPDATE merges the Extras and keeps the original start_ts (MIN) and status
+// (the finalize already set it), so this only adds the late telemetry.
+func (m *fileMapper) reemitOpStarted(fop finalizedOp, advance func(int64) canonical.EventBase, tsUs int64, extras map[string]any) canonical.Event {
+	return canonical.OpStartedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         fop.turnSeq,
+		Seq:             fop.opSeq,
+		ParentOpSeq:     -1,
+		Kind:            fop.kind,
+		Name:            fop.name,
+		ToolNamespace:   fop.namespace,
+		Extras:          extras,
 	}
-	delete(m.openOps, p.CallID)
-	return withExtrasLog(m, advance, tsUs, fin, op.extras)
 }
 
-// enrichFinalizedOrLog handles an end-event whose op is no longer tracked (its
-// *_output already finalized it, or its start was below a resume offset). It
-// re-emits an OpFinalizedEvent ONLY when the end-event carries an explicit
-// status AND the op can be located in a turn — otherwise it surfaces a DBG log
-// so the enrichment is visible without inventing an op reference. Because a
-// finalized op was deleted from openOps, this path cannot recover the (turn,seq)
-// and therefore always logs (the *_output already produced the canonical
-// finalize; the enrichment is supplementary telemetry).
-func (m *fileMapper) enrichFinalizedOrLog(rec record, advance func(int64) canonical.EventBase, tsUs int64, extractor func([]byte) map[string]any) []canonical.Event {
-	p := rec.EventMsg
-	extras := map[string]any{"call_id": p.CallID}
-	if extractor != nil {
-		for k, v := range extractor(rec.Raw) {
-			extras[k] = v
+// recordFinalizedOp records a now-finalized op so a LATE enrichment event can
+// merge Extras onto it via reemitOpStarted (F4). The op's kind/name/namespace are
+// preserved so the re-emit restates the op faithfully.
+func (m *fileMapper) recordFinalizedOp(callID string, op *openOp) {
+	if callID == "" {
+		return
+	}
+	m.finalizedOps[callID] = finalizedOp{
+		turnSeq:   op.turnSeq,
+		opSeq:     op.opSeq,
+		kind:      op.kind,
+		name:      op.name,
+		namespace: op.namespace,
+	}
+}
+
+// enrichWebSearch handles event_msg.web_search_end (F7, spec rule #11). It pairs
+// POSITIONALLY with the active turn's most-recent open web_search op
+// (openWebSearch), because web_search_call carries no correlation key. It
+// finalizes that op completed and re-emits an OpStarted carrying the query Extras
+// (OpFinalized has no Extras field). When no web_search is open (the end is
+// orphaned, or its call was below a resume offset), a DBG log keeps it visible.
+func (m *fileMapper) enrichWebSearch(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
+	ws := m.openWebSearch
+	if ws == nil {
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "web_search_end_no_call", nil)}
+	}
+	m.openWebSearch = nil
+	extras := webSearchExtras(rec.Raw)
+	op, ok := m.openOps[ws.syntheticCallID]
+	if !ok {
+		// The op was already finalized (e.g. at a prior turn close) — re-emit onto
+		// its row via the positional ref.
+		fop := finalizedOp{turnSeq: ws.turnSeq, opSeq: ws.opSeq, kind: canonical.OpTool, name: "web_search", namespace: "web"}
+		if len(extras) == 0 {
+			return nil
 		}
+		return []canonical.Event{m.reemitOpStarted(fop, advance, tsUs, extras)}
 	}
-	return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_"+p.Type, extras)}
+	op.finalized = true
+	mergeExtras(op, extras)
+	delete(m.openOps, ws.syntheticCallID)
+	m.recordFinalizedOp(ws.syntheticCallID, op)
+	return m.finalizeWithExtras(op, advance, tsUs, "completed", "")
 }
 
 // enrichMcp handles event_msg.mcp_tool_call_end (spec rule #15). It re-stamps
@@ -102,6 +197,7 @@ func (m *fileMapper) enrichMcp(rec record, advance func(int64) canonical.EventBa
 		namespace = "mcp:" + server
 	}
 	op.name = name
+	op.namespace = namespace
 	status, errClass := mcpResultStatus(rec.Raw)
 	op.finalized = true
 	out := []canonical.Event{
@@ -126,6 +222,7 @@ func (m *fileMapper) enrichMcp(rec record, advance func(int64) canonical.EventBa
 		},
 	}
 	delete(m.openOps, p.CallID)
+	m.recordFinalizedOp(p.CallID, op)
 	return out
 }
 
@@ -150,6 +247,7 @@ func (m *fileMapper) enrichPatchApply(rec record, advance func(int64) canonical.
 		EndTs:           tsUs,
 	}
 	delete(m.openOps, p.CallID)
+	m.recordFinalizedOp(p.CallID, op)
 	return []canonical.Event{fin}
 }
 
@@ -167,160 +265,6 @@ func mergeExtras(op *openOp, extras map[string]any) {
 	}
 }
 
-// withExtrasLog appends a DBG LogEntry carrying the op's enrichment extras after
-// its finalize, so exec/web telemetry is visible in the Logs tab even though the
-// canonical OpFinalized carries no Extras field. Returns the finalize alone when
-// there are no extras.
-func withExtrasLog(m *fileMapper, advance func(int64) canonical.EventBase, tsUs int64, fin canonical.OpFinalizedEvent, extras map[string]any) []canonical.Event {
-	out := []canonical.Event{fin}
-	if len(extras) > 0 {
-		out = append(out, m.logEntry(advance(tsUs), "DBG", "op_enrichment", extras))
-	}
-	return out
-}
-
-// execCommandExtras extracts the exec_command_end telemetry merged into the op
-// (spec rule #14): exit_code, duration, cwd, source, and the truncated
-// aggregated_output length (the body itself is blanked at the source in Limited
-// mode — only aggregated_output survives, truncated to 10 KB).
-func execCommandExtras(raw []byte) map[string]any {
-	var env struct {
-		Payload struct {
-			ExitCode         *int64 `json:"exit_code"`
-			Duration         any    `json:"duration"`
-			Cwd              string `json:"cwd"`
-			Source           string `json:"source"`
-			AggregatedOutput string `json:"aggregated_output"`
-		} `json:"payload"`
-	}
-	if json.Unmarshal(raw, &env) != nil {
-		return nil
-	}
-	extras := map[string]any{}
-	if env.Payload.ExitCode != nil {
-		extras["exec_exit_code"] = *env.Payload.ExitCode
-	}
-	if env.Payload.Cwd != "" {
-		extras["exec_cwd"] = env.Payload.Cwd
-	}
-	if env.Payload.Source != "" {
-		extras["exec_source"] = env.Payload.Source
-	}
-	if env.Payload.AggregatedOutput != "" {
-		extras["exec_output_bytes"] = len(env.Payload.AggregatedOutput)
-	}
-	if len(extras) == 0 {
-		return nil
-	}
-	return extras
-}
-
-// webSearchExtras extracts event_msg.web_search_end query/action (spec rule #11).
-func webSearchExtras(raw []byte) map[string]any {
-	var env struct {
-		Payload struct {
-			Query string `json:"query"`
-		} `json:"payload"`
-	}
-	if json.Unmarshal(raw, &env) != nil {
-		return nil
-	}
-	if env.Payload.Query == "" {
-		return nil
-	}
-	return map[string]any{"query": trimPreview(env.Payload.Query, previewMax)}
-}
-
-// enrichStatus derives a terminal status/ErrorClass from an end-event carrying
-// an exit_code (spec rule #14). exit_code 0 → completed; non-zero → failed
-// (command_failed). A blanked output is NOT an error (spec rule #14). Returns
-// ("", "") when the event carries no exit_code (status left to the *_output).
-func enrichStatus(raw []byte) (status, errClass string) {
-	var env struct {
-		Payload struct {
-			ExitCode *int64 `json:"exit_code"`
-		} `json:"payload"`
-	}
-	if json.Unmarshal(raw, &env) != nil {
-		return "", ""
-	}
-	if env.Payload.ExitCode == nil {
-		return "", ""
-	}
-	if *env.Payload.ExitCode == 0 {
-		return "completed", ""
-	}
-	return "failed", "command_failed"
-}
-
-// mcpInvocation extracts mcp_tool_call_end.invocation.{server,tool} (spec rule
-// #15). Returns ("","") when absent.
-func mcpInvocation(raw []byte) (server, tool string) {
-	var env struct {
-		Payload struct {
-			Invocation struct {
-				Server string `json:"server"`
-				Tool   string `json:"tool"`
-			} `json:"invocation"`
-		} `json:"payload"`
-	}
-	if json.Unmarshal(raw, &env) != nil {
-		return "", ""
-	}
-	return env.Payload.Invocation.Server, env.Payload.Invocation.Tool
-}
-
-// mcpResultStatus derives status from mcp_tool_call_end.result, a
-// Result<CallToolResult, String> serialized as {"Ok":...} or {"Err":"..."} (spec
-// rule #15, protocol.rs:2191-2228). An Err, or a CallToolResult with
-// is_error=true, is failed; anything else completed.
-func mcpResultStatus(raw []byte) (status, errClass string) {
-	var env struct {
-		Payload struct {
-			Result json.RawMessage `json:"result"`
-		} `json:"payload"`
-	}
-	if json.Unmarshal(raw, &env) != nil {
-		return "completed", ""
-	}
-	body := jsonTrim(env.Payload.Result)
-	if len(body) == 0 {
-		return "completed", ""
-	}
-	var res struct {
-		Err json.RawMessage `json:"Err"`
-		Ok  struct {
-			IsError bool `json:"is_error"`
-		} `json:"Ok"`
-	}
-	if json.Unmarshal(body, &res) != nil {
-		return "completed", ""
-	}
-	if len(jsonTrim(res.Err)) > 0 || res.Ok.IsError {
-		return "failed", "tool_error"
-	}
-	return "completed", ""
-}
-
-// patchApplyStatus derives status from patch_apply_end.success/status (spec rule
-// #16). success=false → failed; an explicit status string maps directly. Default
-// completed.
-func patchApplyStatus(raw []byte) (status, errClass string) {
-	var env struct {
-		Payload struct {
-			Success *bool  `json:"success"`
-			Status  string `json:"status"`
-		} `json:"payload"`
-	}
-	if json.Unmarshal(raw, &env) != nil {
-		return "completed", ""
-	}
-	if env.Payload.Success != nil && !*env.Payload.Success {
-		return "failed", "patch_failed"
-	}
-	switch env.Payload.Status {
-	case "failed", "error":
-		return "failed", "patch_failed"
-	}
-	return "completed", ""
-}
+// The narrow JSON decoders for the enrichment end-events (execCommandExtras,
+// webSearchExtras, enrichStatus, mcpInvocation, mcpResultStatus,
+// patchApplyStatus) live in ops_enrich_decode.go.
diff --git a/internal/adapters/codex/ops_enrich_decode.go b/internal/adapters/codex/ops_enrich_decode.go
new file mode 100644
index 0000000..53821eb
--- /dev/null
+++ b/internal/adapters/codex/ops_enrich_decode.go
@@ -0,0 +1,154 @@
+package codex
+
+import "encoding/json"
+
+// This file holds the narrow JSON decoders that pull telemetry fields off an
+// event_msg end-event's verbatim payload (exec_command_end, web_search_end,
+// mcp_tool_call_end, patch_apply_end). They are pure functions with no mapper
+// state, split from ops_enrich.go so the enrichment dispatch stays focused.
+
+// execCommandExtras extracts the exec_command_end telemetry merged into the op
+// (spec rule #14): exit_code, duration, cwd, source, and the truncated
+// aggregated_output length (the body itself is blanked at the source in Limited
+// mode — only aggregated_output survives, truncated to 10 KB).
+func execCommandExtras(raw []byte) map[string]any {
+	var env struct {
+		Payload struct {
+			ExitCode         *int64 `json:"exit_code"`
+			Duration         any    `json:"duration"`
+			Cwd              string `json:"cwd"`
+			Source           string `json:"source"`
+			AggregatedOutput string `json:"aggregated_output"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return nil
+	}
+	extras := map[string]any{}
+	if env.Payload.ExitCode != nil {
+		extras["exec_exit_code"] = *env.Payload.ExitCode
+	}
+	if env.Payload.Cwd != "" {
+		extras["exec_cwd"] = env.Payload.Cwd
+	}
+	if env.Payload.Source != "" {
+		extras["exec_source"] = env.Payload.Source
+	}
+	if env.Payload.AggregatedOutput != "" {
+		extras["exec_output_bytes"] = len(env.Payload.AggregatedOutput)
+	}
+	if len(extras) == 0 {
+		return nil
+	}
+	return extras
+}
+
+// webSearchExtras extracts event_msg.web_search_end query/action (spec rule #11).
+func webSearchExtras(raw []byte) map[string]any {
+	var env struct {
+		Payload struct {
+			Query string `json:"query"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return nil
+	}
+	if env.Payload.Query == "" {
+		return nil
+	}
+	return map[string]any{"query": trimPreview(env.Payload.Query, previewMax)}
+}
+
+// enrichStatus derives a terminal status/ErrorClass from an end-event carrying
+// an exit_code (spec rule #14). exit_code 0 → completed; non-zero → failed
+// (command_failed). A blanked output is NOT an error (spec rule #14). Returns
+// ("", "") when the event carries no exit_code (status left to the *_output).
+func enrichStatus(raw []byte) (status, errClass string) {
+	var env struct {
+		Payload struct {
+			ExitCode *int64 `json:"exit_code"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "", ""
+	}
+	if env.Payload.ExitCode == nil {
+		return "", ""
+	}
+	if *env.Payload.ExitCode == 0 {
+		return "completed", ""
+	}
+	return "failed", "command_failed"
+}
+
+// mcpInvocation extracts mcp_tool_call_end.invocation.{server,tool} (spec rule
+// #15). Returns ("","") when absent.
+func mcpInvocation(raw []byte) (server, tool string) {
+	var env struct {
+		Payload struct {
+			Invocation struct {
+				Server string `json:"server"`
+				Tool   string `json:"tool"`
+			} `json:"invocation"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "", ""
+	}
+	return env.Payload.Invocation.Server, env.Payload.Invocation.Tool
+}
+
+// mcpResultStatus derives status from mcp_tool_call_end.result, a
+// Result<CallToolResult, String> serialized as {"Ok":...} or {"Err":"..."} (spec
+// rule #15, protocol.rs:2191-2228). An Err, or a CallToolResult with
+// is_error=true, is failed; anything else completed.
+func mcpResultStatus(raw []byte) (status, errClass string) {
+	var env struct {
+		Payload struct {
+			Result json.RawMessage `json:"result"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "completed", ""
+	}
+	body := jsonTrim(env.Payload.Result)
+	if len(body) == 0 {
+		return "completed", ""
+	}
+	var res struct {
+		Err json.RawMessage `json:"Err"`
+		Ok  struct {
+			IsError bool `json:"is_error"`
+		} `json:"Ok"`
+	}
+	if json.Unmarshal(body, &res) != nil {
+		return "completed", ""
+	}
+	if len(jsonTrim(res.Err)) > 0 || res.Ok.IsError {
+		return "failed", "tool_error"
+	}
+	return "completed", ""
+}
+
+// patchApplyStatus derives status from patch_apply_end.success/status (spec rule
+// #16). success=false → failed; an explicit status string maps directly. Default
+// completed.
+func patchApplyStatus(raw []byte) (status, errClass string) {
+	var env struct {
+		Payload struct {
+			Success *bool  `json:"success"`
+			Status  string `json:"status"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return "completed", ""
+	}
+	if env.Payload.Success != nil && !*env.Payload.Success {
+		return "failed", "patch_failed"
+	}
+	switch env.Payload.Status {
+	case "failed", "error":
+		return "failed", "patch_failed"
+	}
+	return "completed", ""
+}
diff --git a/internal/adapters/codex/ops_event.go b/internal/adapters/codex/ops_event.go
index edd9080..a424a89 100644
--- a/internal/adapters/codex/ops_event.go
+++ b/internal/adapters/codex/ops_event.go
@@ -47,11 +47,28 @@ func (m *fileMapper) mapEventMsg(rec record, advance func(int64) canonical.Event
 	case "patch_apply_end":
 		return m.enrichPatchApply(rec, advance, tsUs), nil
 	case "web_search_end":
-		return m.enrichOp(rec, advance, tsUs, webSearchExtras), nil
+		return m.enrichWebSearch(rec, advance, tsUs), nil
 	case "image_generation_end":
 		return m.enrichOp(rec, advance, tsUs, nil), nil
+	case "collab_agent_spawn_end":
+		// Parent→child sub-agent spawn (spec "Sub-Agent Linkage", F3): emit a
+		// session op whose ChildSessionNativeID is new_thread_id (NOT
+		// agent_ref.thread_id — the real link is sender_thread_id→new_thread_id).
+		return m.mapCollabSpawn(rec, advance, tsUs), nil
+	case "collab_close_end", "collab_waiting_end":
+		// Recognized collab lifecycle markers (F3): keep visible as a DBG log; no
+		// canonical op (they carry no parent→child edge the topology view needs).
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "event_msg:"+p.Type, nil)}, nil
 	case "context_compacted":
-		// event_msg.context_compacted → OpCompaction (spec rule #20, gap #4).
+		// event_msg.context_compacted is the bare companion marker of the adjacent
+		// top-level `compacted` line (same timestamp) — TWO representations of ONE
+		// compaction (F5, spec rule #20). The op is emitted from the data-bearing
+		// `compacted`; this companion is suppressed so exactly ONE op is produced.
+		// A context_compacted with no preceding `compacted` (defensive) emits the
+		// op itself so a lone marker is not lost.
+		if m.suppressContextCompacted() {
+			return nil, nil
+		}
 		return m.emitCompactionOp(advance, tsUs, map[string]any{"trigger": "auto"}, "json"), nil
 	case "error":
 		return []canonical.Event{m.logEntry(advance(tsUs), "ERR", "error", errorExtras(p))}, nil
@@ -80,8 +97,15 @@ func (m *fileMapper) mapTaskStarted(rec record, advance func(int64) canonical.Ev
 	if sa := startedAtMicros(rec.Raw); sa > startUs {
 		startUs = sa
 	}
+	out := make([]canonical.Event, 0, 4)
+	// A new turn_id opening supersedes the prior open turn (F1/F2, spec edge #2/#3).
+	// In a new-format session task_started follows turn_context, so the prior turn
+	// is usually already superseded by the turn_context handler; this call covers
+	// the task_started-first ordering and is a no-op for the same turn_id. The
+	// prior turn's close status is decided by ITS OWN format inside the helper.
+	out = append(out, m.supersedePriorTurn(p.TurnID, advance, startUs)...)
 	ts := m.openTurn(p.TurnID, startUs)
-	out := make([]canonical.Event, 0, 1)
+	ts.sawTaskStarted = true
 	if ev := m.emitTurnStarted(ts, advance(startUs)); ev != nil {
 		out = append(out, ev)
 	}
@@ -165,10 +189,10 @@ func (m *fileMapper) mapTokenCount(rec record, advance func(int64) canonical.Eve
 	ts := m.tokenTurn(p.TurnID)
 	if ts == nil {
 		// No turn to attribute to yet (token_count before any turn opened):
-		// surface a DBG log so it is visible and drop the counts (they cannot be
-		// attributed; rare and not load-bearing).
-		_ = tsUs
-		return nil
+		// surface a DBG log so it is visible — never drop silently (spec rule #6
+		// "no silent failures"; F6). The counts cannot be attributed (rare and
+		// not load-bearing), but the drop is recorded with the offending turn_id.
+		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "token_count_no_turn", map[string]any{"turn_id": p.TurnID})}
 	}
 	ts.addTokenUsage(info)
 	return nil
diff --git a/internal/adapters/codex/ops_tools.go b/internal/adapters/codex/ops_tools.go
index b9c6ad6..8f14332 100644
--- a/internal/adapters/codex/ops_tools.go
+++ b/internal/adapters/codex/ops_tools.go
@@ -2,6 +2,7 @@ package codex
 
 import (
 	"encoding/json"
+	"fmt"
 	"strings"
 
 	"github.com/netdata/ai-viewer/internal/canonical"
@@ -41,7 +42,7 @@ func (m *fileMapper) mapToolCall(p *responseItemPayload, advance func(int64) can
 	if bodyBytes > 0 {
 		out = append(out, m.payloadRef(advance(tsUs), turnSeq, opSeq, "tool_request", "json", bodyBytes))
 	}
-	m.trackOp(p.CallID, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, name)
+	m.trackOp(p.CallID, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, name, namespace)
 	return out
 }
 
@@ -56,36 +57,97 @@ func (m *fileMapper) mapToolCall(p *responseItemPayload, advance func(int64) can
 func (m *fileMapper) mapToolOutput(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
 	op, ok := m.openOps[p.CallID]
 	if !ok || op.finalized {
-		// Unmatched / already-finalized output: surface and skip (spec edge #10).
+		// The op may already exist and be finalized because its enrichment
+		// end-event (exec_command_end) was the close signal in a rare path, OR the
+		// output is genuinely orphaned. If we can locate the op (finalizedOps),
+		// attach the tool_response PayloadRef to it rather than warning (spec rule
+		// #9 — the body still belongs to the op). Only a truly unmatched output
+		// surfaces a WRN (spec edge #10).
+		if fop, found := m.finalizedOps[p.CallID]; found && bodyBytes > 0 {
+			return []canonical.Event{m.payloadRef(advance(tsUs), fop.turnSeq, fop.opSeq, "tool_response", "json", bodyBytes)}
+		}
 		return []canonical.Event{m.logEntry(advance(tsUs), "WRN", "tool_output_unmatched", map[string]any{"call_id": p.CallID})}
 	}
 	op.finalized = true
 	status, errClass := outputStatus(p.Output)
-	out := []canonical.Event{
-		canonical.OpFinalizedEvent{
+	// A prior exec_command_end (exec-first ~68-85% ordering) may have stashed an
+	// authoritative exit_code-derived status; it WINS over a benign-looking output
+	// string (a non-zero exit with terse stdout must finalize failed) (F4).
+	if op.enrichStatus != "" {
+		status, errClass = op.enrichStatus, op.enrichErrClass
+	}
+	out := make([]canonical.Event, 0, 3)
+	// If a prior enrichment (exec_command_end BEFORE function_call_output — the
+	// ~68-85% exec ordering) stashed Extras on the op, re-emit an OpStarted
+	// carrying them so they reach ops.extras_json (F4, spec rule #14). The writer
+	// upserts (turn,seq), so this is an idempotent UPDATE, not a second op.
+	if len(op.extras) > 0 {
+		out = append(out, canonical.OpStartedEvent{
 			EventBase:       advance(tsUs),
 			SessionNativeID: m.nativeID,
 			TurnSeq:         op.turnSeq,
 			Seq:             op.opSeq,
-			Status:          status,
-			ErrorClass:      errClass,
-			EndTs:           tsUs,
-		},
+			ParentOpSeq:     -1,
+			Kind:            op.kind,
+			Name:            op.name,
+			ToolNamespace:   op.namespace,
+			Extras:          op.extras,
+		})
 	}
+	out = append(out, canonical.OpFinalizedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         op.turnSeq,
+		Seq:             op.opSeq,
+		Status:          status,
+		ErrorClass:      errClass,
+		EndTs:           tsUs,
+	})
 	if bodyBytes > 0 {
 		out = append(out, m.payloadRef(advance(tsUs), op.turnSeq, op.opSeq, "tool_response", "json", bodyBytes))
 	}
 	delete(m.openOps, p.CallID)
+	// Record the finalized op so a LATER exec_command_end (output-first ~15-32%
+	// ordering) can still merge its Extras via an OpStarted re-emit (F4).
+	m.recordFinalizedOp(p.CallID, op)
 	return out
 }
 
-// mapWebSearchCall handles response_item.web_search_call (spec rule #11). It
-// emits a tool op (Name=web_search, namespace=web). The companion
-// event_msg.web_search_end enriches it with the query/action (ops_event.go); the
-// op is tracked by call_id for that enrichment and finalized at turn close if no
-// end arrives.
+// mapWebSearchCall handles response_item.web_search_call (spec rule #11, F7). It
+// emits a tool op (Name=web_search, namespace=web). web_search_call carries
+// NEITHER id NOR call_id, so the op is NOT tracked by call_id; instead it is
+// recorded as the active turn's most-recent open web_search op (openWebSearch)
+// for POSITIONAL pairing with the companion event_msg.web_search_end (which DOES
+// carry a call_id, but for a DIFFERENT correlation space). If no end arrives the
+// op finalizes at turn close as a dangling op (it is tracked under a synthetic
+// per-op call_id so finalizeDanglingOps closes it).
 func (m *fileMapper) mapWebSearchCall(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
-	return m.emitSingleToolOp(p.CallID, "web_search", "web", advance, tsUs, bodyBytes)
+	ts := m.ensureTurn(tsUs)
+	out := make([]canonical.Event, 0, 2)
+	if ev := m.emitTurnStarted(ts, advance(tsUs)); ev != nil {
+		out = append(out, ev)
+	}
+	turnSeq, opSeq := m.nextOp(ts)
+	out = append(out, canonical.OpStartedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         turnSeq,
+		Seq:             opSeq,
+		ParentOpSeq:     -1,
+		Kind:            canonical.OpTool,
+		Name:            "web_search",
+		ToolNamespace:   "web",
+	})
+	if bodyBytes > 0 {
+		out = append(out, m.payloadRef(advance(tsUs), turnSeq, opSeq, "tool_request", "json", bodyBytes))
+	}
+	// Track under a synthetic call_id so a turn-close dangling-finalize closes it
+	// if no web_search_end arrives; the synthetic id never collides with a real
+	// call_id (the "ws#" prefix is not a codex call_id form).
+	synthetic := fmt.Sprintf("ws#%d:%d", turnSeq, opSeq)
+	m.trackOp(synthetic, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, "web_search", "web")
+	m.openWebSearch = &openWebSearchRef{turnID: m.activeTurnID, turnSeq: turnSeq, opSeq: opSeq, syntheticCallID: synthetic}
+	return out
 }
 
 // mapImageGenCall handles response_item.image_generation_call (spec rule #12):
@@ -123,7 +185,7 @@ func (m *fileMapper) emitSingleToolOp(callID, name, namespace string, advance fu
 	if bodyBytes > 0 {
 		out = append(out, m.payloadRef(advance(tsUs), turnSeq, opSeq, "tool_request", "json", bodyBytes))
 	}
-	m.trackOp(callID, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, name)
+	m.trackOp(callID, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, name, namespace)
 	return out
 }
 
diff --git a/internal/adapters/codex/parser_fuzz_test.go b/internal/adapters/codex/parser_fuzz_test.go
index 5ce56ee..047a2e7 100644
--- a/internal/adapters/codex/parser_fuzz_test.go
+++ b/internal/adapters/codex/parser_fuzz_test.go
@@ -42,8 +42,10 @@ func FuzzParseLine(f *testing.F) {
 		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_1","output":{"content":"ok"}}}`),
 		// response_item, custom_tool_call / output.
 		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"custom_tool_call","call_id":"call_2","name":"apply_patch","input":"*** Begin Patch","status":"completed"}}`),
-		// response_item, web_search_call.
+		// response_item, web_search_call WITH a call_id (older shape, tolerated).
 		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"web_search_call","call_id":"ws_1","status":"completed","action":{"type":"search","query":"q"}}}`),
+		// response_item, web_search_call with NO id/call_id (real shape, F7).
+		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"web_search_call","status":"completed","action":{"type":"search","query":"q"}}}`),
 		// response_item, compaction / context_compaction.
 		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"compaction","encrypted_content":"BBBB"}}`),
 		[]byte(`{"timestamp":"2025-11-20T16:59:11.000Z","type":"response_item","payload":{"type":"context_compaction","encrypted_content":null}}`),
@@ -73,8 +75,13 @@ func FuzzParseLine(f *testing.F) {
 		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"call_1","turn_id":"turn-1","success":true,"status":"completed","changes":{}}}`),
 		// event_msg, context_compacted (unit struct).
 		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"context_compacted"}}`),
-		// event_msg, web_search_end.
+		// event_msg, web_search_end (carries a call_id; pairs positionally, F7).
 		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"web_search_end","call_id":"ws_1","query":"q","action":{"type":"search"}}}`),
+		// event_msg, collab_agent_spawn_end (parent→child via new_thread_id, F3).
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"collab_agent_spawn_end","sender_thread_id":"parent-uuid","new_thread_id":"child-uuid","new_agent_nickname":"Dewey","new_agent_role":"explorer","model":"gpt-5.5","reasoning_effort":"high","status":"completed"}}`),
+		// event_msg, collab_close_end / collab_waiting_end (recognized, log-only, F3).
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"collab_close_end","call_id":"x"}}`),
+		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"collab_waiting_end","call_id":"y"}}`),
 		// event_msg, error (Extended).
 		[]byte(`{"timestamp":"2025-11-20T16:59:12.000Z","type":"event_msg","payload":{"type":"error","message":"boom"}}`),
 		// compacted top-level line with replacement_history.
diff --git a/internal/adapters/codex/payloads.go b/internal/adapters/codex/payloads.go
index 60660de..9fc14b9 100644
--- a/internal/adapters/codex/payloads.go
+++ b/internal/adapters/codex/payloads.go
@@ -3,6 +3,8 @@ package codex
 import (
 	"fmt"
 	"path/filepath"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
 )
 
 // payloadLocationURI builds a containment-checked "file://<resolved-abs>"
@@ -56,3 +58,61 @@ func resolveWithinRoot(root, abs string) (string, bool, error) {
 	}
 	return withinResolvedRoot(resolvedRoot, abs)
 }
+
+// payloadURI builds the PayloadRef LocationURI for a body inline in this
+// rollout file at the given 1-based line number (spec rule #6/#7/#8, edge #7).
+// The form is "file://<symlink-resolved-abs>#L<line>" so the presenter reads the
+// exact record on demand without ai-viewer ever copying the body into SQLite.
+//
+// Containment (Chunk D, security.md §6): the absolute path is resolved through
+// symlinks and verified to stay inside the configured sessions root via
+// payloadLocationURI. The "#L<line>" anchor is appended AFTER the file:// path is
+// built so the anchor is identical to Chunk B's contract
+// (TestMapper_PayloadRefLineAnchor). When m.root is empty (mapper-only tests) the
+// containment resolve is skipped and the cleaned absolute path is used; when
+// m.absPath is empty the URI is just the line anchor.
+//
+// The scanner is the authoritative containment gate: readRollout (scanner.go)
+// refuses any file that resolves outside the root BEFORE a single line is
+// streamed, so by the time the mapper builds a ref the owning file is already
+// known to be contained. A resolve failure or apparent escape here (e.g. the
+// file removed between the scanner's open and this build — impossible while the
+// scanner holds the fd, but handled defensively) therefore falls back to the
+// cleaned absolute path rather than dropping the anchor, keeping the ref usable
+// and the op→payload linkage (payload_refs.op_id NOT NULL) intact.
+func (m *fileMapper) payloadURI(lineNo int) string {
+	anchor := ""
+	if lineNo > 0 {
+		anchor = fmt.Sprintf("#L%d", lineNo)
+	}
+	if m.absPath == "" {
+		return anchor
+	}
+	uri, err := payloadLocationURI(m.root, m.absPath)
+	if err != nil {
+		// Containment resolve failed (escape or unresolvable). The scanner
+		// already vetted the file before streaming, so fall back to the cleaned
+		// absolute path rather than emit a lossy ref.
+		uri = "file://" + filepath.ToSlash(filepath.Clean(m.absPath))
+	}
+	return uri + anchor
+}
+
+// payloadRef builds a PayloadRefEvent for a body inline in this rollout at the
+// record currently being mapped (m.lineNo). It is scoped to the owning op
+// (turnSeq/opSeq) so it references an op that EXISTS — payload_refs.op_id is NOT
+// NULL REFERENCES ops(id), so an orphan ref would FK-roll-back the ingest batch
+// (mirrors claude_code's P1.1a discipline). OriginalBytes is the byte length of
+// the verbatim line so the presenter can budget a read; -1 when unknown.
+func (m *fileMapper) payloadRef(base canonical.EventBase, turnSeq, opSeq int, kind, format string, originalBytes int64) canonical.PayloadRefEvent {
+	return canonical.PayloadRefEvent{
+		EventBase:       base,
+		SessionNativeID: m.nativeID,
+		TurnSeq:         turnSeq,
+		OpSeq:           opSeq,
+		PayloadKind:     kind,
+		Format:          format,
+		LocationURI:     m.payloadURI(m.lineNo),
+		OriginalBytes:   originalBytes,
+	}
+}
diff --git a/internal/adapters/codex/scanner.go b/internal/adapters/codex/scanner.go
index 1ed3a21..55679e0 100644
--- a/internal/adapters/codex/scanner.go
+++ b/internal/adapters/codex/scanner.go
@@ -126,7 +126,10 @@ func reportLegacy(cur Cursor, legacy []string, onError func(error)) Cursor {
 func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID string, start FileCursor, out chan<- canonical.Event, onError func(error)) (FileCursor, int, error) {
 	// Containment guard on EVERY rollout open (security.md §6): a *.jsonl symlink
 	// planted in a watched shard dir after Tail starts would otherwise be opened.
-	// Open the RESOLVED path, not the original (no TOCTOU). A refused path
+	// The resolved path is containment-checked before it is opened, and the
+	// RESOLVED path is what gets opened (not the original symlink). The window
+	// between the check and the open is an accepted limitation for this localhost,
+	// read-only tool (no O_NOFOLLOW hardening this round; F9). A refused path
 	// surfaces a SourceError (the caller logs the returned error) and is skipped.
 	resolvedAbs, ok, cerr := withinResolvedRoot(resolvedRoot, r.abs)
 	if cerr != nil {
@@ -211,15 +214,22 @@ func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID s
 		cur.LastTsUs = mapper.lastTsUs
 	}
 
-	// EOF-finalize (rule #23): finalize a hanging open turn failed/incomplete
-	// ONLY when the file is fully read AND its mtime is stale ≥ 1 h. The mapper
-	// owns the open-turn decision (finalizeStale is a no-op for a cleanly-ended
-	// session — SOW C#3: no clean-EOF completed finalize). A fresh file leaves
-	// the turn open for the next append. The synthetic end timestamp is the file
-	// mtime in micros (mapper_finalize.go).
+	// EOF-finalize (rule #23, spec edge #3, F1): when the file is FULLY read, ask
+	// the mapper to finalize a hanging open turn. The mapper owns the open-turn
+	// decision and splits on format + staleness (mapper_finalize.go):
+	//   - OLD-format open turn (turn_context-only): closed COMPLETED at EOF
+	//     regardless of staleness (spec edge #3 "close at EOF"); no
+	//     SessionFinalized (SOW C#3).
+	//   - NEW-format open turn: closed failed/incomplete + SessionFinalized ONLY
+	//     when the mtime is stale ≥ 1 h (rule #23); a fresh file leaves it open.
+	//   - clean end / no open turn: nothing (stays running, SOW C#3).
+	// This is called UNCONDITIONALLY at full-read EOF (not only when stale) and
+	// passed the stale bool, so the OLD-format completed-close fires on fresh files
+	// too (F1). The synthetic end timestamp is the file mtime in micros.
 	fullyRead := res.advanced >= size
-	if fullyRead && time.Since(info.ModTime()) >= staleAfter {
-		for _, ev := range mapper.finalizeStale(mtimeUs) {
+	if fullyRead {
+		stale := time.Since(info.ModTime()) >= staleAfter
+		for _, ev := range mapper.finalizeAtEOF(stale, mtimeUs) {
 			select {
 			case <-ctx.Done():
 				return cur, res.emitted, ctx.Err()
diff --git a/internal/adapters/codex/scanner_test.go b/internal/adapters/codex/scanner_test.go
index 5cf7519..e6e46b7 100644
--- a/internal/adapters/codex/scanner_test.go
+++ b/internal/adapters/codex/scanner_test.go
@@ -166,6 +166,38 @@ func TestDiscover_MultiShardSorted(t *testing.T) {
 	}
 }
 
+// TestDiscover_ShardDepthRequired asserts a rollout-*.jsonl that is NOT at the
+// YYYY/MM/DD shard depth (directly under the root, or under a single date
+// component) is NOT discovered as a modern rollout (F8). Only the true
+// YYYY/MM/DD layout is ingested, mirroring recorder.rs:1325-1354.
+func TestDiscover_ShardDepthRequired(t *testing.T) {
+	t.Parallel()
+	root := t.TempDir()
+	// A real shard rollout (must be found).
+	good := filepath.Join(root, "2025", "11", "20", "rollout-2025-11-20T10-00-00-"+uuid7(1)+".jsonl")
+	writeFileBytes(t, good, completeSession("sid-good"))
+	// Stray rollout directly under the root (depth 0 — wrong).
+	writeFileBytes(t, filepath.Join(root, "rollout-2025-11-20T10-00-00-strayroot.jsonl"), completeSession("sid-stray0"))
+	// Stray rollout under a single date component (depth 1 — wrong).
+	writeFileBytes(t, filepath.Join(root, "2025", "rollout-2025-11-20T10-00-00-strayyear.jsonl"), completeSession("sid-stray1"))
+	// Stray rollout under YYYY/MM (depth 2 — wrong).
+	writeFileBytes(t, filepath.Join(root, "2025", "11", "rollout-2025-11-20T10-00-00-straymonth.jsonl"), completeSession("sid-stray2"))
+	// Stray rollout under a non-numeric subtree (wrong — not a date shard).
+	writeFileBytes(t, filepath.Join(root, "scratch", "x", "y", "rollout-2025-11-20T10-00-00-straydir.jsonl"), completeSession("sid-straydir"))
+
+	disc, err := discoverRollouts(root, nil)
+	if err != nil {
+		t.Fatalf("discoverRollouts: %v", err)
+	}
+	if len(disc.modern) != 1 {
+		t.Fatalf("modern count = %d, want 1 (only the YYYY/MM/DD shard rollout; F8); got %+v", len(disc.modern), disc.modern)
+	}
+	wantRel := "2025/11/20/rollout-2025-11-20T10-00-00-" + uuid7(1) + ".jsonl"
+	if disc.modern[0].rel != wantRel {
+		t.Errorf("rel = %q, want %q", disc.modern[0].rel, wantRel)
+	}
+}
+
 // TestDiscover_LegacyClassifiedSeparately asserts legacy flat .json files
 // directly under the root are returned in disc.legacy, not disc.modern, and
 // that a legacy-named .json inside a shard dir is NOT treated as legacy.
@@ -242,10 +274,14 @@ func TestScan_ResumeNoDupNoGap(t *testing.T) {
 	t.Parallel()
 	root := t.TempDir()
 	path := shardPath(root, uuid7(1))
-	// First half: session_meta + turn_context (turn opened, not closed).
+	// First half: session_meta + turn_context + task_started (a NEW-format turn
+	// opened, not closed). task_started is included so the turn is unambiguously
+	// new-format — a fresh new-format open turn is left OPEN at EOF (F1), so phase 1
+	// emits no premature finalize and the resume mirrors the one-shot exactly.
 	half := []string{
 		metaLine("sid-r", `"exec"`),
 		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1","started_at":1763664000}}`,
 	}
 	writeFileBytes(t, path, []byte(strings.Join(half, "\n")+"\n"))
 	// Keep mtime fresh so the partial turn is NOT stale-finalized.
diff --git a/internal/adapters/codex/tailer.go b/internal/adapters/codex/tailer.go
index d80a64d..2cdb0c9 100644
--- a/internal/adapters/codex/tailer.go
+++ b/internal/adapters/codex/tailer.go
@@ -346,13 +346,15 @@ func flushDirty(ctx context.Context, resolvedRoot, root, sourceID string, dirty
 // modern rollout is "YYYY/MM/DD/rollout-….jsonl". The abs path is built under
 // the RESOLVED root so the containment open in readRollout resolves cleanly.
 // Returns false when rel is not a recognized modern rollout (a legacy .json, an
-// ignored name, or a path with no rollout basename).
+// ignored name, a path with no rollout basename, OR a rollout-*.jsonl at the
+// wrong shard depth — F8: only the YYYY/MM/DD layout is ingested, so a stray
+// file directly under the root is not tailed even if a Write event fires on it).
 func rolloutForRel(resolvedRoot, rel string) (rollout, bool) {
 	base := rel
 	if i := strings.LastIndex(rel, "/"); i >= 0 {
 		base = rel[i+1:]
 	}
-	if !modernNameRe.MatchString(base) {
+	if !modernNameRe.MatchString(base) || !hasShardDepth(rel) {
 		return rollout{}, false
 	}
 	abs := filepath.Join(resolvedRoot, filepath.FromSlash(rel))
diff --git a/internal/adapters/codex/types.go b/internal/adapters/codex/types.go
index 902a6fa..6c69443 100644
--- a/internal/adapters/codex/types.go
+++ b/internal/adapters/codex/types.go
@@ -242,6 +242,13 @@ var eventMsgTypes = map[string]struct{}{
 	"view_image_tool_call":       {},
 	"dynamic_tool_call_request":  {},
 	"dynamic_tool_call_response": {},
+	// Sub-agent collab lifecycle ends (F3): collab_agent_spawn_end carries the
+	// parent→child spawn link (sender_thread_id→new_thread_id); collab_close_end
+	// and collab_waiting_end are recognized so they never SourceError (they map to
+	// a DBG log, no canonical op — real corpus: 5 spawn / 72 close / 74 waiting).
+	"collab_agent_spawn_end": {},
+	"collab_close_end":       {},
+	"collab_waiting_end":     {},
 }
 
 // eventMsgNoOp are nested event_msg variants the adapter recognizes but
diff --git a/testdata/codex/b_old_turncontext/expected.jsonl b/testdata/codex/b_old_turncontext/expected.jsonl
index d4214c5..e18b883 100644
--- a/testdata/codex/b_old_turncontext/expected.jsonl
+++ b/testdata/codex/b_old_turncontext/expected.jsonl
@@ -14,3 +14,5 @@
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1763657955000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24579,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":4,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L7","OriginalBytes":175,"StoredBytes":0,"SHA256":""}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1780138387665997,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1780138387665997,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1780138387665997,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"on-request","effort":"medium","sandbox":"read-only"}}}
diff --git a/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl b/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl
index bdd9abf..e1ee3f2 100644
--- a/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl
+++ b/testdata/codex/e_compaction/INPUT/codex-home/sessions/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl
@@ -4,6 +4,6 @@
 {"timestamp":"2026-03-03T12:30:04.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"work on a long task"}]}}
 {"timestamp":"2026-03-03T12:30:06.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"working on it"}]}}
 {"timestamp":"2026-03-03T12:30:08.000Z","type":"compacted","payload":{"message":"summary of the conversation so far","replacement_history":[{"type":"message","role":"user","content":[]},{"type":"message","role":"assistant","content":[]}]}}
-{"timestamp":"2026-03-03T12:30:09.000Z","type":"response_item","payload":{"type":"context_compaction","encrypted_content":"AAAABBBBCCCCDDDD"}}
+{"timestamp":"2026-03-03T12:30:08.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
 {"timestamp":"2026-03-03T12:30:10.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"continuing after compaction"}]}}
 {"timestamp":"2026-03-03T12:30:11.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019cb33f-1111-7000-8000-00000000001e","completed_at":1772800211,"duration_ms":8000}}
diff --git a/testdata/codex/e_compaction/expected.jsonl b/testdata/codex/e_compaction/expected.jsonl
index a6a6f6a..95c69a6 100644
--- a/testdata/codex/e_compaction/expected.jsonl
+++ b/testdata/codex/e_compaction/expected.jsonl
@@ -10,12 +10,9 @@
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1772541008000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"compaction","Name":"compaction","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"message_preview":"summary of the conversation so far","replacement_history_size":2,"trigger":"auto"}}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1772541008000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541008000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20483,"Ts":1772541008000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":3,"PayloadKind":"log","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L6","OriginalBytes":-1,"StoredBytes":0,"SHA256":""}}
-{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1772541009000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"compaction","Name":"compaction","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"trigger":"auto"}}}
-{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1772541009000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541009000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
-{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24579,"Ts":1772541009000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":4,"PayloadKind":"log","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L7","OriginalBytes":-1,"StoredBytes":0,"SHA256":""}}
-{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":5,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
-{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":5,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541010000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
-{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":5,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L8","OriginalBytes":183,"StoredBytes":0,"SHA256":""}}
-{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":5,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541010000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541010000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1772541010000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":4,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/03/rollout-2026-03-03T12-30-03-019cb33f-0000-7000-8000-00000000000e.jsonl#L8","OriginalBytes":183,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1772541010000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
 {"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1772800211000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
 {"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1772800211000000,"SessionNativeID":"019cb33f-0000-7000-8000-00000000000e","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019cb33f-1111-7000-8000-00000000001e","effort":"high","sandbox":"workspace-write"}}}
diff --git a/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl b/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl
index dfd6f75..965376d 100644
--- a/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl
+++ b/testdata/codex/f_exec_truncated/INPUT/codex-home/sessions/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl
@@ -3,7 +3,7 @@
 {"timestamp":"2026-03-31T21:32:12.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d452a-1111-7000-8000-00000000001f","started_at":1774992732,"model_context_window":258400}}
 {"timestamp":"2026-03-31T21:32:13.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"run the long command"}]}}
 {"timestamp":"2026-03-31T21:32:14.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","call_id":"call_f1","arguments":"{\"command\":[\"bash\",\"-lc\",\"cat big.txt\"]}"}}
-{"timestamp":"2026-03-31T21:32:18.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_f1","output":"line1\nline2"}}
-{"timestamp":"2026-03-31T21:32:18.100Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_f1","turn_id":"019d452a-1111-7000-8000-00000000001f","command":["bash","-lc","cat big.txt"],"cwd":"<ROOT>/project","stdout":"","stderr":"","formatted_output":"","aggregated_output":"line1\n[.. omitted 4096 bytes ..]\nlineN","exit_code":0,"duration":{"secs":4,"nanos":0},"source":"unified_exec","status":"completed"}}
+{"timestamp":"2026-03-31T21:32:18.000Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_f1","turn_id":"019d452a-1111-7000-8000-00000000001f","command":["bash","-lc","cat big.txt"],"cwd":"<ROOT>/project","stdout":"","stderr":"","formatted_output":"","aggregated_output":"line1\n[.. omitted 4096 bytes ..]\nlineN","exit_code":0,"duration":{"secs":4,"nanos":0},"source":"unified_exec","status":"completed"}}
+{"timestamp":"2026-03-31T21:32:18.100Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_f1","output":"line1\nline2"}}
 {"timestamp":"2026-03-31T21:32:19.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the file is large"}]}}
 {"timestamp":"2026-03-31T21:32:19.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d452a-1111-7000-8000-00000000001f","completed_at":1774992739,"duration_ms":7400}}
diff --git a/testdata/codex/f_exec_truncated/expected.jsonl b/testdata/codex/f_exec_truncated/expected.jsonl
index 8ccfb92..20e2a6d 100644
--- a/testdata/codex/f_exec_truncated/expected.jsonl
+++ b/testdata/codex/f_exec_truncated/expected.jsonl
@@ -6,9 +6,9 @@
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1774992733000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L4","OriginalBytes":170,"StoredBytes":0,"SHA256":""}}
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1774992734000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_f1"}}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1774992734000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L5","OriginalBytes":196,"StoredBytes":0,"SHA256":""}}
-{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1774992738000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992738000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
-{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1774992738000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L6","OriginalBytes":149,"StoredBytes":0,"SHA256":""}}
-{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":0,"Severity":"DBG","Source":"codex","Message":"enrich_exec_command_end","Extras":{"call_id":"call_f1","exec_cwd":"\u003cROOT\u003e/project","exec_exit_code":0,"exec_output_bytes":38,"exec_source":"unified_exec"}}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"exec_cwd":"\u003cROOT\u003e/project","exec_exit_code":0,"exec_output_bytes":38,"exec_source":"unified_exec"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992738100000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L7","OriginalBytes":149,"StoredBytes":0,"SHA256":""}}
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992739000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":3,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L8","OriginalBytes":173,"StoredBytes":0,"SHA256":""}}
diff --git a/testdata/codex/i_collab_spawn/INPUT/codex-home/sessions/2026/05/20/rollout-2026-05-20T10-15-00-019e8a10-0000-7000-8000-000000000010.jsonl b/testdata/codex/i_collab_spawn/INPUT/codex-home/sessions/2026/05/20/rollout-2026-05-20T10-15-00-019e8a10-0000-7000-8000-000000000010.jsonl
new file mode 100644
index 0000000..358311f
--- /dev/null
+++ b/testdata/codex/i_collab_spawn/INPUT/codex-home/sessions/2026/05/20/rollout-2026-05-20T10-15-00-019e8a10-0000-7000-8000-000000000010.jsonl
@@ -0,0 +1,8 @@
+{"timestamp":"2026-05-20T10:15:00.100Z","type":"session_meta","payload":{"id":"019e8a10-0000-7000-8000-000000000010","timestamp":"2026-05-20T10:15:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","git":{"commit_hash":"6666666666666666666666666666666666666666","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-05-20T10:15:00.200Z","type":"turn_context","payload":{"turn_id":"019e8a10-1111-7000-8000-000000000020","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"danger-full-access"},"summary":"none"}}
+{"timestamp":"2026-05-20T10:15:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019e8a10-1111-7000-8000-000000000020","started_at":1779358500,"model_context_window":258400}}
+{"timestamp":"2026-05-20T10:15:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"spawn an explorer to map the repo"}]}}
+{"timestamp":"2026-05-20T10:15:03.000Z","type":"event_msg","payload":{"type":"collab_agent_spawn_end","call_id":"spawn_1","sender_thread_id":"019e8a10-0000-7000-8000-000000000010","new_thread_id":"019e8a11-0000-7000-8000-0000000000aa","new_agent_nickname":"Dewey","new_agent_role":"explorer","prompt":"map the repo","model":"gpt-5.5","reasoning_effort":"high","status":"completed"}}
+{"timestamp":"2026-05-20T10:15:05.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the explorer has been spawned"}]}}
+{"timestamp":"2026-05-20T10:15:05.500Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":1200,"cached_input_tokens":0,"output_tokens":80,"reasoning_output_tokens":30,"total_tokens":1280},"last_token_usage":{"input_tokens":1200,"cached_input_tokens":0,"output_tokens":80,"reasoning_output_tokens":30,"total_tokens":1280},"model_context_window":258400}}}
+{"timestamp":"2026-05-20T10:15:06.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019e8a10-1111-7000-8000-000000000020","completed_at":1779358506,"duration_ms":5700,"time_to_first_token_ms":1800}}
diff --git a/testdata/codex/i_collab_spawn/expected.jsonl b/testdata/codex/i_collab_spawn/expected.jsonl
new file mode 100644
index 0000000..d37ba8e
--- /dev/null
+++ b/testdata/codex/i_collab_spawn/expected.jsonl
@@ -0,0 +1,14 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1779272100100000,"NativeID":"019e8a10-0000-7000-8000-000000000010","RootNativeID":"019e8a10-0000-7000-8000-000000000010","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"6666666666666666666666666666666666666666","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1779272100200000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1779272100200000,"NativeID":"019e8a10-0000-7000-8000-000000000010","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1779272101000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1779272101000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779272101000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1779272101000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/20/rollout-2026-05-20T10-15-00-019e8a10-0000-7000-8000-000000000010.jsonl#L4","OriginalBytes":183,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1779272103000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"session","Name":"spawn","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"019e8a11-0000-7000-8000-0000000000aa","Extras":{"model":"gpt-5.5","new_agent_nickname":"Dewey","new_agent_role":"explorer","reasoning_effort":"high","relationship":"sub_agent","sender_thread_id":"019e8a10-0000-7000-8000-000000000010"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1779272103000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779272103000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1779272105000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1779272105000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779272105000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20483,"Ts":1779272105000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"OpSeq":3,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/20/rollout-2026-05-20T10-15-00-019e8a10-0000-7000-8000-000000000010.jsonl#L6","OriginalBytes":185,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28672,"Ts":1779358506000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779272105000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":1280,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1779358506000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1779358506000000,"TokensIn":1200,"TokensOut":80,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1779358506000000,"SessionNativeID":"019e8a10-0000-7000-8000-000000000010","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019e8a10-1111-7000-8000-000000000020","effort":"high","sandbox":"danger-full-access","ttft_ms":1800}}}
diff --git a/testdata/codex/j_replaced_turn/INPUT/codex-home/sessions/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl b/testdata/codex/j_replaced_turn/INPUT/codex-home/sessions/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl
new file mode 100644
index 0000000..987f49c
--- /dev/null
+++ b/testdata/codex/j_replaced_turn/INPUT/codex-home/sessions/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl
@@ -0,0 +1,10 @@
+{"timestamp":"2026-05-21T14:20:00.100Z","type":"session_meta","payload":{"id":"019e9b20-0000-7000-8000-000000000011","timestamp":"2026-05-21T14:20:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","git":{"commit_hash":"7777777777777777777777777777777777777777","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-05-21T14:20:00.200Z","type":"turn_context","payload":{"turn_id":"019e9b20-1111-7000-8000-000000000021","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-05-21T14:20:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019e9b20-1111-7000-8000-000000000021","started_at":1779452400,"model_context_window":258400}}
+{"timestamp":"2026-05-21T14:20:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"start a slow analysis"}]}}
+{"timestamp":"2026-05-21T14:20:02.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","call_id":"call_j1","arguments":"{\"command\":[\"bash\",\"-lc\",\"sleep 60\"]}"}}
+{"timestamp":"2026-05-21T14:20:05.000Z","type":"turn_context","payload":{"turn_id":"019e9b20-2222-7000-8000-000000000022","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-05-21T14:20:05.100Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019e9b20-2222-7000-8000-000000000022","started_at":1779452405,"model_context_window":258400}}
+{"timestamp":"2026-05-21T14:20:06.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"actually, just list the files instead"}]}}
+{"timestamp":"2026-05-21T14:20:07.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"here are the files"}]}}
+{"timestamp":"2026-05-21T14:20:07.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019e9b20-2222-7000-8000-000000000022","completed_at":1779452407,"duration_ms":2400,"time_to_first_token_ms":900}}
diff --git a/testdata/codex/j_replaced_turn/expected.jsonl b/testdata/codex/j_replaced_turn/expected.jsonl
new file mode 100644
index 0000000..d1a4893
--- /dev/null
+++ b/testdata/codex/j_replaced_turn/expected.jsonl
@@ -0,0 +1,21 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1779373200100000,"NativeID":"019e9b20-0000-7000-8000-000000000011","RootNativeID":"019e9b20-0000-7000-8000-000000000011","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"7777777777777777777777777777777777777777","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1779373200200000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1779373200200000,"NativeID":"019e9b20-0000-7000-8000-000000000011","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1779373201000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1779373201000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779373201000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1779373201000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl#L4","OriginalBytes":171,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1779373202000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_j1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1779373202000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl#L5","OriginalBytes":193,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1779373205000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"Seq":2,"Status":"cancelled","ErrorClass":"","ErrorMessage":"","EndTs":1779373205000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1779373205000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","Seq":1,"Status":"failed","ErrorClass":"replaced","EndTs":1779373205000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1779373205000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019e9b20-1111-7000-8000-000000000021","effort":"high","sandbox":"workspace-write"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20483,"Ts":1779373205000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","Seq":2}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1779373206000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1779373206000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779373206000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1779373206000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl#L8","OriginalBytes":187,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1779373207000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"Seq":2,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1779373207000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779373207000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32771,"Ts":1779373207000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"OpSeq":2,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/21/rollout-2026-05-21T14-20-00-019e9b20-0000-7000-8000-000000000011.jsonl#L9","OriginalBytes":174,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36864,"Ts":1779452407000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779373207000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36865,"Ts":1779452407000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","Seq":2,"Status":"completed","ErrorClass":"","EndTs":1779452407000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36866,"Ts":1779452407000000,"SessionNativeID":"019e9b20-0000-7000-8000-000000000011","TurnSeq":2,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019e9b20-2222-7000-8000-000000000022","effort":"high","sandbox":"workspace-write","ttft_ms":900}}}
diff --git a/testdata/codex/k_web_search/INPUT/codex-home/sessions/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl b/testdata/codex/k_web_search/INPUT/codex-home/sessions/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl
new file mode 100644
index 0000000..f047eee
--- /dev/null
+++ b/testdata/codex/k_web_search/INPUT/codex-home/sessions/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl
@@ -0,0 +1,9 @@
+{"timestamp":"2026-05-22T09:05:00.100Z","type":"session_meta","payload":{"id":"019eac30-0000-7000-8000-000000000012","timestamp":"2026-05-22T09:05:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","git":{"commit_hash":"8888888888888888888888888888888888888888","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-05-22T09:05:00.200Z","type":"turn_context","payload":{"turn_id":"019eac30-1111-7000-8000-000000000022","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-05-22T09:05:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019eac30-1111-7000-8000-000000000022","started_at":1779526500,"model_context_window":258400}}
+{"timestamp":"2026-05-22T09:05:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"look up the latest release notes online"}]}}
+{"timestamp":"2026-05-22T09:05:02.000Z","type":"response_item","payload":{"type":"web_search_call","status":"completed","action":{"type":"search","query":"latest release notes"}}}
+{"timestamp":"2026-05-22T09:05:04.000Z","type":"event_msg","payload":{"type":"web_search_end","call_id":"ws_remote_1","query":"latest release notes","action":{"type":"search","query":"latest release notes"}}}
+{"timestamp":"2026-05-22T09:05:05.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the latest release adds new features"}]}}
+{"timestamp":"2026-05-22T09:05:05.500Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":1500,"cached_input_tokens":0,"output_tokens":90,"reasoning_output_tokens":40,"total_tokens":1590},"last_token_usage":{"input_tokens":1500,"cached_input_tokens":0,"output_tokens":90,"reasoning_output_tokens":40,"total_tokens":1590},"model_context_window":258400}}}
+{"timestamp":"2026-05-22T09:05:06.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019eac30-1111-7000-8000-000000000022","completed_at":1779526506,"duration_ms":5700,"time_to_first_token_ms":1500}}
diff --git a/testdata/codex/k_web_search/expected.jsonl b/testdata/codex/k_web_search/expected.jsonl
new file mode 100644
index 0000000..ef01360
--- /dev/null
+++ b/testdata/codex/k_web_search/expected.jsonl
@@ -0,0 +1,16 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1779440700100000,"NativeID":"019eac30-0000-7000-8000-000000000012","RootNativeID":"019eac30-0000-7000-8000-000000000012","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"8888888888888888888888888888888888888888","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1779440700200000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1779440700200000,"NativeID":"019eac30-0000-7000-8000-000000000012","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1779440701000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1779440701000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779440701000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1779440701000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl#L4","OriginalBytes":189,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1779440702000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1779440702000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl#L5","OriginalBytes":179,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1779440704000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"query":"latest release notes"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1779440704000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779440704000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1779440705000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1779440705000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779440705000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24579,"Ts":1779440705000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"OpSeq":3,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl#L7","OriginalBytes":192,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1779526506000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779440705000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":1590,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1779526506000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1779526506000000,"TokensIn":1500,"TokensOut":90,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1779526506000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019eac30-1111-7000-8000-000000000022","effort":"high","sandbox":"workspace-write","ttft_ms":1500}}}

From a1c685651ec81f7a019f7b855fdfafeea208ca12 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 15:49:21 +0300
Subject: [PATCH 08/13] codex adapter: exec/patch status authority, web_search
 FIFO, NativeID, EndTs

Round-3 fixes completing the partially-fixed exec/patch enrichment and
web_search pairing plus two correctness gaps, all verified against the
real ~/.codex wire shapes.

- exec_command_end exit_code is now authoritative for op status in BOTH
  orders: exec-first applies at finalize; output-first emits a correcting
  OpFinalized(failed,command_failed) via the finalizedOps lookup (a
  non-zero exit no longer leaves a failed command marked completed).
- patch_apply_end is now order-independent (finalizedOps path) and merges
  patch_success/patch_status into op Extras; success=false -> failed.
- exec_duration_ms is now emitted (real duration is {secs,nanos} ->
  secs*1000 + nanos/1e6).
- web_search pairing uses a per-turn FIFO queue of open searches (oldest
  pairs with each web_search_end) so interleaved searches don't mis-pair;
  the end event's action object is now decoded and emitted alongside query.
- NativeID is taken from the authoritative session_meta.payload.id (the
  UUID parent_thread_id/forked_from_id reference); the filename UUID is
  only a fallback.
- old-format turn_context-only sessions now finalize their EOF turn at the
  turn's last-activity timestamp (deterministic), not the file mtime, so
  the golden is stable across runs; the new-format stale crash finalize
  still uses the stale mtime.

New fixtures l_exec_failed / n_patch_apply / m_multi_web_search /
o_payload_id_filename + regenerated f_exec_truncated / b_old_turncontext /
k_web_search, each line-checked against the spec and byte-identical across
repeated -update-golden runs. Spec pinned the order-independence + the
{secs,nanos} and {patch_success,patch_status} shapes (rules #14/#16/#23).
Gates green: golangci(0)/gosec(0)/vet; race 13/13; codex coverage 92.6%;
FuzzParseLine 0 crashes; secret + AI-attribution scans clean.
---
 .../SOW-0004-20260526-codex-adapter.md        |  16 ++
 .agents/sow/specs/adapter-codex.md            |  14 +-
 internal/adapters/codex/helpers_unit_test.go  |  53 ++++-
 internal/adapters/codex/mapper.go             |  16 +-
 .../adapters/codex/mapper_coverage_test.go    | 184 ++++++++++++++++++
 internal/adapters/codex/mapper_finalize.go    |  22 ++-
 internal/adapters/codex/mapper_state.go       |   9 +-
 internal/adapters/codex/mapper_turn.go        |  28 +++
 internal/adapters/codex/ops_enrich.go         | 127 ++++++++----
 internal/adapters/codex/ops_enrich_decode.go  | 119 +++++++++--
 internal/adapters/codex/ops_tools.go          |  15 +-
 .../codex/b_old_turncontext/expected.jsonl    |   4 +-
 .../codex/f_exec_truncated/expected.jsonl     |   2 +-
 testdata/codex/k_web_search/expected.jsonl    |   2 +-
 ...019d8a30-0000-7000-8000-000000000031.jsonl |   9 +
 testdata/codex/l_exec_failed/expected.jsonl   |  18 ++
 ...019d8f40-0000-7000-8000-000000000051.jsonl |  10 +
 .../codex/m_multi_web_search/expected.jsonl   |  20 ++
 ...019d9450-0000-7000-8000-000000000071.jsonl |   9 +
 testdata/codex/n_patch_apply/expected.jsonl   |  18 ++
 ...019d9960-0000-7000-8000-0000000000ff.jsonl |   6 +
 .../o_payload_id_filename/expected.jsonl      |  12 ++
 22 files changed, 630 insertions(+), 83 deletions(-)
 create mode 100644 testdata/codex/l_exec_failed/INPUT/codex-home/sessions/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl
 create mode 100644 testdata/codex/l_exec_failed/expected.jsonl
 create mode 100644 testdata/codex/m_multi_web_search/INPUT/codex-home/sessions/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl
 create mode 100644 testdata/codex/m_multi_web_search/expected.jsonl
 create mode 100644 testdata/codex/n_patch_apply/INPUT/codex-home/sessions/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl
 create mode 100644 testdata/codex/n_patch_apply/expected.jsonl
 create mode 100644 testdata/codex/o_payload_id_filename/INPUT/codex-home/sessions/2026/04/05/rollout-2026-04-05T13-40-00-019d9960-0000-7000-8000-0000000000ff.jsonl
 create mode 100644 testdata/codex/o_payload_id_filename/expected.jsonl

diff --git a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
index acdfee3..c32cd0b 100644
--- a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
+++ b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
@@ -203,6 +203,22 @@ Adjudicated on ground truth (spec lines + a read-only investigation of the real
 
 **Decided fix plan (round 2):** code fixes to match the (mostly already-correct) spec + spec corrections where the spec had wrong wire shapes (F3 collab fields, F5/F7 dead variants) + regenerated goldens (the round-1 goldens were partly circular — built by the same understanding as the code) + new real-shape fixtures (collab spawn, replaced turn, old-format-stale, realistic web_search + compaction + exec-first ordering). All code fixes stay within `internal/adapters/codex/` + the additive `sources.go` probe; no canonical/ingest/store change. F9 hardening and `image_generation` real-shape coverage (no real data exists) are documented as accepted limitations.
 
+### Round 2 (2026-05-30) — same scope + fix notes
+
+- **minimax**: SAFE, no new issues, 1 benign P2 (the LLM op's CtxUsed re-finalize is a second OpFinalized — verified idempotent, carries `completed`+valid EndTs, does not clobber).
+- **glm**: found G6 (P2) empirically — ran `b_old_turncontext` twice and the old-format EOF `turn_finalized` EndTs differed between runs (`1780138387665997` vs `1780143627970705`, ~2026-05-30 wall-clock), i.e. the old-format EOF turn-close uses the file's live mtime as EndTs → **non-deterministic golden (CI-flaky) + semantically wrong**. CONFIRMED. Fix (G6): old-format EOF turn-close EndTs = the turn's last-activity timestamp (deterministic, from data), not the file mtime. Regenerate b_old_turncontext.
+- **codex**: NOT SAFE. Confirmed F1/F2/F3/F5/F6/F8/F9 resolved; **F4 and F7 only partially fixed**, plus a NativeID-source gap. All verified against ground truth (codex 100% accurate across both rounds):
+
+| # | Sev | Finding | Verdict |
+|---|---|---|---|
+| G1 | P1 | output-first `exec_command_end(exit≠0)` adds extras via OpStarted but never emits a correcting `OpFinalized(failed)` → failed exec stays `completed` | CONFIRMED. ops_enrich.go:57-58 ("status NOT re-applied"); `enrichStatus` exists but unused on the finalized path. exit_code is authoritative (spec rule #5/#14) in BOTH orders. |
+| G2 | P2 | `patch_apply_end` not order-independent (openOps-only) and doesn't merge `success`/`status` extras (spec :361) | CONFIRMED. ops_enrich.go:234-240. |
+| G3 | P2 | `exec_duration_ms` (spec :354) decoded but never emitted; golden circular | CONFIRMED. ops_enrich_decode.go:18 decodes `Duration`, :27-39 drops it. |
+| G4 | P2 | web_search single-slot state mis-pairs interleaved searches; `action` (spec :345) dropped | CONFIRMED. ops_tools.go:149 single `openWebSearch`; webSearchExtras decodes only `query`. |
+| G5 | P2 | NativeID seeded from filename, not authoritative `session_meta.payload.id` (spec :290) | CONFIRMED. mapper.go:287 seeds from filename; applySessionMeta never assigns `p.ID`. Hidden where filename==payload.id. |
+
+**Round-3 fix plan:** (G1) exec exit_code authoritative for op status in both orders — exec-first applies at finalize, output-first emits a corrected `OpFinalized(failed,command_failed)` via the finalizedOps lookup; (G2) patch_apply_end uses the same finalizedOps path + merges success/status; (G3) emit `exec_duration_ms` (normalize the duration value to ms); (G4) FIFO queue of open web_search ops per turn + decode/emit `action`; (G5) set NativeID from `payload.id` (filename only as fallback). Add fixtures: failed-exec (output-first, corrected status), patch_apply_end, multi-web-search, and regenerate f_exec_truncated with `exec_duration_ms`.
+
 ## Outcome
 
 Pending.
diff --git a/.agents/sow/specs/adapter-codex.md b/.agents/sow/specs/adapter-codex.md
index ba31474..ef0986b 100644
--- a/.agents/sow/specs/adapter-codex.md
+++ b/.agents/sow/specs/adapter-codex.md
@@ -351,14 +351,15 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
     - LEGACY ONLY (does not occur in modern `.jsonl`). When ingesting legacy `.json` files: Kind=`tool`, Name=`shell`, ToolNamespace=`shell`.
 
 14. **`event_msg` payload `exec_command_end`:**
-    - Used for telemetry enrichment — the matching `function_call`/`function_call_output` pair carries the same `call_id` and produces the op. The `exec_command_end` adds: parsed_cmd, exit_code, duration, source. Adapter merges these into the op's Extras: `{exec_exit_code, exec_duration_ms, exec_cwd, exec_source}`. **Do not** emit a second op.
+    - Used for telemetry enrichment — the matching `function_call`/`function_call_output` pair carries the same `call_id` and produces the op. The `exec_command_end` adds: parsed_cmd, exit_code, duration, source. Adapter merges these into the op's Extras: `{exec_exit_code, exec_duration_ms, exec_cwd, exec_source}`. The `duration` is a Rust `Duration` object `{secs, nanos}` (real corpus: always this shape) normalized to integer `exec_duration_ms = secs*1000 + nanos/1e6`. **Do not** emit a second op.
+    - The `exit_code` is AUTHORITATIVE for the op's terminal status, ORDER-INDEPENDENTLY (G1, rule #5): non-zero `exit_code` → op `failed` / ErrorClass `command_failed`; `exit_code` 0 → `completed`. When `exec_command_end` arrives BEFORE the `function_call_output` (~68-85%), the exec status is stashed and WINS over the output-string heuristic at finalize. When it arrives AFTER (output-first, ~15-32%), the adapter emits a CORRECTING `OpFinalized` on the op's `(turn,seq)` so a non-zero exit overrides a provisionally-`completed` op. A blanked `aggregated_output` is NOT an error.
     - Note: `aggregated_output` is truncated to 10 KB at the source; `stdout`/`stderr`/`formatted_output` are blanked (`policy.rs:51-59`). Adapter cannot recover full output.
 
 15. **`event_msg` payload `mcp_tool_call_end`:**
     - For MCP-routed function calls (which appear ALSO as `function_call`/`function_call_output` with `name = "<server>.<tool>"` or via the `namespace` field). The `invocation` field gives canonical (server, tool). Use it to set `tool_namespace = "mcp:" + server` and `name = tool` on the matching op.
 
 16. **`event_msg` payload `patch_apply_end`:**
-    - Telemetry for an `apply_patch` `function_call`. Merge `success`, `status` into the op's Extras. Set Op Status accordingly.
+    - Telemetry for an `apply_patch` `function_call`. Merge `success`, `status` into the op's Extras as `{patch_success, patch_status}`. Set Op Status accordingly (success=false → `failed` / ErrorClass `patch_failed`). ORDER-INDEPENDENT, mirroring exec (G2): an `apply_patch` op still open is finalized here with the extras merged; an already-finalized op (output-first) gets the extras re-emitted plus a correcting `OpFinalized` on its `(turn,seq)`.
 
 17. **`event_msg` payload `token_count`:**
     - Stream of token accounting snapshots. Each carries cumulative `total_token_usage` and the per-call `last_token_usage`, plus optional `model_context_window`.
@@ -389,7 +390,12 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
     - **OLD-format turn (turn_context-only, no `task_started` — cli < ~0.93):**
       close `TurnFinalizedEvent(Status="completed")` at EOF, REGARDLESS of
       staleness (edge #3 "close at EOF"; ~38% of the real corpus is pure
-      old-format ending cleanly with no completion marker). NO
+      old-format ending cleanly with no completion marker). `EndTs` is the turn's
+      **last-activity timestamp** (the max record timestamp in the file — for the
+      most-recent open turn this IS its last activity), NOT the file mtime /
+      wall-clock: a clean old-format turn ended when its last record was written,
+      and using the live mtime makes the emitted stream non-deterministic across
+      runs (CI-flaky golden) and semantically wrong (G6). NO
       `SessionFinalizedEvent` — codex has no per-session terminal signal (C#3);
       the session stays `running`.
     - **NEW-format turn (a `task_started` opened it, no `task_complete`):** the
@@ -429,7 +435,7 @@ Codex rollout files emit fine-grained `RolloutItem` records but do NOT carry pre
 | `compacted` line (+ adjacent `event_msg.context_compacted` companion, suppressed) | one Op Kind=compaction Name=compaction |
 | lone `event_msg.context_compacted` (no preceding `compacted`) | one Op Kind=compaction Name=compaction (defensive) |
 | `response_item.compaction` / `response_item.context_compaction` | forward-compat only (0 real files); converges on one OpCompaction if ever emitted |
-| EOF, OLD-format open turn (turn_context-only, no task_started) | `TurnFinalizedEvent(completed)` at EOF regardless of staleness (edge #3); **no `SessionFinalizedEvent`** (F1) |
+| EOF, OLD-format open turn (turn_context-only, no task_started) | `TurnFinalizedEvent(completed)` at EOF regardless of staleness, `EndTs` = turn's last-activity ts (deterministic, NOT mtime — G6); **no `SessionFinalizedEvent`** (F1) |
 | EOF, NEW-format open turn (saw task_started), file mtime-stale ≥ 1 h | synthetic `TurnFinalizedEvent(failed,incomplete)` + `SessionFinalizedEvent(failed,incomplete)` |
 | EOF, NEW-format open turn, file FRESH (< 1 h) | turn stays open (still in-flight); no finalize (F1) |
 | EOF clean (most recent event is task_complete / no open turn) | **no `SessionFinalizedEvent`** — session stays `running` (codex has no per-session terminal signal; rollouts are resumable and metadata-appendable per `recorder.rs:1610`). UI uses `last_activity_ts` for staleness, identical to claude-code. |
diff --git a/internal/adapters/codex/helpers_unit_test.go b/internal/adapters/codex/helpers_unit_test.go
index f7b57ea..ede7f9b 100644
--- a/internal/adapters/codex/helpers_unit_test.go
+++ b/internal/adapters/codex/helpers_unit_test.go
@@ -230,11 +230,19 @@ func TestMcpInvocationAndExecExtras(t *testing.T) {
 	if s, tool := mcpInvocation([]byte(`{bad`)); s != "" || tool != "" {
 		t.Errorf("mcpInvocation(malformed) = {%q %q}, want empty", s, tool)
 	}
-	// execCommandExtras: all fields
-	ex := execCommandExtras([]byte(`{"payload":{"exit_code":0,"cwd":"<ROOT>","source":"model","aggregated_output":"abc"}}`))
-	if ex["exec_exit_code"] != int64(0) || ex["exec_cwd"] != "<ROOT>" || ex["exec_source"] != "model" || ex["exec_output_bytes"] != 3 {
+	// execCommandExtras: all fields, including duration {secs,nanos}→ms (G3).
+	ex := execCommandExtras([]byte(`{"payload":{"exit_code":0,"cwd":"<ROOT>","source":"model","aggregated_output":"abc","duration":{"secs":1,"nanos":250000000}}}`))
+	if ex["exec_exit_code"] != int64(0) || ex["exec_cwd"] != "<ROOT>" || ex["exec_source"] != "model" || ex["exec_output_bytes"] != 3 || ex["exec_duration_ms"] != int64(1250) {
 		t.Errorf("execCommandExtras = %+v", ex)
 	}
+	// duration {secs:0,nanos:0} is a real sub-ms value → exec_duration_ms=0, present.
+	if ex := execCommandExtras([]byte(`{"payload":{"exit_code":0,"duration":{"secs":0,"nanos":0}}}`)); ex["exec_duration_ms"] != int64(0) {
+		t.Errorf("execCommandExtras zero-duration = %+v, want exec_duration_ms=0", ex)
+	}
+	// absent duration → no exec_duration_ms key (G3 -1 sentinel suppressed).
+	if ex := execCommandExtras([]byte(`{"payload":{"exit_code":0}}`)); ex == nil || ex["exec_duration_ms"] != nil {
+		t.Errorf("execCommandExtras no-duration = %+v, want no exec_duration_ms", ex)
+	}
 	// empty payload → nil
 	if ex := execCommandExtras([]byte(`{"payload":{}}`)); ex != nil {
 		t.Errorf("execCommandExtras(empty) = %+v, want nil", ex)
@@ -243,9 +251,28 @@ func TestMcpInvocationAndExecExtras(t *testing.T) {
 	if ex := execCommandExtras([]byte(`{bad`)); ex != nil {
 		t.Errorf("execCommandExtras(malformed) = %+v, want nil", ex)
 	}
-	// webSearchExtras
-	if w := webSearchExtras([]byte(`{"payload":{"query":"q"}}`)); w["query"] != "q" {
-		t.Errorf("webSearchExtras = %+v", w)
+	// webSearchExtras: query + action object (G4).
+	w := webSearchExtras([]byte(`{"payload":{"query":"q","action":{"type":"search","query":"q","queries":["q","q2"]}}}`))
+	if w["query"] != "q" {
+		t.Errorf("webSearchExtras query = %+v", w)
+	}
+	act, ok := w["action"].(map[string]any)
+	if !ok || act["type"] != "search" || act["query"] != "q" || act["queries"] != nil {
+		t.Errorf("webSearchExtras action = %+v (queries[] must be dropped)", w["action"])
+	}
+	// action open_page → url surfaced (no query/pattern).
+	w2 := webSearchExtras([]byte(`{"payload":{"action":{"type":"open_page","url":"https://e.invalid/p"}}}`))
+	if a, _ := w2["action"].(map[string]any); a["type"] != "open_page" || a["url"] != "https://e.invalid/p" || w2["query"] != nil {
+		t.Errorf("webSearchExtras open_page = %+v", w2)
+	}
+	// action find_in_page → pattern surfaced.
+	w3 := webSearchExtras([]byte(`{"payload":{"action":{"type":"find_in_page","pattern":"needle","url":"https://e.invalid/p"}}}`))
+	if a, _ := w3["action"].(map[string]any); a["pattern"] != "needle" || a["url"] != "https://e.invalid/p" {
+		t.Errorf("webSearchExtras find_in_page = %+v", w3)
+	}
+	// action with no type → dropped; query-only still works.
+	if w := webSearchExtras([]byte(`{"payload":{"query":"q","action":{"url":"x"}}}`)); w["query"] != "q" || w["action"] != nil {
+		t.Errorf("webSearchExtras typeless-action = %+v, want query only", w)
 	}
 	if w := webSearchExtras([]byte(`{"payload":{}}`)); w != nil {
 		t.Errorf("webSearchExtras(empty) = %+v, want nil", w)
@@ -253,6 +280,20 @@ func TestMcpInvocationAndExecExtras(t *testing.T) {
 	if w := webSearchExtras([]byte(`{bad`)); w != nil {
 		t.Errorf("webSearchExtras(malformed) = %+v, want nil", w)
 	}
+	// patchApplyExtras (G2): success + status surfaced.
+	pe := patchApplyExtras([]byte(`{"payload":{"success":false,"status":"failed"}}`))
+	if pe["patch_success"] != false || pe["patch_status"] != "failed" {
+		t.Errorf("patchApplyExtras = %+v", pe)
+	}
+	if pe := patchApplyExtras([]byte(`{"payload":{"success":true}}`)); pe["patch_success"] != true || pe["patch_status"] != nil {
+		t.Errorf("patchApplyExtras success-only = %+v", pe)
+	}
+	if pe := patchApplyExtras([]byte(`{"payload":{}}`)); pe != nil {
+		t.Errorf("patchApplyExtras(empty) = %+v, want nil", pe)
+	}
+	if pe := patchApplyExtras([]byte(`{bad`)); pe != nil {
+		t.Errorf("patchApplyExtras(malformed) = %+v, want nil", pe)
+	}
 }
 
 func TestDecodeTokenCount_Placements(t *testing.T) {
diff --git a/internal/adapters/codex/mapper.go b/internal/adapters/codex/mapper.go
index 4090fba..cc439d9 100644
--- a/internal/adapters/codex/mapper.go
+++ b/internal/adapters/codex/mapper.go
@@ -135,13 +135,15 @@ type fileMapper struct {
 	// orders. Entries persist for the life of the file (small; one per tool op).
 	finalizedOps map[string]finalizedOp
 
-	// openWebSearch is the most-recently-opened, not-yet-paired web_search op in
-	// the active turn (F7). web_search_call carries NEITHER id NOR call_id, so its
-	// companion event_msg.web_search_end (which DOES carry call_id) cannot pair by
-	// key — it pairs POSITIONALLY with the most-recent open web_search op in the
-	// same turn. nil when no web_search awaits an end. Cleared on pairing or at
-	// turn close (the op then finalizes as a dangling op).
-	openWebSearch *openWebSearchRef
+	// openWebSearch is a FIFO QUEUE of not-yet-paired web_search ops, in open order
+	// (F7/G4). web_search_call carries NEITHER id NOR call_id, so its companion
+	// event_msg.web_search_end (which DOES carry call_id) cannot pair by key — it
+	// pairs POSITIONALLY: each web_search_end finalizes the OLDEST open web_search op
+	// (front of the queue), so interleaved searches pair in order and none are left
+	// dangling out of sequence. Entries are appended on web_search_call, removed on
+	// pairing (front) or when their turn closes (finalizeDanglingOps closes the op
+	// and pruneClosedWebSearch drops the stale ref).
+	openWebSearch []*openWebSearchRef
 
 	// seenUserCallIDs dedups user input across response_item.message(role=user)
 	// and event_msg.user_message (spec rule #6, #18). Keyed on a content
diff --git a/internal/adapters/codex/mapper_coverage_test.go b/internal/adapters/codex/mapper_coverage_test.go
index b55e773..cf207f6 100644
--- a/internal/adapters/codex/mapper_coverage_test.go
+++ b/internal/adapters/codex/mapper_coverage_test.go
@@ -675,3 +675,187 @@ func TestMapper_LateSessionMetaNoSecondStart(t *testing.T) {
 		t.Fatalf("SessionStarted count = %d, want 1 (no second start on late meta)", got)
 	}
 }
+
+// TestMapper_NativeIDFromPayloadID covers G5: the mapper is seeded with a
+// FILENAME-derived nativeID, but session_meta.payload.id is AUTHORITATIVE and must
+// override it on the session AND every subsequent event. The scanner seeds the
+// filename id as a fallback; the body id wins (spec adapter-codex.md:290).
+func TestMapper_NativeIDFromPayloadID(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("file-id") // filename-derived fallback
+	lines := []string{
+		metaLine("meta-id", `"exec"`), // payload.id is the authoritative id
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"hi"}]}}`,
+	}
+	events := runLines(t, m, lines)
+	s := firstStarted(t, events)
+	if s.NativeID != "meta-id" || s.RootNativeID != "meta-id" {
+		t.Errorf("session ids = {NativeID:%q RootNativeID:%q}, want both meta-id (payload.id wins, G5)", s.NativeID, s.RootNativeID)
+	}
+	// Every op/turn event must carry the authoritative id, not the filename fallback.
+	for _, st := range opStarts(events) {
+		if st.SessionNativeID != "meta-id" {
+			t.Errorf("op SessionNativeID = %q, want meta-id (G5)", st.SessionNativeID)
+		}
+	}
+	if m.nativeID != "meta-id" {
+		t.Errorf("m.nativeID = %q, want meta-id (G5)", m.nativeID)
+	}
+}
+
+// TestMapper_MultiWebSearchFIFO covers G4: two web_search_call ops followed by two
+// web_search_end events pair in FIFO order (oldest open search first), each
+// carrying its own query+action Extras. Also exercises pruneWebSearchQueue's
+// survivor path: a leftover unpaired search from turn 1 is dropped when turn 1
+// closes, so a turn-2 web_search_end does not pair with it.
+func TestMapper_MultiWebSearchFIFO(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"web_search_call","action":{"type":"search","query":"alpha"}}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"web_search_call","action":{"type":"open_page","url":"https://e.invalid/p"}}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"web_search_end","call_id":"x1","query":"alpha","action":{"type":"search","query":"alpha"}}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"web_search_end","call_id":"x2","action":{"type":"open_page","url":"https://e.invalid/p"}}}`,
+	}
+	events := runLines(t, m, lines)
+	// The first-opened web_search op (Seq 1) must carry the search action (paired
+	// with the FIRST end); the second (Seq 2) the open_page (the SECOND end). Seqs
+	// are 1 and 2 because the turn opens no other op before them.
+	var firstAction, secondAction map[string]any
+	for _, st := range opStarts(events) {
+		if st.Name != "web_search" || st.Extras == nil {
+			continue
+		}
+		if a, ok := st.Extras["action"].(map[string]any); ok {
+			switch st.Seq {
+			case 1:
+				firstAction = a
+			case 2:
+				secondAction = a
+			}
+		}
+	}
+	if firstAction == nil || firstAction["type"] != "search" {
+		t.Errorf("Seq1 (oldest search) action = %+v, want type=search (FIFO, G4)", firstAction)
+	}
+	if secondAction == nil || secondAction["type"] != "open_page" {
+		t.Errorf("Seq2 (newer search) action = %+v, want type=open_page (FIFO, G4)", secondAction)
+	}
+	// Both web_search ops finalized completed; no orphan log.
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "web_search_end_no_call" {
+			t.Errorf("FIFO pairing wrongly logged an orphan end (G4)")
+		}
+	}
+}
+
+// TestMapper_WebSearchPruneAcrossTurns covers G4's pruneWebSearchQueue survivor
+// path: an unpaired web_search_call in turn 1 is dropped when turn 1 closes, so a
+// web_search_end in turn 2 does NOT pair with the stale turn-1 ref (it is an
+// orphan).
+func TestMapper_WebSearchPruneAcrossTurns(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t1"}}`,
+		// An unpaired web_search_call in turn 1.
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"web_search_call","action":{"type":"search","query":"dangling"}}}`,
+		// turn 1 closes — its open web_search op is dangling-finalized and pruned.
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"task_complete","turn_id":"t1"}}`,
+		// turn 2 opens; a web_search_end with no in-turn call must be an orphan.
+		`{"timestamp":"` + tsDone + `","type":"turn_context","payload":{"turn_id":"t2","model":"m"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"task_started","turn_id":"t2"}}`,
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"web_search_end","call_id":"x9","query":"late"}}`,
+	}
+	events := runLines(t, m, lines)
+	orphan := false
+	for _, ev := range events {
+		if le, ok := ev.(canonical.LogEntryEvent); ok && le.Message == "web_search_end_no_call" {
+			orphan = true
+		}
+	}
+	if !orphan {
+		t.Errorf("turn-2 web_search_end paired with a stale turn-1 ref instead of logging orphan (G4 prune)")
+	}
+}
+
+// TestMapper_PatchApplyOpenOpFailed covers G2's open-op path: a patch_apply_end
+// arriving while the apply_patch op is still OPEN (no function_call_output yet)
+// finalizes it with the success-derived status AND merges {patch_success,
+// patch_status} into Extras.
+func TestMapper_PatchApplyOpenOpFailed(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"apply_patch","arguments":"{}","call_id":"p1"}}`,
+		`{"timestamp":"` + tsEvent + `","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"p1","success":false,"status":"failed"}}`,
+	}
+	events := runLines(t, m, lines)
+	// The apply_patch op is finalized failed/patch_failed with merged extras.
+	gotFailed, gotExtras := false, false
+	for _, f := range opFinals(events) {
+		if f.Status == "failed" && f.ErrorClass == "patch_failed" {
+			gotFailed = true
+		}
+	}
+	for _, st := range opStarts(events) {
+		if st.Name == "apply_patch" && st.Extras != nil && st.Extras["patch_success"] == false && st.Extras["patch_status"] == "failed" {
+			gotExtras = true
+		}
+	}
+	if !gotFailed {
+		t.Errorf("open-op patch_apply_end did not finalize failed/patch_failed (G2)")
+	}
+	if !gotExtras {
+		t.Errorf("open-op patch_apply_end did not merge patch_success/patch_status Extras (G2)")
+	}
+}
+
+// TestMapper_OutputFirstExecFailedCorrects covers G1: an output-first
+// exec_command_end(exit≠0) emits a CORRECTING OpFinalized(failed, command_failed)
+// onto the op that its function_call_output had provisionally finalized completed.
+func TestMapper_OutputFirstExecFailedCorrects(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		// Output-first: provisional completed off a benign-looking output string.
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"some output"}}`,
+		// Late exec_command_end with a non-zero exit_code → authoritative failed.
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":2,"aggregated_output":"some output","duration":{"secs":0,"nanos":500000000}}}`,
+	}
+	events := runLines(t, m, lines)
+	fins := opFinals(events)
+	// The shell op (Seq 1 — it is the turn's first op) must END with a corrected
+	// failed/command_failed finalize (the LAST finalize on its (turn,seq) wins via
+	// the writer upsert).
+	var lastSeq1 *canonical.OpFinalizedEvent
+	for i := range fins {
+		if fins[i].Seq == 1 {
+			f := fins[i]
+			lastSeq1 = &f
+		}
+	}
+	if lastSeq1 == nil || lastSeq1.Status != "failed" || lastSeq1.ErrorClass != "command_failed" {
+		t.Errorf("output-first failed exec: last Seq1 finalize = %+v, want failed/command_failed (G1)", lastSeq1)
+	}
+	// The exec extras (exit_code, duration_ms) reached the op via an OpStarted re-emit.
+	reemit := false
+	for _, st := range opStarts(events) {
+		if st.Name == "shell" && st.Extras != nil && st.Extras["exec_exit_code"] == int64(2) && st.Extras["exec_duration_ms"] == int64(500) {
+			reemit = true
+		}
+	}
+	if !reemit {
+		t.Errorf("output-first failed exec did not re-emit exec_exit_code/exec_duration_ms Extras (G1/G3)")
+	}
+}
diff --git a/internal/adapters/codex/mapper_finalize.go b/internal/adapters/codex/mapper_finalize.go
index c66270a..a96683f 100644
--- a/internal/adapters/codex/mapper_finalize.go
+++ b/internal/adapters/codex/mapper_finalize.go
@@ -43,8 +43,18 @@ func (m *fileMapper) finalizeAtEOF(stale bool, nowUs int64) []canonical.Event {
 	}
 	if !ts.sawTaskStarted {
 		// OLD-format: close COMPLETED at EOF regardless of staleness (spec edge #3).
+		// EndTs MUST be the turn's LAST-ACTIVITY timestamp (m.lastTsUs, the max
+		// record ts in the file — which, for the most-recent open turn, IS that
+		// turn's last activity), NOT the file mtime / wall-clock (G6). A clean
+		// old-format turn ended when its last record was written; using the live
+		// mtime made the golden non-deterministic (CI-flaky) and semantically wrong.
+		// Fall back to nowUs only when no record carried a timestamp (lastTsUs == 0).
+		endUs := m.lastTsUs
+		if endUs == 0 {
+			endUs = nowUs
+		}
 		m.eofFinalized = true
-		return m.closeOpenTurnAtEOF(ts, nowUs, "completed", "", false)
+		return m.closeOpenTurnAtEOF(ts, endUs, "completed", "", false)
 	}
 	// NEW-format: only a stale file's hanging turn is a crash (rule #23). A fresh
 	// file's turn is still running — leave it open (do NOT set eofFinalized, so a
@@ -61,10 +71,12 @@ func (m *fileMapper) finalizeAtEOF(stale bool, nowUs int64) []canonical.Event {
 // old-format turn), the turn is finalized with the supplied status/errClass, its
 // turn-extras log is emitted, and — only when withSessionFinalize is set (the
 // stale new-format crash path) — a SessionFinalizedEvent(failed, incomplete) is
-// appended (the ONLY SessionFinalizedEvent codex emits; SOW C#3). endUs is
-// floored at the turn's start so the synthetic close never predates the open.
-func (m *fileMapper) closeOpenTurnAtEOF(ts *turnState, nowUs int64, status, errClass string, withSessionFinalize bool) []canonical.Event {
-	endUs := nowUs
+// appended (the ONLY SessionFinalizedEvent codex emits; SOW C#3). closeUs is the
+// close timestamp: the turn's last-activity ts for the OLD-format completed close
+// (deterministic, G6) or the file mtime for the stale new-format crash close. It
+// is floored at the turn's start so the synthetic close never predates the open.
+func (m *fileMapper) closeOpenTurnAtEOF(ts *turnState, closeUs int64, status, errClass string, withSessionFinalize bool) []canonical.Event {
+	endUs := closeUs
 	if endUs < ts.startTsUs {
 		endUs = ts.startTsUs
 	}
diff --git a/internal/adapters/codex/mapper_state.go b/internal/adapters/codex/mapper_state.go
index 9e0187f..1f82a6c 100644
--- a/internal/adapters/codex/mapper_state.go
+++ b/internal/adapters/codex/mapper_state.go
@@ -116,11 +116,12 @@ type finalizedOp struct {
 	namespace string
 }
 
-// openWebSearchRef records the most-recent open web_search op in the active turn
-// for POSITIONAL pairing with event_msg.web_search_end (F7). web_search_call
-// carries no correlation key, so the end pairs by position, not call_id.
+// openWebSearchRef records one open web_search op for POSITIONAL pairing with
+// event_msg.web_search_end (F7/G4). web_search_call carries no correlation key, so
+// the end pairs by FIFO position (oldest open search first), not call_id.
 // syntheticCallID is the openOps key the call was tracked under, so the end can
-// finalize the SAME op and remove it from the dangling set.
+// finalize the SAME op and remove it from the dangling set. turnID lets a
+// turn-close prune drop refs belonging to a now-closed turn.
 type openWebSearchRef struct {
 	turnID          string
 	turnSeq         int
diff --git a/internal/adapters/codex/mapper_turn.go b/internal/adapters/codex/mapper_turn.go
index 377470f..fb94284 100644
--- a/internal/adapters/codex/mapper_turn.go
+++ b/internal/adapters/codex/mapper_turn.go
@@ -14,6 +14,16 @@ import (
 // (mapper-only tests feeding session_meta directly) this derives them from the
 // payload so the event is complete either way.
 func applySessionMeta(ev *canonical.SessionStartedEvent, p *sessionMetaPayload, m *fileMapper) {
+	// session_meta.payload.id is the AUTHORITATIVE native id (spec adapter-codex.md
+	// :290 — parent_thread_id / forked_from_id reference this UUID). It overrides
+	// the filename-seeded value (the scanner derives the id from the rollout
+	// filename as a fallback for a file whose body id is absent). Assigning both
+	// ev.NativeID and m.nativeID makes every subsequent turn/op/log event carry the
+	// authoritative id; RootNativeID is re-derived below (G5).
+	if p.ID != "" {
+		ev.NativeID = p.ID
+		m.nativeID = p.ID
+	}
 	kind, parent := p.classifySource()
 	// forked_from_id wins as the parent only when source did not already name a
 	// sub-agent parent (a fork and a sub-agent are mutually exclusive shapes;
@@ -336,9 +346,27 @@ func (m *fileMapper) finalizeDanglingOps(turnID string, base func() canonical.Ev
 		delete(m.openOps, p.callID)
 		m.recordFinalizedOp(p.callID, p.op)
 	}
+	m.pruneWebSearchQueue(turnID)
 	return out
 }
 
+// pruneWebSearchQueue drops any open web_search refs belonging to a now-closed
+// turn from the FIFO queue (G4), so a web_search_end in a LATER turn never pairs
+// with a stale ref whose op was already dangling-finalized. Preserves FIFO order
+// for the surviving refs.
+func (m *fileMapper) pruneWebSearchQueue(turnID string) {
+	if len(m.openWebSearch) == 0 {
+		return
+	}
+	kept := m.openWebSearch[:0]
+	for _, ws := range m.openWebSearch {
+		if ws.turnID != turnID {
+			kept = append(kept, ws)
+		}
+	}
+	m.openWebSearch = kept
+}
+
 // addTokenUsage folds one token_count event into the attributed turn's C#1
 // rollup: TokensIn/Out += this call's last_token_usage; cache split likewise;
 // CtxUsed candidate = cumulative total_token_usage.total_tokens; CtxMax =
diff --git a/internal/adapters/codex/ops_enrich.go b/internal/adapters/codex/ops_enrich.go
index db8cb7c..0c1e402 100644
--- a/internal/adapters/codex/ops_enrich.go
+++ b/internal/adapters/codex/ops_enrich.go
@@ -35,12 +35,17 @@ func (m *fileMapper) enrichOp(rec record, advance func(int64) canonical.EventBas
 	if !ok {
 		// The op may have already been finalized by its *_output before this
 		// end-event (the ~15-32% output-first ordering): re-emit an OpStarted onto
-		// the finalized op so the Extras still land (F4).
-		return m.enrichFinalizedOp(p.CallID, advance, tsUs, p.Type, extras)
+		// the finalized op so the Extras land AND, when the exec carries an explicit
+		// exit_code, a CORRECTING OpFinalized so a non-zero exit overrides the
+		// output-derived status (G1, spec rule #5/#14 — exit_code is authoritative
+		// in BOTH orders).
+		return m.enrichFinalizedOp(p.CallID, advance, tsUs, p.Type, extras, status, errClass)
 	}
 	// Op still open (exec-first): stash extras + the exec-derived status on the op
 	// and leave it open. Its *_output (or the turn-close dangling finalize) emits
-	// the canonical OpFinalized AND re-emits an OpStarted carrying these Extras.
+	// the canonical OpFinalized AND re-emits an OpStarted carrying these Extras. The
+	// exec status WINS over the output-string heuristic when exec carries an
+	// explicit exit_code (spec rule #5/#14).
 	mergeExtras(op, extras)
 	if status != "" {
 		op.enrichStatus = status
@@ -50,13 +55,15 @@ func (m *fileMapper) enrichOp(rec record, advance func(int64) canonical.EventBas
 }
 
 // enrichFinalizedOp handles an end-event whose op was already finalized by its
-// *_output (output-first ordering, ~15-32% of exec files) (F4). It re-emits an
+// *_output (output-first ordering, ~15-32% of exec files) (F4/G1). It re-emits an
 // OpStarted carrying the enrichment Extras to UPDATE the existing op row
-// (idempotent on (turn,seq) — NOT a second op). When the op cannot be located in
-// finalizedOps (start below a resume offset, or orphaned), a DBG log is the only
-// honest surface. The end-event's status is NOT re-applied here: the *_output
-// already produced the canonical finalize, and the enrichment is supplementary.
-func (m *fileMapper) enrichFinalizedOp(callID string, advance func(int64) canonical.EventBase, tsUs int64, evType string, extras map[string]any) []canonical.Event {
+// (idempotent on (turn,seq) — NOT a second op) AND, when the end-event carries an
+// authoritative terminal status (an exec exit_code — spec rule #5/#14), a
+// CORRECTING OpFinalized so a failed exec that was provisionally finalized
+// completed by its output string is upserted to failed/command_failed (G1). When
+// the op cannot be located in finalizedOps (start below a resume offset, or
+// orphaned), a DBG log is the only honest surface.
+func (m *fileMapper) enrichFinalizedOp(callID string, advance func(int64) canonical.EventBase, tsUs int64, evType string, extras map[string]any, status, errClass string) []canonical.Event {
 	fop, ok := m.finalizedOps[callID]
 	if !ok {
 		log := map[string]any{"call_id": callID}
@@ -65,10 +72,38 @@ func (m *fileMapper) enrichFinalizedOp(callID string, advance func(int64) canoni
 		}
 		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_"+evType, log)}
 	}
-	if len(extras) == 0 {
+	out := make([]canonical.Event, 0, 2)
+	if len(extras) > 0 {
+		out = append(out, m.reemitOpStarted(fop, advance, tsUs, extras))
+	}
+	// An explicit exit_code-derived status corrects the op's terminal status in the
+	// output-first ordering: the *_output already finalized it (often completed off a
+	// benign-looking output), but a non-zero exit_code is authoritative (G1, spec
+	// rule #5/#14). The writer upserts on (turn,seq), so this overwrites the status.
+	if status != "" {
+		out = append(out, m.correctFinalizedOp(fop, advance, tsUs, status, errClass))
+	}
+	if len(out) == 0 {
 		return nil
 	}
-	return []canonical.Event{m.reemitOpStarted(fop, advance, tsUs, extras)}
+	return out
+}
+
+// correctFinalizedOp emits an OpFinalized that re-applies an authoritative
+// terminal status onto an already-finalized op's (turn,seq) row (G1). Used when an
+// exec_command_end exit_code (output-first ordering) must override the status the
+// op's *_output provisionally set. The writer's ON CONFLICT upsert overwrites the
+// status/errClass on the existing row; EndTs is the enrichment event's timestamp.
+func (m *fileMapper) correctFinalizedOp(fop finalizedOp, advance func(int64) canonical.EventBase, tsUs int64, status, errClass string) canonical.Event {
+	return canonical.OpFinalizedEvent{
+		EventBase:       advance(tsUs),
+		SessionNativeID: m.nativeID,
+		TurnSeq:         fop.turnSeq,
+		Seq:             fop.opSeq,
+		Status:          status,
+		ErrorClass:      errClass,
+		EndTs:           tsUs,
+	}
 }
 
 // finalizeWithExtras emits the op's OpFinalized AND, when the op accumulated
@@ -138,18 +173,19 @@ func (m *fileMapper) recordFinalizedOp(callID string, op *openOp) {
 	}
 }
 
-// enrichWebSearch handles event_msg.web_search_end (F7, spec rule #11). It pairs
-// POSITIONALLY with the active turn's most-recent open web_search op
-// (openWebSearch), because web_search_call carries no correlation key. It
-// finalizes that op completed and re-emits an OpStarted carrying the query Extras
-// (OpFinalized has no Extras field). When no web_search is open (the end is
-// orphaned, or its call was below a resume offset), a DBG log keeps it visible.
+// enrichWebSearch handles event_msg.web_search_end (F7/G4, spec rule #11). It
+// pairs POSITIONALLY with the OLDEST open web_search op (the FRONT of the
+// openWebSearch FIFO queue), because web_search_call carries no correlation key;
+// FIFO order means interleaved searches pair in the order they opened. It
+// finalizes that op completed and re-emits an OpStarted carrying the query +
+// action Extras (OpFinalized has no Extras field). When no web_search is open
+// (the end is orphaned, or its call was below a resume offset), a DBG log keeps it
+// visible.
 func (m *fileMapper) enrichWebSearch(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
-	ws := m.openWebSearch
+	ws := m.dequeueWebSearch()
 	if ws == nil {
 		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "web_search_end_no_call", nil)}
 	}
-	m.openWebSearch = nil
 	extras := webSearchExtras(rec.Raw)
 	op, ok := m.openOps[ws.syntheticCallID]
 	if !ok {
@@ -168,6 +204,26 @@ func (m *fileMapper) enrichWebSearch(rec record, advance func(int64) canonical.E
 	return m.finalizeWithExtras(op, advance, tsUs, "completed", "")
 }
 
+// dequeueWebSearch pops the OLDEST open web_search op (front of the FIFO queue)
+// for positional pairing with a web_search_end (G4). Returns nil when the queue is
+// empty (an orphaned end). Skips refs whose op already finalized at a turn close
+// (its synthetic call_id is gone from openOps) so a stale front entry never
+// shadows a still-open later search.
+func (m *fileMapper) dequeueWebSearch() *openWebSearchRef {
+	for len(m.openWebSearch) > 0 {
+		ws := m.openWebSearch[0]
+		m.openWebSearch = m.openWebSearch[1:]
+		if _, stillOpen := m.openOps[ws.syntheticCallID]; stillOpen {
+			return ws
+		}
+		// The op was finalized at a prior turn close; its query/action can no longer
+		// be paired meaningfully — keep scanning for a still-open search. (Pairing a
+		// closed op would just re-stamp Extras; the FIFO contract is to pair the
+		// oldest STILL-OPEN search, so we drop the closed ref and continue.)
+	}
+	return nil
+}
+
 // enrichMcp handles event_msg.mcp_tool_call_end (spec rule #15). It re-stamps
 // the matching op's ToolNamespace to "mcp:<server>" and Name to the invocation
 // tool by emitting an OpStarted update (the ingester upserts on (turn,seq), so a
@@ -226,29 +282,32 @@ func (m *fileMapper) enrichMcp(rec record, advance func(int64) canonical.EventBa
 	return out
 }
 
-// enrichPatchApply handles event_msg.patch_apply_end (spec rule #16). It
-// finalizes the matching apply_patch op with the success/status from the event.
-// When no op matches, it surfaces a DBG log.
+// enrichPatchApply handles event_msg.patch_apply_end (spec rule #16,
+// adapter-codex.md:361). It is ORDER-INDEPENDENT (G2), mirroring the exec fix:
+//   - op still OPEN (the apply_patch function_call_output has not arrived):
+//     finalize the op with the success/status-derived terminal status and merge
+//     {patch_success, patch_status} into its Extras via an OpStarted re-emit.
+//   - op already FINALIZED (output-first): re-emit an OpStarted carrying the
+//     extras AND a correcting OpFinalized so success=false upserts the op to
+//     failed/patch_failed (spec rule #16 "Set Op Status accordingly").
+//   - op NOT locatable: a DBG log is the only honest surface.
 func (m *fileMapper) enrichPatchApply(rec record, advance func(int64) canonical.EventBase, tsUs int64) []canonical.Event {
 	p := rec.EventMsg
-	op, ok := m.openOps[p.CallID]
 	status, errClass := patchApplyStatus(rec.Raw)
+	extras := patchApplyExtras(rec.Raw)
+	op, ok := m.openOps[p.CallID]
 	if !ok {
-		return []canonical.Event{m.logEntry(advance(tsUs), "DBG", "enrich_patch_apply_end", map[string]any{"call_id": p.CallID, "status": status})}
+		// Output-first ordering: the op was already finalized by its
+		// function_call_output — merge the extras and correct the status on its row.
+		return m.enrichFinalizedOp(p.CallID, advance, tsUs, p.Type, extras, status, errClass)
 	}
 	op.finalized = true
-	fin := canonical.OpFinalizedEvent{
-		EventBase:       advance(tsUs),
-		SessionNativeID: m.nativeID,
-		TurnSeq:         op.turnSeq,
-		Seq:             op.opSeq,
-		Status:          status,
-		ErrorClass:      errClass,
-		EndTs:           tsUs,
-	}
+	mergeExtras(op, extras)
 	delete(m.openOps, p.CallID)
 	m.recordFinalizedOp(p.CallID, op)
-	return []canonical.Event{fin}
+	// finalizeWithExtras emits the OpStarted (carrying patch_success/patch_status)
+	// followed by the OpFinalized with the success-derived status.
+	return m.finalizeWithExtras(op, advance, tsUs, status, errClass)
 }
 
 // mergeExtras folds enrichment extras onto a tracked op so its eventual finalize
diff --git a/internal/adapters/codex/ops_enrich_decode.go b/internal/adapters/codex/ops_enrich_decode.go
index 53821eb..5e500e7 100644
--- a/internal/adapters/codex/ops_enrich_decode.go
+++ b/internal/adapters/codex/ops_enrich_decode.go
@@ -7,18 +7,39 @@ import "encoding/json"
 // mcp_tool_call_end, patch_apply_end). They are pure functions with no mapper
 // state, split from ops_enrich.go so the enrichment dispatch stays focused.
 
+// execDuration is the Rust std::time::Duration wire shape codex serializes for
+// exec_command_end.duration: ALWAYS the {secs,nanos} object (real corpus: 20000/
+// 20000 exec_command_end lines, keys always nanos,secs — never a bare number or
+// string). A typed decoder so the emitted extras never carry an untyped `any`
+// (G3).
+type execDuration struct {
+	Secs  int64 `json:"secs"`
+	Nanos int64 `json:"nanos"`
+}
+
+// millis normalizes the {secs,nanos} duration to integer milliseconds (spec rule
+// #14 exec_duration_ms). Returns -1 when the field was absent (both zero is a
+// legitimately sub-millisecond command, so 0 is a real value, not "missing").
+func (d *execDuration) millis(present bool) int64 {
+	if !present {
+		return -1
+	}
+	return d.Secs*1000 + d.Nanos/1_000_000
+}
+
 // execCommandExtras extracts the exec_command_end telemetry merged into the op
-// (spec rule #14): exit_code, duration, cwd, source, and the truncated
-// aggregated_output length (the body itself is blanked at the source in Limited
-// mode — only aggregated_output survives, truncated to 10 KB).
+// (spec rule #14): exit_code, duration (normalized to exec_duration_ms), cwd,
+// source, and the truncated aggregated_output length (the body itself is blanked
+// at the source in Limited mode — only aggregated_output survives, truncated to
+// 10 KB).
 func execCommandExtras(raw []byte) map[string]any {
 	var env struct {
 		Payload struct {
-			ExitCode         *int64 `json:"exit_code"`
-			Duration         any    `json:"duration"`
-			Cwd              string `json:"cwd"`
-			Source           string `json:"source"`
-			AggregatedOutput string `json:"aggregated_output"`
+			ExitCode         *int64        `json:"exit_code"`
+			Duration         *execDuration `json:"duration"`
+			Cwd              string        `json:"cwd"`
+			Source           string        `json:"source"`
+			AggregatedOutput string        `json:"aggregated_output"`
 		} `json:"payload"`
 	}
 	if json.Unmarshal(raw, &env) != nil {
@@ -28,6 +49,9 @@ func execCommandExtras(raw []byte) map[string]any {
 	if env.Payload.ExitCode != nil {
 		extras["exec_exit_code"] = *env.Payload.ExitCode
 	}
+	if ms := env.Payload.Duration.millis(env.Payload.Duration != nil); ms >= 0 {
+		extras["exec_duration_ms"] = ms
+	}
 	if env.Payload.Cwd != "" {
 		extras["exec_cwd"] = env.Payload.Cwd
 	}
@@ -43,20 +67,65 @@ func execCommandExtras(raw []byte) map[string]any {
 	return extras
 }
 
-// webSearchExtras extracts event_msg.web_search_end query/action (spec rule #11).
+// webSearchExtras extracts event_msg.web_search_end query + action (spec rule
+// #11). The end event's `action` is ALWAYS an object discriminated by `type`
+// (real corpus: search | open_page | find_in_page | other); webSearchAction
+// reduces it to a compact map carrying the type plus the variant's url / query /
+// pattern (G4). Returns nil only when neither a query nor an action is present.
 func webSearchExtras(raw []byte) map[string]any {
 	var env struct {
 		Payload struct {
-			Query string `json:"query"`
+			Query  string          `json:"query"`
+			Action json.RawMessage `json:"action"`
 		} `json:"payload"`
 	}
 	if json.Unmarshal(raw, &env) != nil {
 		return nil
 	}
-	if env.Payload.Query == "" {
+	extras := map[string]any{}
+	if env.Payload.Query != "" {
+		extras["query"] = trimPreview(env.Payload.Query, previewMax)
+	}
+	if action := webSearchAction(env.Payload.Action); action != nil {
+		extras["action"] = action
+	}
+	if len(extras) == 0 {
 		return nil
 	}
-	return map[string]any{"query": trimPreview(env.Payload.Query, previewMax)}
+	return extras
+}
+
+// webSearchAction reduces web_search_end.action to a compact Extras map (G4, spec
+// rule #11). The action is an object discriminated by `type`; only the
+// type-relevant scalar fields are surfaced (url for open_page; query for search;
+// pattern + optional url for find_in_page). The queries[] array is dropped (the
+// scalar `query` already carries the primary term) so the Extras stays compact.
+// Returns nil when the action is absent or carries no type.
+func webSearchAction(raw json.RawMessage) map[string]any {
+	body := jsonTrim(raw)
+	if len(body) == 0 {
+		return nil
+	}
+	var a struct {
+		Type    string `json:"type"`
+		URL     string `json:"url"`
+		Query   string `json:"query"`
+		Pattern string `json:"pattern"`
+	}
+	if json.Unmarshal(body, &a) != nil || a.Type == "" {
+		return nil
+	}
+	out := map[string]any{"type": a.Type}
+	if a.URL != "" {
+		out["url"] = trimPreview(a.URL, previewMax)
+	}
+	if a.Query != "" {
+		out["query"] = trimPreview(a.Query, previewMax)
+	}
+	if a.Pattern != "" {
+		out["pattern"] = trimPreview(a.Pattern, previewMax)
+	}
+	return out
 }
 
 // enrichStatus derives a terminal status/ErrorClass from an end-event carrying
@@ -152,3 +221,29 @@ func patchApplyStatus(raw []byte) (status, errClass string) {
 	}
 	return "completed", ""
 }
+
+// patchApplyExtras extracts the patch_apply_end success/status merged onto the
+// apply_patch op (spec rule #16, adapter-codex.md:361 "Merge success, status into
+// the op's Extras") (G2). Returns nil when neither field is present.
+func patchApplyExtras(raw []byte) map[string]any {
+	var env struct {
+		Payload struct {
+			Success *bool  `json:"success"`
+			Status  string `json:"status"`
+		} `json:"payload"`
+	}
+	if json.Unmarshal(raw, &env) != nil {
+		return nil
+	}
+	extras := map[string]any{}
+	if env.Payload.Success != nil {
+		extras["patch_success"] = *env.Payload.Success
+	}
+	if env.Payload.Status != "" {
+		extras["patch_status"] = env.Payload.Status
+	}
+	if len(extras) == 0 {
+		return nil
+	}
+	return extras
+}
diff --git a/internal/adapters/codex/ops_tools.go b/internal/adapters/codex/ops_tools.go
index 8f14332..b7a3bf1 100644
--- a/internal/adapters/codex/ops_tools.go
+++ b/internal/adapters/codex/ops_tools.go
@@ -113,13 +113,14 @@ func (m *fileMapper) mapToolOutput(p *responseItemPayload, advance func(int64) c
 	return out
 }
 
-// mapWebSearchCall handles response_item.web_search_call (spec rule #11, F7). It
-// emits a tool op (Name=web_search, namespace=web). web_search_call carries
+// mapWebSearchCall handles response_item.web_search_call (spec rule #11, F7/G4).
+// It emits a tool op (Name=web_search, namespace=web). web_search_call carries
 // NEITHER id NOR call_id, so the op is NOT tracked by call_id; instead it is
-// recorded as the active turn's most-recent open web_search op (openWebSearch)
-// for POSITIONAL pairing with the companion event_msg.web_search_end (which DOES
-// carry a call_id, but for a DIFFERENT correlation space). If no end arrives the
-// op finalizes at turn close as a dangling op (it is tracked under a synthetic
+// appended to the FIFO queue of open web_search ops (openWebSearch) for POSITIONAL
+// pairing with a later event_msg.web_search_end (which DOES carry a call_id, but
+// for a DIFFERENT correlation space). Each web_search_end finalizes the OLDEST
+// queued op, so interleaved searches pair in order. If no end arrives the op
+// finalizes at turn close as a dangling op (it is tracked under a synthetic
 // per-op call_id so finalizeDanglingOps closes it).
 func (m *fileMapper) mapWebSearchCall(p *responseItemPayload, advance func(int64) canonical.EventBase, tsUs, bodyBytes int64) []canonical.Event {
 	ts := m.ensureTurn(tsUs)
@@ -146,7 +147,7 @@ func (m *fileMapper) mapWebSearchCall(p *responseItemPayload, advance func(int64
 	// call_id (the "ws#" prefix is not a codex call_id form).
 	synthetic := fmt.Sprintf("ws#%d:%d", turnSeq, opSeq)
 	m.trackOp(synthetic, m.activeTurnID, turnSeq, opSeq, canonical.OpTool, "web_search", "web")
-	m.openWebSearch = &openWebSearchRef{turnID: m.activeTurnID, turnSeq: turnSeq, opSeq: opSeq, syntheticCallID: synthetic}
+	m.openWebSearch = append(m.openWebSearch, &openWebSearchRef{turnID: m.activeTurnID, turnSeq: turnSeq, opSeq: opSeq, syntheticCallID: synthetic})
 	return out
 }
 
diff --git a/testdata/codex/b_old_turncontext/expected.jsonl b/testdata/codex/b_old_turncontext/expected.jsonl
index e18b883..d3d90e9 100644
--- a/testdata/codex/b_old_turncontext/expected.jsonl
+++ b/testdata/codex/b_old_turncontext/expected.jsonl
@@ -14,5 +14,5 @@
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.1-codex-max","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1763657955000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24579,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":4,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2025/11/20/rollout-2025-11-20T18-59-09-019aa234-0000-7000-8000-00000000000b.jsonl#L7","OriginalBytes":175,"StoredBytes":0,"SHA256":""}}
-{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1780138387665997,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1780138387665997,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
-{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1780138387665997,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"on-request","effort":"medium","sandbox":"read-only"}}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1763657955000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1763657955000000,"SessionNativeID":"019aa234-0000-7000-8000-00000000000b","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"on-request","effort":"medium","sandbox":"read-only"}}}
diff --git a/testdata/codex/f_exec_truncated/expected.jsonl b/testdata/codex/f_exec_truncated/expected.jsonl
index 20e2a6d..b8713c0 100644
--- a/testdata/codex/f_exec_truncated/expected.jsonl
+++ b/testdata/codex/f_exec_truncated/expected.jsonl
@@ -6,7 +6,7 @@
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1774992733000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L4","OriginalBytes":170,"StoredBytes":0,"SHA256":""}}
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1774992734000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_f1"}}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1774992734000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L5","OriginalBytes":196,"StoredBytes":0,"SHA256":""}}
-{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"exec_cwd":"\u003cROOT\u003e/project","exec_exit_code":0,"exec_output_bytes":38,"exec_source":"unified_exec"}}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"exec_cwd":"\u003cROOT\u003e/project","exec_duration_ms":4000,"exec_exit_code":0,"exec_output_bytes":38,"exec_source":"unified_exec"}}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1774992738100000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1774992738100000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/03/31/rollout-2026-03-31T21-32-12-019d452a-0000-7000-8000-00000000000f.jsonl#L7","OriginalBytes":149,"StoredBytes":0,"SHA256":""}}
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1774992739000000,"SessionNativeID":"019d452a-0000-7000-8000-00000000000f","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
diff --git a/testdata/codex/k_web_search/expected.jsonl b/testdata/codex/k_web_search/expected.jsonl
index ef01360..260b45c 100644
--- a/testdata/codex/k_web_search/expected.jsonl
+++ b/testdata/codex/k_web_search/expected.jsonl
@@ -6,7 +6,7 @@
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1779440701000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl#L4","OriginalBytes":189,"StoredBytes":0,"SHA256":""}}
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1779440702000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
 {"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1779440702000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/05/22/rollout-2026-05-22T09-05-00-019eac30-0000-7000-8000-000000000012.jsonl#L5","OriginalBytes":179,"StoredBytes":0,"SHA256":""}}
-{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1779440704000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"query":"latest release notes"}}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1779440704000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"action":{"query":"latest release notes","type":"search"},"query":"latest release notes"}}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1779440704000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779440704000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
 {"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1779440705000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
 {"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24578,"Ts":1779440705000000,"SessionNativeID":"019eac30-0000-7000-8000-000000000012","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1779440705000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
diff --git a/testdata/codex/l_exec_failed/INPUT/codex-home/sessions/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl b/testdata/codex/l_exec_failed/INPUT/codex-home/sessions/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl
new file mode 100644
index 0000000..bba2f4e
--- /dev/null
+++ b/testdata/codex/l_exec_failed/INPUT/codex-home/sessions/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl
@@ -0,0 +1,9 @@
+{"timestamp":"2026-04-02T10:15:30.100Z","type":"session_meta","payload":{"id":"019d8a30-0000-7000-8000-000000000031","timestamp":"2026-04-02T10:15:30.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.122.0","source":"exec","git":{"commit_hash":"6666666666666666666666666666666666666666","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-04-02T10:15:30.200Z","type":"turn_context","payload":{"turn_id":"019d8a30-1111-7000-8000-000000000041","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-04-02T10:15:30.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d8a30-1111-7000-8000-000000000041","started_at":1775470530,"model_context_window":258400}}
+{"timestamp":"2026-04-02T10:15:31.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"run the failing build"}]}}
+{"timestamp":"2026-04-02T10:15:32.000Z","type":"response_item","payload":{"type":"function_call","name":"shell","call_id":"call_l1","arguments":"{\"command\":[\"bash\",\"-lc\",\"exit 2\"]}"}}
+{"timestamp":"2026-04-02T10:15:33.000Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_l1","output":"build output line one\nbuild output line two"}}
+{"timestamp":"2026-04-02T10:15:33.200Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_l1","turn_id":"019d8a30-1111-7000-8000-000000000041","command":["bash","-lc","exit 2"],"cwd":"<ROOT>/project","stdout":"","stderr":"","formatted_output":"","aggregated_output":"build output line one\nbuild output line two","exit_code":2,"duration":{"secs":1,"nanos":250000000},"source":"unified_exec","status":"failed"}}
+{"timestamp":"2026-04-02T10:15:34.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the build failed with exit code 2"}]}}
+{"timestamp":"2026-04-02T10:15:34.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d8a30-1111-7000-8000-000000000041","completed_at":1775470534,"duration_ms":4400}}
diff --git a/testdata/codex/l_exec_failed/expected.jsonl b/testdata/codex/l_exec_failed/expected.jsonl
new file mode 100644
index 0000000..f8c1d7c
--- /dev/null
+++ b/testdata/codex/l_exec_failed/expected.jsonl
@@ -0,0 +1,18 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775124930100000,"NativeID":"019d8a30-0000-7000-8000-000000000031","RootNativeID":"019d8a30-0000-7000-8000-000000000031","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.122.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"6666666666666666666666666666666666666666","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1775124930200000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1775124930200000,"NativeID":"019d8a30-0000-7000-8000-000000000031","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1775124931000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1775124931000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775124931000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1775124931000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl#L4","OriginalBytes":171,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1775124932000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_l1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1775124932000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl#L5","OriginalBytes":191,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1775124933000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775124933000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1775124933000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl#L6","OriginalBytes":181,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1775124933200000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"shell","ToolNamespace":"shell","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"exec_cwd":"\u003cROOT\u003e/project","exec_duration_ms":1250,"exec_exit_code":2,"exec_output_bytes":43,"exec_source":"unified_exec"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1775124933200000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":2,"Status":"failed","ErrorClass":"command_failed","ErrorMessage":"","EndTs":1775124933200000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1775124934000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1775124934000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775124934000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1775124934000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"OpSeq":3,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/02/rollout-2026-04-02T10-15-30-019d8a30-0000-7000-8000-000000000031.jsonl#L8","OriginalBytes":189,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1775470534000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775124934000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1775470534000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1775470534000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1775470534000000,"SessionNativeID":"019d8a30-0000-7000-8000-000000000031","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019d8a30-1111-7000-8000-000000000041","effort":"high","sandbox":"workspace-write"}}}
diff --git a/testdata/codex/m_multi_web_search/INPUT/codex-home/sessions/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl b/testdata/codex/m_multi_web_search/INPUT/codex-home/sessions/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl
new file mode 100644
index 0000000..c8a0eed
--- /dev/null
+++ b/testdata/codex/m_multi_web_search/INPUT/codex-home/sessions/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl
@@ -0,0 +1,10 @@
+{"timestamp":"2026-04-03T11:20:00.100Z","type":"session_meta","payload":{"id":"019d8f40-0000-7000-8000-000000000051","timestamp":"2026-04-03T11:20:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","git":{"commit_hash":"7777777777777777777777777777777777777777","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-04-03T11:20:00.200Z","type":"turn_context","payload":{"turn_id":"019d8f40-1111-7000-8000-000000000061","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-04-03T11:20:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d8f40-1111-7000-8000-000000000061","started_at":1775560800,"model_context_window":258400}}
+{"timestamp":"2026-04-03T11:20:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"research two topics online"}]}}
+{"timestamp":"2026-04-03T11:20:02.000Z","type":"response_item","payload":{"type":"web_search_call","status":"completed","action":{"type":"search","query":"first topic overview"}}}
+{"timestamp":"2026-04-03T11:20:02.500Z","type":"response_item","payload":{"type":"web_search_call","status":"completed","action":{"type":"open_page","url":"https://docs.example.invalid/second-topic"}}}
+{"timestamp":"2026-04-03T11:20:03.000Z","type":"event_msg","payload":{"type":"web_search_end","call_id":"ws_remote_a","query":"first topic overview","action":{"type":"search","query":"first topic overview","queries":["first topic overview","first topic deep dive"]}}}
+{"timestamp":"2026-04-03T11:20:03.500Z","type":"event_msg","payload":{"type":"web_search_end","call_id":"ws_remote_b","action":{"type":"open_page","url":"https://docs.example.invalid/second-topic"}}}
+{"timestamp":"2026-04-03T11:20:04.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"both topics researched"}]}}
+{"timestamp":"2026-04-03T11:20:04.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d8f40-1111-7000-8000-000000000061","completed_at":1775560804,"duration_ms":4400}}
diff --git a/testdata/codex/m_multi_web_search/expected.jsonl b/testdata/codex/m_multi_web_search/expected.jsonl
new file mode 100644
index 0000000..181bac8
--- /dev/null
+++ b/testdata/codex/m_multi_web_search/expected.jsonl
@@ -0,0 +1,20 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775215200100000,"NativeID":"019d8f40-0000-7000-8000-000000000051","RootNativeID":"019d8f40-0000-7000-8000-000000000051","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"7777777777777777777777777777777777777777","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1775215200200000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1775215200200000,"NativeID":"019d8f40-0000-7000-8000-000000000051","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1775215201000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1775215201000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775215201000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1775215201000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl#L4","OriginalBytes":176,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1775215202000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1775215202000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl#L5","OriginalBytes":179,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1775215202500000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1775215202500000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"OpSeq":3,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl#L6","OriginalBytes":201,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1775215203000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"action":{"query":"first topic overview","type":"search"},"query":"first topic overview"}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1775215203000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775215203000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28672,"Ts":1775215203500000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"tool","Name":"web_search","ToolNamespace":"web","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"action":{"type":"open_page","url":"https://docs.example.invalid/second-topic"}}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1775215203500000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775215203500000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1775215204000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":4,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1775215204000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775215204000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32771,"Ts":1775215204000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"OpSeq":4,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/03/rollout-2026-04-03T11-20-00-019d8f40-0000-7000-8000-000000000051.jsonl#L9","OriginalBytes":178,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36864,"Ts":1775560804000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"Seq":4,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775215204000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36865,"Ts":1775560804000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1775560804000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":36866,"Ts":1775560804000000,"SessionNativeID":"019d8f40-0000-7000-8000-000000000051","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019d8f40-1111-7000-8000-000000000061","effort":"high","sandbox":"workspace-write"}}}
diff --git a/testdata/codex/n_patch_apply/INPUT/codex-home/sessions/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl b/testdata/codex/n_patch_apply/INPUT/codex-home/sessions/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl
new file mode 100644
index 0000000..baccb73
--- /dev/null
+++ b/testdata/codex/n_patch_apply/INPUT/codex-home/sessions/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl
@@ -0,0 +1,9 @@
+{"timestamp":"2026-04-04T12:30:00.100Z","type":"session_meta","payload":{"id":"019d9450-0000-7000-8000-000000000071","timestamp":"2026-04-04T12:30:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.124.0","source":"exec","git":{"commit_hash":"9999999999999999999999999999999999999999","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-04-04T12:30:00.200Z","type":"turn_context","payload":{"turn_id":"019d9450-1111-7000-8000-000000000081","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-04-04T12:30:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d9450-1111-7000-8000-000000000081","started_at":1775651400,"model_context_window":258400}}
+{"timestamp":"2026-04-04T12:30:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"apply the patch"}]}}
+{"timestamp":"2026-04-04T12:30:02.000Z","type":"response_item","payload":{"type":"function_call","name":"apply_patch","call_id":"call_n1","arguments":"{\"input\":\"*** Begin Patch\\n*** Update File: missing.txt\\n*** End Patch\"}"}}
+{"timestamp":"2026-04-04T12:30:02.500Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_n1","output":"applying patch"}}
+{"timestamp":"2026-04-04T12:30:03.000Z","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"call_n1","turn_id":"019d9450-1111-7000-8000-000000000081","stdout":"","stderr":"file not found: missing.txt","changes":{},"success":false,"status":"failed"}}
+{"timestamp":"2026-04-04T12:30:04.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"the patch failed to apply"}]}}
+{"timestamp":"2026-04-04T12:30:04.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d9450-1111-7000-8000-000000000081","completed_at":1775651404,"duration_ms":4400}}
diff --git a/testdata/codex/n_patch_apply/expected.jsonl b/testdata/codex/n_patch_apply/expected.jsonl
new file mode 100644
index 0000000..1b69668
--- /dev/null
+++ b/testdata/codex/n_patch_apply/expected.jsonl
@@ -0,0 +1,18 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775305800100000,"NativeID":"019d9450-0000-7000-8000-000000000071","RootNativeID":"019d9450-0000-7000-8000-000000000071","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.124.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"9999999999999999999999999999999999999999","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1775305800200000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1775305800200000,"NativeID":"019d9450-0000-7000-8000-000000000071","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1775305801000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1775305801000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775305801000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1775305801000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl#L4","OriginalBytes":165,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1775305802000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"apply_patch","ToolNamespace":"fs","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"call_id":"call_n1"}}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1775305802000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl#L5","OriginalBytes":232,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1775305802500000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775305802500000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1775305802500000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"OpSeq":2,"PayloadKind":"tool_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl#L6","OriginalBytes":151,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24576,"Ts":1775305803000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"tool","Name":"apply_patch","ToolNamespace":"fs","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":{"patch_status":"failed","patch_success":false}}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":24577,"Ts":1775305803000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":2,"Status":"failed","ErrorClass":"patch_failed","ErrorMessage":"","EndTs":1775305803000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28673,"Ts":1775305804000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":3,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28674,"Ts":1775305804000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775305804000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":28675,"Ts":1775305804000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"OpSeq":3,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/04/rollout-2026-04-04T12-30-00-019d9450-0000-7000-8000-000000000071.jsonl#L8","OriginalBytes":181,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32768,"Ts":1775651404000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"Seq":3,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775305804000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32769,"Ts":1775651404000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1775651404000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":32770,"Ts":1775651404000000,"SessionNativeID":"019d9450-0000-7000-8000-000000000071","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019d9450-1111-7000-8000-000000000081","effort":"high","sandbox":"workspace-write"}}}
diff --git a/testdata/codex/o_payload_id_filename/INPUT/codex-home/sessions/2026/04/05/rollout-2026-04-05T13-40-00-019d9960-0000-7000-8000-0000000000ff.jsonl b/testdata/codex/o_payload_id_filename/INPUT/codex-home/sessions/2026/04/05/rollout-2026-04-05T13-40-00-019d9960-0000-7000-8000-0000000000ff.jsonl
new file mode 100644
index 0000000..6517dbd
--- /dev/null
+++ b/testdata/codex/o_payload_id_filename/INPUT/codex-home/sessions/2026/04/05/rollout-2026-04-05T13-40-00-019d9960-0000-7000-8000-0000000000ff.jsonl
@@ -0,0 +1,6 @@
+{"timestamp":"2026-04-05T13:40:00.100Z","type":"session_meta","payload":{"id":"019d9960-0000-7000-8000-000000000091","timestamp":"2026-04-05T13:40:00.100Z","cwd":"<ROOT>/project","originator":"codex_exec","cli_version":"0.128.0","source":"exec","git":{"commit_hash":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","branch":"main","repository_url":"git@github.com:example/example.git"}}}
+{"timestamp":"2026-04-05T13:40:00.200Z","type":"turn_context","payload":{"turn_id":"019d9960-1111-7000-8000-0000000000a1","cwd":"<ROOT>/project","model":"gpt-5.5","effort":"high","approval_policy":"never","sandbox_policy":{"type":"workspace-write"},"summary":"none"}}
+{"timestamp":"2026-04-05T13:40:00.300Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019d9960-1111-7000-8000-0000000000a1","started_at":1775742000,"model_context_window":258400}}
+{"timestamp":"2026-04-05T13:40:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"say hello"}]}}
+{"timestamp":"2026-04-05T13:40:02.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hello"}]}}
+{"timestamp":"2026-04-05T13:40:02.500Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"019d9960-1111-7000-8000-0000000000a1","completed_at":1775742002,"duration_ms":2500}}
diff --git a/testdata/codex/o_payload_id_filename/expected.jsonl b/testdata/codex/o_payload_id_filename/expected.jsonl
new file mode 100644
index 0000000..cfe8e07
--- /dev/null
+++ b/testdata/codex/o_payload_id_filename/expected.jsonl
@@ -0,0 +1,12 @@
+{"kind":"session_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":0,"Ts":1775396400100000,"NativeID":"019d9960-0000-7000-8000-000000000091","RootNativeID":"019d9960-0000-7000-8000-000000000091","ParentNativeID":"","ParentOpKey":"","Kind":"root","AgentName":"codex:codex_exec","Model":"","Cwd":"\u003cROOT\u003e/project","CallPath":"","Extras":{"cli_version":"0.128.0","cwd":"\u003cROOT\u003e/project","git":{"branch":"main","commit_hash":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","repository_url":"git@github.com:example/example.git"},"originator":"codex_exec","source":"exec"}}}
+{"kind":"turn_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4096,"Ts":1775396400200000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","Seq":1}}
+{"kind":"session_updated","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":4097,"Ts":1775396400200000,"NativeID":"019d9960-0000-7000-8000-000000000091","AgentName":"","Model":"gpt-5.5","Cwd":"","Status":"","Extras":null}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12289,"Ts":1775396401000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"Seq":1,"ParentOpSeq":-1,"Kind":"internal","Name":"user_input","ToolNamespace":"","Model":"","Provider":"","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12290,"Ts":1775396401000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"Seq":1,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775396401000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":12291,"Ts":1775396401000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"OpSeq":1,"PayloadKind":"tool_request","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/05/rollout-2026-04-05T13-40-00-019d9960-0000-7000-8000-0000000000ff.jsonl#L4","OriginalBytes":159,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_started","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16385,"Ts":1775396402000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"Seq":2,"ParentOpSeq":-1,"Kind":"llm","Name":"message","ToolNamespace":"","Model":"gpt-5.5","Provider":"openai","ProviderAlias":"","ReasoningKind":"","ChildSessionNativeID":"","Extras":null}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16386,"Ts":1775396402000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775396402000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":0}}
+{"kind":"payload_ref","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":16387,"Ts":1775396402000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"OpSeq":2,"PayloadKind":"llm_response","Format":"json","Compression":"","LocationURI":"file://\u003cROOT\u003e/2026/04/05/rollout-2026-04-05T13-40-00-019d9960-0000-7000-8000-0000000000ff.jsonl#L5","OriginalBytes":161,"StoredBytes":0,"SHA256":""}}
+{"kind":"op_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20480,"Ts":1775742002000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"Seq":2,"Status":"completed","ErrorClass":"","ErrorMessage":"","EndTs":1775396402000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0,"BytesIn":0,"BytesOut":0,"CharsIn":0,"CharsOut":0,"CtxUsed":0,"CtxMax":258400}}
+{"kind":"turn_finalized","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20481,"Ts":1775742002000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","Seq":1,"Status":"completed","ErrorClass":"","EndTs":1775742002000000,"TokensIn":0,"TokensOut":0,"TokensCacheRead":0,"TokensCacheWrite":0,"CostUSD":0}}
+{"kind":"log_entry","payload":{"SourceID":"codex:\u003cROOT\u003e","SourceSeq":20482,"Ts":1775742002000000,"SessionNativeID":"019d9960-0000-7000-8000-000000000091","TurnSeq":1,"OpSeq":0,"Severity":"INF","Source":"codex","Message":"turn_meta","Extras":{"approval_policy":"never","codex_turn_id":"019d9960-1111-7000-8000-0000000000a1","effort":"high","sandbox":"workspace-write"}}}

From 86ed50e934e8bf133555961eec0faba2293a248a Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 16:32:17 +0300
Subject: [PATCH 09/13] ingest: idempotent catalog rollups under op
 re-emission; codex hygiene

The catalog aggregates (catalog_tools/models/providers) added on every op
event, so any adapter that re-emits an op (codex late enrichment + EOF
finalize; claude_code MCP correction) double-counted call counts and
failure/token/duration/cost totals. Make the rollups idempotent per op so
re-emission and post-restart re-finalization are correct for every adapter.

- onOpStarted takes an `inserted` signal (applyOpStarted probes
  requireOpExists before the upsert): call_count increments only on a
  genuine new op, 0 on a re-emit.
- onOpFinalized applies a (now - prior) delta: applyOpFinalized captures
  the op's persisted terminal contribution (status/tokens/cost/duration)
  before its UPDATE, so a first finalize adds the full amount (single-
  emission unchanged), a re-emit adds zero, and a completed->failed
  correction adds +1 failure exactly once. Restart-safe (reads persisted
  prior, not in-memory). Duration delta via the COALESCE'd column so a
  NULL-duration re-finalize yields delta 0.

Codex hygiene:
- finalizedOp stores the prior terminal status; the output-first
  enrichment path emits a correcting OpFinalized ONLY when the status
  actually changes (an exit-0 exec on an already-completed op no longer
  re-finalizes; an exit-2 still corrects to failed/command_failed once).
- a per-file EOFFinalizedSize marker is persisted in the cursor so
  finalizeAtEOF does not re-fire the EOF TurnFinalized/SessionFinalized on
  an unchanged rescan/restart; a real append (size grew) re-opens normally.

Also commits the previously-missed F8 shard-depth test
(cmd/ai-viewer-ingest/sources_test.go) that pins the wrong-depth-rollout
exclusion. New tests: catalog idempotency (re-emit no double-count;
status-correction delta-once), output-first-exit-0 no-spurious-refinalize,
and EOF-not-re-fired-on-rescan. Gates green: golangci(0)/gosec(0)/vet;
race 13/13; codex 92.5% / ingest 88.4% coverage; FuzzParseLine 0 crashes;
goldens byte-identical; secret + AI-attribution scans clean.
---
 .../SOW-0004-20260526-codex-adapter.md        |  19 ++
 cmd/ai-viewer-ingest/sources_test.go          |  18 +-
 .../adapters/codex/adapter_restart_test.go    | 137 ++++++++++++
 internal/adapters/codex/cursor.go             |  11 +
 .../adapters/codex/mapper_coverage_test.go    |  50 +++++
 internal/adapters/codex/mapper_state.go       |  10 +
 internal/adapters/codex/mapper_turn.go        |   2 +-
 internal/adapters/codex/ops_enrich.go         |  24 +-
 internal/adapters/codex/ops_tools.go          |   8 +-
 internal/adapters/codex/scanner.go            |  17 +-
 internal/ingest/catalog.go                    | 138 +++++++++---
 internal/ingest/catalog_idempotency_test.go   | 206 ++++++++++++++++++
 internal/ingest/error_paths_test.go           |  10 +-
 internal/ingest/writer.go                     |  54 ++++-
 14 files changed, 653 insertions(+), 51 deletions(-)
 create mode 100644 internal/ingest/catalog_idempotency_test.go

diff --git a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
index c32cd0b..835e5cf 100644
--- a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
+++ b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
@@ -219,6 +219,25 @@ Adjudicated on ground truth (spec lines + a read-only investigation of the real
 
 **Round-3 fix plan:** (G1) exec exit_code authoritative for op status in both orders — exec-first applies at finalize, output-first emits a corrected `OpFinalized(failed,command_failed)` via the finalizedOps lookup; (G2) patch_apply_end uses the same finalizedOps path + merges success/status; (G3) emit `exec_duration_ms` (normalize the duration value to ms); (G4) FIFO queue of open web_search ops per turn + decode/emit `action`; (G5) set NativeID from `payload.id` (filename only as fallback). Add fixtures: failed-exec (output-first, corrected status), patch_apply_end, multi-web-search, and regenerate f_exec_truncated with `exec_duration_ms`.
 
+### Round 3 (2026-05-30) — same scope + fix notes
+
+- **glm**: SAFE TO MERGE, P1:0 P2:0 P3:0 — all G1–G6 "correctly and completely resolved".
+- **minimax**: SAFE, all G1–G6 correct, zero regressions, ready to merge.
+- **codex**: NOT SAFE. Confirmed G1–G6 resolved, but the round-2/3 op re-emission collides with a non-idempotent catalog:
+
+| # | Sev | Finding | Ground-truth verdict |
+|---|---|---|---|
+| H1 | P1 | Late-enrichment op re-emission double-counts catalog rollups (re-emit `OpStarted` → `call_count+1` again; correcting `OpFinalized` → failure/tokens/duration added again); also fires even when status is unchanged (exit 0) | CONFIRMED. catalog.go:108/143 ADD unconditionally (not idempotent). |
+| H2 | P2 | `finalizeAtEOF` re-fires the EOF `TurnFinalized` on every unchanged rescan/restart (`eofFinalized` is per-mapper-instance; the cursor has no EOF-finalized marker) | CONFIRMED. scanner.go:230 + mapper.go:153 + cursor.go:43. |
+
+**Root-cause decision (CTO).** The double-count is the **catalog-idempotency-under-re-emission** gap already tracked as **SOW-0020**. It is **pre-existing and structural** (catalog.go ADDs on every event; merged `claude_code` also re-emits Op events at ops.go:75/138/505) and currently **latent** (no shipped API/presenter reads `catalog_*`; only the future stats UI / SOW-0007 will). The codex adapter cannot be simultaneously status-correct (G1) and catalog-non-corrupting (H1) without idempotent catalog aggregation, so SOW-0020 is a genuine prerequisite. Decision: absorb the SOW-0020 catalog-idempotency fix into this convergence (a justified, in-scope-by-necessity ingest change — see Pre-Implementation Gate Addendum below), plus the codex-scoped hygiene fixes.
+
+**Round-4 fix plan:** (H1a, ingest) make the catalog idempotent under op re-emission — `onOpStarted` counts a call once per op (only on a genuine insert), and `onOpFinalized` contributes failure/tokens/duration once / by delta so a corrected re-finalize updates rather than double-adds; (H1b, codex) store the prior terminal status in `finalizedOp` and emit the correcting `OpFinalized` ONLY when the status actually changes (no spurious re-finalize for exit 0); (H2, codex) persist an EOF-finalized marker in the cursor so `finalizeAtEOF` does not re-fire on an unchanged rescan/restart. Add ingest tests pinning catalog idempotency under a re-emitted op, and a restart test pinning no duplicate EOF finalize.
+
+### Pre-Implementation Gate Addendum (2026-05-30) — ingest scope expansion
+
+The original gate scoped this SOW to `internal/adapters/codex/` + the additive `sources.go` probe, with "no canonical/ingest/store change". Round-3 review (codex H1) proved the codex adapter cannot be correct without idempotent catalog aggregation under op re-emission (a pre-existing `internal/ingest/catalog.go` gap, SOW-0020, that the codex replay-from-0 + enrichment + EOF-finalize design is the first to heavily exercise). Scope is therefore expanded to include the catalog-idempotency fix in `internal/ingest/catalog.go` (and a minimal `writer.go` insert-vs-update signal if needed). Blast radius: the change makes catalog rollups idempotent for ALL adapters (benefits aiagent_v2/v3 + claude_code, which also re-emit); it is additive-correctness (aggregates become correct under re-emission, unchanged for single-emission). SOW-0020 is superseded by this work and will be closed referencing this SOW.
+
 ## Outcome
 
 Pending.
diff --git a/cmd/ai-viewer-ingest/sources_test.go b/cmd/ai-viewer-ingest/sources_test.go
index 5845384..a99fbfb 100644
--- a/cmd/ai-viewer-ingest/sources_test.go
+++ b/cmd/ai-viewer-ingest/sources_test.go
@@ -44,8 +44,10 @@ func plantCodexLayout(t *testing.T, root string, modern, legacy int) {
 			t.Fatalf("write legacy rollout: %v", err)
 		}
 	}
-	// Decoys: an archived shard rollout (pruned), a non-rollout .jsonl, and a
-	// non-rollout file at the root. None of these must be counted.
+	// Decoys: an archived shard rollout (pruned), a non-rollout .jsonl, a
+	// non-rollout file at the root, AND a rollout-*.jsonl at the WRONG depth
+	// (directly under the sessions root, not in a YYYY/MM/DD shard — F8). None of
+	// these must be counted.
 	arch := filepath.Join(root, "archived_sessions", "2025", "11", "20")
 	if err := os.MkdirAll(arch, 0o755); err != nil {
 		t.Fatalf("mkdir archive: %v", err)
@@ -59,6 +61,11 @@ func plantCodexLayout(t *testing.T, root string, modern, legacy int) {
 	if err := os.WriteFile(filepath.Join(root, "history.jsonl"), []byte("{}"), 0o644); err != nil {
 		t.Fatalf("write decoy root file: %v", err)
 	}
+	// A rollout-*.jsonl placed directly under the sessions root (wrong shard
+	// depth) must NOT be counted as a modern rollout (F8).
+	if err := os.WriteFile(filepath.Join(root, "rollout-2025-11-20T10-00-09-strayroot.jsonl"), []byte(`{"type":"session_meta"}`+"\n"), 0o644); err != nil {
+		t.Fatalf("write stray-root rollout: %v", err)
+	}
 }
 
 // itoa is a tiny single-digit int→string helper so plantCodexLayout stays free
@@ -176,14 +183,15 @@ func TestAutoDiscover_CodexProbeLogsBothCountsSeparately(t *testing.T) {
 }
 
 // TestCountRolloutFiles verifies the modern-rollout counter mirrors discovery.go's
-// match: rollout-*.jsonl under shards, archived_sessions pruned, non-rollout
-// .jsonl and root non-rollout files ignored.
+// match: rollout-*.jsonl under YYYY/MM/DD shards, archived_sessions pruned,
+// non-rollout .jsonl, root non-rollout files, AND a rollout-*.jsonl at the wrong
+// shard depth (directly under the root) all ignored (F8).
 func TestCountRolloutFiles(t *testing.T) {
 	t.Parallel()
 	tmp := t.TempDir()
 	plantCodexLayout(t, tmp, 4, 2)
 	if n := countRolloutFiles(tmp); n != 4 {
-		t.Fatalf("countRolloutFiles = %d, want 4 (archived + decoys excluded)", n)
+		t.Fatalf("countRolloutFiles = %d, want 4 (archived + decoys + wrong-depth stray excluded)", n)
 	}
 	if n := countRolloutFiles(filepath.Join(tmp, "missing")); n != 0 {
 		t.Fatalf("countRolloutFiles(missing) = %d, want 0", n)
diff --git a/internal/adapters/codex/adapter_restart_test.go b/internal/adapters/codex/adapter_restart_test.go
index a1f8c20..5ee913b 100644
--- a/internal/adapters/codex/adapter_restart_test.go
+++ b/internal/adapters/codex/adapter_restart_test.go
@@ -200,6 +200,143 @@ func TestRestart_TruncationReScansWithSourceError(t *testing.T) {
 	}
 }
 
+// TestRestart_EOFFinalizeNotReFiredOnUnchangedRescan pins H2: an EOF-finalize
+// (the OLD-format completed close, or the stale NEW-format failed/incomplete close
+// + SessionFinalized) must fire EXACTLY ONCE for a given file size. The mapper's
+// own eofFinalized guard is per-instance and the scanner replays from offset 0 on
+// every scan (rebuilding a fresh mapper), so without a DURABLE cursor marker an
+// unchanged rescan/restart re-fires the synthetic finalize. The marker
+// (FileCursor.EOFFinalizedSize) is round-tripped through Cursor.String/ParseCursor
+// exactly as the ingester persists it, so the resume sees ZERO duplicate
+// TurnFinalized / SessionFinalized.
+func TestRestart_EOFFinalizeNotReFiredOnUnchangedRescan(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name        string
+		body        func(id string) []byte
+		age         time.Duration
+		wantTurnFin int  // TurnFinalized expected on the FIRST scan
+		wantSessFin int  // SessionFinalized expected on the FIRST scan
+		oldFormat   bool // documents which EOF path fires
+	}{
+		{
+			// OLD-format (turn_context only, no task_started): closes COMPLETED at
+			// EOF regardless of staleness (spec edge #3) — the 38%-of-corpus case.
+			name:        "old_format_clean_close",
+			body:        oldFormatOpenTurnSession,
+			age:         time.Minute, // fresh: old-format still closes at EOF
+			wantTurnFin: 1,
+			wantSessFin: 0, // codex has no per-session terminal signal (SOW C#3)
+			oldFormat:   true,
+		},
+		{
+			// NEW-format hanging turn aged stale ≥ 1 h: closes failed/incomplete AND
+			// emits SessionFinalized(failed,incomplete) — the only SessionFinalized
+			// codex emits (rule #23). This is the case the P2 explicitly flagged for
+			// a duplicate SessionFinalized on rescan.
+			name:        "new_format_stale_crash",
+			body:        hangingSession,
+			age:         2 * time.Hour, // stale ≥ 1 h
+			wantTurnFin: 1,
+			wantSessFin: 1,
+			oldFormat:   false,
+		},
+	}
+
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			id := uuid7(40 + len(tc.name))
+			root := t.TempDir()
+			path := shardPath(root, id)
+			writeFileBytes(t, path, tc.body(id))
+			setMtime(t, path, tc.age)
+
+			a, err := New(root, canonical.AdapterOptions{})
+			if err != nil {
+				t.Fatalf("New: %v", err)
+			}
+
+			// First scan: the EOF-finalize fires exactly once.
+			out1 := make(chan canonical.Event, 512)
+			if err := a.Scan(context.Background(), nil, out1); err != nil {
+				t.Fatalf("Scan #1: %v", err)
+			}
+			first := drainBuffered(out1)
+			if got := countKind(first, canonical.EvTurnFinalized); got != tc.wantTurnFin {
+				t.Fatalf("first scan TurnFinalized = %d, want %d", got, tc.wantTurnFin)
+			}
+			if got := countKind(first, canonical.EvSessionFinalized); got != tc.wantSessFin {
+				t.Fatalf("first scan SessionFinalized = %d, want %d", got, tc.wantSessFin)
+			}
+
+			// Persist + reload the cursor exactly as the ingester does (JSON
+			// round-trip through the public ParseCursor). The EOFFinalizedSize marker
+			// must survive this round-trip.
+			cursorJSON := lastCursor(t, first)
+			parsed, err := a.ParseCursor(cursorJSON)
+			if err != nil {
+				t.Fatalf("ParseCursor: %v", err)
+			}
+
+			// Rescan with NO new bytes (same size, same mtime). The durable marker
+			// must suppress the EOF-finalize entirely.
+			out2 := make(chan canonical.Event, 512)
+			if err := a.Scan(context.Background(), parsed, out2); err != nil {
+				t.Fatalf("Scan #2 (unchanged rescan): %v", err)
+			}
+			second := drainBuffered(out2)
+			if got := countKind(second, canonical.EvTurnFinalized); got != 0 {
+				t.Errorf("unchanged rescan re-fired TurnFinalized %d times, want 0 (H2)", got)
+			}
+			if got := countKind(second, canonical.EvSessionFinalized); got != 0 {
+				t.Errorf("unchanged rescan re-fired SessionFinalized %d times, want 0 (H2)", got)
+			}
+
+			// A genuine append (size grows) re-opens normally: appending a fresh turn
+			// and closing it must produce a new TurnFinalized (the marker no longer
+			// matches the grown size). This guards against the marker over-suppressing.
+			appendFileBytes(t, path, []byte(appendedClosedTurn()))
+			setMtime(t, path, time.Minute)
+			out3 := make(chan canonical.Event, 512)
+			if err := a.Scan(context.Background(), parsed, out3); err != nil {
+				t.Fatalf("Scan #3 (after append): %v", err)
+			}
+			third := drainBuffered(out3)
+			if got := countKind(third, canonical.EvTurnFinalized); got < 1 {
+				t.Errorf("append after EOF-finalize produced %d TurnFinalized, want >=1 (marker must not over-suppress a real append)", got)
+			}
+		})
+	}
+}
+
+// oldFormatOpenTurnSession returns a modern rollout with an OLD-format turn
+// (turn_context only — no task_started, no task_complete) that stays open until
+// EOF. finalizeAtEOF closes it COMPLETED regardless of staleness (spec edge #3).
+func oldFormatOpenTurnSession(id string) []byte {
+	lines := []string{
+		metaLine(id, `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"gpt-5.1-codex-max"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"hi"}]}}`,
+		`{"timestamp":"` + tsDone + `","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hello"}]}}`,
+	}
+	return []byte(strings.Join(lines, "\n") + "\n")
+}
+
+// appendedClosedTurn returns a NEW-format turn (task_started + task_complete) to
+// append after an EOF-finalize, proving a genuine append re-opens the file and
+// produces its own TurnFinalized rather than being suppressed by the EOF marker.
+func appendedClosedTurn() string {
+	lines := []string{
+		`{"timestamp":"2025-11-20T17:10:00.000Z","type":"turn_context","payload":{"turn_id":"t2","model":"gpt-5.5"}}`,
+		`{"timestamp":"2025-11-20T17:10:00.100Z","type":"event_msg","payload":{"type":"task_started","turn_id":"t2","started_at":1763658600}}`,
+		`{"timestamp":"2025-11-20T17:10:05.000Z","type":"event_msg","payload":{"type":"task_complete","turn_id":"t2","completed_at":1763658605,"duration_ms":5000}}`,
+	}
+	return strings.Join(lines, "\n") + "\n"
+}
+
 // scanFullSession runs the public Scan once over a freshly-written rollout and
 // returns the event stream (a one-shot reference run for the resume comparison).
 func scanFullSession(t *testing.T, id string, lines []string) []canonical.Event {
diff --git a/internal/adapters/codex/cursor.go b/internal/adapters/codex/cursor.go
index 5e0d1d7..b130245 100644
--- a/internal/adapters/codex/cursor.go
+++ b/internal/adapters/codex/cursor.go
@@ -53,6 +53,17 @@ type FileCursor struct {
 	// LastTsUs is the timestamp of the last record consumed, in microseconds
 	// since the UNIX epoch. Observability only.
 	LastTsUs int64 `json:"last_ts_us,omitempty"`
+	// EOFFinalizedSize is the file size at which an EOF-finalize already fired
+	// (the mapper made a terminal decision at full-read EOF: an OLD-format turn
+	// closed completed, a stale NEW-format turn closed failed/incomplete, or a
+	// clean session with no open turn). It is the DURABLE marker that prevents
+	// finalizeAtEOF from re-firing the same TurnFinalized / SessionFinalized on
+	// every unchanged rescan or daemon restart (H2): the mapper's own
+	// eofFinalized guard is per-instance and a replay-from-0 rebuilds a fresh
+	// mapper each scan, so the marker must live in the cursor. A genuine append
+	// grows Size past this value, so the equality check fails and the new EOF is
+	// finalized normally. 0 means no EOF-finalize has fired yet for this file.
+	EOFFinalizedSize int64 `json:"eof_finalized_size,omitempty"`
 }
 
 // LegacyFile is the per-legacy-file suppression record. Ingested is a misnomer
diff --git a/internal/adapters/codex/mapper_coverage_test.go b/internal/adapters/codex/mapper_coverage_test.go
index cf207f6..d6f76c7 100644
--- a/internal/adapters/codex/mapper_coverage_test.go
+++ b/internal/adapters/codex/mapper_coverage_test.go
@@ -859,3 +859,53 @@ func TestMapper_OutputFirstExecFailedCorrects(t *testing.T) {
 		t.Errorf("output-first failed exec did not re-emit exec_exit_code/exec_duration_ms Extras (G1/G3)")
 	}
 }
+
+// TestMapper_OutputFirstExecOKNoSpuriousRefinalize covers H1b: an output-first
+// exec_command_end(exit 0) on an op its function_call_output already finalized
+// COMPLETED must re-emit the exec Extras (so they reach ops.extras_json) but must
+// NOT emit a correcting OpFinalized — the status is unchanged, so a re-finalize
+// would be spurious and would double-count the op in the (finalize-contributing)
+// catalog rollups. Exactly ONE OpFinalized on the shell op's (turn,seq), plus the
+// Extras-carrying OpStarted re-emit.
+func TestMapper_OutputFirstExecOKNoSpuriousRefinalize(t *testing.T) {
+	t.Parallel()
+	m := newTestMapper("sid")
+	lines := []string{
+		metaLine("sid", `"exec"`),
+		`{"timestamp":"` + tsCtx + `","type":"turn_context","payload":{"turn_id":"t1","model":"m"}}`,
+		`{"timestamp":"` + tsItem + `","type":"response_item","payload":{"type":"function_call","name":"shell","arguments":"{}","call_id":"c1"}}`,
+		// Output-first: provisional completed off a benign output string.
+		`{"timestamp":"` + tsEvent + `","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"ok"}}`,
+		// Late exec_command_end with exit 0 → SAME terminal status (completed); the
+		// correcting OpFinalized must be SUPPRESSED (H1b).
+		`{"timestamp":"` + tsDone + `","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","exit_code":0,"aggregated_output":"ok","duration":{"secs":0,"nanos":500000000}}}`,
+	}
+	events := runLines(t, m, lines)
+
+	// Exactly one OpFinalized on the shell op (Seq 1) — the original output finalize.
+	// No correcting re-finalize, because exit 0 did not change the status.
+	seq1Finals := 0
+	for _, f := range opFinals(events) {
+		if f.Seq == 1 {
+			seq1Finals++
+			if f.Status != "completed" {
+				t.Errorf("output-first exit-0: Seq1 finalize status = %q, want completed", f.Status)
+			}
+		}
+	}
+	if seq1Finals != 1 {
+		t.Errorf("output-first exit-0: Seq1 OpFinalized count = %d, want exactly 1 (no spurious re-finalize, H1b)", seq1Finals)
+	}
+
+	// The exec Extras still reach the op via an OpStarted re-emit (enrichment is not
+	// lost just because the status correction is suppressed).
+	reemit := false
+	for _, st := range opStarts(events) {
+		if st.Name == "shell" && st.Extras != nil && st.Extras["exec_exit_code"] == int64(0) {
+			reemit = true
+		}
+	}
+	if !reemit {
+		t.Errorf("output-first exit-0: exec Extras did not reach the op via an OpStarted re-emit (F4 must still hold)")
+	}
+}
diff --git a/internal/adapters/codex/mapper_state.go b/internal/adapters/codex/mapper_state.go
index 1f82a6c..b6668a5 100644
--- a/internal/adapters/codex/mapper_state.go
+++ b/internal/adapters/codex/mapper_state.go
@@ -108,12 +108,22 @@ type openOp struct {
 // faithfully (the writer's ON CONFLICT UPDATE keeps the original start_ts via
 // MIN and grafts the resolver stash, so re-emitting with the op's known
 // identity only adds the enrichment Extras).
+//
+// status/errClass record the TERMINAL status the op was finalized with, so an
+// output-first enrichment (exec_command_end / patch_apply_end arriving AFTER the
+// *_output already finalized the op) emits a CORRECTING OpFinalized ONLY when its
+// authoritative status actually DIFFERS from this recorded one (H1b). Without
+// this, an exec exit 0 on an already-`completed` op would re-emit a redundant
+// OpFinalized(completed) that the catalog (which contributes a finalize's
+// failure/token/duration totals) would double-count.
 type finalizedOp struct {
 	turnSeq   int
 	opSeq     int
 	kind      canonical.OpKind
 	name      string
 	namespace string
+	status    string
+	errClass  string
 }
 
 // openWebSearchRef records one open web_search op for POSITIONAL pairing with
diff --git a/internal/adapters/codex/mapper_turn.go b/internal/adapters/codex/mapper_turn.go
index fb94284..6d63557 100644
--- a/internal/adapters/codex/mapper_turn.go
+++ b/internal/adapters/codex/mapper_turn.go
@@ -344,7 +344,7 @@ func (m *fileMapper) finalizeDanglingOps(turnID string, base func() canonical.Ev
 			EndTs:           endUs,
 		})
 		delete(m.openOps, p.callID)
-		m.recordFinalizedOp(p.callID, p.op)
+		m.recordFinalizedOp(p.callID, p.op, opStatus, opErrClass)
 	}
 	m.pruneWebSearchQueue(turnID)
 	return out
diff --git a/internal/adapters/codex/ops_enrich.go b/internal/adapters/codex/ops_enrich.go
index 0c1e402..5304f6e 100644
--- a/internal/adapters/codex/ops_enrich.go
+++ b/internal/adapters/codex/ops_enrich.go
@@ -80,7 +80,14 @@ func (m *fileMapper) enrichFinalizedOp(callID string, advance func(int64) canoni
 	// output-first ordering: the *_output already finalized it (often completed off a
 	// benign-looking output), but a non-zero exit_code is authoritative (G1, spec
 	// rule #5/#14). The writer upserts on (turn,seq), so this overwrites the status.
-	if status != "" {
+	//
+	// Emit the correcting OpFinalized ONLY when the enrichment-derived status differs
+	// from the one the op was already finalized with (H1b). An exec exit 0 on an
+	// already-`completed` op carries no correction, so re-emitting OpFinalized(completed)
+	// would be spurious — and the catalog rollups (which contribute each finalize's
+	// failure/token/duration totals) would double-count it. A genuine change
+	// (completed → failed on a non-zero exit) still corrects exactly once.
+	if status != "" && (status != fop.status || errClass != fop.errClass) {
 		out = append(out, m.correctFinalizedOp(fop, advance, tsUs, status, errClass))
 	}
 	if len(out) == 0 {
@@ -159,8 +166,11 @@ func (m *fileMapper) reemitOpStarted(fop finalizedOp, advance func(int64) canoni
 
 // recordFinalizedOp records a now-finalized op so a LATE enrichment event can
 // merge Extras onto it via reemitOpStarted (F4). The op's kind/name/namespace are
-// preserved so the re-emit restates the op faithfully.
-func (m *fileMapper) recordFinalizedOp(callID string, op *openOp) {
+// preserved so the re-emit restates the op faithfully. status/errClass are the
+// TERMINAL status the op was finalized with, so an output-first enrichment only
+// emits a correcting OpFinalized when its authoritative status actually differs
+// from this one (H1b — no spurious re-finalize on an unchanged status).
+func (m *fileMapper) recordFinalizedOp(callID string, op *openOp, status, errClass string) {
 	if callID == "" {
 		return
 	}
@@ -170,6 +180,8 @@ func (m *fileMapper) recordFinalizedOp(callID string, op *openOp) {
 		kind:      op.kind,
 		name:      op.name,
 		namespace: op.namespace,
+		status:    status,
+		errClass:  errClass,
 	}
 }
 
@@ -200,7 +212,7 @@ func (m *fileMapper) enrichWebSearch(rec record, advance func(int64) canonical.E
 	op.finalized = true
 	mergeExtras(op, extras)
 	delete(m.openOps, ws.syntheticCallID)
-	m.recordFinalizedOp(ws.syntheticCallID, op)
+	m.recordFinalizedOp(ws.syntheticCallID, op, "completed", "")
 	return m.finalizeWithExtras(op, advance, tsUs, "completed", "")
 }
 
@@ -278,7 +290,7 @@ func (m *fileMapper) enrichMcp(rec record, advance func(int64) canonical.EventBa
 		},
 	}
 	delete(m.openOps, p.CallID)
-	m.recordFinalizedOp(p.CallID, op)
+	m.recordFinalizedOp(p.CallID, op, status, errClass)
 	return out
 }
 
@@ -304,7 +316,7 @@ func (m *fileMapper) enrichPatchApply(rec record, advance func(int64) canonical.
 	op.finalized = true
 	mergeExtras(op, extras)
 	delete(m.openOps, p.CallID)
-	m.recordFinalizedOp(p.CallID, op)
+	m.recordFinalizedOp(p.CallID, op, status, errClass)
 	// finalizeWithExtras emits the OpStarted (carrying patch_success/patch_status)
 	// followed by the OpFinalized with the success-derived status.
 	return m.finalizeWithExtras(op, advance, tsUs, status, errClass)
diff --git a/internal/adapters/codex/ops_tools.go b/internal/adapters/codex/ops_tools.go
index b7a3bf1..3b8c4ee 100644
--- a/internal/adapters/codex/ops_tools.go
+++ b/internal/adapters/codex/ops_tools.go
@@ -107,9 +107,11 @@ func (m *fileMapper) mapToolOutput(p *responseItemPayload, advance func(int64) c
 		out = append(out, m.payloadRef(advance(tsUs), op.turnSeq, op.opSeq, "tool_response", "json", bodyBytes))
 	}
 	delete(m.openOps, p.CallID)
-	// Record the finalized op so a LATER exec_command_end (output-first ~15-32%
-	// ordering) can still merge its Extras via an OpStarted re-emit (F4).
-	m.recordFinalizedOp(p.CallID, op)
+	// Record the finalized op (with the status it was finalized with) so a LATER
+	// exec_command_end (output-first ~15-32% ordering) can merge its Extras via an
+	// OpStarted re-emit (F4) AND only emit a correcting OpFinalized when its
+	// authoritative status differs from this one (H1b — no spurious re-finalize).
+	m.recordFinalizedOp(p.CallID, op, status, errClass)
 	return out
 }
 
diff --git a/internal/adapters/codex/scanner.go b/internal/adapters/codex/scanner.go
index 55679e0..ca0c6cd 100644
--- a/internal/adapters/codex/scanner.go
+++ b/internal/adapters/codex/scanner.go
@@ -226,8 +226,16 @@ func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID s
 	// This is called UNCONDITIONALLY at full-read EOF (not only when stale) and
 	// passed the stale bool, so the OLD-format completed-close fires on fresh files
 	// too (F1). The synthetic end timestamp is the file mtime in micros.
+	//
+	// H2: the EOF-finalize is suppressed when this exact size was already finalized
+	// on a prior pass (cur.EOFFinalizedSize == size). The mapper's eofFinalized
+	// guard is per-instance and the replay-from-0 rebuilds a fresh mapper each scan,
+	// so without a DURABLE cursor marker an unchanged rescan/restart would re-fire
+	// the EOF TurnFinalized (and the stale SessionFinalized) every time. A genuine
+	// append grows size past the marker, so the new EOF is finalized normally.
 	fullyRead := res.advanced >= size
-	if fullyRead {
+	alreadyFinalized := cur.EOFFinalizedSize > 0 && cur.EOFFinalizedSize == size
+	if fullyRead && !alreadyFinalized {
 		stale := time.Since(info.ModTime()) >= staleAfter
 		for _, ev := range mapper.finalizeAtEOF(stale, mtimeUs) {
 			select {
@@ -237,6 +245,13 @@ func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID s
 				res.emitted++
 			}
 		}
+		// Persist the marker only when the mapper made a terminal decision (it set
+		// its eofFinalized guard). A FRESH new-format file with a still-running turn
+		// emits nothing and leaves the guard false, so a later stale sweep can still
+		// finalize it; the marker stays unset until then.
+		if mapper.eofFinalized {
+			cur.EOFFinalizedSize = size
+		}
 	}
 	return cur, res.emitted, nil
 }
diff --git a/internal/ingest/catalog.go b/internal/ingest/catalog.go
index b7d3c4f..7b153cf 100644
--- a/internal/ingest/catalog.go
+++ b/internal/ingest/catalog.go
@@ -61,11 +61,32 @@ ON CONFLICT (source_format, cwd) DO UPDATE SET
 
 // onOpStarted populates catalog_providers, catalog_models, and
 // catalog_tools depending on op kind.
-func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent) error {
+//
+// inserted reports whether the op's ops-table row was a GENUINE NEW INSERT in
+// this batch (determined by applyOpStarted's existence probe before its upsert).
+// call_count is bumped ONLY on a genuine insert: a re-emitted OpStarted (late
+// enrichment carrying corrected status/extras for an op that already exists —
+// the codex/claude_code replay-from-0 + enrichment design re-emits OpStarted on
+// the same (turn,seq)) is an UPDATE, and double-counting it would inflate the
+// per-(provider,model)/(namespace,name) call totals. The first_seen/last_seen
+// floor/ceiling and the ctx_max seed stay idempotent (MIN/MAX/COALESCE) and run
+// on every call so a re-emit still refreshes them. (SOW-0020 / SOW-0004 H1a.)
+func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent, inserted bool) error {
+	// callInc is added to call_count only on the ON CONFLICT (existing-row)
+	// branch. On a genuine INSERT the VALUES(...,1) sets the count and the branch
+	// is not taken, so callInc is irrelevant there; on a re-emit (existing row)
+	// callInc=0 keeps the count, eliminating the double-count. We still run the
+	// upsert (not a bare UPDATE) so the row is created when applyOpStarted's probe
+	// raced an absent row — the existence probe and this write share one tx, so
+	// inserted is authoritative and callInc=1 only ever lands via VALUES.
+	callInc := 0
+	if inserted {
+		callInc = 1
+	}
 	switch ev.Kind {
 	case canonical.OpLLM:
 		if ev.Provider != "" {
-			if err := upsertProvider(ctx, tx, ev.Provider, ev.ProviderAlias, ev.Ts); err != nil {
+			if err := upsertProvider(ctx, tx, ev.Provider, ev.ProviderAlias, ev.Ts, callInc); err != nil {
 				return err
 			}
 		}
@@ -88,8 +109,8 @@ ON CONFLICT (provider, name) DO UPDATE SET
     first_seen = MIN(catalog_models.first_seen, excluded.first_seen),
     last_seen  = MAX(catalog_models.last_seen, excluded.last_seen),
     ctx_max    = COALESCE(catalog_models.ctx_max, excluded.ctx_max),
-    call_count = catalog_models.call_count + 1
-`, ev.Provider, ev.Model, ctxMaxSeed, ev.Ts, ev.Ts); err != nil {
+    call_count = catalog_models.call_count + ?
+`, ev.Provider, ev.Model, ctxMaxSeed, ev.Ts, ev.Ts, callInc); err != nil {
 				return fmt.Errorf("catalog_models upsert: %w", err)
 			}
 		}
@@ -105,8 +126,8 @@ VALUES (?, ?, ?, ?, 1)
 ON CONFLICT (namespace, name) DO UPDATE SET
     first_seen = MIN(catalog_tools.first_seen, excluded.first_seen),
     last_seen  = MAX(catalog_tools.last_seen, excluded.last_seen),
-    call_count = catalog_tools.call_count + 1
-`, ns, ev.Name, ev.Ts, ev.Ts); err != nil {
+    call_count = catalog_tools.call_count + ?
+`, ns, ev.Name, ev.Ts, ev.Ts, callInc); err != nil {
 				return fmt.Errorf("catalog_tools upsert: %w", err)
 			}
 		}
@@ -114,12 +135,54 @@ ON CONFLICT (namespace, name) DO UPDATE SET
 	return nil
 }
 
-// onOpFinalized updates running totals on the catalog row matching the
-// op's kind. The op kind is looked up from the ops table (it was
-// recorded by the matching OpStarted) since OpFinalizedEvent does not
-// carry the kind itself.
-func (c *catalogWriter) onOpFinalized(ctx context.Context, tx *sql.Tx, opID string, ev canonical.OpFinalizedEvent) error {
-	// Pull the kind + identity from the row we just upserted.
+// opPriorTotals captures an op's persisted terminal contribution as it stood
+// BEFORE the current OpFinalized's row UPDATE. onOpFinalized applies the DELTA
+// between the now-persisted totals and these prior ones, so a re-emitted /
+// corrected OpFinalized on the same (turn,seq) updates the catalog aggregate
+// EXACTLY ONCE rather than adding its full contribution again (SOW-0020 /
+// SOW-0004 H1a). found=false means the op row was absent before this finalize
+// (first finalize, or OpStarted not yet landed) — then the prior contribution is
+// zero and the delta equals the full new contribution, identical to the
+// pre-fix single-emission behaviour.
+//
+// The prior status drives the failure delta (a completed→failed correction adds
+// +1 failure once; a re-emit of the same status adds 0). The token/cost/duration
+// deltas mirror the ops row exactly: the writer persists tokens/cost directly
+// from the event and duration via COALESCE, so reading the persisted prior + new
+// values keeps catalog_*.total_* a faithful running sum even when a re-finalize
+// carries NULL duration (COALESCE keeps the old value ⇒ delta 0).
+type opPriorTotals struct {
+	found            bool
+	status           string
+	tokensIn         int64
+	tokensOut        int64
+	tokensCacheRead  int64
+	tokensCacheWrite int64
+	costUSD          float64
+	durationUS       int64
+}
+
+// failureInc maps a terminal status to its failure-count contribution.
+func failureInc(status string) int64 {
+	if status == string(canonical.StatusFailed) {
+		return 1
+	}
+	return 0
+}
+
+// onOpFinalized updates running totals on the catalog row matching the op's
+// kind. The op kind/identity AND the now-persisted terminal totals are read from
+// the ops row (it was upserted by applyOpFinalized just before this call, and
+// recorded by the matching OpStarted) since OpFinalizedEvent carries neither the
+// kind nor the resolved cost. prior is the op's contribution before this
+// finalize's UPDATE, captured by applyOpFinalized; the aggregate moves by the
+// (now − prior) delta so a re-emit is idempotent (SOW-0004 H1a).
+func (c *catalogWriter) onOpFinalized(ctx context.Context, tx *sql.Tx, opID string, ev canonical.OpFinalizedEvent, prior opPriorTotals) error {
+	// Pull the kind + identity AND the now-persisted terminal totals from the row
+	// we just upserted. Reading the persisted values (not ev.*) makes the catalog
+	// a faithful mirror of the ops row: duration in particular is persisted via
+	// COALESCE, so a NULL-duration re-finalize keeps the old value and the delta
+	// is zero (using ev.EndTs-ev.Ts here would wrongly subtract on a re-finalize).
 	var (
 		kind          string
 		name          string
@@ -127,11 +190,21 @@ func (c *catalogWriter) onOpFinalized(ctx context.Context, tx *sql.Tx, opID stri
 		model         sql.NullString
 		provider      sql.NullString
 		providerAlias sql.NullString
+		nowStatus     string
+		nowTokensIn   int64
+		nowTokensOut  int64
+		nowCacheRead  int64
+		nowCacheWrite int64
+		nowCostUSD    float64
+		nowDurationUS sql.NullInt64
 	)
 	row := tx.QueryRowContext(ctx,
-		`SELECT kind, name, tool_namespace, model, provider, provider_alias FROM ops WHERE id = ?`,
+		`SELECT kind, name, tool_namespace, model, provider, provider_alias,
+		        status, tokens_in, tokens_out, tokens_cache_read, tokens_cache_write, cost_usd, duration_us
+		   FROM ops WHERE id = ?`,
 		opID)
-	if err := row.Scan(&kind, &name, &toolNamespace, &model, &provider, &providerAlias); err != nil {
+	if err := row.Scan(&kind, &name, &toolNamespace, &model, &provider, &providerAlias,
+		&nowStatus, &nowTokensIn, &nowTokensOut, &nowCacheRead, &nowCacheWrite, &nowCostUSD, &nowDurationUS); err != nil {
 		if err == sql.ErrNoRows {
 			// OpStarted never landed (event ordering bug). Skip; the
 			// per-session aggregates still pick up the row once it
@@ -140,14 +213,16 @@ func (c *catalogWriter) onOpFinalized(ctx context.Context, tx *sql.Tx, opID stri
 		}
 		return fmt.Errorf("catalog onOpFinalized lookup: %w", err)
 	}
-	failureInc := 0
-	if ev.Status == "failed" {
-		failureInc = 1
-	}
-	durUS := int64(0)
-	if ev.EndTs > 0 && ev.Ts > 0 && ev.EndTs >= ev.Ts {
-		durUS = ev.EndTs - ev.Ts
-	}
+	// Deltas vs the prior persisted contribution (zero when the op row was absent
+	// before this finalize). A re-emit with unchanged totals yields all-zero
+	// deltas — a true no-op against the aggregate.
+	failureDelta := failureInc(nowStatus) - failureInc(prior.status)
+	tokensInDelta := nowTokensIn - prior.tokensIn
+	tokensOutDelta := nowTokensOut - prior.tokensOut
+	cacheReadDelta := nowCacheRead - prior.tokensCacheRead
+	cacheWriteDelta := nowCacheWrite - prior.tokensCacheWrite
+	costDelta := nowCostUSD - prior.costUSD
+	durDelta := nowDurationUS.Int64 - prior.durationUS
 	switch kind {
 	case string(canonical.OpLLM):
 		if provider.Valid && provider.String != "" {
@@ -165,7 +240,7 @@ UPDATE catalog_providers SET
     total_cost_usd           = total_cost_usd + ?,
     last_seen                = MAX(last_seen, ?)
 WHERE name = ? AND alias = ?
-`, failureInc, ev.TokensIn, ev.TokensOut, ev.TokensCacheRead, ev.TokensCacheWrite, ev.CostUSD, ev.EndTs,
+`, failureDelta, tokensInDelta, tokensOutDelta, cacheReadDelta, cacheWriteDelta, costDelta, ev.EndTs,
 				provider.String, alias); err != nil {
 				return fmt.Errorf("catalog_providers totals: %w", err)
 			}
@@ -179,6 +254,8 @@ WHERE name = ? AND alias = ?
 			// the column forever. The CASE WHEN gate keeps the
 			// pre-iter-9 behaviour for ops that record no CtxMax
 			// (the column declares NULLIF(?, 0) in writer.go:472).
+			// ctx_max stays MAX-based (idempotent under re-emit by
+			// construction), so it uses ev.CtxMax directly, not a delta.
 			if _, err := tx.ExecContext(ctx, `
 UPDATE catalog_models SET
     failure_count            = failure_count + ?,
@@ -191,7 +268,7 @@ UPDATE catalog_models SET
     ctx_max                  = CASE WHEN ? > 0 THEN MAX(COALESCE(ctx_max, 0), ?) ELSE ctx_max END,
     last_seen                = MAX(last_seen, ?)
 WHERE provider = ? AND name = ?
-`, failureInc, ev.TokensIn, ev.TokensOut, ev.TokensCacheRead, ev.TokensCacheWrite, ev.CostUSD, durUS,
+`, failureDelta, tokensInDelta, tokensOutDelta, cacheReadDelta, cacheWriteDelta, costDelta, durDelta,
 				ev.CtxMax, ev.CtxMax, ev.EndTs,
 				provider.String, model.String); err != nil {
 				return fmt.Errorf("catalog_models totals: %w", err)
@@ -211,22 +288,27 @@ UPDATE catalog_tools SET
     total_duration_us = total_duration_us + ?,
     last_seen         = MAX(last_seen, ?)
 WHERE namespace = ? AND name = ?
-`, failureInc, ev.TokensIn, ev.TokensOut, ev.CostUSD, durUS, ev.EndTs, ns, name); err != nil {
+`, failureDelta, tokensInDelta, tokensOutDelta, costDelta, durDelta, ev.EndTs, ns, name); err != nil {
 			return fmt.Errorf("catalog_tools totals: %w", err)
 		}
 	}
 	return nil
 }
 
-func upsertProvider(ctx context.Context, tx *sql.Tx, name, alias string, ts int64) error {
+// upsertProvider seeds/refreshes a catalog_providers row. callInc is added to
+// call_count on the ON CONFLICT (existing-row) branch only — 1 on a genuine new
+// op insert, 0 on a re-emitted OpStarted — so a late-enrichment re-emit does not
+// double-count the provider's call total (SOW-0020 / SOW-0004 H1a). On a genuine
+// INSERT the VALUES(...,1) sets the count and the conflict branch is not taken.
+func upsertProvider(ctx context.Context, tx *sql.Tx, name, alias string, ts int64, callInc int) error {
 	if _, err := tx.ExecContext(ctx, `
 INSERT INTO catalog_providers (name, alias, first_seen, last_seen, call_count)
 VALUES (?, ?, ?, ?, 1)
 ON CONFLICT (name, alias) DO UPDATE SET
     first_seen = MIN(catalog_providers.first_seen, excluded.first_seen),
     last_seen  = MAX(catalog_providers.last_seen, excluded.last_seen),
-    call_count = catalog_providers.call_count + 1
-`, name, alias, ts, ts); err != nil {
+    call_count = catalog_providers.call_count + ?
+`, name, alias, ts, ts, callInc); err != nil {
 		return fmt.Errorf("catalog_providers upsert: %w", err)
 	}
 	return nil
diff --git a/internal/ingest/catalog_idempotency_test.go b/internal/ingest/catalog_idempotency_test.go
new file mode 100644
index 0000000..c200a8a
--- /dev/null
+++ b/internal/ingest/catalog_idempotency_test.go
@@ -0,0 +1,206 @@
+package ingest
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// This file pins the catalog-idempotency-under-op-re-emission invariant
+// (SOW-0004 H1a / superseded SOW-0020). The codex and claude_code adapters
+// RE-EMIT OpStarted/OpFinalized for the same (turn,seq): a replay-from-0 on
+// resume, plus late enrichment that carries corrected status/extras onto an
+// already-finalized op. The catalog rollups (catalog_providers / catalog_models
+// / catalog_tools) ACCUMULATE call_count, failure_count, total_tokens_*,
+// total_cost_usd and total_duration_us, so a naive re-emit would double-count.
+// These tests prove a re-emit leaves every aggregate exactly where a single
+// emission left it, and that a status correction moves the failure total by
+// exactly one.
+
+// catalogTotals snapshots every accumulating column the op-rollups touch for one
+// (provider, model)+(provider) LLM op and one tool op, so a re-emit can be
+// asserted against a single-emission baseline.
+type catalogTotals struct {
+	providerCalls, providerFailures            int64
+	providerTokensIn, providerTokensOut        int64
+	providerCacheRead, providerCacheWrite      int64
+	modelCalls, modelFailures, modelDurationUS int64
+	modelTokensIn, modelTokensOut              int64
+	toolCalls, toolFailures, toolTokensIn      int64
+	toolDurationUS                             int64
+}
+
+func readCatalogTotals(t *testing.T, db *sql.DB, provider, model, toolNS, toolName string) catalogTotals {
+	t.Helper()
+	var c catalogTotals
+	c.providerCalls = scanInt(t, db, `SELECT call_count FROM catalog_providers WHERE name=?`, provider)
+	c.providerFailures = scanInt(t, db, `SELECT failure_count FROM catalog_providers WHERE name=?`, provider)
+	c.providerTokensIn = scanInt(t, db, `SELECT total_tokens_in FROM catalog_providers WHERE name=?`, provider)
+	c.providerTokensOut = scanInt(t, db, `SELECT total_tokens_out FROM catalog_providers WHERE name=?`, provider)
+	c.providerCacheRead = scanInt(t, db, `SELECT total_tokens_cache_read FROM catalog_providers WHERE name=?`, provider)
+	c.providerCacheWrite = scanInt(t, db, `SELECT total_tokens_cache_write FROM catalog_providers WHERE name=?`, provider)
+	c.modelCalls = scanInt(t, db, `SELECT call_count FROM catalog_models WHERE provider=? AND name=?`, provider, model)
+	c.modelFailures = scanInt(t, db, `SELECT failure_count FROM catalog_models WHERE provider=? AND name=?`, provider, model)
+	c.modelDurationUS = scanInt(t, db, `SELECT total_duration_us FROM catalog_models WHERE provider=? AND name=?`, provider, model)
+	c.modelTokensIn = scanInt(t, db, `SELECT total_tokens_in FROM catalog_models WHERE provider=? AND name=?`, provider, model)
+	c.modelTokensOut = scanInt(t, db, `SELECT total_tokens_out FROM catalog_models WHERE provider=? AND name=?`, provider, model)
+	c.toolCalls = scanInt(t, db, `SELECT call_count FROM catalog_tools WHERE namespace=? AND name=?`, toolNS, toolName)
+	c.toolFailures = scanInt(t, db, `SELECT failure_count FROM catalog_tools WHERE namespace=? AND name=?`, toolNS, toolName)
+	c.toolTokensIn = scanInt(t, db, `SELECT total_tokens_in FROM catalog_tools WHERE namespace=? AND name=?`, toolNS, toolName)
+	c.toolDurationUS = scanInt(t, db, `SELECT total_duration_us FROM catalog_tools WHERE namespace=? AND name=?`, toolNS, toolName)
+	return c
+}
+
+// applyOpRoundTrip applies a session + turn + one LLM op (provider/model, tokens,
+// cost via NopPricer is zero so tokens drive the assertion) + one tool op, all
+// finalized, inside a single committed batch. Called once for the baseline and
+// again (same identities, same seqs) for the re-emit assertion.
+func applyCatalogOps(t *testing.T, ctx context.Context, db *sql.DB, w *writer, src string, llmStatus, toolStatus string) {
+	t.Helper()
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+	apply := func(ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+	apply(canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(canonical.TurnStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1000},
+		SessionNativeID: "s", Seq: 1,
+	})
+	// LLM op (provider+model) → catalog_providers + catalog_models.
+	apply(canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpLLM, Name: "message", Provider: "openai", Model: "gpt-5.5",
+	})
+	apply(canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: llmStatus, EndTs: 1300,
+		TokensIn: 100, TokensOut: 20, TokensCacheRead: 5, TokensCacheWrite: 3,
+	})
+	// Tool op → catalog_tools.
+	apply(canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 5, Ts: 1400},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 2, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "shell", ToolNamespace: "shell",
+	})
+	apply(canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 6, Ts: 1400},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 2, Status: toolStatus, EndTs: 1500,
+		TokensIn: 7,
+	})
+	if cErr := tx.Commit(); cErr != nil {
+		t.Fatalf("Commit: %v", cErr)
+	}
+}
+
+// TestCatalog_ReEmittedOpNoDoubleCount pins H1a (1): re-emitting an IDENTICAL
+// OpStarted+OpFinalized for the same (turn,seq) — exactly what the codex /
+// claude_code replay-from-0 + late-enrichment design does — must leave every
+// catalog aggregate where the single emission left it. call_count counts the op
+// once (only on the genuine insert); failure/token/cost/duration totals
+// contribute the op's terminal values once.
+func TestCatalog_ReEmittedOpNoDoubleCount(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+
+	// Baseline: a single clean emission.
+	applyCatalogOps(t, ctx, db, w, src, "completed", "completed")
+	baseline := readCatalogTotals(t, db, "openai", "gpt-5.5", "shell", "shell")
+	if baseline.providerCalls != 1 || baseline.modelCalls != 1 || baseline.toolCalls != 1 {
+		t.Fatalf("baseline call counts unexpected: %+v (test premise broken)", baseline)
+	}
+	if baseline.modelTokensIn != 100 || baseline.providerTokensIn != 100 || baseline.toolTokensIn != 7 {
+		t.Fatalf("baseline token totals unexpected: %+v (test premise broken)", baseline)
+	}
+
+	// Re-emit the SAME events (same seqs/identities). Each op row already exists, so
+	// every OpStarted is an UPDATE and every OpFinalized carries unchanged totals.
+	applyCatalogOps(t, ctx, db, w, src, "completed", "completed")
+	after := readCatalogTotals(t, db, "openai", "gpt-5.5", "shell", "shell")
+
+	if after != baseline {
+		t.Fatalf("catalog aggregates moved under an identical op re-emit (must be idempotent, H1a):\n baseline=%+v\n after   =%+v", baseline, after)
+	}
+}
+
+// TestCatalog_ReFinalizeStatusCorrectionDeltaOnce pins H1a (2): finalizing an op
+// completed and then RE-finalizing it failed (the codex output-first exec
+// correction: exit≠0 overrides a provisional completed) must move the catalog
+// failure_count by EXACTLY ONE — not zero (the correction must register) and not
+// two (the original completed contributed zero failures, the correction +1). The
+// token totals, which did not change, must not move on the re-finalize.
+func TestCatalog_ReFinalizeStatusCorrectionDeltaOnce(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+
+	// Batch 1: session + turn + a tool op finalized COMPLETED (failure 0).
+	tx1, _ := db.BeginTx(ctx, nil)
+	apply(tx1, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx1, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "shell", ToolNamespace: "shell",
+	})
+	apply(tx1, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "completed", EndTs: 1200, TokensIn: 9,
+	})
+	if err := tx1.Commit(); err != nil {
+		t.Fatalf("Commit batch 1: %v", err)
+	}
+	if got := scanInt(t, db, `SELECT failure_count FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 0 {
+		t.Fatalf("after completed finalize, failure_count = %d, want 0", got)
+	}
+
+	// Batch 2: the correcting re-finalize → failed (the output-first exec exit≠0
+	// path). Tokens unchanged. The failure total must become exactly 1.
+	tx2, _ := db.BeginTx(ctx, nil)
+	apply(tx2, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "failed", ErrorClass: "command_failed", EndTs: 1200, TokensIn: 9,
+	})
+	if err := tx2.Commit(); err != nil {
+		t.Fatalf("Commit batch 2: %v", err)
+	}
+
+	if got := scanInt(t, db, `SELECT failure_count FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 1 {
+		t.Errorf("after completed→failed re-finalize, failure_count = %d, want exactly 1 (delta once, H1a)", got)
+	}
+	if got := scanInt(t, db, `SELECT call_count FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 1 {
+		t.Errorf("call_count = %d, want 1 (no OpStarted re-emit here; finalize must not bump call_count)", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 9 {
+		t.Errorf("total_tokens_in = %d, want 9 (unchanged tokens must not re-add on re-finalize)", got)
+	}
+}
diff --git a/internal/ingest/error_paths_test.go b/internal/ingest/error_paths_test.go
index 4ceea19..fd8f9af 100644
--- a/internal/ingest/error_paths_test.go
+++ b/internal/ingest/error_paths_test.go
@@ -57,7 +57,7 @@ func TestUpsertProvider_EmptyAliasOK(t *testing.T) {
 	ctx := context.Background()
 	_ = ensureSourceRowDirect(ctx, db, "src", "fmt", "/loc")
 	tx, _ := db.BeginTx(ctx, nil)
-	if err := upsertProvider(ctx, tx, "anthropic", "", 1000); err != nil {
+	if err := upsertProvider(ctx, tx, "anthropic", "", 1000, 1); err != nil {
 		t.Fatalf("upsertProvider: %v", err)
 	}
 	_ = tx.Commit()
@@ -253,7 +253,7 @@ func TestUpsertProvider_FailsOnDeadTx(t *testing.T) {
 	t.Parallel()
 	_, db := openTestStore(t)
 	ctx, tx := rolledTx(t, db)
-	if err := upsertProvider(ctx, tx, "p", "", 1); err == nil {
+	if err := upsertProvider(ctx, tx, "p", "", 1, 1); err == nil {
 		t.Fatal("expected error on rolled-back tx")
 	}
 }
@@ -265,7 +265,7 @@ func TestCatalog_OnOpFinalized_RolledTx(t *testing.T) {
 	c := newCatalogWriter(NopPricer{})
 	err := c.onOpFinalized(ctx, tx, "any-op-id", canonical.OpFinalizedEvent{
 		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
-	})
+	}, opPriorTotals{})
 	if err == nil {
 		t.Fatal("expected error on rolled-back tx")
 	}
@@ -313,7 +313,7 @@ func TestCatalog_OnOpStarted_RolledTx(t *testing.T) {
 	err := c.onOpStarted(ctx, tx, canonical.OpStartedEvent{
 		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
 		Kind:      canonical.OpLLM, Provider: "anthropic", Model: "m",
-	})
+	}, true)
 	if err == nil {
 		t.Fatal("expected error on rolled-back tx (llm branch)")
 	}
@@ -322,7 +322,7 @@ func TestCatalog_OnOpStarted_RolledTx(t *testing.T) {
 	err = c.onOpStarted(ctx2, tx2, canonical.OpStartedEvent{
 		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
 		Kind:      canonical.OpTool, Name: "read",
-	})
+	}, true)
 	if err == nil {
 		t.Fatal("expected error on rolled-back tx (tool branch)")
 	}
diff --git a/internal/ingest/writer.go b/internal/ingest/writer.go
index c441c34..7040d6a 100644
--- a/internal/ingest/writer.go
+++ b/internal/ingest/writer.go
@@ -516,6 +516,20 @@ ON CONFLICT (session_id, seq) DO NOTHING
 		return fmt.Errorf("writer: synthesize turn for op: %w", err)
 	}
 	opID := canonicalOpID(turnID, ev.Seq)
+	// Probe whether this op already has a row BEFORE the upsert so the catalog can
+	// count the call ONCE per distinct op (SOW-0004 H1a). A re-emitted OpStarted —
+	// late enrichment carrying corrected status/extras on the same (turn,seq), as
+	// the codex/claude_code replay-from-0 + enrichment design emits — is an UPDATE,
+	// not a new call. ON CONFLICT DO UPDATE returns RowsAffected=1 for both insert
+	// and update under modernc/sqlite, so an explicit existence check is the
+	// authoritative insert-vs-update signal. sql.ErrNoRows ⇒ genuine new insert.
+	opInserted := false
+	switch existsErr := w.requireOpExists(ctx, tx, opID); {
+	case errors.Is(existsErr, sql.ErrNoRows):
+		opInserted = true
+	case existsErr != nil:
+		return existsErr
+	}
 	var parentOpID sql.NullString
 	if ev.ParentOpSeq >= 0 {
 		parentOpID = sql.NullString{String: canonicalOpID(turnID, ev.ParentOpSeq), Valid: true}
@@ -587,7 +601,7 @@ ON CONFLICT (turn_id, seq) DO UPDATE SET
 	}
 	w.markDirtyTurn(turnID)
 	w.markDirtySession(sessionID)
-	if err := w.catalog.onOpStarted(ctx, tx, ev); err != nil {
+	if err := w.catalog.onOpStarted(ctx, tx, ev, opInserted); err != nil {
 		return err
 	}
 	return nil
@@ -600,6 +614,15 @@ func (w *writer) applyOpFinalized(ctx context.Context, tx *sql.Tx, ev canonical.
 	}
 	turnID := canonicalTurnID(sessionID, ev.TurnSeq)
 	opID := canonicalOpID(turnID, ev.Seq)
+	// Capture the op's persisted terminal contribution BEFORE the UPDATE below
+	// overwrites it, so the catalog can move its rollups by the (new − prior)
+	// delta and stay idempotent under a re-emitted / corrected OpFinalized on the
+	// same (turn,seq) (SOW-0004 H1a). Absent row ⇒ first finalize ⇒ zero prior ⇒
+	// delta equals the full new contribution (unchanged single-emission path).
+	prior, err := w.opPriorTotals(ctx, tx, opID)
+	if err != nil {
+		return err
+	}
 	cost := ev.CostUSD
 	if cost == 0 && w.pricer != nil {
 		// Resolve provider/model/kind and start_ts from the row we know
@@ -682,12 +705,39 @@ WHERE id = ?
 	// every op whose cost was computed.
 	evForCatalog := ev
 	evForCatalog.CostUSD = cost
-	if err := w.catalog.onOpFinalized(ctx, tx, opID, evForCatalog); err != nil {
+	if err := w.catalog.onOpFinalized(ctx, tx, opID, evForCatalog, prior); err != nil {
 		return err
 	}
 	return nil
 }
 
+// opPriorTotals reads an op's persisted terminal contribution (status + the
+// token/cost/duration columns the catalog rollups sum) as it stands BEFORE the
+// current OpFinalized UPDATE. It is the durable prior state the catalog subtracts
+// to stay idempotent under a re-emitted / corrected finalize (SOW-0004 H1a):
+// reading the persisted row (not the event) means a re-finalize across a daemon
+// restart — where any in-memory per-op tracking would be gone — still computes a
+// correct delta. sql.ErrNoRows ⇒ no row yet (first finalize, or OpStarted not yet
+// landed): found=false and every prior contribution is zero, so the delta equals
+// the full new contribution and the single-emission path is unchanged.
+func (w *writer) opPriorTotals(ctx context.Context, tx *sql.Tx, opID string) (opPriorTotals, error) {
+	var p opPriorTotals
+	var dur sql.NullInt64
+	err := tx.QueryRowContext(ctx, `
+SELECT status, tokens_in, tokens_out, tokens_cache_read, tokens_cache_write, cost_usd, duration_us
+  FROM ops WHERE id = ?`, opID).
+		Scan(&p.status, &p.tokensIn, &p.tokensOut, &p.tokensCacheRead, &p.tokensCacheWrite, &p.costUSD, &dur)
+	if err != nil {
+		if errors.Is(err, sql.ErrNoRows) {
+			return opPriorTotals{}, nil
+		}
+		return opPriorTotals{}, fmt.Errorf("writer: read op prior totals %s: %w", opID, err)
+	}
+	p.found = true
+	p.durationUS = dur.Int64
+	return p, nil
+}
+
 // isPriceableOp reports whether the op identified by (kind, provider,
 // model) should be passed to the pricer. Non-LLM ops (tool, system,
 // session) carry no token counts and have empty provider/model; pricing

From e4243de5c4cfd42158c4a5b59612d901ba064a9e Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 17:06:55 +0300
Subject: [PATCH 10/13] ingest: migrate catalog contribution on op identity
 change; codex EOF + spec

Round-5 fixes for the last review edges (catalog aggregation + EOF), all
latent (no shipped surface reads catalog_*) but fixed for correctness.

- Catalog identity migration (ingest): when an OpStarted re-emit changes an
  op's catalog identity (codex MCP enrichment corrects custom->mcp:server
  for the same turn/seq), applyOpStarted captures the op's prior persisted
  catalog identity + booked totals before the upsert; onOpStarted then MOVES
  the contribution (call_count + failure/tokens/cost/duration) from the old
  key to the new instead of inserting a second count. A physical op now
  contributes to exactly one catalog row (its final identity). Same-identity
  idempotency (round-4) is unchanged. New file catalog_migrate.go keeps
  catalog.go within budget.
- Codex EOF (metadata-only append): the old-format EOF close-ts now uses a
  turn last-CONTENT timestamp (a no-op session_meta no longer re-dates it),
  and readRollout suppresses a second EOF finalize when a size-growing append
  carried no new turn content (advancing the EOFFinalizedSize marker). A real
  new-turn append still re-opens and re-finalizes.
- Spec: ingester.md catalog section rewritten to the idempotent semantics
  (OpStarted counts on insert + migrates on identity change; OpFinalized
  applies a now-minus-prior delta); adapter-codex.md cursor JSON documents
  eof_finalized_size + the metadata-append suppression.

New tests: catalog identity-change migration (total call_count=1 across both
keys; totals only under the final key; provider/model/kind-change branches)
and a metadata-append restart test (append-only-session_meta rescan emits
zero new TurnFinalized, stable end-ts; verified non-vacuous). Gates green:
golangci(0)/gosec(0)/vet; race all pass; codex 92.6% / ingest 88.4%
(+1.6pt); FuzzParseLine 0 crashes; goldens byte-identical; scans clean.
---
 .../SOW-0004-20260526-codex-adapter.md        |  14 +
 .agents/sow/specs/adapter-codex.md            |  11 +-
 .agents/sow/specs/ingester.md                 |  10 +-
 .../adapters/codex/adapter_restart_test.go    |  89 +++++
 internal/adapters/codex/mapper.go             |  24 +-
 internal/adapters/codex/mapper_finalize.go    |  15 +-
 internal/adapters/codex/scanner.go            |  28 +-
 internal/ingest/catalog.go                    |  58 +++-
 internal/ingest/catalog_idempotency_test.go   | 308 ++++++++++++++++++
 internal/ingest/catalog_migrate.go            | 218 +++++++++++++
 internal/ingest/error_paths_test.go           |  34 +-
 internal/ingest/writer.go                     |  72 +++-
 12 files changed, 840 insertions(+), 41 deletions(-)
 create mode 100644 internal/ingest/catalog_migrate.go

diff --git a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
index 835e5cf..bc7fab7 100644
--- a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
+++ b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
@@ -238,6 +238,20 @@ Adjudicated on ground truth (spec lines + a read-only investigation of the real
 
 The original gate scoped this SOW to `internal/adapters/codex/` + the additive `sources.go` probe, with "no canonical/ingest/store change". Round-3 review (codex H1) proved the codex adapter cannot be correct without idempotent catalog aggregation under op re-emission (a pre-existing `internal/ingest/catalog.go` gap, SOW-0020, that the codex replay-from-0 + enrichment + EOF-finalize design is the first to heavily exercise). Scope is therefore expanded to include the catalog-idempotency fix in `internal/ingest/catalog.go` (and a minimal `writer.go` insert-vs-update signal if needed). Blast radius: the change makes catalog rollups idempotent for ALL adapters (benefits aiagent_v2/v3 + claude_code, which also re-emit); it is additive-correctness (aggregates become correct under re-emission, unchanged for single-emission). SOW-0020 is superseded by this work and will be closed referencing this SOW.
 
+### Round 4 (2026-05-30) — same scope + H1/H2 fix notes
+
+- **minimax**: SAFE — all H-fixes correct, zero regressions.
+- **glm**: SAFE — H1/H1b/H2 architecturally sound; 2 P3 (spec drift: `ingester.md` catalog semantics + cursor JSON missing `eof_finalized_size`; a theoretical same-tx SELECT "race" that is not a bug).
+- **codex**: NOT SAFE — confirmed same-identity catalog idempotency, status-correction delta-once, exit-0 no-spurious-refinalize, unchanged-rescan suppression, and truncation-clears-marker are all CORRECT; found 2 narrower edges + 1 spec drift:
+
+| # | Sev | Finding | Verdict |
+|---|---|---|---|
+| I1 | P1 | Catalog call_count still double-counts when an `OpStarted` re-emit CHANGES the catalog identity (MCP enrichment corrects `custom`→`mcp:server` for the same op → a new catalog_tools row inserts at count 1 while the placeholder key already counted it); finalize deltas also strand on the old key | CONFIRMED. catalog.go:123 inserts a fresh count for the corrected key; my H1a only handled SAME-identity re-emit. |
+| I2 | P2 | EOF marker keys on file SIZE, so a metadata-only `session_meta` append (real: `openai/codex recorder.rs:1615`) grows size → `finalizeAtEOF` re-fires an old-format turn's `TurnFinalized` and moves its end-ts to the metadata append (lastTsUs updates on every record) | CONFIRMED. scanner.go:236 + mapper.go:225/251 + mapper_finalize.go:44. |
+| I3 | P3 | `ingester.md` (+ adapter-codex.md cursor JSON) still document the pre-H1a catalog semantics + omit `eof_finalized_size` | CONFIRMED spec drift (also glm). |
+
+**Round-5 fix plan:** (I1, ingest) on an `OpStarted` re-emit whose catalog identity changed, MOVE the contribution (call_count + any finalize totals) from the old key to the new key instead of inserting a second count — capture the op's prior persisted (kind,name,tool_namespace,model,provider,alias) before the upsert and, when it differs, decrement the old catalog row and increment the new; test `function_call`→MCP-enrichment keeps total call_count = 1. (I2, codex) track turn-local last-activity separately from file `lastTsUs` for the old-format EOF close-ts, and suppress a second EOF close when a size-growing append carried no new TURN content (e.g. mark the turn EOF-finalized, not just the file size); test: EOF-close an old-format turn, append only `session_meta`, rescan → no new `TurnFinalized`, stable end-ts. (I3, spec) update `ingester.md` catalog semantics (OpStarted counts on insert; OpFinalized applies a delta) + add `eof_finalized_size` to the adapter-codex.md cursor JSON.
+
 ## Outcome
 
 Pending.
diff --git a/.agents/sow/specs/adapter-codex.md b/.agents/sow/specs/adapter-codex.md
index ef0986b..cbb794d 100644
--- a/.agents/sow/specs/adapter-codex.md
+++ b/.agents/sow/specs/adapter-codex.md
@@ -260,7 +260,8 @@ Per-file byte offset plus discovery hints. JSON shape:
       "offset": 2917322,
       "size": 2917322,
       "mtime_us": 1763664584000000,
-      "last_ts_us": 1763664584000000
+      "last_ts_us": 1763664584000000,
+      "eof_finalized_size": 2917322
     }
   },
   "legacy_json": {
@@ -273,9 +274,15 @@ Per-file byte offset plus discovery hints. JSON shape:
 
 Path keys are RELATIVE to the configured `--codex-home` (default `~/.codex`) — the cursor survives a home-directory move.
 
+Per-file fields:
+- `offset` — byte offset of the next unread byte; always at a line start (trailing partial lines held back).
+- `size` — file size when `offset` was recorded; drives truncation detection.
+- `mtime_us` / `last_ts_us` — observability + the staleness heuristic (rule #23).
+- `eof_finalized_size` — DURABLE marker of the file size at which an EOF-finalize already fired (the synthetic close of a hanging turn at full-read EOF: an OLD-format turn closed `completed`, or a stale NEW-format turn closed `failed/incomplete` + its `SessionFinalized`). The mapper's own in-memory guard is rebuilt on every replay-from-0, so the marker must persist in the cursor: without it, an unchanged rescan/restart would re-fire the synthetic `TurnFinalized`/`SessionFinalized`. A metadata-only `session_meta` append (recorder.rs:1615) GROWS `size` past this marker but carries no new turn content, so the scanner ALSO suppresses the re-fire when a grown rescan emitted no new content, then advances the marker to the new size. A genuine new-turn append (always emitting at least a `TurnStarted`) re-opens and closes normally. `0`/absent ⇒ no EOF-finalize has fired yet.
+
 Restart logic:
 - For each tracked file: if `current_size >= cursor.offset`, resume from `cursor.offset`.
-- If `current_size < cursor.offset`: file was truncated (codex never truncates, so this means manual operator deletion + recreation) — emit `SourceError`, reset to 0, full re-scan.
+- If `current_size < cursor.offset`: file was truncated (codex never truncates, so this means manual operator deletion + recreation) — emit `SourceError`, reset to 0, full re-scan (also clears `eof_finalized_size`).
 - For new files (not in cursor): start at 0, full scan.
 - For files no longer present on disk: leave cursor entry; never re-emit. Optional GC after N days.
 
diff --git a/.agents/sow/specs/ingester.md b/.agents/sow/specs/ingester.md
index a8a4e88..9705e6a 100644
--- a/.agents/sow/specs/ingester.md
+++ b/.agents/sow/specs/ingester.md
@@ -285,9 +285,15 @@ Inline upserts run per event in Chunk 7:
 - `OpStartedEvent{Kind=llm}` → `catalog_providers` (provider, alias), `catalog_models` (provider, model).
 - `OpStartedEvent{Kind=tool}` → `catalog_tools` (namespace, name).
 - `SessionStartedEvent` → `catalog_agents` (source_format, agent_name) when agent_name is set; `catalog_cwds` (source_format, cwd) when cwd is set.
-- `OpFinalizedEvent` → increments call_count / failure_count / totals on the catalog row matching the parent OpStartedEvent.
+- `OpFinalizedEvent` → applies a `(now − prior)` DELTA to call_count's siblings — failure_count / total_tokens_* / total_cost_usd / total_duration_us — on the catalog row matching the parent OpStartedEvent (see below).
 
-The catalog rows use SQLite's `ON CONFLICT (...) DO UPDATE SET first_seen=MIN(first_seen, excluded.first_seen), last_seen=MAX(last_seen, excluded.last_seen), call_count=call_count+1, ...` so the rollups are eventually consistent with the ops table.
+`first_seen` / `last_seen` floors/ceilings and the `ctx_max` seed are always idempotent (`MIN`/`MAX`/`COALESCE`) and run on every event. The accumulating counters are made idempotent under **op re-emission** — adapters that replay from offset 0 on resume, or carry late enrichment that corrects an op's status/identity, re-emit `OpStarted`/`OpFinalized` for the same `(turn, seq)` (SOW-0004 H1a/I1, superseded SOW-0020):
+
+- **`call_count` increments from `OpStarted` ONLY on a genuine new op** — the writer probes whether the op's `ops` row already exists BEFORE the upsert (`requireOpExists` / `opPriorIdentity`); a same-identity re-emit is an UPDATE and adds 0, so a replay/enrichment re-emit never double-counts. `call_count = call_count + ?` where the bind is 1 on insert, 0 on a same-identity re-emit.
+- **`call_count` is MIGRATED, not duplicated, when an op's catalog identity CHANGES.** A re-emitted `OpStarted` may correct the op's identity on the same `(turn, seq)` — the codex case is MCP enrichment re-stamping a heuristic `tool_namespace="custom"`→`"mcp:<server>"` (and the tool name). The writer captures the op's prior persisted identity + already-booked totals before the upsert; when the identity differs, it DECREMENTS the old catalog row's `call_count` by 1 and subtracts the op's booked failure/tokens/cost/duration totals, then re-books them under the new key (+1 call, + the migrated totals). One physical op therefore contributes to exactly ONE catalog row (its FINAL identity), `call_count = 1`.
+- **`OpFinalizedEvent` applies a `(now − prior)` delta.** The writer reads the op's persisted terminal contribution (status → failure, tokens, cost, duration) BEFORE the finalize UPDATE overwrites it, then the catalog moves each total by `(new − prior)`. A first finalize sees a zero prior (delta = full contribution, identical to single-emission). A corrected re-finalize (e.g. codex output-first `exec` exit≠0 flipping a provisional `completed`→`failed`) moves `failure_count` by exactly ±1 and leaves unchanged totals at delta 0. `ctx_max` stays `MAX`-based (idempotent by construction), not delta-based.
+
+So the rollups are eventually consistent with the ops table AND idempotent under any number of re-emissions of the same op.
 
 Time-bucketed rollups for hour-/day-grained analytics (per `data-model.md` §Aggregation) are NOT in Chunk 7 — they land in SOW-0007.
 
diff --git a/internal/adapters/codex/adapter_restart_test.go b/internal/adapters/codex/adapter_restart_test.go
index 5ee913b..71b12eb 100644
--- a/internal/adapters/codex/adapter_restart_test.go
+++ b/internal/adapters/codex/adapter_restart_test.go
@@ -312,6 +312,95 @@ func TestRestart_EOFFinalizeNotReFiredOnUnchangedRescan(t *testing.T) {
 	}
 }
 
+// TestRestart_MetadataAppendDoesNotReFinalizeOldFormatEOF pins SOW-0004 I2: after
+// an OLD-format turn is closed COMPLETED at EOF (the 38%-of-corpus case), codex may
+// append a bare metadata-only session_meta record (real: recorder.rs:1615). That
+// append GROWS the file past the EOF-finalize marker but carries NO new turn
+// content. The adapter must NOT re-fire the turn's TurnFinalized, and the original
+// close-ts (the turn's last CONTENT-activity ts, not the metadata append's later
+// ts) must stay unchanged. A genuine new turn would re-open and close normally; a
+// metadata append must be inert.
+func TestRestart_MetadataAppendDoesNotReFinalizeOldFormatEOF(t *testing.T) {
+	t.Parallel()
+
+	id := uuid7(7)
+	root := t.TempDir()
+	path := shardPath(root, id)
+	writeFileBytes(t, path, oldFormatOpenTurnSession(id))
+	setMtime(t, path, time.Minute) // fresh: old-format still closes COMPLETED at EOF
+
+	a, err := New(root, canonical.AdapterOptions{})
+	if err != nil {
+		t.Fatalf("New: %v", err)
+	}
+
+	// First scan: the OLD-format turn closes COMPLETED exactly once at EOF.
+	out1 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), nil, out1); err != nil {
+		t.Fatalf("Scan #1: %v", err)
+	}
+	first := drainBuffered(out1)
+	tf1 := turnFinals(first)
+	if len(tf1) != 1 {
+		t.Fatalf("first scan TurnFinalized count = %d, want 1", len(tf1))
+	}
+	// The close-ts is the turn's last CONTENT-activity ts (tsDone, the assistant
+	// message), NOT the file mtime / wall-clock (G6) and — after the append below —
+	// NOT the later metadata ts (I2).
+	wantEndTs, perr := parseTsToMicros(tsDone)
+	if perr != nil {
+		t.Fatalf("parse tsDone: %v", perr)
+	}
+	if tf1[0].EndTs != wantEndTs {
+		t.Fatalf("first scan TurnFinalized EndTs = %d, want %d (last content ts, tsDone)", tf1[0].EndTs, wantEndTs)
+	}
+
+	// Persist + reload the cursor through the exact ingester round-trip. The
+	// EOFFinalizedSize marker must survive.
+	cursorJSON := lastCursor(t, first)
+	parsed, err := a.ParseCursor(cursorJSON)
+	if err != nil {
+		t.Fatalf("ParseCursor: %v", err)
+	}
+
+	// Append ONLY a metadata-only session_meta with a LATER timestamp. This grows the
+	// file past the marker but carries no turn content; a naive size-only suppression
+	// would re-fire the close and re-date it to this later ts.
+	metaAppend := `{"timestamp":"2025-11-20T18:30:00.000Z","type":"session_meta","payload":{"id":"` + id + `","cwd":"<ROOT>","originator":"codex_exec","cli_version":"0.125.0","source":"exec"}}`
+	appendFileBytes(t, path, []byte(metaAppend+"\n"))
+	setMtime(t, path, time.Minute)
+
+	out2 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), parsed, out2); err != nil {
+		t.Fatalf("Scan #2 (metadata append): %v", err)
+	}
+	second := drainBuffered(out2)
+
+	if got := countKind(second, canonical.EvTurnFinalized); got != 0 {
+		t.Errorf("metadata-only append re-fired TurnFinalized %d times, want 0 (I2)", got)
+	}
+	if got := countKind(second, canonical.EvSessionFinalized); got != 0 {
+		t.Errorf("metadata-only append emitted SessionFinalized %d times, want 0 (I2)", got)
+	}
+
+	// Belt-and-braces: a THIRD unchanged rescan after the suppressed append (no new
+	// bytes) must also be inert — the marker advanced to the post-append size, so the
+	// file no longer looks "grown".
+	cursorJSON2 := lastCursor(t, second)
+	parsed2, err := a.ParseCursor(cursorJSON2)
+	if err != nil {
+		t.Fatalf("ParseCursor #2: %v", err)
+	}
+	out3 := make(chan canonical.Event, 512)
+	if err := a.Scan(context.Background(), parsed2, out3); err != nil {
+		t.Fatalf("Scan #3 (unchanged after append): %v", err)
+	}
+	third := drainBuffered(out3)
+	if got := countKind(third, canonical.EvTurnFinalized); got != 0 {
+		t.Errorf("unchanged rescan after metadata append re-fired TurnFinalized %d times, want 0 (I2 marker advance)", got)
+	}
+}
+
 // oldFormatOpenTurnSession returns a modern rollout with an OLD-format turn
 // (turn_context only — no task_started, no task_complete) that stays open until
 // EOF. finalizeAtEOF closes it COMPLETED regardless of staleness (spec edge #3).
diff --git a/internal/adapters/codex/mapper.go b/internal/adapters/codex/mapper.go
index cc439d9..8f8aca9 100644
--- a/internal/adapters/codex/mapper.go
+++ b/internal/adapters/codex/mapper.go
@@ -169,10 +169,21 @@ type fileMapper struct {
 	compactedSeen      bool
 	compactedRecordIdx uint64
 
-	// lastTsUs is the timestamp (micros) of the most recent record carrying one.
+	// lastTsUs is the timestamp (micros) of the most recent record carrying one,
+	// across ALL record types (including a metadata-only session_meta append).
 	// Observability only (cursor LastTsUs). Stays 0 for a file whose records all
 	// lack timestamps.
 	lastTsUs int64
+
+	// lastContentTsUs is the timestamp (micros) of the most recent CONTENT-bearing
+	// record — every record the mapper acts on EXCEPT a no-op session_meta (the
+	// once-per-file bootstrap line, and any later metadata-only session_meta append
+	// codex writes per recorder.rs:1615). It is the deterministic close-ts for an
+	// OLD-format turn finalized at EOF (I2/G6): a metadata append grows the file and
+	// advances lastTsUs, but the OLD-format turn ended when its LAST real record was
+	// written — using lastContentTsUs keeps that close-ts stable across a
+	// metadata-only append. Stays 0 until the first content record carrying a ts.
+	lastContentTsUs int64
 }
 
 // The per-file inference STATE TYPES (turnState, openOp, finalizedOp,
@@ -248,6 +259,17 @@ func (m *fileMapper) mapRecord(rec record) ([]canonical.Event, error) {
 		m.sessionStarted = true
 	}
 
+	// A session_meta is the once-per-file bootstrap line, or a later metadata-only
+	// append (recorder.rs:1615); it carries no turn content, so it must NOT advance
+	// the OLD-format EOF close-ts (lastContentTsUs). Every other record type is
+	// turn content and advances it (I2). recordTs returns 0 for a record with no
+	// timestamp, which never moves the max.
+	if rec.Type() != recSessionMeta {
+		if ts := m.recordTs(rec); ts > m.lastContentTsUs {
+			m.lastContentTsUs = ts
+		}
+	}
+
 	switch rec.Type() {
 	case recSessionMeta:
 		// SessionStarted already emitted by the bootstrap above; a session_meta
diff --git a/internal/adapters/codex/mapper_finalize.go b/internal/adapters/codex/mapper_finalize.go
index a96683f..d90dd6b 100644
--- a/internal/adapters/codex/mapper_finalize.go
+++ b/internal/adapters/codex/mapper_finalize.go
@@ -43,13 +43,14 @@ func (m *fileMapper) finalizeAtEOF(stale bool, nowUs int64) []canonical.Event {
 	}
 	if !ts.sawTaskStarted {
 		// OLD-format: close COMPLETED at EOF regardless of staleness (spec edge #3).
-		// EndTs MUST be the turn's LAST-ACTIVITY timestamp (m.lastTsUs, the max
-		// record ts in the file — which, for the most-recent open turn, IS that
-		// turn's last activity), NOT the file mtime / wall-clock (G6). A clean
-		// old-format turn ended when its last record was written; using the live
-		// mtime made the golden non-deterministic (CI-flaky) and semantically wrong.
-		// Fall back to nowUs only when no record carried a timestamp (lastTsUs == 0).
-		endUs := m.lastTsUs
+		// EndTs MUST be the turn's LAST-CONTENT-ACTIVITY timestamp (m.lastContentTsUs,
+		// the max ts over content records — which, for the most-recent open turn, IS
+		// that turn's last activity), NOT the file mtime / wall-clock (G6) and NOT
+		// m.lastTsUs (which a metadata-only session_meta append would advance,
+		// re-dating the close — I2). A clean old-format turn ended when its last real
+		// record was written. Fall back to nowUs only when no content record carried a
+		// timestamp (lastContentTsUs == 0).
+		endUs := m.lastContentTsUs
 		if endUs == 0 {
 			endUs = nowUs
 		}
diff --git a/internal/adapters/codex/scanner.go b/internal/adapters/codex/scanner.go
index ca0c6cd..f09734b 100644
--- a/internal/adapters/codex/scanner.go
+++ b/internal/adapters/codex/scanner.go
@@ -213,6 +213,10 @@ func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID s
 	if mapper.lastTsUs > 0 {
 		cur.LastTsUs = mapper.lastTsUs
 	}
+	// emittedContent records whether THIS pass surfaced any new canonical event from
+	// records at/after the resume offset, captured before the EOF-finalize block adds
+	// its own events to res.emitted (I2 metadata-append suppression below).
+	emittedContent := res.emitted > 0
 
 	// EOF-finalize (rule #23, spec edge #3, F1): when the file is FULLY read, ask
 	// the mapper to finalize a hanging open turn. The mapper owns the open-turn
@@ -231,11 +235,29 @@ func readRollout(ctx context.Context, resolvedRoot string, r rollout, sourceID s
 	// on a prior pass (cur.EOFFinalizedSize == size). The mapper's eofFinalized
 	// guard is per-instance and the replay-from-0 rebuilds a fresh mapper each scan,
 	// so without a DURABLE cursor marker an unchanged rescan/restart would re-fire
-	// the EOF TurnFinalized (and the stale SessionFinalized) every time. A genuine
-	// append grows size past the marker, so the new EOF is finalized normally.
+	// the EOF TurnFinalized (and the stale SessionFinalized) every time.
+	//
+	// I2: a metadata-only append (codex appends a bare session_meta per
+	// recorder.rs:1615) GROWS the file past the marker yet carries NO new turn
+	// content, so the size-equality check alone (H2) would let finalizeAtEOF re-fire
+	// the OLD-format turn's TurnFinalized (and re-date its close — see lastContentTsUs).
+	// We therefore ALSO suppress when the file grew past the marker but this pass
+	// emitted no new content (emittedContent == false). A genuine append (a new
+	// turn_context / task_started — always emitting at least a TurnStarted) sets
+	// emittedContent and re-finalizes normally.
 	fullyRead := res.advanced >= size
+	grewWithoutContent := cur.EOFFinalizedSize > 0 && size > cur.EOFFinalizedSize && !emittedContent
 	alreadyFinalized := cur.EOFFinalizedSize > 0 && cur.EOFFinalizedSize == size
-	if fullyRead && !alreadyFinalized {
+	switch {
+	case fullyRead && (alreadyFinalized || grewWithoutContent):
+		// Already EOF-finalized at this-or-a-prior size. Advance the marker to the
+		// current size so a metadata-only growth that we just suppressed does not look
+		// "grown" again on the next rescan (the marker tracks the latest fully-read,
+		// content-free size). A same-size rescan leaves the marker unchanged.
+		if grewWithoutContent {
+			cur.EOFFinalizedSize = size
+		}
+	case fullyRead:
 		stale := time.Since(info.ModTime()) >= staleAfter
 		for _, ev := range mapper.finalizeAtEOF(stale, mtimeUs) {
 			select {
diff --git a/internal/ingest/catalog.go b/internal/ingest/catalog.go
index 7b153cf..0cef00b 100644
--- a/internal/ingest/catalog.go
+++ b/internal/ingest/catalog.go
@@ -59,6 +59,11 @@ ON CONFLICT (source_format, cwd) DO UPDATE SET
 	return nil
 }
 
+// The priorOpIdentity type + the catalog identity-MIGRATION helpers
+// (catalogIdentityChanged, normalizeToolNamespace, removeOpContribution,
+// addMigratedTotals) live in catalog_migrate.go (SOW-0004 I1), keeping this file
+// focused on the straight-line per-event upserts.
+
 // onOpStarted populates catalog_providers, catalog_models, and
 // catalog_tools depending on op kind.
 //
@@ -71,16 +76,39 @@ ON CONFLICT (source_format, cwd) DO UPDATE SET
 // per-(provider,model)/(namespace,name) call totals. The first_seen/last_seen
 // floor/ceiling and the ctx_max seed stay idempotent (MIN/MAX/COALESCE) and run
 // on every call so a re-emit still refreshes them. (SOW-0020 / SOW-0004 H1a.)
-func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent, inserted bool) error {
-	// callInc is added to call_count only on the ON CONFLICT (existing-row)
-	// branch. On a genuine INSERT the VALUES(...,1) sets the count and the branch
-	// is not taken, so callInc is irrelevant there; on a re-emit (existing row)
-	// callInc=0 keeps the count, eliminating the double-count. We still run the
-	// upsert (not a bare UPDATE) so the row is created when applyOpStarted's probe
-	// raced an absent row — the existence probe and this write share one tx, so
-	// inserted is authoritative and callInc=1 only ever lands via VALUES.
+//
+// prior carries the op's persisted catalog identity + terminal totals as they
+// stood before this OpStarted's row upsert (empty when inserted=true). When the
+// op already existed AND its catalog identity CHANGED (codex MCP enrichment
+// re-stamping tool_namespace/name on the same (turn,seq) — SOW-0004 I1), the op's
+// whole contribution (call_count + any already-booked failure/tokens/cost/
+// duration totals) is MOVED off the old key before it is added to the new one, so
+// the physical op is counted under exactly ONE catalog row.
+func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent, inserted bool, prior priorOpIdentity) error {
+	// identityChanged is true only for a re-emit of an EXISTING op whose catalog
+	// identity was corrected (codex MCP enrichment — SOW-0004 I1). Computed once and
+	// reused below for the migrate-out, the call_count bump, and the migrate-in.
+	identityChanged := !inserted && prior.found && c.catalogIdentityChanged(ev, prior)
+	// Identity migration (I1): an existing op whose catalog identity changed has
+	// already contributed (call_count, and any finalize totals) to its OLD key. Move
+	// that whole contribution off the old key here, BEFORE the add below re-books it
+	// under the new key. A re-emit with the SAME identity skips this (nothing to
+	// move) and falls through to the idempotent callInc=0 path (H1a).
+	if identityChanged {
+		if err := c.removeOpContribution(ctx, tx, prior); err != nil {
+			return err
+		}
+	}
+	// callInc is added to call_count on the ON CONFLICT (existing-row) branch. On a
+	// genuine INSERT the VALUES(...,1) sets the count and the branch is not taken; on
+	// a same-identity re-emit (existing row) callInc=0 keeps the count, eliminating
+	// the double-count (H1a). On an identity-CHANGED re-emit we just removed the old
+	// key's count, so this op must register +1 under its new key even though the ops
+	// row already existed — hence callInc=1 here too (I1). We still run the upsert
+	// (not a bare UPDATE) so the row is created when applyOpStarted's probe raced an
+	// absent row.
 	callInc := 0
-	if inserted {
+	if inserted || identityChanged {
 		callInc = 1
 	}
 	switch ev.Kind {
@@ -132,6 +160,18 @@ ON CONFLICT (namespace, name) DO UPDATE SET
 			}
 		}
 	}
+	// I1: on an identity-changed re-emit, the upsert above re-booked call_count under
+	// the NEW key but not the op's already-finalized totals (those live on the OLD key
+	// we just decremented). Move those totals onto the new key now. A subsequent
+	// OpFinalized re-emit applies a (now − prior) delta on top of this migrated base,
+	// so the new key converges to the op's current finalize contribution exactly once
+	// (migrated prior + (now − prior) = now); when no re-finalize follows, the new key
+	// simply holds the op's last-known contribution.
+	if identityChanged {
+		if err := c.addMigratedTotals(ctx, tx, ev, prior.totals); err != nil {
+			return err
+		}
+	}
 	return nil
 }
 
diff --git a/internal/ingest/catalog_idempotency_test.go b/internal/ingest/catalog_idempotency_test.go
index c200a8a..5e2b6d7 100644
--- a/internal/ingest/catalog_idempotency_test.go
+++ b/internal/ingest/catalog_idempotency_test.go
@@ -204,3 +204,311 @@ func TestCatalog_ReFinalizeStatusCorrectionDeltaOnce(t *testing.T) {
 		t.Errorf("total_tokens_in = %d, want 9 (unchanged tokens must not re-add on re-finalize)", got)
 	}
 }
+
+// TestCatalog_IdentityChangeMigratesContribution pins SOW-0004 I1: the codex MCP
+// enrichment re-emits OpStarted for the SAME (turn,seq) with a CORRECTED catalog
+// identity (the heuristic tool_namespace="custom" placeholder is re-stamped to
+// "mcp:<server>", and the tool name is corrected). The op physically counted once
+// under the placeholder key (and any finalize totals booked there); a naive
+// re-emit would INSERT a fresh catalog_tools row at call_count=1 under the
+// corrected key — counting the one op TWICE — and strand the finalize totals on
+// the old key. The fix MOVES the whole contribution (call_count + failure/tokens/
+// cost/duration totals) onto the new key. This test drives the exact event
+// sequence the codex adapter emits (function_call → output → mcp_tool_call_end
+// re-emit) and asserts the total call_count across BOTH keys is exactly 1 and the
+// failure/token totals live ONLY under the final key.
+func TestCatalog_IdentityChangeMigratesContribution(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+	apply(tx, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx, canonical.TurnStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1000},
+		SessionNativeID: "s", Seq: 1,
+	})
+	// 1) function_call → heuristic placeholder identity (namespace "custom").
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "search", ToolNamespace: "custom",
+	})
+	// 2) function_call_output → finalizes the placeholder op (with a token cost so
+	//    a stranded total would be visible on the old key).
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "failed", ErrorClass: "tool_error",
+		EndTs: 1200, TokensIn: 11, TokensOut: 4,
+	})
+	// 3) mcp_tool_call_end → re-emit OpStarted with the CORRECTED identity (the I1
+	//    case): namespace "mcp:files", name "files.read". Same (turn,seq).
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 5, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "files.read", ToolNamespace: "mcp:files",
+	})
+	// 4) the enrichment's correcting OpFinalized (same terminal status here).
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 6, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "failed", ErrorClass: "tool_error",
+		EndTs: 1200, TokensIn: 11, TokensOut: 4,
+	})
+	if cErr := tx.Commit(); cErr != nil {
+		t.Fatalf("Commit: %v", cErr)
+	}
+
+	// Total call_count across BOTH the placeholder key and the corrected key must be
+	// exactly 1 — the one physical op, counted once.
+	oldCalls := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_tools WHERE namespace='custom' AND name='search'`)
+	newCalls := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_tools WHERE namespace='mcp:files' AND name='files.read'`)
+	if oldCalls+newCalls != 1 {
+		t.Fatalf("total call_count across both keys = %d (old=%d new=%d), want 1 (one op counted once, I1)", oldCalls+newCalls, oldCalls, newCalls)
+	}
+	// The op's final contribution lives ENTIRELY under the corrected key.
+	if newCalls != 1 {
+		t.Errorf("corrected key call_count = %d, want 1 (contribution migrated to final identity)", newCalls)
+	}
+	if got := scanInt(t, db, `SELECT failure_count FROM catalog_tools WHERE namespace='mcp:files' AND name='files.read'`); got != 1 {
+		t.Errorf("corrected key failure_count = %d, want 1 (failed op's failure migrated to final key)", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_tools WHERE namespace='mcp:files' AND name='files.read'`); got != 11 {
+		t.Errorf("corrected key total_tokens_in = %d, want 11 (tokens migrated to final key)", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_out FROM catalog_tools WHERE namespace='mcp:files' AND name='files.read'`); got != 4 {
+		t.Errorf("corrected key total_tokens_out = %d, want 4 (tokens migrated to final key)", got)
+	}
+	// The old placeholder key must be fully drained: no stranded count or totals.
+	if got := scanInt(t, db, `SELECT COALESCE(failure_count,0) FROM catalog_tools WHERE namespace='custom' AND name='search'`); got != 0 {
+		t.Errorf("placeholder key failure_count = %d, want 0 (must be migrated off the old key, I1)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(total_tokens_in,0) FROM catalog_tools WHERE namespace='custom' AND name='search'`); got != 0 {
+		t.Errorf("placeholder key total_tokens_in = %d, want 0 (no stranded tokens on the old key, I1)", got)
+	}
+}
+
+// TestCatalog_LLMIdentityChangeMigratesContribution pins SOW-0004 I1 for the LLM
+// catalog rows (catalog_providers + catalog_models): an OpStarted re-emit that
+// corrects an LLM op's (provider, model) on the same (turn,seq) must MOVE its
+// whole contribution — call_count + failure/tokens/cost/duration totals — off the
+// old provider/model rows and onto the corrected ones, so each physical op counts
+// once. This exercises the provider/model migrate-out + migrate-in branches the
+// tool case above does not. (LLM identity correction is rarer than the codex MCP
+// tool case, but the catalog migration must be symmetric across kinds.)
+func TestCatalog_LLMIdentityChangeMigratesContribution(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+	apply(tx, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx, canonical.TurnStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1000},
+		SessionNativeID: "s", Seq: 1,
+	})
+	// 1) LLM op under (openai, gpt-5.5) + a failed finalize with tokens/cost-bearing
+	//    duration so a stranded total would be visible on the old key.
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpLLM, Name: "message", Provider: "openai", Model: "gpt-5.5",
+	})
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "failed", ErrorClass: "model_error",
+		EndTs: 1500, TokensIn: 30, TokensOut: 8, TokensCacheRead: 2, TokensCacheWrite: 1,
+	})
+	// 2) Re-emit OpStarted with a CORRECTED model (same provider): gpt-5.5 → gpt-5.5-codex.
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 5, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpLLM, Name: "message", Provider: "openai", Model: "gpt-5.5-codex",
+	})
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 6, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "failed", ErrorClass: "model_error",
+		EndTs: 1500, TokensIn: 30, TokensOut: 8, TokensCacheRead: 2, TokensCacheWrite: 1,
+	})
+	if cErr := tx.Commit(); cErr != nil {
+		t.Fatalf("Commit: %v", cErr)
+	}
+
+	// catalog_models: the old model row drained to 0; the corrected model row holds
+	// the single op's full contribution.
+	oldModelCalls := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`)
+	newModelCalls := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_models WHERE provider='openai' AND name='gpt-5.5-codex'`)
+	if oldModelCalls != 0 {
+		t.Errorf("old model call_count = %d, want 0 (migrated off, I1)", oldModelCalls)
+	}
+	if newModelCalls != 1 {
+		t.Errorf("corrected model call_count = %d, want 1 (migrated on, I1)", newModelCalls)
+	}
+	if got := scanInt(t, db, `SELECT failure_count FROM catalog_models WHERE provider='openai' AND name='gpt-5.5-codex'`); got != 1 {
+		t.Errorf("corrected model failure_count = %d, want 1", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_models WHERE provider='openai' AND name='gpt-5.5-codex'`); got != 30 {
+		t.Errorf("corrected model total_tokens_in = %d, want 30", got)
+	}
+	if got := scanInt(t, db, `SELECT total_duration_us FROM catalog_models WHERE provider='openai' AND name='gpt-5.5-codex'`); got != 400 {
+		t.Errorf("corrected model total_duration_us = %d, want 400 (1500-1100)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(total_tokens_in,0) FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`); got != 0 {
+		t.Errorf("old model total_tokens_in = %d, want 0 (no stranded tokens, I1)", got)
+	}
+	// catalog_providers: provider unchanged across the re-emit, so its call_count
+	// stays exactly 1 (the migrate-out −1 and migrate-in +1 net to zero on the SAME
+	// provider row) — a provider that did not change must not double-count NOR drop.
+	if got := scanInt(t, db, `SELECT call_count FROM catalog_providers WHERE name='openai'`); got != 1 {
+		t.Errorf("provider call_count = %d, want 1 (unchanged provider nets to 1 across an identity-changed re-emit)", got)
+	}
+	if got := scanInt(t, db, `SELECT failure_count FROM catalog_providers WHERE name='openai'`); got != 1 {
+		t.Errorf("provider failure_count = %d, want 1", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_providers WHERE name='openai'`); got != 30 {
+		t.Errorf("provider total_tokens_in = %d, want 30", got)
+	}
+}
+
+// TestCatalog_IdentityChangeBeforeFinalize pins the SOW-0004 I1 migration when the
+// op was OpStarted under one identity but its identity is corrected by a re-emitted
+// OpStarted BEFORE any OpFinalized (no totals booked yet). Only call_count moves:
+// the old key drains to 0, the new key counts 1, and no failure/token total is
+// stranded or duplicated. This covers the migrate path with an unfinalized prior
+// (addMigratedTotals's no-booked-totals short-circuit).
+func TestCatalog_IdentityChangeBeforeFinalize(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+	tx, _ := db.BeginTx(ctx, nil)
+	apply(tx, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "search", ToolNamespace: "custom",
+	})
+	// Identity correction BEFORE any finalize: only the call_count exists to move.
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "files.read", ToolNamespace: "mcp:files",
+	})
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("Commit: %v", err)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_tools WHERE namespace='custom' AND name='search'`); got != 0 {
+		t.Errorf("old key call_count = %d, want 0 (migrated before finalize)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_tools WHERE namespace='mcp:files' AND name='files.read'`); got != 1 {
+		t.Errorf("new key call_count = %d, want 1 (migrated before finalize)", got)
+	}
+}
+
+// TestCatalog_KindChangeMigratesAcrossTables pins the SOW-0004 I1 migration when a
+// re-emitted OpStarted changes the op KIND (tool → llm) on the same (turn,seq) — a
+// defensive edge: the op's contribution must move OFF the tool catalog row and ONTO
+// the LLM provider/model rows, never double-counted across tables. Covers
+// catalogIdentityChanged's kind-changed branch.
+func TestCatalog_KindChangeMigratesAcrossTables(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+	tx, _ := db.BeginTx(ctx, nil)
+	apply(tx, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "shell", ToolNamespace: "shell",
+	})
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "completed", EndTs: 1200, TokensIn: 5,
+	})
+	// Re-emit as an LLM op (kind change) on the same (turn,seq).
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpLLM, Name: "message", Provider: "openai", Model: "gpt-5.5",
+	})
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 5, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "completed", EndTs: 1200, TokensIn: 5,
+	})
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("Commit: %v", err)
+	}
+	// The tool row drained; the LLM model+provider rows hold the single contribution.
+	if got := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 0 {
+		t.Errorf("tool key call_count = %d, want 0 (migrated to llm tables, I1 kind change)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`); got != 1 {
+		t.Errorf("model call_count = %d, want 1 (migrated from tool, I1 kind change)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(call_count,0) FROM catalog_providers WHERE name='openai'`); got != 1 {
+		t.Errorf("provider call_count = %d, want 1 (migrated from tool, I1 kind change)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(total_tokens_in,0) FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 0 {
+		t.Errorf("tool total_tokens_in = %d, want 0 (no stranded tokens after kind change)", got)
+	}
+	if got := scanInt(t, db, `SELECT COALESCE(total_tokens_in,0) FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`); got != 5 {
+		t.Errorf("model total_tokens_in = %d, want 5 (migrated from tool)", got)
+	}
+}
diff --git a/internal/ingest/catalog_migrate.go b/internal/ingest/catalog_migrate.go
new file mode 100644
index 0000000..6dd9da4
--- /dev/null
+++ b/internal/ingest/catalog_migrate.go
@@ -0,0 +1,218 @@
+package ingest
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+
+	"github.com/netdata/ai-viewer/internal/canonical"
+)
+
+// This file holds the catalog identity-MIGRATION logic (SOW-0004 I1): when a
+// re-emitted OpStarted CORRECTS an op's catalog identity on the same (turn, seq)
+// — the codex MCP-enrichment case where a `function_call` first counted under a
+// heuristic tool_namespace="custom" key is re-stamped to "mcp:<server>" (and the
+// tool name) — the op's whole contribution (call_count + any already-booked
+// failure/tokens/cost/duration totals) is MOVED off the old catalog row and onto
+// the new one, so one physical op is counted under exactly ONE catalog row. The
+// straight-line per-event upserts live in catalog.go; onOpStarted there calls
+// these helpers.
+
+// priorOpIdentity captures an op's persisted catalog identity AND its terminal
+// rollup contribution as they stood BEFORE the current OpStarted's row upsert.
+// applyOpStarted reads this from the ops row when the op already exists
+// (inserted=false) so onOpStarted can MIGRATE the op's catalog contribution from
+// its old key to its new key when a re-emitted OpStarted CHANGES the catalog
+// identity (SOW-0004 I1).
+//
+// The OpStarted upsert touches only the identity columns (kind/name/namespace/
+// model/provider/alias) + start_ts/extras; it does NOT touch status, tokens_*,
+// cost_usd, or duration_us. So these totals, read just before the upsert, are
+// exactly the contribution onOpFinalized already booked under the OLD identity
+// (zero when the op was OpStarted but never finalized). Migrating call_count AND
+// these totals from the old key to the new key keeps one physical op contributing
+// to exactly ONE catalog row (its final identity), call_count = 1.
+//
+// found=false means the op row was absent before this OpStarted (a genuine new
+// insert) — there is nothing to migrate and the normal +1 insert path runs.
+type priorOpIdentity struct {
+	found         bool
+	kind          string
+	name          string
+	toolNamespace string
+	model         string
+	provider      string
+	providerAlias string
+	totals        opPriorTotals
+}
+
+// catalogIdentityChanged reports whether a re-emitted OpStarted lands on a
+// DIFFERENT catalog row than the op's prior persisted identity, so onOpStarted
+// migrates the op's contribution instead of double-counting it (SOW-0004 I1). The
+// comparison mirrors the catalog keying exactly: LLM ops key on
+// (provider, alias, model); tool ops key on (namespace-normalized-to-builtin,
+// name); a changed KIND always counts as changed. The event's identity is
+// compared against the persisted columns the prior op contributed under.
+func (c *catalogWriter) catalogIdentityChanged(ev canonical.OpStartedEvent, prior priorOpIdentity) bool {
+	if string(ev.Kind) != prior.kind {
+		return true
+	}
+	switch ev.Kind {
+	case canonical.OpLLM:
+		return ev.Provider != prior.provider ||
+			ev.ProviderAlias != prior.providerAlias ||
+			ev.Model != prior.model
+	case canonical.OpTool:
+		return normalizeToolNamespace(ev.ToolNamespace) != normalizeToolNamespace(prior.toolNamespace) ||
+			ev.Name != prior.name
+	default:
+		// session/system/reasoning/compaction ops touch no catalog rollup row, so
+		// there is never a contribution to migrate.
+		return false
+	}
+}
+
+// normalizeToolNamespace mirrors onOpStarted/onOpFinalized's empty→"builtin"
+// fold so the migration compares the SAME key the rollup wrote under.
+func normalizeToolNamespace(ns string) string {
+	if ns == "" {
+		return "builtin"
+	}
+	return ns
+}
+
+// removeOpContribution backs an op's whole rollup contribution OUT of its OLD
+// catalog key before onOpStarted re-books it under the new key (SOW-0004 I1
+// identity migration). It subtracts call_count by 1 and the op's already-booked
+// failure/tokens/cost/duration totals (zero when the op was started but never
+// finalized), keyed on the prior PERSISTED identity. The columns mirror
+// onOpFinalized's per-kind total sets exactly (providers carry no duration;
+// tools carry no cache split) so the move is a faithful inverse. ctx_max is
+// MAX-based, not summed, so it is intentionally NOT decremented — a stale seed on
+// an emptied row is harmless (no op references it) and re-derives on the next
+// observation.
+func (c *catalogWriter) removeOpContribution(ctx context.Context, tx *sql.Tx, prior priorOpIdentity) error {
+	t := prior.totals
+	failure := failureInc(t.status)
+	switch prior.kind {
+	case string(canonical.OpLLM):
+		if prior.provider != "" {
+			if _, err := tx.ExecContext(ctx, `
+UPDATE catalog_providers SET
+    call_count               = call_count - 1,
+    failure_count            = failure_count - ?,
+    total_tokens_in          = total_tokens_in - ?,
+    total_tokens_out         = total_tokens_out - ?,
+    total_tokens_cache_read  = total_tokens_cache_read - ?,
+    total_tokens_cache_write = total_tokens_cache_write - ?,
+    total_cost_usd           = total_cost_usd - ?
+WHERE name = ? AND alias = ?
+`, failure, t.tokensIn, t.tokensOut, t.tokensCacheRead, t.tokensCacheWrite, t.costUSD,
+				prior.provider, prior.providerAlias); err != nil {
+				return fmt.Errorf("catalog_providers migrate-out: %w", err)
+			}
+		}
+		if prior.provider != "" && prior.model != "" {
+			if _, err := tx.ExecContext(ctx, `
+UPDATE catalog_models SET
+    call_count               = call_count - 1,
+    failure_count            = failure_count - ?,
+    total_tokens_in          = total_tokens_in - ?,
+    total_tokens_out         = total_tokens_out - ?,
+    total_tokens_cache_read  = total_tokens_cache_read - ?,
+    total_tokens_cache_write = total_tokens_cache_write - ?,
+    total_cost_usd           = total_cost_usd - ?,
+    total_duration_us        = total_duration_us - ?
+WHERE provider = ? AND name = ?
+`, failure, t.tokensIn, t.tokensOut, t.tokensCacheRead, t.tokensCacheWrite, t.costUSD, t.durationUS,
+				prior.provider, prior.model); err != nil {
+				return fmt.Errorf("catalog_models migrate-out: %w", err)
+			}
+		}
+	case string(canonical.OpTool):
+		if prior.name != "" {
+			if _, err := tx.ExecContext(ctx, `
+UPDATE catalog_tools SET
+    call_count        = call_count - 1,
+    failure_count     = failure_count - ?,
+    total_tokens_in   = total_tokens_in - ?,
+    total_tokens_out  = total_tokens_out - ?,
+    total_cost_usd    = total_cost_usd - ?,
+    total_duration_us = total_duration_us - ?
+WHERE namespace = ? AND name = ?
+`, failure, t.tokensIn, t.tokensOut, t.costUSD, t.durationUS,
+				normalizeToolNamespace(prior.toolNamespace), prior.name); err != nil {
+				return fmt.Errorf("catalog_tools migrate-out: %w", err)
+			}
+		}
+	}
+	return nil
+}
+
+// addMigratedTotals re-books the op's already-finalized totals (failure/tokens/
+// cost/duration) onto its NEW catalog key after an identity change (SOW-0004 I1).
+// call_count for the new key is handled by onOpStarted's upsert (callInc=1); this
+// adds ONLY the totals removeOpContribution backed off the old key, so the new
+// key starts from the op's prior contribution and any subsequent OpFinalized
+// re-emit then applies its (now − prior) delta on top. The column sets mirror
+// onOpFinalized exactly. last_seen is left to the OpStarted upsert / a later
+// OpFinalized (this UPDATE only moves accumulating totals).
+func (c *catalogWriter) addMigratedTotals(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent, t opPriorTotals) error {
+	// t is the prior PERSISTED contribution; the caller only reaches here when the
+	// op row already existed (prior.found), so t.found is always true. An op started
+	// but never finalized has status="running" (failureInc 0) and zero tokens/cost/
+	// duration, so the adds below move nothing meaningful — only the call_count the
+	// OpStarted upsert already re-booked under the new key matters in that case.
+	failure := failureInc(t.status)
+	switch ev.Kind {
+	case canonical.OpLLM:
+		if ev.Provider != "" {
+			alias := ev.ProviderAlias
+			if _, err := tx.ExecContext(ctx, `
+UPDATE catalog_providers SET
+    failure_count            = failure_count + ?,
+    total_tokens_in          = total_tokens_in + ?,
+    total_tokens_out         = total_tokens_out + ?,
+    total_tokens_cache_read  = total_tokens_cache_read + ?,
+    total_tokens_cache_write = total_tokens_cache_write + ?,
+    total_cost_usd           = total_cost_usd + ?
+WHERE name = ? AND alias = ?
+`, failure, t.tokensIn, t.tokensOut, t.tokensCacheRead, t.tokensCacheWrite, t.costUSD,
+				ev.Provider, alias); err != nil {
+				return fmt.Errorf("catalog_providers migrate-in: %w", err)
+			}
+		}
+		if ev.Provider != "" && ev.Model != "" {
+			if _, err := tx.ExecContext(ctx, `
+UPDATE catalog_models SET
+    failure_count            = failure_count + ?,
+    total_tokens_in          = total_tokens_in + ?,
+    total_tokens_out         = total_tokens_out + ?,
+    total_tokens_cache_read  = total_tokens_cache_read + ?,
+    total_tokens_cache_write = total_tokens_cache_write + ?,
+    total_cost_usd           = total_cost_usd + ?,
+    total_duration_us        = total_duration_us + ?
+WHERE provider = ? AND name = ?
+`, failure, t.tokensIn, t.tokensOut, t.tokensCacheRead, t.tokensCacheWrite, t.costUSD, t.durationUS,
+				ev.Provider, ev.Model); err != nil {
+				return fmt.Errorf("catalog_models migrate-in: %w", err)
+			}
+		}
+	case canonical.OpTool:
+		if ev.Name != "" {
+			if _, err := tx.ExecContext(ctx, `
+UPDATE catalog_tools SET
+    failure_count     = failure_count + ?,
+    total_tokens_in   = total_tokens_in + ?,
+    total_tokens_out  = total_tokens_out + ?,
+    total_cost_usd    = total_cost_usd + ?,
+    total_duration_us = total_duration_us + ?
+WHERE namespace = ? AND name = ?
+`, failure, t.tokensIn, t.tokensOut, t.costUSD, t.durationUS,
+				normalizeToolNamespace(ev.ToolNamespace), ev.Name); err != nil {
+				return fmt.Errorf("catalog_tools migrate-in: %w", err)
+			}
+		}
+	}
+	return nil
+}
diff --git a/internal/ingest/error_paths_test.go b/internal/ingest/error_paths_test.go
index fd8f9af..706bef7 100644
--- a/internal/ingest/error_paths_test.go
+++ b/internal/ingest/error_paths_test.go
@@ -309,11 +309,12 @@ func TestCatalog_OnOpStarted_RolledTx(t *testing.T) {
 	_, db := openTestStore(t)
 	ctx, tx := rolledTx(t, db)
 	c := newCatalogWriter(NopPricer{})
-	// LLM branch with provider+model.
+	// LLM branch with provider+model. A genuine insert (inserted=true) has no prior
+	// identity to migrate, so pass an empty priorOpIdentity (SOW-0004 I1 signature).
 	err := c.onOpStarted(ctx, tx, canonical.OpStartedEvent{
 		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
 		Kind:      canonical.OpLLM, Provider: "anthropic", Model: "m",
-	}, true)
+	}, true, priorOpIdentity{})
 	if err == nil {
 		t.Fatal("expected error on rolled-back tx (llm branch)")
 	}
@@ -322,10 +323,37 @@ func TestCatalog_OnOpStarted_RolledTx(t *testing.T) {
 	err = c.onOpStarted(ctx2, tx2, canonical.OpStartedEvent{
 		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
 		Kind:      canonical.OpTool, Name: "read",
-	}, true)
+	}, true, priorOpIdentity{})
 	if err == nil {
 		t.Fatal("expected error on rolled-back tx (tool branch)")
 	}
+	// Identity-migration error paths (SOW-0004 I1): an existing op (inserted=false)
+	// whose catalog identity CHANGED triggers removeOpContribution before the upsert;
+	// on a rolled-back tx that UPDATE fails, exercising the migrate-out error return.
+	// Tool kind (namespace change) and LLM kind (provider/model change) cover both
+	// catalog-table branches.
+	ctx3, tx3 := rolledTx(t, db)
+	err = c.onOpStarted(ctx3, tx3, canonical.OpStartedEvent{
+		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
+		Kind:      canonical.OpTool, Name: "read", ToolNamespace: "mcp:srv",
+	}, false, priorOpIdentity{
+		found: true, kind: string(canonical.OpTool), name: "read", toolNamespace: "custom",
+		totals: opPriorTotals{found: true, status: "completed"},
+	})
+	if err == nil {
+		t.Fatal("expected error on rolled-back tx (tool migrate-out)")
+	}
+	ctx4, tx4 := rolledTx(t, db)
+	err = c.onOpStarted(ctx4, tx4, canonical.OpStartedEvent{
+		EventBase: canonical.EventBase{SourceID: "x", SourceSeq: 1, Ts: 1},
+		Kind:      canonical.OpLLM, Name: "message", Provider: "openai", Model: "gpt-5.5",
+	}, false, priorOpIdentity{
+		found: true, kind: string(canonical.OpLLM), name: "message", provider: "openai", model: "old-model",
+		totals: opPriorTotals{found: true, status: "completed"},
+	})
+	if err == nil {
+		t.Fatal("expected error on rolled-back tx (llm migrate-out)")
+	}
 }
 
 func TestWriter_AllEventTypes_RolledTx(t *testing.T) {
diff --git a/internal/ingest/writer.go b/internal/ingest/writer.go
index 7040d6a..6580d21 100644
--- a/internal/ingest/writer.go
+++ b/internal/ingest/writer.go
@@ -516,20 +516,24 @@ ON CONFLICT (session_id, seq) DO NOTHING
 		return fmt.Errorf("writer: synthesize turn for op: %w", err)
 	}
 	opID := canonicalOpID(turnID, ev.Seq)
-	// Probe whether this op already has a row BEFORE the upsert so the catalog can
-	// count the call ONCE per distinct op (SOW-0004 H1a). A re-emitted OpStarted —
-	// late enrichment carrying corrected status/extras on the same (turn,seq), as
-	// the codex/claude_code replay-from-0 + enrichment design emits — is an UPDATE,
-	// not a new call. ON CONFLICT DO UPDATE returns RowsAffected=1 for both insert
-	// and update under modernc/sqlite, so an explicit existence check is the
-	// authoritative insert-vs-update signal. sql.ErrNoRows ⇒ genuine new insert.
-	opInserted := false
-	switch existsErr := w.requireOpExists(ctx, tx, opID); {
-	case errors.Is(existsErr, sql.ErrNoRows):
-		opInserted = true
-	case existsErr != nil:
-		return existsErr
+	// Read the op's PRIOR persisted catalog identity + terminal totals BEFORE the
+	// upsert so the catalog can (a) count the call ONCE per distinct op (SOW-0004
+	// H1a) and (b) MIGRATE the op's contribution off its old key when this re-emit
+	// CHANGES the catalog identity (codex MCP enrichment re-stamping
+	// tool_namespace/name on the same (turn,seq) — SOW-0004 I1). A re-emitted
+	// OpStarted (late enrichment on the same (turn,seq), as the codex/claude_code
+	// replay-from-0 + enrichment design emits) is an UPDATE, not a new call. ON
+	// CONFLICT DO UPDATE returns RowsAffected=1 for both insert and update under
+	// modernc/sqlite, so reading the row first is the authoritative
+	// insert-vs-update signal: found=false (sql.ErrNoRows) ⇒ genuine new insert.
+	// The OpStarted upsert below touches only identity columns + start_ts/extras —
+	// never the status/tokens/cost/duration columns — so the totals read here are
+	// exactly the contribution onOpFinalized already booked under the old identity.
+	prior, err := w.opPriorIdentity(ctx, tx, opID)
+	if err != nil {
+		return err
 	}
+	opInserted := !prior.found
 	var parentOpID sql.NullString
 	if ev.ParentOpSeq >= 0 {
 		parentOpID = sql.NullString{String: canonicalOpID(turnID, ev.ParentOpSeq), Valid: true}
@@ -601,12 +605,52 @@ ON CONFLICT (turn_id, seq) DO UPDATE SET
 	}
 	w.markDirtyTurn(turnID)
 	w.markDirtySession(sessionID)
-	if err := w.catalog.onOpStarted(ctx, tx, ev, opInserted); err != nil {
+	if err := w.catalog.onOpStarted(ctx, tx, ev, opInserted, prior); err != nil {
 		return err
 	}
 	return nil
 }
 
+// opPriorIdentity reads an op's persisted catalog identity (kind/name/namespace/
+// model/provider/alias) AND its terminal rollup totals as they stand BEFORE the
+// current OpStarted UPSERT, so the catalog can migrate the op's contribution when
+// a re-emit changes its catalog identity (SOW-0004 I1). found=false (sql.ErrNoRows)
+// means the op has no row yet — a genuine new insert with nothing to migrate. The
+// totals share opPriorTotals' semantics (the OpStarted upsert leaves the
+// status/tokens/cost/duration columns untouched, so this is the contribution
+// onOpFinalized already booked under the old identity).
+func (w *writer) opPriorIdentity(ctx context.Context, tx *sql.Tx, opID string) (priorOpIdentity, error) {
+	var (
+		p             priorOpIdentity
+		toolNamespace sql.NullString
+		model         sql.NullString
+		provider      sql.NullString
+		providerAlias sql.NullString
+		dur           sql.NullInt64
+	)
+	err := tx.QueryRowContext(ctx, `
+SELECT kind, name, tool_namespace, model, provider, provider_alias,
+       status, tokens_in, tokens_out, tokens_cache_read, tokens_cache_write, cost_usd, duration_us
+  FROM ops WHERE id = ?`, opID).
+		Scan(&p.kind, &p.name, &toolNamespace, &model, &provider, &providerAlias,
+			&p.totals.status, &p.totals.tokensIn, &p.totals.tokensOut,
+			&p.totals.tokensCacheRead, &p.totals.tokensCacheWrite, &p.totals.costUSD, &dur)
+	if err != nil {
+		if errors.Is(err, sql.ErrNoRows) {
+			return priorOpIdentity{}, nil
+		}
+		return priorOpIdentity{}, fmt.Errorf("writer: read op prior identity %s: %w", opID, err)
+	}
+	p.found = true
+	p.toolNamespace = toolNamespace.String
+	p.model = model.String
+	p.provider = provider.String
+	p.providerAlias = providerAlias.String
+	p.totals.found = true
+	p.totals.durationUS = dur.Int64
+	return p, nil
+}
+
 func (w *writer) applyOpFinalized(ctx context.Context, tx *sql.Tx, ev canonical.OpFinalizedEvent) error {
 	sessionID, err := w.requireSessionID(ctx, tx, ev.SessionNativeID, ev.Ts)
 	if err != nil {

From 2b5349c918eae9df649da8dc090552a1bdef1895 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 17:29:09 +0300
Subject: [PATCH 11/13] ingest: catalog migration compares effective
 post-upsert op identity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Final review edge: the catalog identity-change migration compared the raw
OpStarted event identity, but the ops UPSERT preserves omitted optional
fields via COALESCE(NULLIF(excluded.x,''), ops.x). A partial re-emit of an
existing op with empty provider/model/tool_namespace therefore looked like
an identity change, draining the old catalog row while the re-book under
the empty/normalized key was skipped or wrong — a drained/mismatched
aggregate.

effectiveOpIdentity(ev, prior) applies the same empty->prior rule the SQL
upsert uses (kind/name from the event, the COALESCE'd fields fall back to
the prior persisted value). onOpStarted computes it once and uses it for
the identity-change comparison, the call-count booking key, and the
migrated-totals destination, so a partial/empty-omitted re-emit resolves to
the prior identity and is correctly seen as unchanged (no drain, no
migration). A genuine identity change (non-empty corrected fields) is
unaffected.

Tests: TestCatalog_LLMReEmitEmptyProviderModelNoDrain and
TestCatalog_ToolReEmitEmptyNamespaceNoMigrate (both fail before this fix,
pass after); all prior migration + idempotency tests still pass. Ingest-only.
Gates green: golangci(0)/gosec(0)/vet; race all pass; ingest coverage 88.5%;
FuzzParseLine 0 crashes; codex goldens byte-identical.
---
 .../SOW-0004-20260526-codex-adapter.md        |  12 ++
 internal/ingest/catalog.go                    |  40 +++--
 internal/ingest/catalog_idempotency_test.go   | 159 ++++++++++++++++++
 internal/ingest/catalog_migrate.go            |  95 ++++++++---
 4 files changed, 266 insertions(+), 40 deletions(-)

diff --git a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
index bc7fab7..65b4dfa 100644
--- a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
+++ b/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
@@ -252,6 +252,18 @@ The original gate scoped this SOW to `internal/adapters/codex/` + the additive `
 
 **Round-5 fix plan:** (I1, ingest) on an `OpStarted` re-emit whose catalog identity changed, MOVE the contribution (call_count + any finalize totals) from the old key to the new key instead of inserting a second count — capture the op's prior persisted (kind,name,tool_namespace,model,provider,alias) before the upsert and, when it differs, decrement the old catalog row and increment the new; test `function_call`→MCP-enrichment keeps total call_count = 1. (I2, codex) track turn-local last-activity separately from file `lastTsUs` for the old-format EOF close-ts, and suppress a second EOF close when a size-growing append carried no new TURN content (e.g. mark the turn EOF-finalized, not just the file size); test: EOF-close an old-format turn, append only `session_meta`, rescan → no new `TurnFinalized`, stable end-ts. (I3, spec) update `ingester.md` catalog semantics (OpStarted counts on insert; OpFinalized applies a delta) + add `eof_finalized_size` to the adapter-codex.md cursor JSON.
 
+### Round 5 (2026-05-30) — same scope + I1/I2/I3 fix notes
+
+- **glm**: SAFE TO MERGE — ran a deep catalog-migration audit (negative call_count, SQL injection, sibling regressions); found nothing.
+- **minimax**: SAFE — I1/I2/I3 correct + complete, no regressions.
+- **codex**: NOT SAFE — confirmed I2 (EOF suppression), I3 (spec), and I1's full-identity/finalized-prior/pre-finalize/same-identity/kind-change paths all CORRECT; found ONE P2 (no P1, no other issues):
+
+| # | Sev | Finding | Verdict |
+|---|---|---|---|
+| J1 | P2 | Catalog migration compares the RAW event identity, but the ops UPSERT preserves omitted fields via `COALESCE(NULLIF(excluded.x,''), ops.x)` (writer.go:588-591). A PARTIAL re-emit (empty provider/model/namespace) → migration wrongly sees "changed" → drains the old key + skips/mis-books the new → drained aggregate | CONFIRMED. catalog_migrate.go compared `ev.*` not the effective post-upsert identity. Latent (catalog unread). |
+
+**Round 6 (2026-05-30) — J1 fix.** `effectiveOpIdentity(ev, prior)` (catalog_migrate.go) applies the same empty→prior rule as the SQL upsert; `onOpStarted` computes it ONCE and threads it through the identity-change compare, the call-count booking key, and the migrated-totals destination. A partial/empty-omitted re-emit now resolves to the prior identity → seen as UNCHANGED → no drain. Regression tests `TestCatalog_LLMReEmitEmptyProviderModelNoDrain` + `TestCatalog_ToolReEmitEmptyNamespaceNoMigrate` (both fail pre-fix, pass post-fix); all prior migration + idempotency tests still pass. Ingest-only; gates green (golangci 0, gosec 0, race all pass, ingest coverage 88.5%, goldens byte-identical).
+
 ## Outcome
 
 Pending.
diff --git a/internal/ingest/catalog.go b/internal/ingest/catalog.go
index 0cef00b..9a572c1 100644
--- a/internal/ingest/catalog.go
+++ b/internal/ingest/catalog.go
@@ -85,10 +85,17 @@ ON CONFLICT (source_format, cwd) DO UPDATE SET
 // duration totals) is MOVED off the old key before it is added to the new one, so
 // the physical op is counted under exactly ONE catalog row.
 func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent, inserted bool, prior priorOpIdentity) error {
+	// eff is the catalog identity the ops row will ACTUALLY carry after applyOpStarted's
+	// upsert: the upsert COALESCEs omitted tool_namespace/model/provider/provider_alias
+	// back to the prior persisted value (writer.go), so booking + migration must use
+	// this effective identity, not the raw event — otherwise an empty-but-unchanged
+	// re-emit drains its real catalog key without re-booking (SOW-0004 I1 shared-ingest
+	// edge). On a genuine insert (prior absent) eff equals the event identity.
+	eff := effectiveOpIdentity(ev, prior)
 	// identityChanged is true only for a re-emit of an EXISTING op whose catalog
 	// identity was corrected (codex MCP enrichment — SOW-0004 I1). Computed once and
 	// reused below for the migrate-out, the call_count bump, and the migrate-in.
-	identityChanged := !inserted && prior.found && c.catalogIdentityChanged(ev, prior)
+	identityChanged := !inserted && prior.found && c.catalogIdentityChanged(eff, prior)
 	// Identity migration (I1): an existing op whose catalog identity changed has
 	// already contributed (call_count, and any finalize totals) to its OLD key. Move
 	// that whole contribution off the old key here, BEFORE the add below re-books it
@@ -111,14 +118,18 @@ func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonica
 	if inserted || identityChanged {
 		callInc = 1
 	}
-	switch ev.Kind {
-	case canonical.OpLLM:
-		if ev.Provider != "" {
-			if err := upsertProvider(ctx, tx, ev.Provider, ev.ProviderAlias, ev.Ts, callInc); err != nil {
+	// Booking keys use the EFFECTIVE post-upsert identity (eff), not the raw event:
+	// they must match the ops row the upsert produces and the key the migrate-in/out
+	// touch, so an empty-field re-emit books under the prior-preserved key rather than
+	// an empty one (SOW-0004 I1 shared-ingest edge).
+	switch eff.kind {
+	case string(canonical.OpLLM):
+		if eff.provider != "" {
+			if err := upsertProvider(ctx, tx, eff.provider, eff.providerAlias, ev.Ts, callInc); err != nil {
 				return err
 			}
 		}
-		if ev.Provider != "" && ev.Model != "" {
+		if eff.provider != "" && eff.model != "" {
 			// Iter-8 fix iter8-4: seed ctx_max from the pricing table
 			// when the pricer carries metadata. The COALESCE on
 			// ON CONFLICT keeps an existing non-null ctx_max (set by a
@@ -126,7 +137,7 @@ func (c *catalogWriter) onOpStarted(ctx context.Context, tx *sql.Tx, ev canonica
 			// untouched — the table seeds, the op refines.
 			ctxMaxSeed := sql.NullInt64{}
 			if mp, ok := c.pricer.(MetadataPricer); ok && mp != nil {
-				if cm, hit := mp.CtxMax(ev.Provider, ev.Model); hit && cm > 0 {
+				if cm, hit := mp.CtxMax(eff.provider, eff.model); hit && cm > 0 {
 					ctxMaxSeed = sql.NullInt64{Int64: cm, Valid: true}
 				}
 			}
@@ -138,16 +149,13 @@ ON CONFLICT (provider, name) DO UPDATE SET
     last_seen  = MAX(catalog_models.last_seen, excluded.last_seen),
     ctx_max    = COALESCE(catalog_models.ctx_max, excluded.ctx_max),
     call_count = catalog_models.call_count + ?
-`, ev.Provider, ev.Model, ctxMaxSeed, ev.Ts, ev.Ts, callInc); err != nil {
+`, eff.provider, eff.model, ctxMaxSeed, ev.Ts, ev.Ts, callInc); err != nil {
 				return fmt.Errorf("catalog_models upsert: %w", err)
 			}
 		}
-	case canonical.OpTool:
-		if ev.Name != "" {
-			ns := ev.ToolNamespace
-			if ns == "" {
-				ns = "builtin"
-			}
+	case string(canonical.OpTool):
+		if eff.name != "" {
+			ns := normalizeToolNamespace(eff.toolNamespace)
 			if _, err := tx.ExecContext(ctx, `
 INSERT INTO catalog_tools (namespace, name, first_seen, last_seen, call_count)
 VALUES (?, ?, ?, ?, 1)
@@ -155,7 +163,7 @@ ON CONFLICT (namespace, name) DO UPDATE SET
     first_seen = MIN(catalog_tools.first_seen, excluded.first_seen),
     last_seen  = MAX(catalog_tools.last_seen, excluded.last_seen),
     call_count = catalog_tools.call_count + ?
-`, ns, ev.Name, ev.Ts, ev.Ts, callInc); err != nil {
+`, ns, eff.name, ev.Ts, ev.Ts, callInc); err != nil {
 				return fmt.Errorf("catalog_tools upsert: %w", err)
 			}
 		}
@@ -168,7 +176,7 @@ ON CONFLICT (namespace, name) DO UPDATE SET
 	// (migrated prior + (now − prior) = now); when no re-finalize follows, the new key
 	// simply holds the op's last-known contribution.
 	if identityChanged {
-		if err := c.addMigratedTotals(ctx, tx, ev, prior.totals); err != nil {
+		if err := c.addMigratedTotals(ctx, tx, eff, prior.totals); err != nil {
 			return err
 		}
 	}
diff --git a/internal/ingest/catalog_idempotency_test.go b/internal/ingest/catalog_idempotency_test.go
index 5e2b6d7..4f23abf 100644
--- a/internal/ingest/catalog_idempotency_test.go
+++ b/internal/ingest/catalog_idempotency_test.go
@@ -449,6 +449,165 @@ func TestCatalog_IdentityChangeBeforeFinalize(t *testing.T) {
 	}
 }
 
+// TestCatalog_LLMReEmitEmptyProviderModelNoDrain pins SOW-0004 I1's shared-ingest
+// edge for LLM ops: the ops-table upsert PRESERVES omitted identity fields via a
+// COALESCE/NULLIF empty-as-prior rule (writer.go), so a re-emitted OpStarted
+// carrying EMPTY provider/model keeps the prior persisted (openai, gpt-5.5) on the
+// ops row. The catalog identity-change check must therefore compare the EFFECTIVE
+// post-upsert identity (empty→prior), NOT the raw event — otherwise it sees
+// ""≠openai, "drains" the catalog_providers/catalog_models rows, and never re-books
+// (effective provider is empty in the raw-event view) → a permanently drained
+// aggregate. This drives that exact re-emit and asserts the rows are UNCHANGED and
+// no empty-key row was created.
+func TestCatalog_LLMReEmitEmptyProviderModelNoDrain(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+	apply(tx, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx, canonical.TurnStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1000},
+		SessionNativeID: "s", Seq: 1,
+	})
+	// 1) Finalized LLM op under (openai, gpt-5.5) with a failure + tokens/duration so
+	//    a stranded/drained total would be visible.
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpLLM, Name: "message", Provider: "openai", Model: "gpt-5.5",
+	})
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "failed", ErrorClass: "model_error",
+		EndTs: 1500, TokensIn: 30, TokensOut: 8, TokensCacheRead: 2, TokensCacheWrite: 1,
+	})
+	// 2) Re-emit OpStarted for the SAME (turn,seq) with EMPTY provider/model — the ops
+	//    upsert COALESCEs these back to (openai, gpt-5.5). The catalog must see this as
+	//    UNCHANGED (effective identity == prior), NOT drain-without-rebook.
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 5, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpLLM, Name: "message", Provider: "", Model: "",
+	})
+	if cErr := tx.Commit(); cErr != nil {
+		t.Fatalf("Commit: %v", cErr)
+	}
+
+	// The (openai, gpt-5.5) rows must be intact: call_count 1, totals unchanged.
+	if got := scanInt(t, db, `SELECT call_count FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`); got != 1 {
+		t.Errorf("model call_count = %d, want 1 (empty-field re-emit must not drain, I1 shared-ingest edge)", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`); got != 30 {
+		t.Errorf("model total_tokens_in = %d, want 30 (must not drain on empty-field re-emit)", got)
+	}
+	if got := scanInt(t, db, `SELECT failure_count FROM catalog_models WHERE provider='openai' AND name='gpt-5.5'`); got != 1 {
+		t.Errorf("model failure_count = %d, want 1 (must not drain on empty-field re-emit)", got)
+	}
+	if got := scanInt(t, db, `SELECT call_count FROM catalog_providers WHERE name='openai'`); got != 1 {
+		t.Errorf("provider call_count = %d, want 1 (must not drain on empty-field re-emit)", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_providers WHERE name='openai'`); got != 30 {
+		t.Errorf("provider total_tokens_in = %d, want 30 (must not drain on empty-field re-emit)", got)
+	}
+	// No empty-key catalog rows must have been created by the raw-event view.
+	if got := scanInt(t, db, `SELECT COUNT(*) FROM catalog_models WHERE provider='' OR name=''`); got != 0 {
+		t.Errorf("empty-key catalog_models rows = %d, want 0", got)
+	}
+	if got := scanInt(t, db, `SELECT COUNT(*) FROM catalog_providers WHERE name=''`); got != 0 {
+		t.Errorf("empty-key catalog_providers rows = %d, want 0", got)
+	}
+}
+
+// TestCatalog_ToolReEmitEmptyNamespaceNoMigrate pins SOW-0004 I1's shared-ingest
+// edge for tool ops: a tool op counted under namespace "shell", then a re-emitted
+// OpStarted OMITS ToolNamespace. The ops upsert COALESCEs the namespace back to
+// "shell", but normalizeToolNamespace("")="builtin" — so a raw-event identity check
+// would "migrate" the contribution from "shell" to "builtin" while the ops row still
+// says "shell", siphoning the count onto a phantom (builtin, name) row and draining
+// the real "shell" row. The effective-identity check (empty→prior) must detect this
+// as UNCHANGED.
+func TestCatalog_ToolReEmitEmptyNamespaceNoMigrate(t *testing.T) {
+	t.Parallel()
+	const src = "codex:/tmp"
+	_, db := openTestStore(t)
+	ctx := context.Background()
+	if err := ensureSourceRowDirect(ctx, db, src, "codex", "/tmp"); err != nil {
+		t.Fatalf("ensure source: %v", err)
+	}
+	w := newWriter(src, "codex", "/tmp", NopPricer{})
+	apply := func(tx *sql.Tx, ev canonical.Event) {
+		if aErr := w.apply(ctx, tx, ev); aErr != nil {
+			t.Fatalf("apply %T: %v", ev, aErr)
+		}
+	}
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+	apply(tx, canonical.SessionStartedEvent{
+		EventBase: canonical.EventBase{SourceID: src, SourceSeq: 1, Ts: 1000},
+		NativeID:  "s", RootNativeID: "s", Kind: canonical.KindRoot,
+	})
+	apply(tx, canonical.TurnStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 2, Ts: 1000},
+		SessionNativeID: "s", Seq: 1,
+	})
+	// 1) Tool op under namespace "shell", finalized with tokens so a drained total
+	//    would be visible.
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 3, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "shell", ToolNamespace: "shell",
+	})
+	apply(tx, canonical.OpFinalizedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 4, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, Status: "completed", EndTs: 1200, TokensIn: 7,
+	})
+	// 2) Re-emit OpStarted OMITTING ToolNamespace (empty). The ops upsert COALESCEs it
+	//    back to "shell"; the catalog must NOT migrate "shell"→"builtin".
+	apply(tx, canonical.OpStartedEvent{
+		EventBase:       canonical.EventBase{SourceID: src, SourceSeq: 5, Ts: 1100},
+		SessionNativeID: "s", TurnSeq: 1, Seq: 1, ParentOpSeq: -1,
+		Kind: canonical.OpTool, Name: "shell", ToolNamespace: "",
+	})
+	if cErr := tx.Commit(); cErr != nil {
+		t.Fatalf("Commit: %v", cErr)
+	}
+
+	// The (shell, shell) row stays at call_count 1 with its tokens; no (builtin, shell)
+	// row siphoned it.
+	if got := scanInt(t, db, `SELECT call_count FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 1 {
+		t.Errorf("shell tool call_count = %d, want 1 (omitted-namespace re-emit must not migrate, I1 shared-ingest edge)", got)
+	}
+	if got := scanInt(t, db, `SELECT total_tokens_in FROM catalog_tools WHERE namespace='shell' AND name='shell'`); got != 7 {
+		t.Errorf("shell tool total_tokens_in = %d, want 7 (must not drain on omitted-namespace re-emit)", got)
+	}
+	// COALESCE(SUM(...),0) always returns one row (0 when no phantom row exists), so
+	// it both proves "no siphoned count" and tolerates the absent-row case.
+	if got := scanInt(t, db, `SELECT COALESCE(SUM(call_count),0) FROM catalog_tools WHERE namespace='builtin' AND name='shell'`); got != 0 {
+		t.Errorf("builtin tool call_count = %d, want 0 (no phantom builtin row may siphon the shell contribution)", got)
+	}
+	if got := scanInt(t, db, `SELECT COUNT(*) FROM catalog_tools WHERE namespace='builtin' AND name='shell'`); got != 0 {
+		t.Errorf("phantom (builtin, shell) row count = %d, want 0", got)
+	}
+}
+
 // TestCatalog_KindChangeMigratesAcrossTables pins the SOW-0004 I1 migration when a
 // re-emitted OpStarted changes the op KIND (tool → llm) on the same (turn,seq) — a
 // defensive edge: the op's contribution must move OFF the tool catalog row and ONTO
diff --git a/internal/ingest/catalog_migrate.go b/internal/ingest/catalog_migrate.go
index 6dd9da4..cf3da9e 100644
--- a/internal/ingest/catalog_migrate.go
+++ b/internal/ingest/catalog_migrate.go
@@ -46,25 +46,71 @@ type priorOpIdentity struct {
 	totals        opPriorTotals
 }
 
+// effectiveCatalogIdentity is the catalog identity an OpStarted's row will ACTUALLY
+// carry AFTER applyOpStarted's upsert — which is NOT the raw event for the optional
+// identity columns. The upsert (writer.go) preserves omitted fields via a
+// COALESCE/NULLIF empty-as-prior rule, so a re-emit that carries an EMPTY
+// tool_namespace/model/provider/provider_alias keeps the PRIOR persisted value;
+// kind and name are overwritten with the event's values directly. Migration MUST be
+// driven off this effective identity, otherwise an empty-but-unchanged re-emit looks
+// "changed" against the raw event and the contribution is drained off its real key
+// without being re-booked (the raw-event key is empty) — a permanent drain
+// (SOW-0004 I1 shared-ingest edge).
+type effectiveCatalogIdentity struct {
+	kind          string
+	name          string
+	toolNamespace string
+	model         string
+	provider      string
+	providerAlias string
+}
+
+// effectiveOpIdentity computes the post-upsert catalog identity from the event and
+// the op's prior persisted identity, mirroring applyOpStarted's upsert rules: kind
+// and name come from the event (overwritten directly); tool_namespace/model/
+// provider/provider_alias use the event value when non-empty, else fall back to the
+// prior persisted value (the COALESCE/NULLIF empty-as-prior rule). When the op is a
+// genuine new insert (prior absent) the prior fields are empty, so the fallback is a
+// no-op and the effective identity equals the event identity.
+func effectiveOpIdentity(ev canonical.OpStartedEvent, prior priorOpIdentity) effectiveCatalogIdentity {
+	coalesce := func(evVal, priorVal string) string {
+		if evVal != "" {
+			return evVal
+		}
+		return priorVal
+	}
+	return effectiveCatalogIdentity{
+		kind:          string(ev.Kind),
+		name:          ev.Name,
+		toolNamespace: coalesce(ev.ToolNamespace, prior.toolNamespace),
+		model:         coalesce(ev.Model, prior.model),
+		provider:      coalesce(ev.Provider, prior.provider),
+		providerAlias: coalesce(ev.ProviderAlias, prior.providerAlias),
+	}
+}
+
 // catalogIdentityChanged reports whether a re-emitted OpStarted lands on a
 // DIFFERENT catalog row than the op's prior persisted identity, so onOpStarted
 // migrates the op's contribution instead of double-counting it (SOW-0004 I1). The
 // comparison mirrors the catalog keying exactly: LLM ops key on
 // (provider, alias, model); tool ops key on (namespace-normalized-to-builtin,
-// name); a changed KIND always counts as changed. The event's identity is
-// compared against the persisted columns the prior op contributed under.
-func (c *catalogWriter) catalogIdentityChanged(ev canonical.OpStartedEvent, prior priorOpIdentity) bool {
-	if string(ev.Kind) != prior.kind {
+// name); a changed KIND always counts as changed. The EFFECTIVE post-upsert
+// identity (empty event fields fall back to the prior persisted value) is compared
+// against the columns the prior op contributed under — so an empty-but-unchanged
+// re-emit, which the ops upsert COALESCEs back to the prior value, is correctly
+// detected as UNCHANGED and triggers no migration (SOW-0004 I1 shared-ingest edge).
+func (c *catalogWriter) catalogIdentityChanged(eff effectiveCatalogIdentity, prior priorOpIdentity) bool {
+	if eff.kind != prior.kind {
 		return true
 	}
-	switch ev.Kind {
-	case canonical.OpLLM:
-		return ev.Provider != prior.provider ||
-			ev.ProviderAlias != prior.providerAlias ||
-			ev.Model != prior.model
-	case canonical.OpTool:
-		return normalizeToolNamespace(ev.ToolNamespace) != normalizeToolNamespace(prior.toolNamespace) ||
-			ev.Name != prior.name
+	switch eff.kind {
+	case string(canonical.OpLLM):
+		return eff.provider != prior.provider ||
+			eff.providerAlias != prior.providerAlias ||
+			eff.model != prior.model
+	case string(canonical.OpTool):
+		return normalizeToolNamespace(eff.toolNamespace) != normalizeToolNamespace(prior.toolNamespace) ||
+			eff.name != prior.name
 	default:
 		// session/system/reasoning/compaction ops touch no catalog rollup row, so
 		// there is never a contribution to migrate.
@@ -156,18 +202,19 @@ WHERE namespace = ? AND name = ?
 // key starts from the op's prior contribution and any subsequent OpFinalized
 // re-emit then applies its (now − prior) delta on top. The column sets mirror
 // onOpFinalized exactly. last_seen is left to the OpStarted upsert / a later
-// OpFinalized (this UPDATE only moves accumulating totals).
-func (c *catalogWriter) addMigratedTotals(ctx context.Context, tx *sql.Tx, ev canonical.OpStartedEvent, t opPriorTotals) error {
+// OpFinalized (this UPDATE only moves accumulating totals). The destination key is
+// the EFFECTIVE post-upsert identity so it always matches the ops row and the key
+// the call_count upsert booked under (SOW-0004 I1 shared-ingest edge).
+func (c *catalogWriter) addMigratedTotals(ctx context.Context, tx *sql.Tx, eff effectiveCatalogIdentity, t opPriorTotals) error {
 	// t is the prior PERSISTED contribution; the caller only reaches here when the
 	// op row already existed (prior.found), so t.found is always true. An op started
 	// but never finalized has status="running" (failureInc 0) and zero tokens/cost/
 	// duration, so the adds below move nothing meaningful — only the call_count the
 	// OpStarted upsert already re-booked under the new key matters in that case.
 	failure := failureInc(t.status)
-	switch ev.Kind {
-	case canonical.OpLLM:
-		if ev.Provider != "" {
-			alias := ev.ProviderAlias
+	switch eff.kind {
+	case string(canonical.OpLLM):
+		if eff.provider != "" {
 			if _, err := tx.ExecContext(ctx, `
 UPDATE catalog_providers SET
     failure_count            = failure_count + ?,
@@ -178,11 +225,11 @@ UPDATE catalog_providers SET
     total_cost_usd           = total_cost_usd + ?
 WHERE name = ? AND alias = ?
 `, failure, t.tokensIn, t.tokensOut, t.tokensCacheRead, t.tokensCacheWrite, t.costUSD,
-				ev.Provider, alias); err != nil {
+				eff.provider, eff.providerAlias); err != nil {
 				return fmt.Errorf("catalog_providers migrate-in: %w", err)
 			}
 		}
-		if ev.Provider != "" && ev.Model != "" {
+		if eff.provider != "" && eff.model != "" {
 			if _, err := tx.ExecContext(ctx, `
 UPDATE catalog_models SET
     failure_count            = failure_count + ?,
@@ -194,12 +241,12 @@ UPDATE catalog_models SET
     total_duration_us        = total_duration_us + ?
 WHERE provider = ? AND name = ?
 `, failure, t.tokensIn, t.tokensOut, t.tokensCacheRead, t.tokensCacheWrite, t.costUSD, t.durationUS,
-				ev.Provider, ev.Model); err != nil {
+				eff.provider, eff.model); err != nil {
 				return fmt.Errorf("catalog_models migrate-in: %w", err)
 			}
 		}
-	case canonical.OpTool:
-		if ev.Name != "" {
+	case string(canonical.OpTool):
+		if eff.name != "" {
 			if _, err := tx.ExecContext(ctx, `
 UPDATE catalog_tools SET
     failure_count     = failure_count + ?,
@@ -209,7 +256,7 @@ UPDATE catalog_tools SET
     total_duration_us = total_duration_us + ?
 WHERE namespace = ? AND name = ?
 `, failure, t.tokensIn, t.tokensOut, t.costUSD, t.durationUS,
-				normalizeToolNamespace(ev.ToolNamespace), ev.Name); err != nil {
+				normalizeToolNamespace(eff.toolNamespace), eff.name); err != nil {
 				return fmt.Errorf("catalog_tools migrate-in: %w", err)
 			}
 		}

From f80b07ab915ad4fe377b6c4939adbebad3703c26 Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 17:37:18 +0300
Subject: [PATCH 12/13] codex spec: document the v1 same-id session-collapse
 limitation (SOW-0022)

adapter-codex.md edge #14 promised that two rollout files sharing a
session_meta.payload.id become separate sessions keyed on
native_id+basename. The v1 adapter uses payload.id as the authoritative
NativeID and the ingester upserts sessions on (source_id, native_id), so
same-id files collapse into one session. This is an unobserved edge (0 of
2,566 modern files) and disambiguation needs cross-file collision
detection the per-file adapter lacks. Align the spec to the v1 reality and
file SOW-0022 to implement the basename-disambiguation.
---
 ...022-20260530-codex-duplicate-rollout-id.md | 75 +++++++++++++++++++
 .agents/sow/specs/adapter-codex.md            |  2 +-
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 .agents/sow/pending/SOW-0022-20260530-codex-duplicate-rollout-id.md

diff --git a/.agents/sow/pending/SOW-0022-20260530-codex-duplicate-rollout-id.md b/.agents/sow/pending/SOW-0022-20260530-codex-duplicate-rollout-id.md
new file mode 100644
index 0000000..30a812c
--- /dev/null
+++ b/.agents/sow/pending/SOW-0022-20260530-codex-duplicate-rollout-id.md
@@ -0,0 +1,75 @@
+# SOW-0022 - codex duplicate-rollout-id disambiguation
+
+## Status
+
+Status: open
+
+Sub-state: proposed follow-up, awaiting operator prioritization. Discovered during SOW-0004 (codex adapter) round-6 review (codex P3). Not blocking — unobserved edge, accepted for v1.
+
+## Requirements
+
+### Purpose
+
+Honor `adapter-codex.md` edge case #14: when two codex rollout files carry the same `session_meta.payload.id`, they must become SEPARATE canonical sessions keyed on `(source_id, native_id + ":" + file_basename)` with a `LogEntry` warning — instead of collapsing into one session as they do in SOW-0004's v1.
+
+### User Request
+
+Implied by `adapter-codex.md` edge #14 (the spec documents the disambiguation behavior). Surfaced by codex's round-6 review of SOW-0004 as the one remaining (P3, non-blocking) gap.
+
+### Assistant Understanding
+
+Facts:
+
+- SOW-0004's codex adapter sets `SessionStartedEvent.NativeID = session_meta.payload.id` (authoritative; `internal/adapters/codex/mapper_turn.go`), and the ingester upserts sessions on `(source_id, native_id)` (`internal/ingest/writer.go`). So two rollout files with the same `payload.id` upsert into ONE `sessions` row — they collapse.
+- The spec (`adapter-codex.md` edge #14) intends them kept separate via `native_id + ":" + file_basename` + a warning.
+- This is **unobserved**: 0 of the 2,566 modern files on the reference workstation have duplicate ids. It would require codex to resume into a forked thread writing the same id to two files.
+- The per-file adapter cannot detect the collision alone — it has no cross-file view. Detection needs either the ingester (which sees the `(source_id, native_id)` conflict) to disambiguate, or the adapter to track seen ids across files within a Scan/Tail run.
+
+Inferences:
+
+- The cleanest home is likely the ingester: on a sessions upsert where the incoming `start_ts`/file differs from the existing row in a way that indicates a DIFFERENT physical file with the same native_id, disambiguate by appending the basename. But that needs the file basename threaded into the SessionStarted extras (the adapter has it).
+- Alternatively the adapter carries the basename in extras and the ingester composes the disambiguated native_id when it detects a collision. Design decision for the gate.
+
+Unknowns:
+
+- Whether to disambiguate in the adapter (cross-file id set per run) or the ingester (collision-on-upsert). Resolve in the Pre-Implementation Gate.
+- Whether the disambiguated `native_id` breaks parent/child linkage (parent_thread_id/forked_from_id reference the bare id) — the resolver may need to match the bare id prefix.
+
+### Acceptance Criteria
+
+1. Two codex rollout files with the same `session_meta.payload.id` produce TWO distinct canonical sessions, each keyed on a basename-disambiguated native id, with a `LogEntry` warning per the spec. **Verification**: a golden/integration test with two same-id fixtures asserts two sessions.
+2. Parent/child + fork linkage still resolves when a parent/child id collides (the resolver matches the bare id). **Verification**: a linkage test across the disambiguated ids.
+3. The single-file common case (unique ids — 100% of observed data) is byte-for-byte unchanged. **Verification**: existing codex goldens unchanged.
+
+## Analysis
+
+Sources checked: `adapter-codex.md` edge #14 (:504), `internal/adapters/codex/mapper_turn.go` (NativeID assignment), `internal/ingest/writer.go` (sessions upsert + resolver), SOW-0004 round-6 review (codex P3).
+
+Risks:
+
+- **R1 — cross-file state.** Disambiguation needs a view the per-file adapter lacks; the ingester is the natural place but it must not regress the common single-file path. Mitigation: gate the disambiguation strictly on a detected `(source_id, native_id)` collision from a DIFFERENT file.
+- **R2 — linkage.** Disambiguated native ids must not break parent_thread_id/forked_from_id resolution. Mitigation: the resolver matches the bare id.
+
+## Pre-Implementation Gate
+
+(To be filled by the assistant picking this SOW up. Required before moving to `current/`.)
+
+## Implementation
+
+(Empty placeholder.)
+
+## Validation
+
+(Empty placeholder.)
+
+## Reviews
+
+(Empty placeholder.)
+
+## Outcome
+
+Pending.
+
+## Lessons / Follow-Ups
+
+Pending.
diff --git a/.agents/sow/specs/adapter-codex.md b/.agents/sow/specs/adapter-codex.md
index cbb794d..42a3fee 100644
--- a/.agents/sow/specs/adapter-codex.md
+++ b/.agents/sow/specs/adapter-codex.md
@@ -501,7 +501,7 @@ Real observation: 8 distinct sub-agent sessions in the sampled set, all `depth=1
 
 13. **File renamed/moved**: codex does not rename files. If an operator manually renames or moves a rollout file, the adapter sees a Delete event on the old path and Create on the new path; cursor entry for the old path is left stale. Optional cleanup after N days.
 
-14. **Two rollouts with the same `id`**: not observed but theoretically possible (codex could resume into a forked thread). Treat as separate canonical sessions keyed on `(source_id, native_id+":"+file_basename)` to avoid collision; emit a LogEntry warning.
+14. **Two rollouts with the same `id`**: not observed (0 of the 2,566 modern files on the reference workstation) but theoretically possible (codex could resume into a forked thread). The intended behavior is to treat them as separate canonical sessions keyed on `(source_id, native_id+":"+file_basename)` with a LogEntry warning. **v1 limitation (SOW-0004):** the adapter uses the authoritative `session_meta.payload.id` as `NativeID`, and the ingester upserts sessions on `(source_id, native_id)`, so two same-`id` rollout files would COLLAPSE into one canonical session rather than disambiguate. The basename-disambiguation is deferred to **SOW-0022** (requires cross-file id-collision detection the per-file adapter does not have today). Unobserved edge; no data loss within a single session.
 
 15. **`originator` variants**: observed `codex_cli_rs`, `codex_exec`, `codex-tui`. Treat as identifying string; surface in Extras.
 

From 3d28204527e30329333c3bf2e8237cbf549db1aa Mon Sep 17 00:00:00 2001
From: Costa Tsaousis <costa@netdata.cloud>
Date: Sat, 30 May 2026 17:42:51 +0300
Subject: [PATCH 13/13] =?UTF-8?q?SOW-0004:=20close=20(completed)=20?=
 =?UTF-8?q?=E2=80=94=20codex=20adapter=20delivered=20+=20merged;=20SOW-002?=
 =?UTF-8?q?0=20superseded?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move SOW-0004 to done/ with Status: completed, Outcome, and Lessons after 6
external-review rounds converged (codex+glm+minimax all SAFE) and CI green.
Close SOW-0020 (catalog idempotency) as superseded — its fix was absorbed
into SOW-0004 as a prerequisite and now benefits every adapter. Follow-ups
SOW-0021 (turn-extras carrier) and SOW-0022 (codex duplicate-rollout-id)
remain in pending/.
---
 .../SOW-0004-20260526-codex-adapter.md        | 22 +++++++++++++++----
 .../SOW-0020-20260530-catalog-idempotency.md  |  5 ++---
 2 files changed, 20 insertions(+), 7 deletions(-)
 rename .agents/sow/{current => done}/SOW-0004-20260526-codex-adapter.md (91%)
 rename .agents/sow/{pending => done}/SOW-0020-20260530-catalog-idempotency.md (83%)

diff --git a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md b/.agents/sow/done/SOW-0004-20260526-codex-adapter.md
similarity index 91%
rename from .agents/sow/current/SOW-0004-20260526-codex-adapter.md
rename to .agents/sow/done/SOW-0004-20260526-codex-adapter.md
index 65b4dfa..af9620c 100644
--- a/.agents/sow/current/SOW-0004-20260526-codex-adapter.md
+++ b/.agents/sow/done/SOW-0004-20260526-codex-adapter.md
@@ -2,9 +2,9 @@
 
 ## Status
 
-Status: in-progress
+Status: completed
 
-Sub-state: active in `current/`. Approved under the operator's blanket Phase-2 backlog sign-off ("deliver them all, any order"). Prerequisite met: SOW-0001 Phase 1 Foundation is in `done/` (canonical event types + ingest pipeline + store + adapter registry + pricing + CI gates) — this SOW reuses that infrastructure unchanged. Pre-Implementation Gate filled 2026-05-30 (see below).
+Sub-state: delivered + merged (PR #29) after 6 external-review rounds; moved to `done/`. Originally:active in `current/`. Approved under the operator's blanket Phase-2 backlog sign-off ("deliver them all, any order"). Prerequisite met: SOW-0001 Phase 1 Foundation is in `done/` (canonical event types + ingest pipeline + store + adapter registry + pricing + CI gates) — this SOW reuses that infrastructure unchanged. Pre-Implementation Gate filled 2026-05-30 (see below).
 
 ## Requirements
 
@@ -266,8 +266,22 @@ The original gate scoped this SOW to `internal/adapters/codex/` + the additive `
 
 ## Outcome
 
-Pending.
+Delivered the `codex` adapter end-to-end (PR #29, merged after 6 external-review rounds + green CI). 5 implementation chunks (A parser+cursor, B mapper/state-machine, C scanner/tailer, D payloads+wiring+auto-discovery, E golden fixtures+integration) + 6 review/fix rounds. Acceptance #1–8 met: registered as `"codex"`; ingests every persisted RolloutItem/payload variant with per-variant SourceError tolerance; turn-boundary dual-format (cli 0.61 turn_context-only vs ≥0.93 task_started/complete); reasoning split; byte-offset cursor with zero-dup/zero-gap resume + truncation defense; FuzzParseLine gate; auto-discovery probe (`$CODEX_HOME/sessions`) with modern/legacy counts. Final gates: golangci 0, gosec 0, `go test -race ./...` 13/13, codex coverage 92.6%, ingest 88.5%, goldens byte-identical, secret + AI-attribution scans clean.
+
+CTO decisions recorded in the gate + reviews: sum-of-`last_token_usage` token rollup (C#1); claude-code-model session finalize, no clean-EOF completed (C#3); exec exit_code authoritative for op status order-independently; web_search positional pairing (no call-side id); NativeID from `payload.id`. The catalog-idempotency-under-re-emission fix (SOW-0020) was absorbed here as a prerequisite and benefits all adapters.
 
 ## Lessons / Follow-Ups
 
-Pending.
+Lessons:
+
+- **External review is load-bearing, and codex is the decisive reviewer.** Per-round findings: codex 6→5→2→2→1→0; glm/minimax rubber-stamped most rounds (minimax once *falsely* claimed a log was emitted — caught only by reading the code). Adjudicate every finding on ground truth (spec lines + the real `~/.codex` corpus), never on reviewer convergence. The real-data investigation repeatedly *corrected* codex's wire-shape guesses (web_search has no id; collab uses new_thread_id not agent_ref; compaction companion is event_msg.context_compacted).
+- **Golden fixtures built by the same understanding as the code are circular.** Round-1 goldens encoded the code's (wrong) assumptions; codex caught the old-format-stale mislabel (38% of real files) precisely because the fixture used a fresh mtime. Line-check every expected.jsonl against the spec, and seed fixtures from real wire shapes.
+- **Op re-emission breaks incremental aggregates.** The codex adapter (replay-from-0 + enrichment correction + EOF finalize) is the first heavy re-emitter; it exposed that the catalog ADDs unconditionally. Idempotency needs insert-signal counting + persisted-prior delta + effective-post-upsert identity (empty→prior). A *derived* catalog (recompute from the ops table) would be more robust — candidate for the quality-cluster SOWs.
+- **git add hygiene:** a specific-path `git add` once listed `sources.go` but omitted its `sources_test.go`, shipping code without its test; always verify `git status` covers source+tests before commit.
+
+Follow-ups (filed):
+
+- **SOW-0021** — turn-extras carrier: `turns.extras_json` is unreachable (no Extras on canonical turn events); codex surfaces codex_turn_id/sandbox/ttft_ms via an interim `turn_meta` LogEntry.
+- **SOW-0022** — codex duplicate-rollout-id disambiguation (same `payload.id` files collapse in v1; unobserved).
+- **Legacy `.json`** (R1) — Phase-2.5: ingest the 19 legacy flat files (v1 emits one SourceError/file).
+- **SOW-0020** — superseded by this SOW's catalog-idempotency work; closed.
diff --git a/.agents/sow/pending/SOW-0020-20260530-catalog-idempotency.md b/.agents/sow/done/SOW-0020-20260530-catalog-idempotency.md
similarity index 83%
rename from .agents/sow/pending/SOW-0020-20260530-catalog-idempotency.md
rename to .agents/sow/done/SOW-0020-20260530-catalog-idempotency.md
index 6a24f05..99f23ae 100644
--- a/.agents/sow/pending/SOW-0020-20260530-catalog-idempotency.md
+++ b/.agents/sow/done/SOW-0020-20260530-catalog-idempotency.md
@@ -2,10 +2,9 @@
 
 ## Status
 
-Status: pending
+Status: closed
 
-Sub-state: proposed (2026-05-30) during SOW-0003 Round-6 review. Awaiting operator sign-off.
-Independent of the adapters; an ingester-layer correctness fix.
+Sub-state: SUPERSEDED 2026-05-30 by SOW-0004 (codex adapter), which absorbed this catalog-idempotency-under-re-emission fix as a prerequisite: `onOpStarted` counts a call once per op (insert-signal) and migrates the contribution on an identity change keyed on the effective post-upsert identity (empty→prior); `onOpFinalized` applies a now-minus-prior delta. The fix is adapter-agnostic and benefits aiagent_v2/v3 + claude_code + codex. See SOW-0004 `## Reviews` rounds 4–6 and `internal/ingest/{catalog.go,catalog_migrate.go,writer.go}` + `catalog_idempotency_test.go`. Moved to `done/` as a closed (superseded) record. Originally: proposed during SOW-0003 Round-6 review as an independent ingester-layer correctness fix.
 
 ## Requirements