WXYC · jakebromberg · May 12, 2026 · May 11, 2026 · May 12, 2026 · May 12, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -13,8 +13,15 @@ scans/                           input PDFs (gitignored; SCANS_ROOT)
 data/                            outputs (gitignored; DATA_ROOT)
   pages/<rel-pdf>/page-NN.png    rendered images
   results/<rel-pdf>/page-NN.json extraction results (one PageResult per page)
+  verifier/<stem>.bundle.json    pre-processor output: result + per-row bboxes
+  verifier/<stem>.verified.json  verifier UI export: hand-corrected PageResult
   jobs.db                        SQLite job table
 
+verifier/                        static SPA for manual row-by-row verification.
+                                 Loads a bundle, renders each row's cropped
+                                 image strip next to an editable text field,
+                                 exports a corrected verified.json.
+
 core/
   schema.py                      Pydantic models. GeminiPageResult is what
                                  the model returns (used as response_schema);
@@ -41,12 +48,28 @@ core/
                                  PageLayout (header_bottom_y, body_mid_y,
                                  column_mid_x). Used by the per-quadrant
                                  cropper in scripts/calibrate_models.py.
+                                 `partition_row_lines_by_quadrant(image,
+                                 layout)` is the public hook the verifier
+                                 pre-processor uses to compute per-row bboxes.
   continuations.py               Read-time merge of `notes="continuation"`
                                  rows into the prior entry's raw_text.
                                  Pure function; on-disk shape unchanged.
 
 cli.py                           Typer entrypoint: `flowsheets <subcommand>`.
                                  Builds dependencies from env, calls into core.
+
+scripts/
+  make_verifier_bundle.py        PageResult JSON + page PNG -> verifier
+                                 bundle.json with per-quadrant + per-row
+                                 bboxes for the SPA to canvas-crop. Hard-codes
+                                 SCHEMA_VERSION = 1; bump on incompatible
+                                 schema changes.
+  derive_truth.py                <stem>.verified.json -> <stem>.truth.json
+                                 by extracting short uppercased substrings
+                                 (page date tokens, jock prefix, artist
+                                 portion of raw_text). Single source of
+                                 truth for those rules — the UI doesn't
+                                 derive truth itself.
 ```
 
 ## Why these choices

diff --git a/README.md b/README.md
@@ -89,6 +89,30 @@ Tests are split into:
 
 The default test run **excludes** the `external_api` and `slow` markers; CI runs the same default. The golden-page external-API runner is a follow-up.
 
+## Manual verifier
+
+After the pipeline produces `data/results/<rel>/page-NN.json`, you can hand-verify and correct entries via the static SPA in `verifier/`. Each row's cropped image strip sits next to its detected text in an editable field. Export emits a `<stem>.verified.json` (`PageResult`-shaped, plugs back into the pipeline as ground truth) and `derive_truth` produces a matching `tests/golden/<stem>.truth.json`.
+
+```bash
+# Generate a bundle
+python -m scripts.make_verifier_bundle \
+    data/results/<rel>/page-NN.json \
+    data/pages/<rel>/page-NN.png \
+    --out data/verifier/<stem>.bundle.json
+
+# Open the verifier
+python -m http.server 8765
+# then visit:
+# http://localhost:8765/verifier/?bundle=/data/verifier/<stem>.bundle.json
+
+# Derive a truth file from the exported verified.json
+python -m scripts.derive_truth \
+    data/verifier/<stem>.verified.json \
+    --out tests/golden/<stem>.truth.json
+```
+
+See `verifier/README.md` for the bundle schema, expected file layout, and the substring-derivation rules.
+
 ## Cost calibration
 
 Gemini 3.1 Pro charges per input token; one 300-DPI flowsheet page at `media_resolution=high` is ~1120 image tokens plus ~600 prompt tokens. Across the full corpus (~16K pages) input cost lands in the low tens of dollars; output adds modestly. Run the pipeline against a 10–20 page sample first and inspect both quality and `usage_metadata` before scheduling a full run.

diff --git a/core/jobs.py b/core/jobs.py
@@ -50,11 +50,15 @@ class Job:
     image_path: str | None
     result_path: str | None
     model_version: str | None
+    verified_at: str | None
+    verified_path: str | None
+    corrections_path: str | None
     created_at: str
     updated_at: str
 
     @classmethod
     def from_row(cls, row: aiosqlite.Row) -> Self:
+        keys = set(row.keys())
         return cls(
             pdf_path=row["pdf_path"],
             page_number=row["page_number"],
@@ -64,29 +68,54 @@ def from_row(cls, row: aiosqlite.Row) -> Self:
             image_path=row["image_path"],
             result_path=row["result_path"],
             model_version=row["model_version"],
+            # Late-added columns are nullable; tolerate their absence on a
+            # very old jobs.db that hasn't been re-init()ed yet.
+            verified_at=row["verified_at"] if "verified_at" in keys else None,
+            verified_path=row["verified_path"] if "verified_path" in keys else None,
+            corrections_path=(row["corrections_path"] if "corrections_path" in keys else None),
             created_at=row["created_at"],
             updated_at=row["updated_at"],
         )
 
 
 _SCHEMA = """
 CREATE TABLE IF NOT EXISTS jobs (
-    pdf_path        TEXT NOT NULL,
-    page_number     INTEGER NOT NULL,
-    status          TEXT NOT NULL,
-    attempts        INTEGER NOT NULL DEFAULT 0,
-    last_error      TEXT,
-    image_path      TEXT,
-    result_path     TEXT,
-    model_version   TEXT,
-    created_at      TEXT NOT NULL,
-    updated_at      TEXT NOT NULL,
+    pdf_path          TEXT NOT NULL,
+    page_number       INTEGER NOT NULL,
+    status            TEXT NOT NULL,
+    attempts          INTEGER NOT NULL DEFAULT 0,
+    last_error        TEXT,
+    image_path        TEXT,
+    result_path       TEXT,
+    model_version     TEXT,
+    verified_at       TEXT,
+    verified_path     TEXT,
+    corrections_path  TEXT,
+    created_at        TEXT NOT NULL,
+    updated_at        TEXT NOT NULL,
     PRIMARY KEY (pdf_path, page_number)
 );
 
 CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status);
 """
 
+# Columns added after the initial schema. `init()` runs `ALTER TABLE` for
+# each of these against existing databases so older jobs.db files pick up
+# the new columns without losing data.
+_LATE_COLUMNS: tuple[tuple[str, str], ...] = (
+    ("verified_at", "TEXT"),
+    ("verified_path", "TEXT"),
+    ("corrections_path", "TEXT"),
+)
+
+# Indexes that depend on late-added columns and therefore must be created
+# AFTER the ALTER TABLE migrations run. Keeping them out of `_SCHEMA`
+# avoids "no such column" errors when initializing a legacy database.
+_POST_MIGRATION_INDEXES: tuple[str, ...] = (
+    "CREATE INDEX IF NOT EXISTS idx_jobs_verified_at "
+    "ON jobs(verified_at) WHERE verified_at IS NOT NULL",
+)
+
 
 def _now() -> str:
     return datetime.now(UTC).isoformat()
@@ -112,6 +141,19 @@ async def init(self) -> None:
             # rollback journal. The pragma is persistent across connections.
             await db.execute("PRAGMA journal_mode=WAL")
             await db.executescript(_SCHEMA)
+            # ALTER TABLE migrations for late-added columns. CREATE TABLE
+            # above is idempotent (IF NOT EXISTS), so on a fresh DB this
+            # is a no-op; on an existing DB it adds the columns.
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute("PRAGMA table_info(jobs)")
+            existing = {row["name"] for row in await cursor.fetchall()}
+            for name, col_type in _LATE_COLUMNS:
+                if name not in existing:
+                    await db.execute(f"ALTER TABLE jobs ADD COLUMN {name} {col_type}")
+            # Indexes that reference late columns run after the ALTER
+            # TABLE pass, otherwise SQLite errors on the missing column.
+            for index_sql in _POST_MIGRATION_INDEXES:
+                await db.execute(index_sql)
             await db.commit()
 
     @asynccontextmanager
@@ -234,6 +276,49 @@ async def mark_low_confidence(
             clear_error=True,
         )
 
+    async def mark_verified(
+        self,
+        pdf_path: str,
+        page_number: int,
+        *,
+        verified_path: Path,
+        corrections_path: Path,
+    ) -> bool:
+        """Record that a page has been hand-verified via the verifier UI.
+
+        Doesn't change `status` — verification is orthogonal to the
+        extraction state machine (a `completed` page can be verified;
+        re-extracting a verified page resets the result but should NOT
+        clear the verification record by default — that's a separate
+        decision a human makes via `retry`).
+
+        Returns True if a job row matched, False otherwise. Callers
+        (e.g. the verifier server) may want to write files even when no
+        job row exists for the page (test fixtures), so a False return
+        is not an error.
+        """
+        async with self._connect() as db:
+            cursor = await db.execute(
+                """
+                UPDATE jobs
+                SET verified_at = ?,
+                    verified_path = ?,
+                    corrections_path = ?,
+                    updated_at = ?
+                WHERE pdf_path = ? AND page_number = ?
+                """,
+                (
+                    _now(),
+                    str(verified_path),
+                    str(corrections_path),
+                    _now(),
+                    pdf_path,
+                    page_number,
+                ),
+            )
+            await db.commit()
+            return cursor.rowcount > 0
+
     async def mark_failed(self, pdf_path: str, page_number: int, error: str) -> None:
         async with self._connect() as db:
             cursor = await db.execute(

diff --git a/core/page_layout.py b/core/page_layout.py
@@ -31,6 +31,8 @@
 
 import numpy as np
 
+from core.schema import QUADRANT_ORDER, QuadrantPosition
+
 if TYPE_CHECKING:
     from PIL.Image import Image as PILImage
 
@@ -89,6 +91,14 @@
 # Comments line and excludes the last body row.
 _BODY_BOTTOM_SEARCH_BAND = (0.95, 0.99)
 
+# When the top quadrant's last spacing exceeds this multiple of the
+# global median row spacing, the trailing line is reattributed to the
+# corresponding bottom quadrant. The anomaly signals that body_mid_y
+# landed BELOW the bottom block's hour-jock-cell baseline, leaving that
+# line in the top partition by mistake. See
+# `partition_row_lines_by_quadrant`'s correction-pass comment.
+_BOTTOM_BASELINE_REATTRIBUTION_RATIO = 1.3
+
 
 @dataclass(frozen=True)
 class PageLayout:
@@ -296,3 +306,80 @@ def _detect_body_bottom_y(row_lines: list[int], h: int) -> int:
     if not in_band:
         return int(h * FALLBACK_BODY_BOTTOM_FRACTION)
     return in_band[-1]
+
+
+def partition_row_lines_by_quadrant(
+    image: PILImage, layout: PageLayout
+) -> dict[QuadrantPosition, list[int]]:
+    """Detected row-line y-coords, partitioned by quadrant of the body grid.
+
+    Reuses `_detect_row_lines` for the y-coordinates, then classifies each
+    line by which page-column it spans (left, right, or both, based on ink
+    density at that y) and which body band it sits in (top vs bottom, by
+    `layout.body_mid_y`).
+
+    A line spanning both columns is added to BOTH side quadrants — most
+    printed flowsheet grid lines run full-width and bracket both hour-blocks
+    of a row.
+
+    Lines outside `[layout.header_bottom_y, layout.body_bottom_y)` are
+    dropped (header or footer artifacts, not body rows).
+
+    Returns a dict with all four `QUADRANT_ORDER` keys; empty list when
+    no lines hit a quadrant (blank image, un-printed margin).
+    """
+    w, _h = image.size
+    grayscale = np.asarray(image.convert("L"))
+    col_mid = layout.column_mid_x
+
+    all_lines = _detect_row_lines(grayscale, w, col_mid)
+
+    ink = (255 - grayscale).astype(np.float64) / 255.0
+    left_w = float(col_mid)
+    right_w = float(w - col_mid)
+    threshold = _ROW_LINE_THRESHOLDS[-1]
+
+    out: dict[QuadrantPosition, list[int]] = {q: [] for q in QUADRANT_ORDER}
+    for y in all_lines:
+        if not (layout.header_bottom_y <= y < layout.body_bottom_y):
+            continue
+        left_ink = float(ink[y, :col_mid].sum())
+        right_ink = float(ink[y, col_mid:].sum())
+        on_left = left_ink > threshold * left_w
+        on_right = right_ink > threshold * right_w
+        if y < layout.body_mid_y:
+            if on_left:
+                out["top_left"].append(int(y))
+            if on_right:
+                out["top_right"].append(int(y))
+        else:
+            if on_left:
+                out["bottom_left"].append(int(y))
+            if on_right:
+                out["bottom_right"].append(int(y))
+
+    # Correction pass: on some pages `_detect_body_mid_y` lands BELOW the
+    # bottom-block hour-jock-cell baseline (the anchor at 0.55h prefers the
+    # gap below the cell over the true inter-block gap above it). The
+    # baseline line then gets misattributed to the top quadrant, and the
+    # bottom quadrant's first detected line is row 0's BOTTOM rather than
+    # its top — shifting every row crop up by one.
+    #
+    # Signal: the top quadrant's last spacing is significantly larger than
+    # the median row spacing across all detected lines (a normal sequence
+    # has consistent spacing; an anomalous jump at the end means the last
+    # line belongs to a different sequence — the bottom block).
+    if len(all_lines) >= 2:
+        median_spacing = float(np.median(np.diff(np.asarray(all_lines))))
+        if median_spacing > 0:
+            for top_pos, bottom_pos in (
+                ("top_left", "bottom_left"),
+                ("top_right", "bottom_right"),
+            ):
+                top_lines = out[top_pos]  # type: ignore[index]
+                if len(top_lines) >= 2:
+                    last_spacing = top_lines[-1] - top_lines[-2]
+                    if last_spacing > _BOTTOM_BASELINE_REATTRIBUTION_RATIO * median_spacing:
+                        moved = top_lines.pop()
+                        out[bottom_pos].insert(0, moved)  # type: ignore[index]
+    return out
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,14 @@ dependencies = [
     "rich>=13.0.0",
     "pillow>=10.0",
     "numpy>=2.0",
+    # Verifier UI server (verifier/serve.py). The static SPA depends on the
+    # POST /api/lookup proxy (request-o-matic doesn't emit CORS) and
+    # POST /api/save (writes verified.json + corrections.json, updates
+    # jobs.db). httpx is also load-bearing for tests/unit/test_verifier_serve.py
+    # via httpx.ASGITransport.
+    "fastapi>=0.115",
+    "uvicorn>=0.30",
+    "httpx>=0.27",
 ]
 
 [project.optional-dependencies]