From ed9eaf7e7a00d21bb9805bc14d3babf47905b1e5 Mon Sep 17 00:00:00 2001 From: Jake Bromberg Date: Tue, 12 May 2026 09:43:36 -0700 Subject: [PATCH 1/2] feat(verifier): bundle index, prev/next nav, status tracking, shortcuts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a workflow on top of the per-bundle editor that surfaces what's been done and steps through what hasn't. INDEX page at /verifier/ (no ?bundle=). Lists every bundle in data/verifier/ with its three-state status badge (incomplete / partial / complete), page_date_raw, and last-saved timestamp. Click a row to open. Header has an "Open next page that needs work" button that jumps to the first non-complete bundle. Three-state tracking via a new top-level "status" field on corrections.json. /api/save accepts an optional "status" body parameter: complete from the client wins, but an existing on-disk complete is preserved across plain Saves (refining details on a done page doesn't downgrade it). Legacy corrections.json without status is treated as partial — they were saved, just not done. New GET /api/bundles enumerates bundles + their state for the index page and the Prev/Next nav in the editor. Malformed bundles still appear (with null metadata) so the user can spot the breakage. Editor header gets two buttons (Save, Mark complete), a status pill reflecting the current state, position indicator (3 / 5), Prev/Next buttons, and a "← All pages" back link. Save and Mark complete share the same POST path; they only differ in the status body field. Keyboard shortcuts: ⌘S save, ⌘⇧S mark complete, j/k row nav (also ⌘↓/⌘↑), ⌘D toggle delete on focused row, n/p prev/next bundle, ? toggles the overlay listing them all. Single-letter shortcuts are ignored when an input has focus so typing works normally. The bundle-input file picker is dropped — opening a bundle is now done via the index list or a direct ?bundle= URL. 10 new tests cover the status preservation rule, /api/bundles state classification (incomplete/partial/complete, legacy corrections.json, malformed bundle, verified_at surfacing, empty dir). Total 494, ruff/mypy clean. --- tests/unit/test_verifier_serve.py | 174 +++++++++++++++++ verifier/README.md | 50 +++-- verifier/app.js | 307 ++++++++++++++++++++++++++++-- verifier/index.html | 65 ++++++- verifier/serve.py | 153 ++++++++++++++- verifier/styles.css | 134 +++++++++++++ 6 files changed, 842 insertions(+), 41 deletions(-) diff --git a/tests/unit/test_verifier_serve.py b/tests/unit/test_verifier_serve.py index 3aa54b0..8da6299 100644 --- a/tests/unit/test_verifier_serve.py +++ b/tests/unit/test_verifier_serve.py @@ -319,6 +319,180 @@ async def test_save_writes_are_atomic_no_tmp_left_behind(serve_app, tmp_path: Pa assert tmp_files == [], f"unexpected tmp files left: {tmp_files}" +async def test_save_writes_status_draft_by_default(serve_app, tmp_path: Path) -> None: + """A Save with no `status` field writes corrections.json with + `status: "draft"` — the default for a partial / in-progress page.""" + async with await _client(serve_app.app) as c: + r = await c.post( + "/api/save", + json={ + "stem": "draft-default", + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + assert r.json()["status"] == "draft" + on_disk = json.loads( + (tmp_path / "data" / "verifier" / "draft-default.corrections.json").read_text() + ) + assert on_disk["status"] == "draft" + + +async def test_save_writes_status_complete_when_requested(serve_app, tmp_path: Path) -> None: + """An explicit `status: "complete"` from the UI's Mark complete button + persists as `"complete"`.""" + async with await _client(serve_app.app) as c: + r = await c.post( + "/api/save", + json={ + "stem": "mark-done", + "status": "complete", + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + assert r.json()["status"] == "complete" + + +async def test_save_preserves_complete_on_subsequent_draft_save(serve_app, tmp_path: Path) -> None: + """Once a page is `complete`, a subsequent plain Save (default + `draft` or omitted status) does NOT downgrade it. Refining details + on a completed page is a tweak-in-place, not a status change.""" + body_draft = { + "stem": "preserve", + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + } + body_complete = {**body_draft, "status": "complete"} + async with await _client(serve_app.app) as c: + await c.post("/api/save", json=body_complete) + # Now save again with no status — should stay complete. + r = await c.post("/api/save", json=body_draft) + assert r.status_code == 200 + assert r.json()["status"] == "complete" + on_disk = json.loads((tmp_path / "data" / "verifier" / "preserve.corrections.json").read_text()) + assert on_disk["status"] == "complete" + + +async def test_save_rejects_invalid_status(serve_app, tmp_path: Path) -> None: + """Unknown status values are rejected — no silent fallback.""" + async with await _client(serve_app.app) as c: + r = await c.post( + "/api/save", + json={ + "stem": "bad-status", + "status": "in-progress", # not a valid value + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 400 + assert "status" in r.json()["detail"] + + +# -- /api/bundles ---------------------------------------------------------- + + +def _write_bundle(verifier_dir: Path, stem: str, page_date_raw: str | None) -> None: + """Drop a minimal bundle.json under the verifier directory for the + /api/bundles enumeration tests.""" + verifier_dir.mkdir(parents=True, exist_ok=True) + (verifier_dir / f"{stem}.bundle.json").write_text( + json.dumps( + { + "schema_version": 2, + "stem": stem, + "image_path": f"../tests/golden/{stem}.png", + "pdf_path": None, + "page_number": None, + "model_version": "test", + "extracted_at": "2026-05-12T00:00:00Z", + "page_date_raw": page_date_raw, + "comments_raw": None, + "oddities": [], + "quadrants": [], + } + ) + ) + + +async def test_list_bundles_empty_when_no_dir(serve_app, tmp_path: Path) -> None: + """No data/verifier/ directory → empty bundle list, not a 500.""" + async with await _client(serve_app.app) as c: + r = await c.get("/api/bundles") + assert r.status_code == 200 + assert r.json() == {"bundles": []} + + +async def test_list_bundles_classifies_three_states(serve_app, tmp_path: Path) -> None: + """Three bundles → three states: incomplete (no corrections file), + partial (corrections with status=draft), complete (corrections with + status=complete). Sorted alphabetically by stem.""" + verifier_dir = tmp_path / "data" / "verifier" + _write_bundle(verifier_dir, "a-untouched", "A") + _write_bundle(verifier_dir, "b-draft", "B") + _write_bundle(verifier_dir, "c-complete", "C") + (verifier_dir / "b-draft.corrections.json").write_text(json.dumps({"status": "draft"})) + (verifier_dir / "c-complete.corrections.json").write_text(json.dumps({"status": "complete"})) + + async with await _client(serve_app.app) as c: + r = await c.get("/api/bundles") + bundles = r.json()["bundles"] + assert [b["stem"] for b in bundles] == ["a-untouched", "b-draft", "c-complete"] + assert [b["status"] for b in bundles] == ["incomplete", "partial", "complete"] + assert [b["page_date_raw"] for b in bundles] == ["A", "B", "C"] + assert bundles[0]["url"] == "/verifier/?bundle=/data/verifier/a-untouched.bundle.json" + + +async def test_list_bundles_legacy_corrections_without_status_is_partial( + serve_app, tmp_path: Path +) -> None: + """A corrections.json from before status tracking landed (no `status` + field) is classified as `partial` — they were saved, just not done.""" + verifier_dir = tmp_path / "data" / "verifier" + _write_bundle(verifier_dir, "legacy", None) + (verifier_dir / "legacy.corrections.json").write_text(json.dumps({"row_corrections": []})) + async with await _client(serve_app.app) as c: + r = await c.get("/api/bundles") + assert r.json()["bundles"][0]["status"] == "partial" + + +async def test_list_bundles_surfaces_verified_at_timestamp(serve_app, tmp_path: Path) -> None: + """`verified_at` reflects when the last Save / Mark-complete fired. + Sourced from the verified.json mtime so the same /api/save flow keeps + it accurate.""" + verifier_dir = tmp_path / "data" / "verifier" + _write_bundle(verifier_dir, "stamped", None) + (verifier_dir / "stamped.corrections.json").write_text(json.dumps({"status": "draft"})) + (verifier_dir / "stamped.verified.json").write_text("{}") + + async with await _client(serve_app.app) as c: + r = await c.get("/api/bundles") + bundle = r.json()["bundles"][0] + assert bundle["verified_at"] is not None + # ISO format with timezone. + assert "T" in bundle["verified_at"] + + +async def test_list_bundles_malformed_bundle_doesnt_break_index(serve_app, tmp_path: Path) -> None: + """If one bundle.json is corrupted, the index still lists it (so the + user can spot the problem) but with null metadata.""" + verifier_dir = tmp_path / "data" / "verifier" + verifier_dir.mkdir(parents=True) + (verifier_dir / "broken.bundle.json").write_text("not json {{ \\") + _write_bundle(verifier_dir, "good", "ok") + async with await _client(serve_app.app) as c: + r = await c.get("/api/bundles") + bundles = r.json()["bundles"] + by_stem = {b["stem"]: b for b in bundles} + assert "broken" in by_stem + assert by_stem["broken"]["page_date_raw"] is None + assert by_stem["broken"]["status"] == "incomplete" + assert by_stem["good"]["page_date_raw"] == "ok" + + async def test_save_skips_db_when_no_jobs_db_file(serve_app, tmp_path: Path) -> None: """If `data/jobs.db` doesn't exist (no pipeline has run), Save still succeeds — no DB integration is attempted.""" diff --git a/verifier/README.md b/verifier/README.md index ef96db5..39d31e3 100644 --- a/verifier/README.md +++ b/verifier/README.md @@ -14,15 +14,15 @@ The verifier ships with a tiny FastAPI server that does two things: .venv/bin/python verifier/serve.py # default port is 8765; override with VERIFIER_PORT=9000 .venv/bin/python verifier/serve.py -# then open in a browser: -open "http://localhost:8765/verifier/?bundle=/data/verifier/.bundle.json" +# then open the index: +open "http://localhost:8765/verifier/" ``` -If you want only the static side and don't need the artist-lookup button, `python -m http.server 8765` from the repo root still works — the Check-artists button will return 404s but everything else functions. +The index page lists every bundle in `data/verifier/` with its verification state and an **Open next page that needs work** button. Click a row to open it. The status badge on each row mirrors the same state machine as the in-edit pill: `incomplete` (no save yet), `partial` (saved as draft), `complete` (marked complete). -The `?bundle=...` URL param is the recommended path: the UI fetches the bundle, then resolves the bundle's `image_path` (relative path inside the JSON) and fetches the image too. +The `?bundle=` URL is still the way to deep-link a specific page (e.g., bookmarks, share links). Edit-mode navigation also exposes Prev / Next buttons and the keyboard shortcuts (`?` to see all). -You can also load a bundle via the **Load bundle** file picker, in which case a second **Load image** picker appears. This path works without a server but you must pick both files manually. +You can also load a bundle via the **Load image** file picker if the page is served statically and the relative image path can't be fetched. ## File layout @@ -104,15 +104,25 @@ tests/golden/.truth.json # derive_truth output (optional destinat ## Saving -Clicking **Save** POSTs the current edit state to the server's `/api/save` endpoint, which: +Two buttons share the right side of the header: **Save** and **Mark complete**. Both POST to `/api/save`. The only difference is the `status` field in the body — `Save` omits it (treated as `draft`), `Mark complete` sends `"complete"`. -1. Writes `data/verifier/.verified.json` — `PageResult`-shaped JSON validating against `core.schema.PageResult`. Bundle-only fields (`schema_version`, `stem`, `image_path`, `pdf_path`, `page_number`, per-entry `row_bbox`) are stripped before validation. Rows marked ✗ are excluded. Rows added via **+ add row** are included. -2. Writes `data/verifier/.corrections.json` — the delta between the loaded bundle and the verified state (shape below). -3. If the bundle has a non-null `pdf_path` + `page_number` (production-pipeline pages do; test fixtures don't), updates the matching `jobs.db` row via `JobStore.mark_verified` — setting `verified_at`, `verified_path`, and `corrections_path`. +Status semantics: -The status bar reports the destination files and whether `jobs.db` was updated: +- **Incomplete** — no `.corrections.json` on disk. The bundle has never been saved. +- **Partial** — `corrections.json` exists with `"status": "draft"`. The user is in progress. +- **Complete** — `corrections.json` has `"status": "complete"`. The user explicitly marked the page done. -> Saved data/verifier/X.verified.json + data/verifier/X.corrections.json · 4 field correction(s), 0 added, 0 deleted · jobs.db updated. +The server runs a small preservation rule so a plain Save on an already-complete page **does not** downgrade it. The user can refine details on a complete page without re-marking it. (If we ever need a "Revert to draft" affordance, that's a separate ticket.) + +Save's three side effects: + +1. Writes `data/verifier/.verified.json` — `PageResult`-shaped JSON validating against `core.schema.PageResult`. Bundle-only fields are stripped before validation. Rows marked ✗ are excluded; rows added via **+ add row** are included. +2. Writes `data/verifier/.corrections.json` — the delta between the loaded bundle and the verified state, plus a top-level `"status"` field. +3. If the bundle has a non-null `pdf_path` + `page_number`, updates the matching `jobs.db` row via `JobStore.mark_verified`. + +The status bar reports the destination files, status, and whether `jobs.db` was updated: + +> Saved as complete · data/verifier/X.verified.json + data/verifier/X.corrections.json · 4 field correction(s), 0 added, 0 deleted · jobs.db updated. If you'd rather have a downloadable file, open the saved JSON from `data/verifier/` directly. @@ -166,6 +176,24 @@ Badge states: The lookup goes through request-o-matic's LLM-driven request parser (artist normalization, fuzzy matching) before hitting the LML library search. The badge reflects request-o-matic's `library_results` and `artwork` fields — not LML's `/api/v1/lookup` directly, since the LLM correction layer is the load-bearing piece. +## Keyboard shortcuts + +Press `?` anywhere in the editor to see the overlay. The current set: + +| Key | Action | +|---|---| +| ⌘S / Ctrl+S | Save (draft) | +| ⌘⇧S / Ctrl+Shift+S | Mark complete | +| j / ⌘↓ | Focus next row's `raw_text` | +| k / ⌘↑ | Focus previous row | +| ⌘D / Ctrl+D | Toggle ✗ (delete) on focused row | +| n | Next bundle | +| p | Previous bundle | +| ? | Toggle shortcut overlay | +| Esc | Close overlay | + +The single-letter keys (`j`, `k`, `n`, `p`, `?`) are ignored when the keyboard focus is in an ``, `