diff --git a/core/prompts.py b/core/prompts.py index 5164242..8389b8a 100644 --- a/core/prompts.py +++ b/core/prompts.py @@ -10,7 +10,7 @@ * never invent content; mark unreadable rows confidence=low, * tag special-case rows in `notes` and skip parsing them. -Three top-level prompts: +Four top-level prompts: * `PAGE_EXTRACTION_PROMPT` — Gemini and the page-level qwen-vl adapter. The model sees the whole page; the schema demands all four quadrants. @@ -24,6 +24,10 @@ call. Pulls only `page_date_raw` and page-level oddities from the top band of the page. + * `FOOTER_EXTRACTION_PROMPT` — the per-quadrant adapter's footer-strip + call. Pulls only `comments_raw` (the verbatim contents of the + printed "Comments:" band) from the bottom band of the page. + The per-row guidance (raw_text / artist_guess / confidence / notes tags / etc.) is duplicated across the page and quadrant prompts. They must stay in sync. The shared row-level content is enforced by parallel @@ -251,3 +255,31 @@ Return only the structured JSON described by the response schema. """ + + +FOOTER_EXTRACTION_PROMPT = """\ +You are reading the bottom footer strip of a 1990s WXYC handwritten +radio flowsheet page. The image is a horizontal slice from the very +bottom of the page — below the four hour-blocks of the broadcast grid. +It contains the printed "Comments:" label and a free-text band where +the DJ writes short notes about the broadcast (dedications, jokes, +themed-show titles — e.g. "declared today anti-Valentines Day"). + +Capture: + - comments_raw: the verbatim contents of the Comments band, as + written. Do not fix spelling, do not expand abbreviations, do not + normalize punctuation. Join multi-line entries with a single + newline. Use JSON null (not the string "null", not an empty + string) when the Comments band is blank, unreadable, or absent. + +The crop may include a few pixels of the bottom row of the broadcast +grid just above the printed "Comments:" line. Do NOT transcribe row +content from above the Comments line — those entries are captured by a +separate call against the bottom quadrants. Only transcribe what the +DJ wrote in the Comments band itself. + +Never invent content. If the band is unreadable, return null rather +than guessing. + +Return only the structured JSON described by the response schema. +""" diff --git a/scripts/calibrate_models.py b/scripts/calibrate_models.py index d7675a0..c000fe9 100755 --- a/scripts/calibrate_models.py +++ b/scripts/calibrate_models.py @@ -35,11 +35,12 @@ modal-qwen-vl-quad Per-quadrant Qwen-VL on Modal: crops the page into - 4 sub-images plus a header strip, calls the model 5x - per page, assembles a PageResult locally. Eliminates - cross-quadrant content placement errors that the - single-shot `modal-qwen-vl` adapter still suffers. - ~5x cost (~$0.05-0.10/page); full corpus ~$1000-1500. + 4 sub-images plus a header strip and a footer strip, + calls the model 6x per page, assembles a PageResult + locally. Eliminates cross-quadrant content placement + errors that the single-shot `modal-qwen-vl` adapter + still suffers. ~6x cost (~$0.06-0.12/page); full + corpus ~$1200-1800. local-quadrant-smoke Local-only crop-quality smoke check: runs Churro @@ -98,6 +99,7 @@ from core.golden import GoldenTruth, RowCountDiscrepancy, compare_row_counts # noqa: E402 from core.page_layout import PageLayout, detect_page_layout # noqa: E402 from core.prompts import ( # noqa: E402 + FOOTER_EXTRACTION_PROMPT, HEADER_EXTRACTION_PROMPT, PAGE_EXTRACTION_PROMPT, QUADRANT_EXTRACTION_PROMPT_TEMPLATE, @@ -408,6 +410,22 @@ def transcribe(image_path: Path) -> PageResult: """ +FOOTER_WIRE_SCHEMA: dict[str, Any] = { + "type": "object", + "properties": { + "comments_raw": {"type": ["string", "null"]}, + }, + "required": ["comments_raw"], + "additionalProperties": False, +} +"""JSON Schema for the footer-strip call in `modal-qwen-vl-quad`. + +Mirrors `HEADER_WIRE_SCHEMA`'s shape: one-off, inline, no Pydantic +indirection. Pulls only `comments_raw` (the verbatim contents of the +printed "Comments:" band at the bottom of the page). +""" + + def make_modal_qwen_vl_quad_adapter( model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct", ) -> TranscribeFn: @@ -416,24 +434,29 @@ def make_modal_qwen_vl_quad_adapter( Eliminates layout misplacement by construction: instead of asking the model to spatially attribute rows from one full-page image to the right quadrant slot in the JSON wrapper, we crop the page - locally and call the model 5 times — once per quadrant + once on - the header strip — and assemble the page server-side. + locally and call the model 6 times — once per quadrant, once on + the header strip, and once on the footer strip — and assemble the + page server-side. Each call is grammar-constrained (xgrammar). The four quadrant schemas pin `position` to a singleton enum so the model literally cannot mislabel the cell. The header schema is small and ad-hoc, capturing only `page_date_raw` and page-level oddities (DJ-handoff - notes, weather notes, marginal annotations above the grid). + notes, weather notes, marginal annotations above the grid). The + footer schema captures only `comments_raw` — the verbatim contents + of the printed "Comments:" band at the bottom of the page. - All 5 calls run inside one `with app.run():` block. Modal reuses + All 6 calls run inside one `with app.run():` block. Modal reuses the warm container across them — the first call pays whatever - cold-start applies; calls 2-5 are warm. Per-page wall time is - roughly `cold + 4 * warm`, not `5 * warm`. + cold-start applies; calls 2-6 are warm. Per-page wall time is + roughly `cold + 5 * warm`, not `6 * warm`. On per-quadrant JSON failure, the affected quadrant is replaced by a `_quadrant_fallback` carrying the raw text in one entry tagged `notes="parse_failed"`. Other quadrants still validate; the page - is never lost wholesale. + is never lost wholesale. Header and footer parse failures leave + their respective fields at their defaults (`page_date_raw=None`, + `oddities=[]`, `comments_raw=None`) without failing the page. """ from PIL import Image @@ -450,10 +473,12 @@ def transcribe(image_path: Path) -> PageResult: image = Image.open(image_path).convert("RGB") layout = detect_page_layout(image) header_image = _crop_header_strip(image, layout) + footer_image = _crop_footer_strip(image, layout) crops = _crop_quadrants(image, layout) page_date_raw: str | None = None page_oddities: list[str] = [] + comments_raw: str | None = None quadrants: list[Quadrant] = [] with app.run(): @@ -494,9 +519,26 @@ def transcribe(image_path: Path) -> PageResult: except Exception: quadrants.append(_quadrant_fallback(text, position)) + # Footer call — surfaces the bottom Comments band, which the + # quadrant crops deliberately exclude (they stop at + # body_bottom_y). Same fault-tolerance shape as the header + # call: a parse failure leaves comments_raw at None. + try: + footer_text: str = transcribe_qwen_vl.remote( + _png_bytes(footer_image), + FOOTER_EXTRACTION_PROMPT, + model_id, + json_schema=FOOTER_WIRE_SCHEMA, + ) + footer_data = json.loads(footer_text) + comments_raw = footer_data.get("comments_raw") + except Exception: + pass # leave default; not worth failing the page + return PageResult( page_date_raw=page_date_raw, quadrants=quadrants, + comments_raw=comments_raw, model_version=f"modal-qwen-vl-quad:{model_id}", extracted_at=datetime.now(UTC), oddities=page_oddities, @@ -602,6 +644,12 @@ def _crop_header_strip(image: PILImage, layout: PageLayout) -> PILImage: return image.crop((0, 0, w, layout.header_bottom_y)) +def _crop_footer_strip(image: PILImage, layout: PageLayout) -> PILImage: + """The footer strip — printed "Comments:" line + free-text DJ commentary — below the body grid.""" + w, h = image.size + return image.crop((0, layout.body_bottom_y, w, h)) + + def _crop_quadrants(image: PILImage, layout: PageLayout) -> dict[QuadrantPosition, PILImage]: """Split the page body into 4 quadrants on the detected grid lines. diff --git a/tests/unit/test_calibrate_models.py b/tests/unit/test_calibrate_models.py index eb1c118..9d73471 100644 --- a/tests/unit/test_calibrate_models.py +++ b/tests/unit/test_calibrate_models.py @@ -324,8 +324,11 @@ def _painted_page(width: int, height: int, layout: PageLayout) -> object: draw.rectangle( (layout.column_mid_x, layout.body_mid_y, width, layout.body_bottom_y), fill=(255, 255, 0) ) # BR - # Footer band below body_bottom_y is left white so any leakage into - # the bottom quadrants would be visible. + # Paint the footer band below body_bottom_y so a footer crop is + # identifiable by sampling a pixel; leakage into the bottom quadrants + # would still be visible since the color is distinct from the bottom- + # quadrant fills. + draw.rectangle((0, layout.body_bottom_y, width, height), fill=(128, 128, 128)) # footer return image @@ -338,6 +341,36 @@ def test_crop_header_strip_uses_layout_header_bottom_y() -> None: assert strip.getpixel((400, 60)) == (10, 10, 10) +def test_crop_footer_strip_uses_layout_body_bottom_y() -> None: + """The footer crop must start at body_bottom_y and run to the bottom + of the image. It is the band that contains the printed Comments: line + and any handwritten free-text below it — content the quadrant crops + deliberately exclude.""" + layout = PageLayout(header_bottom_y=120, body_mid_y=550, body_bottom_y=970, column_mid_x=400) + image = _painted_page(800, 1000, layout) + strip = cm._crop_footer_strip(image, layout) + assert strip.size == (800, 1000 - 970) + # The painted footer is solid (128,128,128); sample its center. + assert strip.getpixel((400, 15)) == (128, 128, 128) + + +def test_crop_footer_strip_excludes_body_grid() -> None: + """The footer crop must NOT pull pixels from the bottom quadrants — + if it did, the model would helpfully transcribe the last row of those + quadrants into comments_raw.""" + layout = PageLayout(header_bottom_y=120, body_mid_y=550, body_bottom_y=970, column_mid_x=400) + image = _painted_page(800, 1000, layout) + strip = cm._crop_footer_strip(image, layout) + # Bottom-left was painted (0,0,255); bottom-right was painted (255,255,0). + # Sweep the whole footer strip and make sure neither color appears. + w, h = strip.size + for y in range(h): + for x in range(w): + pixel = strip.getpixel((x, y)) + assert pixel != (0, 0, 255), f"bottom-left bled into footer at ({x},{y})" + assert pixel != (255, 255, 0), f"bottom-right bled into footer at ({x},{y})" + + def test_crop_quadrants_returns_canonical_keys() -> None: layout = PageLayout(header_bottom_y=120, body_mid_y=550, body_bottom_y=970, column_mid_x=400) image = _painted_page(800, 1000, layout) @@ -642,8 +675,9 @@ def fake_run() -> object: def test_modal_qwen_vl_quad_adapter_happy_path( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - """5 RPC calls in canonical order, each with the right schema and prompt; - the assembled PageResult round-trips the header date and four quadrants.""" + """6 RPC calls in canonical order, each with the right schema and prompt; + the assembled PageResult round-trips the header date, four quadrants, + and the comments band.""" image = tmp_path / "1990-04apr0106-page05.png" _save_fixture_page(image) fake_remote = _patch_modal_for_quadrant( @@ -654,19 +688,20 @@ def test_modal_qwen_vl_quad_adapter_happy_path( _quadrant_json("top_right", "7AM", "DJ B"), _quadrant_json("bottom_left", "8AM", "DJ C"), _quadrant_json("bottom_right", "9AM", "DJ D"), + '{"comments_raw": "declared today anti-Valentines Day"}', ], ) transcribe = cm.make_modal_qwen_vl_quad_adapter("test-model") result = transcribe(image) - # 5 calls: 1 header + 4 quadrants in canonical order. - assert fake_remote.call_count == 5 + # 6 calls: 1 header + 4 quadrants + 1 footer, in canonical order. + assert fake_remote.call_count == 6 calls = fake_remote.call_args_list # Call 0: header. header_args = calls[0] - from core.prompts import HEADER_EXTRACTION_PROMPT + from core.prompts import FOOTER_EXTRACTION_PROMPT, HEADER_EXTRACTION_PROMPT assert header_args.args[1] == HEADER_EXTRACTION_PROMPT assert header_args.kwargs["json_schema"] == cm.HEADER_WIRE_SCHEMA @@ -678,12 +713,18 @@ def test_modal_qwen_vl_quad_adapter_happy_path( schema = calls[i].kwargs["json_schema"] assert schema["properties"]["position"] == {"enum": [position]} + # Call 5: footer. + footer_args = calls[5] + assert footer_args.args[1] == FOOTER_EXTRACTION_PROMPT + assert footer_args.kwargs["json_schema"] == cm.FOOTER_WIRE_SCHEMA + # Assembled PageResult. assert result.page_date_raw == "Mon 1 Jan 90" assert result.oddities == ["weather: snowy"] assert [q.position for q in result.quadrants] == list(QUADRANT_ORDER) assert result.quadrants[0].hour_raw == "6AM" assert result.quadrants[3].jock_raw == "DJ D" + assert result.comments_raw == "declared today anti-Valentines Day" assert result.model_version == "modal-qwen-vl-quad:test-model" @@ -702,6 +743,7 @@ def test_modal_qwen_vl_quad_adapter_quadrant_fallback_on_malformed_json( "not json {{", # second quadrant returns garbage _quadrant_json("bottom_left", "8AM", "C"), _quadrant_json("bottom_right", "9AM", "D"), + '{"comments_raw": null}', ], ) @@ -741,6 +783,7 @@ def test_modal_qwen_vl_quad_adapter_header_failure_does_not_fail_page( _quadrant_json("top_right", "7AM", "B"), _quadrant_json("bottom_left", "8AM", "C"), _quadrant_json("bottom_right", "9AM", "D"), + '{"comments_raw": "valid footer"}', ], ) @@ -752,6 +795,37 @@ def test_modal_qwen_vl_quad_adapter_header_failure_does_not_fail_page( assert [q.position for q in result.quadrants] == list(QUADRANT_ORDER) # Quadrant data still flows through. assert result.quadrants[0].hour_raw == "6AM" + # Footer call is independent of the header call — its content survives. + assert result.comments_raw == "valid footer" + + +def test_modal_qwen_vl_quad_adapter_footer_failure_does_not_fail_page( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A malformed footer response leaves comments_raw at None; the page + still validates with all four quadrants and the header date intact.""" + image = tmp_path / "1990-04apr0106-page25.png" + _save_fixture_page(image) + _patch_modal_for_quadrant( + monkeypatch, + side_effect=[ + '{"page_date_raw": "Mon 1 Jan 90", "oddities": []}', + _quadrant_json("top_left", "6AM", "A"), + _quadrant_json("top_right", "7AM", "B"), + _quadrant_json("bottom_left", "8AM", "C"), + _quadrant_json("bottom_right", "9AM", "D"), + "garbage response from footer call", + ], + ) + + transcribe = cm.make_modal_qwen_vl_quad_adapter("test-model") + result = transcribe(image) + + assert result.comments_raw is None + # The rest of the page is untouched. + assert result.page_date_raw == "Mon 1 Jan 90" + assert [q.position for q in result.quadrants] == list(QUADRANT_ORDER) + assert result.quadrants[0].hour_raw == "6AM" # -- _run_row_count_check / _format_discrepancy -------------------------------- diff --git a/tests/unit/test_prompts.py b/tests/unit/test_prompts.py index 01796bb..ebc37a5 100644 --- a/tests/unit/test_prompts.py +++ b/tests/unit/test_prompts.py @@ -10,6 +10,7 @@ import pytest from core.prompts import ( + FOOTER_EXTRACTION_PROMPT, HEADER_EXTRACTION_PROMPT, PAGE_EXTRACTION_PROMPT, QUADRANT_EXTRACTION_PROMPT_TEMPLATE, @@ -268,3 +269,38 @@ def test_header_prompt_scopes_oddities_to_page_level() -> None: def test_header_prompt_forbids_invented_content() -> None: assert "Never invent content" in HEADER_EXTRACTION_PROMPT + + +# -- FOOTER_EXTRACTION_PROMPT ---------------------------------------------- + + +def test_footer_prompt_captures_comments_raw() -> None: + assert "comments_raw" in FOOTER_EXTRACTION_PROMPT + + +def test_footer_prompt_demands_verbatim_transcription() -> None: + """The Comments band is free-text DJ commentary — the model must not + clean it up like an editor.""" + assert "verbatim" in FOOTER_EXTRACTION_PROMPT.lower() + + +def test_footer_prompt_specifies_json_null_for_blank() -> None: + """Blank comments band must round-trip as null, not "" — same convention + as page_date_raw / hour_raw / jock_raw.""" + assert "JSON null" in FOOTER_EXTRACTION_PROMPT + + +def test_footer_prompt_scopes_to_footer_band() -> None: + """The footer crop slightly overlaps the bottom-quadrant baseline; the + prompt must tell the model to ignore content above the Comments line — + otherwise the model will helpfully transcribe the last row of the + bottom quadrants into comments_raw.""" + text = FOOTER_EXTRACTION_PROMPT.lower() + assert "comments" in text + # Negate transcribing content from above the Comments line. + assert "do not" in text + assert "above" in text + + +def test_footer_prompt_forbids_invented_content() -> None: + assert "Never invent content" in FOOTER_EXTRACTION_PROMPT