WXYC · jakebromberg · May 11, 2026 · May 10, 2026
diff --git a/core/prompts.py b/core/prompts.py
@@ -10,7 +10,7 @@
   * never invent content; mark unreadable rows confidence=low,
   * tag special-case rows in `notes` and skip parsing them.
 
-Three top-level prompts:
+Four top-level prompts:
 
   * `PAGE_EXTRACTION_PROMPT` — Gemini and the page-level qwen-vl adapter.
     The model sees the whole page; the schema demands all four quadrants.
@@ -24,6 +24,10 @@
     call. Pulls only `page_date_raw` and page-level oddities from the
     top band of the page.
 
+  * `FOOTER_EXTRACTION_PROMPT` — the per-quadrant adapter's footer-strip
+    call. Pulls only `comments_raw` (the verbatim contents of the
+    printed "Comments:" band) from the bottom band of the page.
+
 The per-row guidance (raw_text / artist_guess / confidence / notes
 tags / etc.) is duplicated across the page and quadrant prompts. They
 must stay in sync. The shared row-level content is enforced by parallel
@@ -251,3 +255,31 @@
 
 Return only the structured JSON described by the response schema.
 """
+
+
+FOOTER_EXTRACTION_PROMPT = """\
+You are reading the bottom footer strip of a 1990s WXYC handwritten
+radio flowsheet page. The image is a horizontal slice from the very
+bottom of the page — below the four hour-blocks of the broadcast grid.
+It contains the printed "Comments:" label and a free-text band where
+the DJ writes short notes about the broadcast (dedications, jokes,
+themed-show titles — e.g. "declared today anti-Valentines Day").
+
+Capture:
+  - comments_raw: the verbatim contents of the Comments band, as
+    written. Do not fix spelling, do not expand abbreviations, do not
+    normalize punctuation. Join multi-line entries with a single
+    newline. Use JSON null (not the string "null", not an empty
+    string) when the Comments band is blank, unreadable, or absent.
+
+The crop may include a few pixels of the bottom row of the broadcast
+grid just above the printed "Comments:" line. Do NOT transcribe row
+content from above the Comments line — those entries are captured by a
+separate call against the bottom quadrants. Only transcribe what the
+DJ wrote in the Comments band itself.
+
+Never invent content. If the band is unreadable, return null rather
+than guessing.
+
+Return only the structured JSON described by the response schema.
+"""
diff --git a/scripts/calibrate_models.py b/scripts/calibrate_models.py
@@ -35,11 +35,12 @@
 
   modal-qwen-vl-quad
                    Per-quadrant Qwen-VL on Modal: crops the page into
-                   4 sub-images plus a header strip, calls the model 5x
-                   per page, assembles a PageResult locally. Eliminates
-                   cross-quadrant content placement errors that the
-                   single-shot `modal-qwen-vl` adapter still suffers.
-                   ~5x cost (~$0.05-0.10/page); full corpus ~$1000-1500.
+                   4 sub-images plus a header strip and a footer strip,
+                   calls the model 6x per page, assembles a PageResult
+                   locally. Eliminates cross-quadrant content placement
+                   errors that the single-shot `modal-qwen-vl` adapter
+                   still suffers. ~6x cost (~$0.06-0.12/page); full
+                   corpus ~$1200-1800.
 
   local-quadrant-smoke
                    Local-only crop-quality smoke check: runs Churro
@@ -98,6 +99,7 @@
 from core.golden import GoldenTruth, RowCountDiscrepancy, compare_row_counts  # noqa: E402
 from core.page_layout import PageLayout, detect_page_layout  # noqa: E402
 from core.prompts import (  # noqa: E402
+    FOOTER_EXTRACTION_PROMPT,
     HEADER_EXTRACTION_PROMPT,
     PAGE_EXTRACTION_PROMPT,
     QUADRANT_EXTRACTION_PROMPT_TEMPLATE,
@@ -408,6 +410,22 @@ def transcribe(image_path: Path) -> PageResult:
 """
 
 
+FOOTER_WIRE_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "properties": {
+        "comments_raw": {"type": ["string", "null"]},
+    },
+    "required": ["comments_raw"],
+    "additionalProperties": False,
+}
+"""JSON Schema for the footer-strip call in `modal-qwen-vl-quad`.
+
+Mirrors `HEADER_WIRE_SCHEMA`'s shape: one-off, inline, no Pydantic
+indirection. Pulls only `comments_raw` (the verbatim contents of the
+printed "Comments:" band at the bottom of the page).
+"""
+
+
 def make_modal_qwen_vl_quad_adapter(
     model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct",
 ) -> TranscribeFn:
@@ -416,24 +434,29 @@ def make_modal_qwen_vl_quad_adapter(
     Eliminates layout misplacement by construction: instead of asking
     the model to spatially attribute rows from one full-page image to
     the right quadrant slot in the JSON wrapper, we crop the page
-    locally and call the model 5 times — once per quadrant + once on
-    the header strip — and assemble the page server-side.
+    locally and call the model 6 times — once per quadrant, once on
+    the header strip, and once on the footer strip — and assemble the
+    page server-side.
 
     Each call is grammar-constrained (xgrammar). The four quadrant
     schemas pin `position` to a singleton enum so the model literally
     cannot mislabel the cell. The header schema is small and ad-hoc,
     capturing only `page_date_raw` and page-level oddities (DJ-handoff
-    notes, weather notes, marginal annotations above the grid).
+    notes, weather notes, marginal annotations above the grid). The
+    footer schema captures only `comments_raw` — the verbatim contents
+    of the printed "Comments:" band at the bottom of the page.
 
-    All 5 calls run inside one `with app.run():` block. Modal reuses
+    All 6 calls run inside one `with app.run():` block. Modal reuses
     the warm container across them — the first call pays whatever
-    cold-start applies; calls 2-5 are warm. Per-page wall time is
-    roughly `cold + 4 * warm`, not `5 * warm`.
+    cold-start applies; calls 2-6 are warm. Per-page wall time is
+    roughly `cold + 5 * warm`, not `6 * warm`.
 
     On per-quadrant JSON failure, the affected quadrant is replaced by
     a `_quadrant_fallback` carrying the raw text in one entry tagged
     `notes="parse_failed"`. Other quadrants still validate; the page
-    is never lost wholesale.
+    is never lost wholesale. Header and footer parse failures leave
+    their respective fields at their defaults (`page_date_raw=None`,
+    `oddities=[]`, `comments_raw=None`) without failing the page.
     """
     from PIL import Image
 
@@ -450,10 +473,12 @@ def transcribe(image_path: Path) -> PageResult:
         image = Image.open(image_path).convert("RGB")
         layout = detect_page_layout(image)
         header_image = _crop_header_strip(image, layout)
+        footer_image = _crop_footer_strip(image, layout)
         crops = _crop_quadrants(image, layout)
 
         page_date_raw: str | None = None
         page_oddities: list[str] = []
+        comments_raw: str | None = None
         quadrants: list[Quadrant] = []
 
         with app.run():
@@ -494,9 +519,26 @@ def transcribe(image_path: Path) -> PageResult:
                 except Exception:
                     quadrants.append(_quadrant_fallback(text, position))
 
+            # Footer call — surfaces the bottom Comments band, which the
+            # quadrant crops deliberately exclude (they stop at
+            # body_bottom_y). Same fault-tolerance shape as the header
+            # call: a parse failure leaves comments_raw at None.
+            try:
+                footer_text: str = transcribe_qwen_vl.remote(
+                    _png_bytes(footer_image),
+                    FOOTER_EXTRACTION_PROMPT,
+                    model_id,
+                    json_schema=FOOTER_WIRE_SCHEMA,
+                )
+                footer_data = json.loads(footer_text)
+                comments_raw = footer_data.get("comments_raw")
+            except Exception:
+                pass  # leave default; not worth failing the page
+
         return PageResult(
             page_date_raw=page_date_raw,
             quadrants=quadrants,
+            comments_raw=comments_raw,
             model_version=f"modal-qwen-vl-quad:{model_id}",
             extracted_at=datetime.now(UTC),
             oddities=page_oddities,
@@ -602,6 +644,12 @@ def _crop_header_strip(image: PILImage, layout: PageLayout) -> PILImage:
     return image.crop((0, 0, w, layout.header_bottom_y))
 
 
+def _crop_footer_strip(image: PILImage, layout: PageLayout) -> PILImage:
+    """The footer strip — printed "Comments:" line + free-text DJ commentary — below the body grid."""
+    w, h = image.size
+    return image.crop((0, layout.body_bottom_y, w, h))
+
+
 def _crop_quadrants(image: PILImage, layout: PageLayout) -> dict[QuadrantPosition, PILImage]:
     """Split the page body into 4 quadrants on the detected grid lines.
 

diff --git a/tests/unit/test_calibrate_models.py b/tests/unit/test_calibrate_models.py
@@ -324,8 +324,11 @@ def _painted_page(width: int, height: int, layout: PageLayout) -> object:
     draw.rectangle(
         (layout.column_mid_x, layout.body_mid_y, width, layout.body_bottom_y), fill=(255, 255, 0)
     )  # BR
-    # Footer band below body_bottom_y is left white so any leakage into
-    # the bottom quadrants would be visible.
+    # Paint the footer band below body_bottom_y so a footer crop is
+    # identifiable by sampling a pixel; leakage into the bottom quadrants
+    # would still be visible since the color is distinct from the bottom-
+    # quadrant fills.
+    draw.rectangle((0, layout.body_bottom_y, width, height), fill=(128, 128, 128))  # footer
     return image
 
 
@@ -338,6 +341,36 @@ def test_crop_header_strip_uses_layout_header_bottom_y() -> None:
     assert strip.getpixel((400, 60)) == (10, 10, 10)
 
 
+def test_crop_footer_strip_uses_layout_body_bottom_y() -> None:
+    """The footer crop must start at body_bottom_y and run to the bottom
+    of the image. It is the band that contains the printed Comments: line
+    and any handwritten free-text below it — content the quadrant crops
+    deliberately exclude."""
+    layout = PageLayout(header_bottom_y=120, body_mid_y=550, body_bottom_y=970, column_mid_x=400)
+    image = _painted_page(800, 1000, layout)
+    strip = cm._crop_footer_strip(image, layout)
+    assert strip.size == (800, 1000 - 970)
+    # The painted footer is solid (128,128,128); sample its center.
+    assert strip.getpixel((400, 15)) == (128, 128, 128)
+
+
+def test_crop_footer_strip_excludes_body_grid() -> None:
+    """The footer crop must NOT pull pixels from the bottom quadrants —
+    if it did, the model would helpfully transcribe the last row of those
+    quadrants into comments_raw."""
+    layout = PageLayout(header_bottom_y=120, body_mid_y=550, body_bottom_y=970, column_mid_x=400)
+    image = _painted_page(800, 1000, layout)
+    strip = cm._crop_footer_strip(image, layout)
+    # Bottom-left was painted (0,0,255); bottom-right was painted (255,255,0).
+    # Sweep the whole footer strip and make sure neither color appears.
+    w, h = strip.size
+    for y in range(h):
+        for x in range(w):
+            pixel = strip.getpixel((x, y))
+            assert pixel != (0, 0, 255), f"bottom-left bled into footer at ({x},{y})"
+            assert pixel != (255, 255, 0), f"bottom-right bled into footer at ({x},{y})"
+
+
 def test_crop_quadrants_returns_canonical_keys() -> None:
     layout = PageLayout(header_bottom_y=120, body_mid_y=550, body_bottom_y=970, column_mid_x=400)
     image = _painted_page(800, 1000, layout)
@@ -642,8 +675,9 @@ def fake_run() -> object:
 def test_modal_qwen_vl_quad_adapter_happy_path(
     tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 ) -> None:
-    """5 RPC calls in canonical order, each with the right schema and prompt;
-    the assembled PageResult round-trips the header date and four quadrants."""
+    """6 RPC calls in canonical order, each with the right schema and prompt;
+    the assembled PageResult round-trips the header date, four quadrants,
+    and the comments band."""
     image = tmp_path / "1990-04apr0106-page05.png"
     _save_fixture_page(image)
     fake_remote = _patch_modal_for_quadrant(
@@ -654,19 +688,20 @@ def test_modal_qwen_vl_quad_adapter_happy_path(
             _quadrant_json("top_right", "7AM", "DJ B"),
             _quadrant_json("bottom_left", "8AM", "DJ C"),
             _quadrant_json("bottom_right", "9AM", "DJ D"),
+            '{"comments_raw": "declared today anti-Valentines Day"}',
         ],
     )
 
     transcribe = cm.make_modal_qwen_vl_quad_adapter("test-model")
     result = transcribe(image)
 
-    # 5 calls: 1 header + 4 quadrants in canonical order.
-    assert fake_remote.call_count == 5
+    # 6 calls: 1 header + 4 quadrants + 1 footer, in canonical order.
+    assert fake_remote.call_count == 6
     calls = fake_remote.call_args_list
 
     # Call 0: header.
     header_args = calls[0]
-    from core.prompts import HEADER_EXTRACTION_PROMPT
+    from core.prompts import FOOTER_EXTRACTION_PROMPT, HEADER_EXTRACTION_PROMPT
 
     assert header_args.args[1] == HEADER_EXTRACTION_PROMPT
     assert header_args.kwargs["json_schema"] == cm.HEADER_WIRE_SCHEMA
@@ -678,12 +713,18 @@ def test_modal_qwen_vl_quad_adapter_happy_path(
         schema = calls[i].kwargs["json_schema"]
         assert schema["properties"]["position"] == {"enum": [position]}
 
+    # Call 5: footer.
+    footer_args = calls[5]
+    assert footer_args.args[1] == FOOTER_EXTRACTION_PROMPT
+    assert footer_args.kwargs["json_schema"] == cm.FOOTER_WIRE_SCHEMA
+
     # Assembled PageResult.
     assert result.page_date_raw == "Mon 1 Jan 90"
     assert result.oddities == ["weather: snowy"]
     assert [q.position for q in result.quadrants] == list(QUADRANT_ORDER)
     assert result.quadrants[0].hour_raw == "6AM"
     assert result.quadrants[3].jock_raw == "DJ D"
+    assert result.comments_raw == "declared today anti-Valentines Day"
     assert result.model_version == "modal-qwen-vl-quad:test-model"
 
 
@@ -702,6 +743,7 @@ def test_modal_qwen_vl_quad_adapter_quadrant_fallback_on_malformed_json(
             "not json {{",  # second quadrant returns garbage
             _quadrant_json("bottom_left", "8AM", "C"),
             _quadrant_json("bottom_right", "9AM", "D"),
+            '{"comments_raw": null}',
         ],
     )
 
@@ -741,6 +783,7 @@ def test_modal_qwen_vl_quad_adapter_header_failure_does_not_fail_page(
             _quadrant_json("top_right", "7AM", "B"),
             _quadrant_json("bottom_left", "8AM", "C"),
             _quadrant_json("bottom_right", "9AM", "D"),
+            '{"comments_raw": "valid footer"}',
         ],
     )
 
@@ -752,6 +795,37 @@ def test_modal_qwen_vl_quad_adapter_header_failure_does_not_fail_page(
     assert [q.position for q in result.quadrants] == list(QUADRANT_ORDER)
     # Quadrant data still flows through.
     assert result.quadrants[0].hour_raw == "6AM"
+    # Footer call is independent of the header call — its content survives.
+    assert result.comments_raw == "valid footer"
+
+
+def test_modal_qwen_vl_quad_adapter_footer_failure_does_not_fail_page(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """A malformed footer response leaves comments_raw at None; the page
+    still validates with all four quadrants and the header date intact."""
+    image = tmp_path / "1990-04apr0106-page25.png"
+    _save_fixture_page(image)
+    _patch_modal_for_quadrant(
+        monkeypatch,
+        side_effect=[
+            '{"page_date_raw": "Mon 1 Jan 90", "oddities": []}',
+            _quadrant_json("top_left", "6AM", "A"),
+            _quadrant_json("top_right", "7AM", "B"),
+            _quadrant_json("bottom_left", "8AM", "C"),
+            _quadrant_json("bottom_right", "9AM", "D"),
+            "garbage response from footer call",
+        ],
+    )
+
+    transcribe = cm.make_modal_qwen_vl_quad_adapter("test-model")
+    result = transcribe(image)
+
+    assert result.comments_raw is None
+    # The rest of the page is untouched.
+    assert result.page_date_raw == "Mon 1 Jan 90"
+    assert [q.position for q in result.quadrants] == list(QUADRANT_ORDER)
+    assert result.quadrants[0].hour_raw == "6AM"
 
 
 # -- _run_row_count_check / _format_discrepancy --------------------------------

diff --git a/tests/unit/test_prompts.py b/tests/unit/test_prompts.py
@@ -10,6 +10,7 @@
 import pytest
 
 from core.prompts import (
+    FOOTER_EXTRACTION_PROMPT,
     HEADER_EXTRACTION_PROMPT,
     PAGE_EXTRACTION_PROMPT,
     QUADRANT_EXTRACTION_PROMPT_TEMPLATE,
@@ -268,3 +269,38 @@ def test_header_prompt_scopes_oddities_to_page_level() -> None:
 
 def test_header_prompt_forbids_invented_content() -> None:
     assert "Never invent content" in HEADER_EXTRACTION_PROMPT
+
+
+# -- FOOTER_EXTRACTION_PROMPT ----------------------------------------------
+
+
+def test_footer_prompt_captures_comments_raw() -> None:
+    assert "comments_raw" in FOOTER_EXTRACTION_PROMPT
+
+
+def test_footer_prompt_demands_verbatim_transcription() -> None:
+    """The Comments band is free-text DJ commentary — the model must not
+    clean it up like an editor."""
+    assert "verbatim" in FOOTER_EXTRACTION_PROMPT.lower()
+
+
+def test_footer_prompt_specifies_json_null_for_blank() -> None:
+    """Blank comments band must round-trip as null, not "" — same convention
+    as page_date_raw / hour_raw / jock_raw."""
+    assert "JSON null" in FOOTER_EXTRACTION_PROMPT
+
+
+def test_footer_prompt_scopes_to_footer_band() -> None:
+    """The footer crop slightly overlaps the bottom-quadrant baseline; the
+    prompt must tell the model to ignore content above the Comments line —
+    otherwise the model will helpfully transcribe the last row of the
+    bottom quadrants into comments_raw."""
+    text = FOOTER_EXTRACTION_PROMPT.lower()
+    assert "comments" in text
+    # Negate transcribing content from above the Comments line.
+    assert "do not" in text
+    assert "above" in text
+
+
+def test_footer_prompt_forbids_invented_content() -> None:
+    assert "Never invent content" in FOOTER_EXTRACTION_PROMPT