diff --git a/core/continuations.py b/core/continuations.py
index 1b8d2d0..a422cde 100644
--- a/core/continuations.py
+++ b/core/continuations.py
@@ -39,13 +39,15 @@ def merge_continuations(entries: list[Entry]) -> list[Entry]:
         continuation still reports "high". Substring matching doesn't
         care; if a future re-OCR queue filters by confidence and wants
         to flag merged rows, take the min over the contributing rows.
-      * `artist_guess` / `track_guess` — kept as-is. After a merge
-        these may not reflect the joined `raw_text`; downstream
-        consumers that care should re-parse from `raw_text`.
       * `row_index` — the predecessor's index is preserved. The
         continuation row's index is dropped from the merged view; the
         on-disk JSON still has both grid positions if needed.
 
+    Callers that want an artist/track split from the merged `raw_text`
+    should apply `core.parse.parse_artist_track` after merging — the
+    split is deterministic at read time and is intentionally not stored
+    on `Entry`.
+
     The merge is **lossy with respect to internal whitespace** at the
     wrap boundary — multiple spaces / tabs collapse to a single space.
     Verbatim whitespace round-tripping requires reading the on-disk
diff --git a/core/parse.py b/core/parse.py
new file mode 100644
index 0000000..9bf015f
--- /dev/null
+++ b/core/parse.py
@@ -0,0 +1,60 @@
+"""Read-time artist/track split for `Entry.raw_text`.
+
+The Phase-1 schema asked Gemini to fill `artist_guess` and `track_guess`
+alongside the verbatim `raw_text`. The split is deterministic (separate
+on the first whitespace-bracketed dash) and producing it on the model
+side spends output tokens for no analytic benefit. The fields were
+dropped from the response schema; this module is what downstream
+consumers call at read time to recover them.
+
+Sibling pattern: `core.continuations.merge_continuations`,
+`core.comments.normalize_comments`. Pure function, no I/O. The on-disk
+shape stays verbatim; the split happens in memory on read.
+
+Separator convention: a single ASCII hyphen, en-dash, or em-dash with
+whitespace on both sides. Compound band names ("X-Ray Spex") have no
+surrounding whitespace and are NOT split. Multiple separators in one
+line split on the first; the track side keeps the rest.
+"""
+
+from __future__ import annotations
+
+import re
+
+# ASCII hyphen, en-dash (U+2013), em-dash (U+2014). Whitespace-bracketed
+# so we don't split compound band names like "X-Ray Spex" on their
+# internal hyphen.
+_SEPARATOR = re.compile(r"\s+[-–—]\s+")
+
+
+def parse_artist_track(raw_text: str | None) -> tuple[str | None, str | None]:
+    """Split a row's verbatim text into best-effort (artist, track) parts.
+
+    Returns:
+      * `(None, None)` when the input is None, empty, or whitespace-only.
+      * `(artist, None)` when no separator is present — the line names
+        an artist with no track (common for early-90s flowsheets, e.g.
+        "STEREOLAB" alone).
+      * `(None, track)` when the separator opens the line with nothing
+        before it (e.g. " - LA PARADOJA").
+      * `(artist, None)` when the separator closes the line with nothing
+        after it (e.g. "JUANA MOLINA - ").
+      * `(artist, track)` for the dominant case ("ARTIST - TRACK"). When
+        multiple separators appear, we split on the first only and the
+        rest stays in the track.
+
+    Each side is stripped of leading/trailing whitespace. Empty sides
+    become None so callers have one sentinel for "nothing useful here".
+    """
+    if raw_text is None:
+        return (None, None)
+    if not raw_text.strip():
+        return (None, None)
+    # Split the unstripped text so a leading or trailing " - " still has
+    # the whitespace on both sides of the dash that the regex needs.
+    split = _SEPARATOR.split(raw_text, maxsplit=1)
+    if len(split) == 1:
+        return (raw_text.strip(), None)
+    artist = split[0].strip() or None
+    track = split[1].strip() or None
+    return (artist, track)
diff --git a/core/prompts.py b/core/prompts.py
index 8389b8a..27e0c13 100644
--- a/core/prompts.py
+++ b/core/prompts.py
@@ -28,7 +28,7 @@
     call. Pulls only `comments_raw` (the verbatim contents of the
     printed "Comments:" band) from the bottom band of the page.
 
-The per-row guidance (raw_text / artist_guess / confidence / notes
+The per-row guidance (raw_text / type_raw / confidence / notes
 tags / etc.) is duplicated across the page and quadrant prompts. They
 must stay in sync. The shared row-level content is enforced by parallel
 contract tests in `tests/unit/test_prompts.py`; if you change one,
@@ -64,8 +64,6 @@
     drawn shape that is NOT a letter; do not invent a description from
     these examples or copy them onto rows where you cannot read the mark.
     When in doubt, return null.
-  - artist_guess: best-effort parse of the part left of the dash, or null
-  - track_guess: best-effort parse of the part right of the dash, or null
   - confidence: "high" if the row is clearly legible, "medium" if you had to
     guess one or two characters, "low" if mostly illegible
   - notes: null in the common case. Use one of these tags only when relevant:
@@ -172,8 +170,6 @@
     drawn shape that is NOT a letter; do not invent a description from
     these examples or copy them onto rows where you cannot read the mark.
     When in doubt, return null.
-  - artist_guess: best-effort parse of the part left of the dash, or null
-  - track_guess: best-effort parse of the part right of the dash, or null
   - confidence: "high" if the row is clearly legible, "medium" if you had to
     guess one or two characters, "low" if mostly illegible
   - notes: null in the common case. Use one of these tags only when relevant:
diff --git a/core/schema.py b/core/schema.py
index 5c03974..7ae3f6d 100644
--- a/core/schema.py
+++ b/core/schema.py
@@ -60,14 +60,6 @@ class Entry(BaseModel):
             "normal entry. Null if the circle is blank."
         ),
     )
-    artist_guess: str | None = Field(
-        default=None,
-        description="Best-effort parse of the artist portion (left of the dash).",
-    )
-    track_guess: str | None = Field(
-        default=None,
-        description="Best-effort parse of the track portion (right of the dash).",
-    )
     confidence: Confidence = Field(
         description="high if the row is clearly legible; low if mostly illegible.",
     )
diff --git a/core/spot_check.py b/core/spot_check.py
index 76ef605..dce35f0 100644
--- a/core/spot_check.py
+++ b/core/spot_check.py
@@ -12,6 +12,8 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 
+from core.parse import parse_artist_track
+
 
 @dataclass(frozen=True)
 class EntryRef:
@@ -28,7 +30,7 @@ class EntryRef:
 class PageReport:
     """Hit/miss accounting for a single result JSON.
 
-    `total` counts entries that had an `artist_guess`. `with_track` is
+    `total` counts entries we could derive an artist for. `with_track` is
     the joint-mode denominator: entries with both an artist and a track.
     The two miss lists hold the rows whose lookup returned False, so a
     caller can drill into specific transcriptions.
@@ -46,19 +48,37 @@ class PageReport:
 def collect_entries(results_root: Path) -> list[EntryRef]:
     """Walk every result JSON under `results_root` and return queryable rows.
 
-    Skips entries with no `artist_guess` (continuation rows have no
-    artist by design). An empty `track_guess` becomes `None` so callers
-    can distinguish "no track to check" from "track is the empty string".
+    Two on-disk shapes coexist:
+
+      * Pre-audit (34 legacy corpus JSONs): `artist_guess` / `track_guess`
+        are present on every entry. A null `artist_guess` is the explicit
+        "continuation row" sentinel — skip those.
+      * Post-audit (everything new): no `artist_guess` / `track_guess`
+        keys. Artist and track are derived from `raw_text` via
+        `core.parse.parse_artist_track`.
+
+    The branch is on KEY PRESENCE, not value truthiness, so the legacy
+    "explicit null means skip" contract is preserved on legacy rows
+    while new rows get the deterministic parse. An empty track becomes
+    `None` so callers can distinguish "no track to check" from "track
+    is the empty string".
     """
     rows: list[EntryRef] = []
     for path in sorted(results_root.rglob("*.json")):
         data = json.loads(path.read_text())
         for q in data.get("quadrants", []):
             for e in q.get("entries", []):
-                artist = (e.get("artist_guess") or "").strip()
-                if not artist:
-                    continue
-                track = (e.get("track_guess") or "").strip() or None
+                if "artist_guess" in e:
+                    legacy = (e.get("artist_guess") or "").strip()
+                    if not legacy:
+                        continue
+                    artist = legacy
+                    track = (e.get("track_guess") or "").strip() or None
+                else:
+                    parsed_artist, track = parse_artist_track(e.get("raw_text"))
+                    if parsed_artist is None:
+                        continue
+                    artist = parsed_artist
                 rows.append(
                     EntryRef(
                         page_path=path,
diff --git a/scripts/spot_check_discogs.py b/scripts/spot_check_discogs.py
index d0a68d7..90fa9db 100755
--- a/scripts/spot_check_discogs.py
+++ b/scripts/spot_check_discogs.py
@@ -2,8 +2,10 @@
 """spot_check_discogs.py — sanity-check Gemini-extracted entries against the
 local Discogs PostgreSQL cache.
 
-Walks every `data/results/**/*.json` and for each entry with an
-`artist_guess` checks two questions against the cache:
+Walks every `data/results/**/*.json` and for each entry with a
+derivable artist (from `artist_guess` on the 34 legacy JSONs, or from
+`parse_artist_track(raw_text)` on post-schema-trim extractions) checks
+two questions against the cache:
 
   * artist  — is this artist name in the WXYC library? (broad, cheap)
   * joint   — does any release for this WXYC-owned artist contain a
@@ -156,7 +158,7 @@ def print_report(
     print()
     print("=" * 72)
     print(f"Pages:                          {len(pages)}")
-    print(f"Entries with artist_guess:      {total}")
+    print(f"Entries with derivable artist:  {total}")
     print(f"  artist hits:                  {artist_hit_total}/{total}")
     print(f"Entries with artist + track:    {with_track}")
     print(f"  joint hits (artist+track):    {joint_hit_total}/{with_track}")
@@ -216,7 +218,7 @@ def main(argv: list[str]) -> int:
 
     rows = collect_entries(results_root)
     if not rows:
-        print(f"No entries with artist_guess under {results_root}.")
+        print(f"No entries with a derivable artist under {results_root}.")
         return 0
 
     unique_artists = sorted({r.artist for r in rows})
diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py
index d8eb566..46be44a 100644
--- a/tests/integration/test_pipeline.py
+++ b/tests/integration/test_pipeline.py
@@ -41,8 +41,6 @@ def _build_page_result(date: str = "Monday 1 Jan '90") -> PageResult:
                     Entry(
                         row_index=0,
                         raw_text="LED ZEP - TRAMPLED",
-                        artist_guess="LED ZEP",
-                        track_guess="TRAMPLED",
                         confidence="high",
                     )
                 ],
diff --git a/tests/unit/test_parse.py b/tests/unit/test_parse.py
new file mode 100644
index 0000000..dcc8a4c
--- /dev/null
+++ b/tests/unit/test_parse.py
@@ -0,0 +1,114 @@
+"""Tests for `core.parse.parse_artist_track`.
+
+`parse_artist_track` is the read-time replacement for the
+`Entry.artist_guess` / `Entry.track_guess` fields that used to live on
+the Gemini response schema. The model now returns only `raw_text`; the
+split happens here, deterministically, at read time.
+
+Sibling pattern: `core.continuations.merge_continuations`,
+`core.comments.normalize_comments`. Pure function, no I/O.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from core.parse import parse_artist_track
+
+
+class TestParseArtistTrack:
+    @pytest.mark.parametrize(
+        ("raw", "expected"),
+        [
+            # The dominant case: ASCII hyphen with single spaces.
+            ("JUANA MOLINA - LA PARADOJA", ("JUANA MOLINA", "LA PARADOJA")),
+            # Album in parens belongs to the track side, not split off.
+            (
+                "JUANA MOLINA - LA PARADOJA (DOGA)",
+                ("JUANA MOLINA", "LA PARADOJA (DOGA)"),
+            ),
+        ],
+    )
+    def test_simple_ascii_hyphen(self, raw: str, expected: tuple[str | None, str | None]) -> None:
+        assert parse_artist_track(raw) == expected
+
+    @pytest.mark.parametrize(
+        ("dash", "label"),
+        [
+            ("–", "en-dash"),
+            ("—", "em-dash"),
+        ],
+    )
+    def test_unicode_dashes_split_the_same(self, dash: str, label: str) -> None:
+        """DJs sometimes write en/em-dashes; the model occasionally normalizes
+        a hand-drawn dash to unicode. Treat them like ASCII hyphens so both
+        on-disk shapes split consistently."""
+        raw = f"JESSICA PRATT {dash} BACK, BABY"
+        assert parse_artist_track(raw) == ("JESSICA PRATT", "BACK, BABY"), label
+
+    def test_no_dash_returns_artist_only(self) -> None:
+        """An entry with no separator is documented as artist-only. The
+        Stereolab fixture case (artist-only row) appears in the WXYC
+        canonical-artists test data; the dominant historical reading is
+        that the line names the artist and omits the track."""
+        assert parse_artist_track("STEREOLAB") == ("STEREOLAB", None)
+
+    def test_multiple_dashes_split_on_first(self) -> None:
+        """Hand-written track titles can themselves contain hyphens
+        ("Duke Ellington & John Coltrane - In a Sentimental Mood"
+        wouldn't trigger this — the artist '&' isn't a dash — but a
+        title like 'X-Ray' on the track side will). Split on the first
+        separator so the artist segment stays intact."""
+        assert parse_artist_track("ARTIST - TRACK - PART TWO") == (
+            "ARTIST",
+            "TRACK - PART TWO",
+        )
+
+    def test_strips_whitespace_per_side(self) -> None:
+        """Leading/trailing whitespace on either side gets stripped from
+        both sides of the result. Internal whitespace inside artist or
+        track is preserved."""
+        assert parse_artist_track("  THE BAND   -   THE SONG  ") == ("THE BAND", "THE SONG")
+
+    def test_none_input_returns_none_pair(self) -> None:
+        assert parse_artist_track(None) == (None, None)
+
+    def test_empty_string_returns_none_pair(self) -> None:
+        assert parse_artist_track("") == (None, None)
+
+    def test_whitespace_only_returns_none_pair(self) -> None:
+        assert parse_artist_track("   ") == (None, None)
+
+    def test_empty_artist_side_returns_none(self) -> None:
+        """A line that opens with the separator (' - TRACK') has no artist;
+        the function shouldn't promote an empty string into a real value."""
+        assert parse_artist_track(" - LA PARADOJA") == (None, "LA PARADOJA")
+
+    def test_empty_track_side_returns_none(self) -> None:
+        """A trailing separator with nothing after it has no track."""
+        assert parse_artist_track("JUANA MOLINA - ") == ("JUANA MOLINA", None)
+
+    def test_hyphen_with_no_spaces_does_not_split(self) -> None:
+        """Compound words like 'X-Ray Spex' must NOT be split on the
+        internal hyphen — the convention is that the separator is a dash
+        SURROUNDED by whitespace. This is the load-bearing constraint
+        that lets the function work on a corpus that mixes hyphenated
+        band names with separator dashes."""
+        assert parse_artist_track("X-RAY SPEX") == ("X-RAY SPEX", None)
+
+    def test_separator_matches_across_newlines(self) -> None:
+        """The regex's `\\s+` class includes `\\n`, so a separator that
+        straddles a newline still splits. Rare in practice — Gemini's
+        `raw_text` is usually a single line — but pinning the behavior
+        here means a future tightening (e.g. `[^\\S\\n]+` to exclude
+        newlines) is a deliberate decision rather than a regression.
+        """
+        assert parse_artist_track("ARTIST\n - TRACK") == ("ARTIST", "TRACK")
+
+    def test_idempotent_when_artist_only_recomputed(self) -> None:
+        """Round-tripping the result through join+split must be a no-op
+        for the artist-only case: a downstream consumer that re-stores
+        the artist as `raw_text` and re-parses shouldn't flip the result."""
+        artist, track = parse_artist_track("STEREOLAB")
+        assert (artist, track) == ("STEREOLAB", None)
+        assert parse_artist_track(artist) == ("STEREOLAB", None)
diff --git a/tests/unit/test_prompts.py b/tests/unit/test_prompts.py
index ebc37a5..30067a9 100644
--- a/tests/unit/test_prompts.py
+++ b/tests/unit/test_prompts.py
@@ -28,8 +28,6 @@ def test_prompt_names_each_quadrant_position(position: str) -> None:
         "row_index",
         "raw_text",
         "type_raw",
-        "artist_guess",
-        "track_guess",
         "confidence",
         "notes",
     ],
@@ -180,8 +178,6 @@ def test_quadrant_template_substitutes_position(position: str) -> None:
         "row_index",
         "raw_text",
         "type_raw",
-        "artist_guess",
-        "track_guess",
         "confidence",
         "notes",
     ],
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index 16609aa..c082267 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -22,22 +22,37 @@ class TestEntry:
     def test_minimal_entry(self) -> None:
         e = Entry(row_index=0, raw_text="LED ZEP - TRAMPLED", confidence="high")
         assert e.row_index == 0
-        assert e.artist_guess is None
-        assert e.track_guess is None
         assert e.notes is None
 
     def test_full_entry(self) -> None:
         e = Entry(
             row_index=2,
             raw_text="LED ZEP - TRAMPLED",
-            artist_guess="Led Zeppelin",
-            track_guess="Trampled Under Foot",
             confidence="medium",
             notes="continuation",
         )
-        assert e.artist_guess == "Led Zeppelin"
+        assert e.raw_text == "LED ZEP - TRAMPLED"
         assert e.confidence == "medium"
 
+    def test_artist_guess_track_guess_keys_are_ignored(self) -> None:
+        """The 34 pre-audit corpus JSONs carry `artist_guess` and `track_guess`.
+        The new schema must accept the old shape (extra keys silently ignored)
+        and round-trip the entry without those keys reappearing on dump. That's
+        the load-bearing backward-compat contract this PR rests on."""
+        e = Entry.model_validate(
+            {
+                "row_index": 0,
+                "raw_text": "JUANA MOLINA - LA PARADOJA",
+                "artist_guess": "JUANA MOLINA",
+                "track_guess": "LA PARADOJA",
+                "confidence": "high",
+            }
+        )
+        assert e.raw_text == "JUANA MOLINA - LA PARADOJA"
+        dumped = e.model_dump()
+        assert "artist_guess" not in dumped
+        assert "track_guess" not in dumped
+
     def test_invalid_confidence_rejected(self) -> None:
         with pytest.raises(ValidationError):
             Entry(row_index=0, raw_text="x", confidence="very-high")  # type: ignore[arg-type]
diff --git a/tests/unit/test_spot_check.py b/tests/unit/test_spot_check.py
index 69a98fa..2eb8992 100644
--- a/tests/unit/test_spot_check.py
+++ b/tests/unit/test_spot_check.py
@@ -84,6 +84,9 @@ def _entry(
     track: str | None = None,
     raw: str = "x",
 ) -> dict:
+    """Legacy-shape entry: the 34 pre-audit corpus JSONs carry
+    `artist_guess` / `track_guess` keys; `collect_entries` still honors
+    them when present."""
     return {
         "row_index": row_index,
         "raw_text": raw,
@@ -95,6 +98,18 @@ def _entry(
     }
 
 
+def _entry_v2(row_index: int, *, raw: str) -> dict:
+    """Post-audit entry: no `artist_guess` / `track_guess`. The artist
+    and track come from `parse_artist_track(raw_text)` at read time."""
+    return {
+        "row_index": row_index,
+        "raw_text": raw,
+        "confidence": "high",
+        "notes": None,
+        "oddities": [],
+    }
+
+
 # -- collect_entries -------------------------------------------------------
 
 
@@ -150,6 +165,53 @@ def test_walks_subdirectories_in_sorted_order(self, tmp_path: Path) -> None:
     def test_returns_empty_list_for_empty_root(self, tmp_path: Path) -> None:
         assert collect_entries(tmp_path) == []
 
+    def test_derives_artist_track_from_raw_text_for_post_audit_entries(
+        self, tmp_path: Path
+    ) -> None:
+        """The post-#41 on-disk shape has no `artist_guess`/`track_guess`
+        keys — both fall back to `parse_artist_track(raw_text)`. Without
+        this fallback, `collect_entries` would silently return zero rows
+        on every new corpus extraction."""
+        page = _make_page(
+            [
+                _entry_v2(0, raw="JUANA MOLINA - LA PARADOJA"),
+                _entry_v2(1, raw="STEREOLAB"),  # artist-only, no track
+                _entry_v2(2, raw="   "),  # blank row, skipped
+            ],
+        )
+        (tmp_path / "p.json").write_text(json.dumps(page))
+
+        rows = collect_entries(tmp_path)
+
+        assert {(r.artist, r.track) for r in rows} == {
+            ("JUANA MOLINA", "LA PARADOJA"),
+            ("STEREOLAB", None),
+        }
+
+    def test_legacy_artist_guess_takes_precedence_over_parsing(self, tmp_path: Path) -> None:
+        """On the 34 pre-audit JSONs `artist_guess` / `track_guess` exist
+        and may have been hand-corrected by the original model in ways
+        the deterministic parser can't reproduce. Honor the stored value
+        on those rows so a re-run of the spot-check doesn't drift."""
+        page = _make_page(
+            [
+                {
+                    "row_index": 0,
+                    "raw_text": "JUANA MOLINA - LA PARADOJA",
+                    "artist_guess": "Juana Molina",
+                    "track_guess": "la paradoja",
+                    "confidence": "high",
+                    "notes": None,
+                    "oddities": [],
+                }
+            ],
+        )
+        (tmp_path / "p.json").write_text(json.dumps(page))
+
+        rows = collect_entries(tmp_path)
+
+        assert (rows[0].artist, rows[0].track) == ("Juana Molina", "la paradoja")
+
     def test_carries_quadrant_position_and_row_index(self, tmp_path: Path) -> None:
         page = _make_page(
             [_entry(7, artist="Pavement", track="Cut Your Hair")],