diff --git a/.agent-plan.md b/.agent-plan.md index 4f4c290..47c1e6c 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -64,7 +64,7 @@ Goal: ship a best-in-class educational synthetic CRM lead-scoring dataset family ### Phase 7 — LLM critique + publish (3 PRs) - [x] PR 7.1: LLM critique module + prompt + driver landed. `leadforge/validation/llm_critique.py` (new) — single-provider Anthropic critique core via an `LLMCritiqueClient` protocol (no preemptive OpenAI/Gemini stubs); `_AnthropicCritiqueClient` lazy-imports the SDK so the module imports cleanly even on machines without `anthropic` installed (the skip-cleanly path needs to work without the SDK). `has_anthropic_credentials` / `api_key_or_skip` treat unset and empty-after-strip identically as "absent", explicitly to handle the `env -i` / stale `.envrc` case where the shell sets `ANTHROPIC_API_KEY=""` and the SDK would otherwise 401 instead of cleanly skipping. Default model `claude-opus-4-7` with `thinking={"type": "adaptive", "display": "summarized"}` (only mode supported on Opus 4.7 — manual `budget_tokens` 400s) and `output_config={"effort": "high"}` (recommended minimum for intelligence-sensitive work per the `claude-api` skill); two prompt-cache breakpoints (rubric + input bundle) per the design doc's caching strategy so the common adjudication-loop workflow hits cache on both layers; streamed via `messages.stream(...).get_final_message()` to dodge the 10-min idle-connection timeout on long adaptive-thinking responses. `build_input_bundle` is pure (same `release_dir` → byte-identical bytes → identical `sha256`) and assembles eleven blocks: `release/README.md`, per-tier `dataset_card.md`, `docs/release/generation_method.md`, `manifest.json`, `feature_dictionary.csv`, `validation_report.{md,json}`, the first 100 test-split rows rendered as deterministic CSV, the public/instructor diff summary (live-derived from the `BANNED_LEAD_COLUMNS` / `BANNED_OPP_COLUMNS` / `BANNED_TABLES` / `SNAPSHOT_FILTERED_TABLES` constants in `leakage_probes.py` — single source of truth, auto-stays-in-sync, sync-tested), the public-safe mechanism summary (motif family **names** + difficulty knob **names**, never values — same redaction posture as `student_public`), and the break-me guide verbatim ("avoid re-deriving" the existing nine patterns). `parse_critique_response` schema-validator pins eleven malformations (missing required field, wrong severity, wrong category, wrong rubric dimension, finding-id collision, findings non-list, top-level non-object, non-JSON, score out of range, defensive code-fence stripping, empty findings list valid) and returns every problem in one error rather than the first one. Output schema is a frozen dataclass (no pydantic dependency) with the nine-value `category` vocabulary lifted **verbatim** from `break_me_guide.md` so findings route to existing issue-template labels without translation; `rubric_dimension: str` is required on every finding (D1-D14) so reviewers can audit clustering. Provenance triple (`model` / `effort` / `thinking_mode`) plus per-source-file `bundle_hashes` and the assembled `input_bundle_sha256` are carried on every result for audit-artifact-sync — re-runs on the same RC produce the same bundle hashes. `docs/release/llm_critique_prompt.md` (new) — the rubric document the driver feeds to Claude, parseable via `` / `` section markers with surrounding prose ignored; fourteen rubric dimensions (D1 documentation truthfulness · D2 leakage discipline · D3 realism vs disclosure · D4 difficulty signal · D5 calibration / value-aware ranking · D6 cohort/time-window discipline · D7 notebook integrity · D8 platform packaging hygiene · D9 adversarial-framing completeness · D10 pedagogy of the documented `total_touches_all` trap · D11 effective semantic diversity per recommendation #12 v1 scope · D12 Datasheets-for-Datasets composition · D13 manifest/provenance integrity · D14 out-of-scope guard). Severity calibration explicitly written to discourage padding the report with low-severity nits and to surface "no high-severity findings" as a positive signal vs "the critique didn't surface any". `scripts/run_llm_critique.py` (new) — driver mirroring `validate_release_candidate.py`'s posture (free-function `parse_args`, frozen `DriverConfig`, `run_critique(config) -> DriverResult`, `main(argv)` returning an exit code). Skip-cleanly path triggers BEFORE any I/O — no rubric read, no bundle build, no out-dir creation; tested explicitly with `not (tmp_path / "out").exists()` after the skip. Three modes alongside the live path: `--dry-run` writes the rendered input bundle to `/llm_critique_input_.md` for human inspection (different filename from the real raw JSON, can't be confused); `--no-execute` calls `api_key_or_skip` + `build_anthropic_client()` to prove the SDK is installed and creds are present without burning an API call (CI smoke); `--out-tag` suffixes the raw filename so adjudication re-runs don't shadow the canonical run. Outputs: timestamped `llm_critique_raw_.json` (accumulates per run, no clobber) + canonical `llm_critique_summary.md` (overwritten in place so dataset-card links don't rot). Exit codes mirror `validate_release_candidate.py`: 0 pass (skip-cleanly counts as pass), 1 high-severity surfaced and unresolved, 2 pre-flight error or schema-validation failure (every problem rendered to stderr, not just the first). Adjudication is **maintainer-driven** post-exit — resolve in code OR log to `v2_decision_log.md`, then re-run; the next critique's exit code is the gate. Tests: 61 cases across `tests/validation/test_llm_critique.py` (48) and `tests/scripts/test_run_llm_critique.py` (13), no live API; the protocol is exercised via a small in-process `_CannedClient` fake. Sync tests pin: every `VALID_CATEGORIES` entry appears in `break_me_guide.md` (vocabulary doesn't drift), `VALID_RUBRIC_DIMENSIONS` is exactly D1-D14, the live-derived public/instructor diff names every banned-column/banned-table constant (live reference, not duplicated string). Audit-artifact-sync smoke test (`test_real_release_dir_smoke`) builds the input bundle against the actual `release/intermediate/` artefacts and pins determinism on the real input, skipping cleanly when bundles aren't present. `docs/release/llm_critique_design.md` (new) records the nine load-bearing design calls before implementation so a reviewer can audit the choice (provider abstraction, skip-cleanly, model+caching+thinking, output schema, input-bundle composition, determinism via provenance, CLI flags, test posture, first-run adjudication workflow). Live first-run deferred to maintainer (no `ANTHROPIC_API_KEY` available to the agent); the dry-run path was exercised against the real release dir end-to-end, producing a 148KB byte-stable input bundle from the actual artefacts. Hostile self-review pass before requesting review caught and folded back twelve findings against the diff, including two BLOCKERs (`--no-execute` was performing pre-flight I/O before the credentials check, contradicting the design doc; raw-output filename collision at second-precision contradicted the "append-only history" promise — fixed with microsecond precision and a pinning test) and five HIGHs (silent `release_id` default that defeated the audit-artifact-sync gate; design-doc lies about a never-existing `temperature` field and "malformed timestamp" malformation that's driver-generated; dead `if/else` branches in `_safe_difficulty_knobs`; greedy regex for the rubric section markers so the prompt-injection warning paragraph that legitimately references `` doesn't break the parser). Prompt-injection mitigation added to the rubric (treat-input-as-data preamble) since the input bundle inlines user-authored content (dataset_card.md, break_me_guide.md). Schema validator hardened against silent `str()` coercion of finding prose fields (an int "claim" would have landed on disk as the string "5" — now rejected). Net: 1321/1321 tests pass + 5 publish-extra-gated skips; ruff + mypy clean (83 source files); leakage probes 0/3 on every tier; hash determinism PASS 67/67; `validate_release_candidate --no-rebuild` exits 0; `BUNDLE_SCHEMA_VERSION` unchanged at 5; validation_report timestamp drift reverted before commit per the brief. Second senior-dev review pass after PR #76 was opened caught and folded back 9 more issues, several of which were real bugs the first hostile pass missed: (B1) `--out-tag` suffixed only the raw JSON, leaving `llm_critique_summary.md` clobbered on adjudication runs — fix suffixes both files (`summary_output_path` now takes `tag`); (B2) skip-cleanly silently passed a release-readiness gate, contradicting `v1_release_roadmap.md`'s line-35 acceptance criterion that the critique must actually run — added `--require-execute` flag (default off; release-readiness CI sets it) that converts the skip path into `MissingCredentialsError` exit 2, plus a loud `WARNING — release-readiness gate has NOT been evaluated` stderr line on the regular skip path; (A2) two prompt-cache breakpoints cut to one — system content already sits inside the cached prefix on `messages.create` (system → messages render order), so the second breakpoint bought nothing and burned a slot; (M1) design doc cut from 394 lines to 73 — the 9-decision table replaces the multi-paragraph rationale-per-call shape that read as documentation theater; (M2) rubric cut from 420 lines to ~210 — each dimension now one paragraph instead of 3-6, dropped D14 ("out-of-scope guard") which was meta-instruction not a rubric dimension, made it a "What is NOT yours to audit" appendix at the end; rubric is now D1-D13 and `VALID_RUBRIC_DIMENSIONS` updated in lockstep; (M3) test-split sample replaced 100 raw rows of CSV with `df.describe(include="all")` per-column statistics + a 20-row head — distributional conclusions need statistics not raw rows, and the rendered input bundle dropped from 148KB to 128KB; (M5) streaming-via-`messages.stream` replaced with `messages.create(timeout=600.0)` — no stream events were processed anyway, the contract is just "don't time out on long adaptive-thinking responses" and an explicit timeout is the right way to spell that; (M6) `render_input_bundle_text` free function moved to `InputBundle.render()` method — leaky abstraction; the audit-artifact-sync framing was misleading (no committed-artefact diff) and was renamed to "smoke test against the real release dir" / "staleness check vs committed result" throughout the module and design doc. Net after the second pass: 1323/1323 tests pass + 5 publish-extra-gated skips; ruff + mypy clean; leakage probes 0/3 on every tier; hash determinism PASS 67/67; `validate_release_candidate --no-rebuild` exits 0; `BUNDLE_SCHEMA_VERSION` unchanged at 5; validation_report timestamp drift reverted again before this commit. First live critique run executed by the maintainer with a dedicated Anthropic project key (`leadforge-llm-critique-v1-prod`): score 7/10, six findings (1 high, 4 medium, 1 low), exit code 1 as designed for unresolved high-severity findings. Adjudication: F001 high-severity (93 % `account_id` overlap between train/test documented only in break_me_guide §5, missing from README/dataset_card) — **resolved in code** by adding a "Group-leakage warning" paragraph to `release/README.md` "Splits" subsection citing the 518/557 figure and a `GroupKFold(account_id)` recipe; the parallel disclosure on the auto-rendered `dataset_card.md` is logged as `accepted-for-v2` because the renderer change is out of scope for PR 7.1's no-bundle-regen rule. F004 medium (break_me_guide pattern 5 covered `account_id` but not `contact_id`, despite contacts being shared across the lead-keyed split at the same magnitude) — **resolved in code** by extending §5 to enumerate both keys and any reusable foreign-key column as group-leakage axes. F006 low (README "Conversion rate (recipe band)" column header didn't make clear it was a recipe-acceptance window not an observed range) — **resolved in code** by renaming to "(acceptance band, gate G7.\*)" and adding a one-sentence note that observed five-seed spreads sit comfortably inside the band. F002 medium (Gaussian noise produces non-physical values: negative ACV, negative day-deltas, day-deltas > snapshot_day=30, undisclosed in dataset card) — `accepted-for-v2`; requires `leadforge/narrative/dataset_card.py` change. F003 medium (`](../foo)` relative links would 404 on Kaggle/HF) — `wont-fix`: already treated by `scripts/_release_common.py::rewrite_release_links()` which both platform packagers (PR 5.1, 5.2) call at packaging time; the LLM didn't have visibility into the platform packagers and made a wrong inference. F005 medium (advanced-tier `calibration_max_bin_error = 0.5234` driven by an n=2 high-probability bin, no minimum-bin-count footnote) — `accepted-for-v2`; not a 1-line change, touches `release_quality.py` metric definition and would require regenerating `validation_report.{json,md}` which PR 7.1's brief explicitly forbids. Three missing-section callouts (Datasheets §Biases, §Privacy, per-bundle group-split warning) and three maintainer questions (noise/windowing interaction, `top_decile_rate` naming, Kaggle/HF docs subtree) all logged to `docs/release/v2_decision_log.md`. README edits cascaded into the platform packager artefacts; `release/kaggle/dataset-metadata.json` and `release/huggingface/README.md` regenerated cleanly via the existing packagers (`scripts/package_{kaggle,hf}_release.py`). Critique run output committed to `release/validation/llm_critique_raw_20260508T204359.124834Z.json` + `release/validation/llm_critique_summary.md`. Final net: 1325/1325 tests pass + 5 publish-extra-gated skips; ruff + mypy clean (83 source files); leakage probes 0/3 on every tier; hash determinism PASS 67/67; `validate_release_candidate --no-rebuild` exits 0; `BUNDLE_SCHEMA_VERSION` unchanged at 5. Phase 7 PR 7.1 closed; PR 7.2 (local Kaggle/HF mock-page preview) is next. -- [ ] **PR 7.2** — local Kaggle + HuggingFace mock-page preview tooling (must land before PR 7.3): `scripts/preview_kaggle_page.py` and `scripts/preview_hf_page.py` render offline HTML mocks of the public Kaggle and HF dataset pages from the *exact* upload artefacts (metadata JSON, README, cover image), serve over `localhost`, and let the maintainer click through both pages in a browser before any platform upload — catches styling / link / YAML-rendering issues before they hit cached previews on the live page. Tests cover required-field presence, link resolution, schema column listing, configs-block round-trip. +- [x] PR 7.2: local Kaggle + HuggingFace mock-page preview tooling landed. `scripts/preview_kaggle_page.py` (new) — reads the *exact* artefacts the publish PR will upload (`release/kaggle/dataset-metadata.json` + the inlined README body + the cover image, prefer `release/kaggle/dataset-cover-image.png` then fall back to the gitignore-resilient `release/dataset-cover-image.png` master copy) and renders an offline HTML page mocking the public Kaggle dataset view: header (title / subtitle / id pill / licence / update-frequency / visibility), cover image, rendered description (the inlined README body), file tree of declared resources grouped by tier with per-tier counts, schema/columns table for every tabular resource (`resources[].schema.fields[].name/type/description`) with per-table column counts in the heading, user-specified-sources block (rendered only when present), keywords + licence footer. Serves on `http://localhost:8765` via stdlib `http.server.ThreadingHTTPServer` (the threading variant inherits `allow_reuse_address=True` from `HTTPServer`, so Ctrl-C → re-run within ~60s does not raise `OSError [Errno 48] Address already in use` while the socket sits in TIME_WAIT — caught and folded back in self-review pass 1, the initial draft used `socketserver.ThreadingTCPServer` which defaults to `False`). `--no-serve` builds the HTML and exits (CI / inspection mode); `--open-browser` pops a tab on startup; `--port` / `--release-dir` / `--out-dir` round out the surface. `scripts/preview_hf_page.py` (new) — reads `release/huggingface/README.md` (or `release/huggingface-instructor/README.md` per `--variant=public|instructor`) and parses YAML frontmatter + Markdown body via a single anchored regex (`r"\A---\n(?P.*?)\n---\n(?P.*)\Z"` with `re.DOTALL`); renders the analogous HF view: header pills (pretty_name + license + task_categories + size_categories + language), tag chips, configs dropdown (one details-block per `configs[]` entry with the default config flagged via a single `badge--default` instance, data_files split→path table per config), file tree of declared YAML paths bucketed by config, README body, footer carrying the variant for human visual confirmation. `--variant` defaults `--out-dir` to `release/_preview/huggingface/` (public) or `release/_preview/huggingface-instructor/` (instructor); the instructor path also reads its README from a different location (`huggingface-instructor/README.md`) and looks for the cover under the variant directory first. Both scripts share the validation discipline from the Phase 5 packagers: build → validate → write; pre-flight failures (missing metadata, malformed JSON / YAML, unknown variant, missing cover) raise and the CLI converts to rc=2 without touching disk; runtime success exits 0. Markdown rendering via `markdown-it-py` in `gfm-like` preset (tables / fenced code / strikethrough on; `linkify` explicitly disabled so the optional `linkify-it-py` transitive dep is not required); the dep is added to the `[publish]` extra alongside `datasets` / `kaggle` (mirrors the PR 5.1 / 5.2 gating posture for publish-pipeline tooling), and absent imports raise a clean `ImportError` pointing at `pip install -e ".[publish]"` instead of a cryptic stdlib `ModuleNotFoundError`. Both renderers are pure: same `(metadata|doc, cover_filename|variant)` → byte-identical HTML (no `now()`, no random, no clock). Output landing at `release/_preview//index.html` is gitignored (`.gitignore` adds `release/_preview/`); the audit-artefact-sync gate lives at `release/_preview_committed/{kaggle,huggingface_public,huggingface_instructor}.html` (committed alongside the scripts, mirrors the PR 4.1 / 5.1 / 5.2 / 7.1 audit-sync pattern). HTML is wrapped in a single self-contained file (CSS inlined, no external stylesheet) so each committed sample is human-inspectable directly from `git show` or a browser without a server. XSS-safety: every user-controlled string passes through a hand-rolled `_escape` (`&`, `<`, `>`, `"`, `'`); kept hand-rolled rather than `html.escape` so the committed samples' `'` (decimal) escapes don't churn against `html.escape`'s `'` (hex) entity. Tests: 48 cases across `tests/scripts/test_preview_kaggle_page.py` (20) and `tests/scripts/test_preview_hf_page.py` (28); no live HTTP, no network, no socket open. The four roadmap-mandated checks per script: required field labels appear in rendered HTML (Kaggle: title / subtitle / id / license / file count / schema column count; HF: pretty_name / license / configs / tags); every Markdown link in the source resolves to a non-allowlisted URL pattern fails the test (allow-list: `https://github.com/leadforge-dev/leadforge`, `https://huggingface.co/datasets/leadforge`, sibling-relative `LICENSE`, in-document `#` anchors — anything else is a 404 risk on the live page); the Kaggle schema table lists every column declared in `resources[].schema.fields` (iterates the committed metadata, asserts each `{name}` appears); every `configs[]` block in the HF YAML round-trips into the rendered dropdown. Determinism is double-tested: `test_render_is_byte_deterministic` runs two passes against the real release artefact and pins equality; `test_committed_*_sample_matches_fresh_regeneration` pins the committed HTML against fresh regeneration byte-for-byte (the audit-sync gate). Pre-flight error paths exercised end-to-end: missing artefact (`FileNotFoundError`), malformed JSON / YAML (`ValueError`), unknown variant, missing cover image — all return rc=2 via `main()` with informative stderr. HTML escape coverage: `test_render_escapes_html_in_field_values` asserts a `", "license": "mit"}, + body="body\n", + ) + html = preview.render_hf_html(doc, variant="public") + assert "" not in html + assert "<script>x</script>" in html + + +# --------------------------------------------------------------------------- +# Markdown link resolution (the leakage / link-rewrite regression guard) +# --------------------------------------------------------------------------- + +_HREF_RE = re.compile(r'href="([^"]+)"') + + +@pytest.mark.skipif(not _PUBLIC_PRESENT, reason="public README not present") +def test_public_readme_has_no_unrewritten_relative_links() -> None: + """Same source-side regression guard as the Kaggle preview.""" + + body = _PUBLIC_README.read_text(encoding="utf-8") + assert "](../" not in body, "unrewritten parent-relative link in public README" + assert "](validation/" not in body, "unrewritten validation-relative link in public README" + + +@pytest.mark.skipif(not _PUBLIC_PRESENT, reason="public README not present") +def test_public_rendered_links_point_at_known_targets() -> None: + """Every rendered href in the public preview points at one of the + allow-listed prefixes — anything else would 404 on the live HF + page.""" + + doc = preview.parse_hf_readme(_PUBLIC_README.read_text(encoding="utf-8")) + html = preview.render_hf_html(doc, variant="public") + bad: list[str] = [] + for href in _HREF_RE.findall(html): + if any(href.startswith(prefix) for prefix in _LINK_OK_PREFIXES): + continue + bad.append(href) + assert not bad, f"non-allowlisted hrefs would 404 on HF: {bad[:5]}" + + +@pytest.mark.skipif(not _INSTRUCTOR_PRESENT, reason="instructor README not present") +def test_instructor_rendered_links_point_at_known_targets() -> None: + doc = preview.parse_hf_readme(_INSTRUCTOR_README.read_text(encoding="utf-8")) + html = preview.render_hf_html(doc, variant="instructor") + bad: list[str] = [] + for href in _HREF_RE.findall(html): + if any(href.startswith(prefix) for prefix in _LINK_OK_PREFIXES): + continue + bad.append(href) + assert not bad, f"non-allowlisted hrefs would 404 on HF: {bad[:5]}" + + +@pytest.mark.skipif(not _PUBLIC_PRESENT, reason="public README not present") +def test_public_yaml_configs_round_trip_into_html() -> None: + """Every ``configs[].config_name`` declared in the YAML appears in + the rendered HTML — the round-trip the roadmap mandates.""" + + doc = preview.parse_hf_readme(_PUBLIC_README.read_text(encoding="utf-8")) + html = preview.render_hf_html(doc, variant="public") + for config in doc.frontmatter["configs"]: + name = config["config_name"] + assert f"{name}" in html, ( + f"config {name!r} declared in YAML but missing from rendered HTML" + ) + + +# --------------------------------------------------------------------------- +# Determinism + audit-artefact-sync (against committed samples) +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif(not _PUBLIC_PRESENT, reason="public README not present") +def test_render_is_byte_deterministic() -> None: + doc = preview.parse_hf_readme(_PUBLIC_README.read_text(encoding="utf-8")) + a = preview.render_hf_html(doc, variant="public") + b = preview.render_hf_html(doc, variant="public") + assert a == b + + +@pytest.mark.skipif( + not (_PUBLIC_PRESENT and _PUBLIC_SAMPLE.exists()), + reason="public README or committed sample missing", +) +def test_committed_public_sample_matches_fresh_regeneration() -> None: + """Audit-sync gate for the public variant. + + Regenerate via:: + + python scripts/preview_hf_page.py --no-serve + cp release/_preview/huggingface/index.html \\ + release/_preview_committed/huggingface_public.html + """ + + doc = preview.parse_hf_readme(_PUBLIC_README.read_text(encoding="utf-8")) + fresh = preview.render_hf_html(doc, variant="public") + committed = _PUBLIC_SAMPLE.read_text(encoding="utf-8") + assert fresh == committed + + +@pytest.mark.skipif( + not (_INSTRUCTOR_PRESENT and _INSTRUCTOR_SAMPLE.exists()), + reason="instructor README or committed sample missing", +) +def test_committed_instructor_sample_matches_fresh_regeneration() -> None: + """Audit-sync gate for the instructor variant.""" + + doc = preview.parse_hf_readme(_INSTRUCTOR_README.read_text(encoding="utf-8")) + fresh = preview.render_hf_html(doc, variant="instructor") + committed = _INSTRUCTOR_SAMPLE.read_text(encoding="utf-8") + assert fresh == committed + + +# --------------------------------------------------------------------------- +# Driver — pre-flight error paths (no server start) +# --------------------------------------------------------------------------- + + +def _make_config(release_dir: Path, out_dir: Path, *, variant: str = "public") -> object: + return preview.PreviewConfig( + release_dir=release_dir, + out_dir=out_dir, + port=8766, + variant=variant, + open_browser=False, + serve=False, + ) + + +def test_run_preview_raises_on_unknown_variant(tmp_path: Path) -> None: + fake_release = tmp_path / "release" + fake_release.mkdir() + config = _make_config(fake_release, tmp_path / "preview", variant="bogus") + with pytest.raises(ValueError, match="unknown --variant"): + preview.run_preview(config) # type: ignore[arg-type] + + +def test_run_preview_raises_on_missing_readme(tmp_path: Path) -> None: + fake_release = tmp_path / "release" + fake_release.mkdir() + config = _make_config(fake_release, tmp_path / "preview") + with pytest.raises(FileNotFoundError, match="HF README not found"): + preview.run_preview(config) # type: ignore[arg-type] + + +def test_run_preview_raises_on_malformed_readme(tmp_path: Path) -> None: + fake_release = tmp_path / "release" + (fake_release / "huggingface").mkdir(parents=True) + (fake_release / "huggingface" / "README.md").write_text("# No frontmatter\n", encoding="utf-8") + config = _make_config(fake_release, tmp_path / "preview") + with pytest.raises(ValueError, match="missing a YAML frontmatter"): + preview.run_preview(config) # type: ignore[arg-type] + + +def test_run_preview_raises_on_missing_required_frontmatter_keys(tmp_path: Path) -> None: + """Pre-flight required-key check (Copilot finding COPILOT-1, + applied symmetrically to the HF script). Missing pretty_name / + license would otherwise render a half-blank header.""" + + fake_release = tmp_path / "release" + (fake_release / "huggingface").mkdir(parents=True) + (fake_release / "huggingface" / "README.md").write_text( + "---\nlanguage:\n - en\n---\nbody\n", encoding="utf-8" + ) + config = _make_config(fake_release, tmp_path / "preview") + with pytest.raises(ValueError, match="missing required key") as exc_info: + preview.run_preview(config) # type: ignore[arg-type] + msg = str(exc_info.value) + assert "pretty_name" in msg + assert "license" in msg + + +def test_validate_required_frontmatter_treats_empty_string_as_missing(tmp_path: Path) -> None: + """Whitespace-only or empty values count as missing — a blank + pretty_name renders an empty

, which is what the validator + is supposed to prevent.""" + + with pytest.raises(ValueError, match="missing required key"): + preview._validate_required_frontmatter( + {"pretty_name": " ", "license": ""}, tmp_path / "any.md" + ) + + +def test_run_preview_raises_on_missing_cover(tmp_path: Path) -> None: + fake_release = tmp_path / "release" + (fake_release / "huggingface").mkdir(parents=True) + (fake_release / "huggingface" / "README.md").write_text( + "---\npretty_name: T\nlicense: mit\n---\nbody\n", encoding="utf-8" + ) + config = _make_config(fake_release, tmp_path / "preview") + with pytest.raises(FileNotFoundError, match="cover image"): + preview.run_preview(config) # type: ignore[arg-type] + + +def test_run_preview_writes_html_and_copies_cover(tmp_path: Path) -> None: + """End-to-end no-serve: HTML lands at out_dir/index.html and the + cover image is copied as a real file.""" + + fake_release = tmp_path / "release" + (fake_release / "huggingface").mkdir(parents=True) + (fake_release / "huggingface" / "README.md").write_text( + "---\npretty_name: T\nlicense: mit\n---\nbody\n", encoding="utf-8" + ) + cover = fake_release / "huggingface" / "dataset-cover-image.png" + cover.write_bytes(b"\x89PNG\r\n\x1a\nfake") + out_dir = tmp_path / "preview" + outcome = preview.run_preview(_make_config(fake_release, out_dir)) # type: ignore[arg-type] + assert outcome.html_path == out_dir / "index.html" + assert outcome.html_path.is_file() + assert outcome.cover_path.is_file() + assert not outcome.cover_path.is_symlink() + + +def test_run_preview_instructor_variant_uses_companion_paths(tmp_path: Path) -> None: + """``--variant=instructor`` reads the companion README and writes + to the companion-flavoured out_dir.""" + + fake_release = tmp_path / "release" + (fake_release / "huggingface-instructor").mkdir(parents=True) + (fake_release / "huggingface-instructor" / "README.md").write_text( + "---\npretty_name: I\nlicense: mit\n---\nbody\n", encoding="utf-8" + ) + cover = fake_release / "huggingface-instructor" / "dataset-cover-image.png" + cover.write_bytes(b"\x89PNG\r\n\x1a\nfake") + out_dir = tmp_path / "preview-instructor" + outcome = preview.run_preview( + _make_config(fake_release, out_dir, variant="instructor") # type: ignore[arg-type] + ) + assert outcome.html_path.is_file() + assert "Variant: instructor" in outcome.html_path.read_text(encoding="utf-8") + + +def test_main_returns_2_on_missing_release( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + rc = preview.main( + [ + "--release-dir", + str(tmp_path / "missing"), + "--out-dir", + str(tmp_path / "preview"), + "--no-serve", + ] + ) + assert rc == 2 + captured = capsys.readouterr() + assert "HF README not found" in captured.err + + +def test_main_default_out_dir_depends_on_variant(tmp_path: Path) -> None: + """``--out-dir`` defaults to the variant-flavoured location.""" + + args_public = preview.parse_args(["--no-serve"]) + args_instructor = preview.parse_args(["--no-serve", "--variant=instructor"]) + assert args_public.out_dir is None # resolved in main() + assert args_instructor.out_dir is None + # Sanity: ``main`` resolves the default per variant. + rc = preview.main( + [ + "--release-dir", + str(tmp_path / "missing"), + "--variant=instructor", + "--no-serve", + ] + ) + assert rc == 2 # missing README; we just want to confirm CLI parsing didn't crash + + +def test_parse_args_defaults() -> None: + args = preview.parse_args(["--no-serve"]) + assert args.release_dir == preview.DEFAULT_RELEASE_DIR + assert args.out_dir is None # variant-resolved in main() + assert args.port == preview.DEFAULT_PORT + assert args.variant == "public" + assert args.open_browser is False + assert args.no_serve is True + + +def test_parse_args_rejects_unknown_variant() -> None: + with pytest.raises(SystemExit): + preview.parse_args(["--variant=bogus"]) diff --git a/tests/scripts/test_preview_kaggle_page.py b/tests/scripts/test_preview_kaggle_page.py new file mode 100644 index 0000000..e056a7b --- /dev/null +++ b/tests/scripts/test_preview_kaggle_page.py @@ -0,0 +1,545 @@ +"""Tests for ``scripts/preview_kaggle_page.py`` (PR 7.2). + +Locks the local Kaggle preview-page contract: + +* required field labels appear in the rendered HTML (title, subtitle, + licence, file count, schema column count) — the four roadmap-mandated + Kaggle checks; +* every Markdown link in the inlined description resolves to a + non-404 URL pattern (no ``](../`` survives the rewrite, no + ``](validation/...)`` lives at a relative path on the upload tree); +* the Kaggle schema table lists every CSV / parquet column declared + in ``dataset-metadata.json::resources[].schema.fields``; +* the renderer is byte-deterministic and the committed sample at + ``release/_preview_committed/kaggle.html`` matches a fresh + regeneration (audit-artefact-sync gate, mirrors PR 5.1 / 5.2 / 7.1); +* the driver exits with rc=2 on missing artefacts (no live HTTP). + +No network. No live HTTP. Everything goes through the pure +``render_kaggle_html()`` or the in-process ``run_preview()`` driver. +""" + +from __future__ import annotations + +import importlib.util +import json +import re +import sys +from pathlib import Path + +import pytest + +_REPO_ROOT = Path(__file__).resolve().parents[2] +_SCRIPT_PATH = _REPO_ROOT / "scripts" / "preview_kaggle_page.py" +_spec = importlib.util.spec_from_file_location("preview_kaggle_page", _SCRIPT_PATH) +assert _spec is not None +assert _spec.loader is not None +preview = importlib.util.module_from_spec(_spec) +sys.modules["preview_kaggle_page"] = preview +_spec.loader.exec_module(preview) + + +_RELEASE_DIR = _REPO_ROOT / "release" +_COMMITTED_METADATA = _RELEASE_DIR / "kaggle" / "dataset-metadata.json" +_COMMITTED_COVER = _RELEASE_DIR / "dataset-cover-image.png" +_COMMITTED_SAMPLE = _REPO_ROOT / "release" / "_preview_committed" / "kaggle.html" +_RELEASE_PRESENT = _COMMITTED_METADATA.exists() + +# Allow-listed link patterns the audit-sync test accepts. Anything else +# in the rendered description is a regression — either the source +# README leaked a relative ``../`` link or the GitHub blob rewrite +# stopped firing. The whitelist is intentionally narrow. +_LINK_OK_PREFIXES = ( + "https://github.com/leadforge-dev/leadforge", + "https://huggingface.co/datasets/leadforge", + "https://example.com", # used by unit tests only + "LICENSE", # sibling-relative, resolves under the upload tree + "#", # in-document anchor (footnotes, etc.) +) + + +# --------------------------------------------------------------------------- +# Pure-renderer fixtures +# --------------------------------------------------------------------------- + + +def _minimal_metadata() -> dict[str, object]: + """A minimum-viable metadata payload exercising every renderer + branch (header pills, file tree, schema table, sources, footer).""" + + return { + "title": "TestSet: Lead Scoring Mock", + "id": "testorg/testset-lead-scoring", + "subtitle": "A mock metadata payload exercising the renderer.", + "description": ( + "# Mock dataset\n\n" + "This is a [test link](https://github.com/leadforge-dev/leadforge).\n\n" + "| Col | Notes |\n|---|---|\n| a | b |\n" + ), + "isPrivate": True, + "licenses": [{"name": "MIT"}], + "keywords": ["b2b", "tabular"], + "collaborators": [], + "expectedUpdateFrequency": "never", + "userSpecifiedSources": [ + {"title": "source repo", "url": "https://github.com/leadforge-dev/leadforge"}, + ], + "image": "dataset-cover-image.png", + "resources": [ + { + "path": "intro/lead_scoring.csv", + "description": "Intro flat CSV.", + "schema": { + "fields": [ + {"name": "lead_id", "type": "string", "description": "Opaque id."}, + {"name": "label", "type": "boolean", "description": "Outcome."}, + ] + }, + }, + { + "path": "intro/manifest.json", + "description": "Provenance manifest (no schema).", + }, + ], + } + + +# --------------------------------------------------------------------------- +# Required field labels (one of the four roadmap-mandated Kaggle checks) +# --------------------------------------------------------------------------- + + +def test_render_includes_title_subtitle_id_and_license() -> None: + html = preview.render_kaggle_html(_minimal_metadata(), "dataset-cover-image.png") + assert "TestSet: Lead Scoring Mock" in html + assert "A mock metadata payload exercising the renderer." in html + assert "testorg/testset-lead-scoring" in html + assert "License: MIT" in html + assert "Updates: never" in html + + +def test_render_does_not_include_visibility_pill() -> None: + """Kaggle's public page does NOT display ``isPrivate``; rendering + a ``Visibility:`` pill in the preview would misrepresent what + public viewers see (folded back in self-review pass 3).""" + + private_html = preview.render_kaggle_html(_minimal_metadata(), "dataset-cover-image.png") + public_html = preview.render_kaggle_html( + {**_minimal_metadata(), "isPrivate": False}, + "dataset-cover-image.png", + ) + for html in (private_html, public_html): + assert "Visibility:" not in html + assert "pill--visibility" not in html + + +def test_render_file_tree_lists_every_resource_path() -> None: + """File tree shows every resource path declared in metadata.""" + + html = preview.render_kaggle_html(_minimal_metadata(), "dataset-cover-image.png") + assert "intro/lead_scoring.csv" in html + assert "intro/manifest.json" in html + assert "(2 total)" in html # file count appears in the heading + + +def test_render_schema_table_lists_every_column() -> None: + """The schema table lists every column from every tabular resource.""" + + html = preview.render_kaggle_html(_minimal_metadata(), "dataset-cover-image.png") + assert "lead_id" in html + assert "label" in html + assert "Opaque id." in html + assert "(2 columns)" in html # per-table column count + # Resources without a schema (manifest.json) do not appear in the table. + # Note singular "tabular file" — the plural() helper kicks in only when + # n != 1 (Copilot finding COPILOT-3). + assert "(2 columns across 1 tabular file)" in html + + +def test_render_keywords_appear_as_chips_in_footer() -> None: + html = preview.render_kaggle_html(_minimal_metadata(), "dataset-cover-image.png") + assert 'b2b' in html + assert 'tabular' in html + + +def test_render_sources_block_renders_when_present() -> None: + html = preview.render_kaggle_html(_minimal_metadata(), "dataset-cover-image.png") + assert "source repo" in html + assert 'href="https://github.com/leadforge-dev/leadforge"' in html + + +def test_render_sources_block_omitted_when_empty() -> None: + metadata = {**_minimal_metadata(), "userSpecifiedSources": []} + html = preview.render_kaggle_html(metadata, "dataset-cover-image.png") + assert '

Sources

' not in html + + +def test_render_escapes_html_in_field_values() -> None: + """User-controlled strings are HTML-escaped — guards against XSS + if a recipe ever surfaces ``"} + html = preview.render_kaggle_html(metadata, "dataset-cover-image.png") + assert "" not in html + assert "<script>" in html + + +# --------------------------------------------------------------------------- +# Schema-fields exhaustiveness (audit-style, against committed metadata) +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif(not _RELEASE_PRESENT, reason="release bundles not present") +def test_committed_metadata_schema_is_fully_listed() -> None: + """The roadmap-mandated check: the Kaggle schema table lists every + CSV / parquet column declared in dataset-metadata.json.""" + + metadata = json.loads(_COMMITTED_METADATA.read_text(encoding="utf-8")) + html = preview.render_kaggle_html(metadata, metadata["image"]) + for resource in metadata["resources"]: + schema = resource.get("schema") + if not schema: + continue + for field in schema["fields"]: + name = field["name"] + # Every column name appears as a ```` cell in the table. + assert f"{name}" in html, ( + f"schema column {name!r} from {resource['path']!r} not rendered" + ) + + +# --------------------------------------------------------------------------- +# Markdown link resolution (the leakage / link-rewrite regression guard) +# --------------------------------------------------------------------------- + +#: Match ``href="X"`` in the rendered HTML — markdown-it-py emits +#: double-quoted hrefs. Inline ``](X)`` would slip past this and stay +#: as escaped text rather than a real link, so we also assert against +#: those separately. +_HREF_RE = re.compile(r'href="([^"]+)"') + + +@pytest.mark.skipif(not _RELEASE_PRESENT, reason="release bundles not present") +def test_committed_metadata_description_has_no_unrewritten_relative_links() -> None: + """Source-side regression guard. + + The Kaggle packager runs ``rewrite_release_links()`` on the + inlined README; if a future README adds a ``](../foo)`` link or a + ``](validation/...)`` link AND someone updates the rewriter to + miss it, the rendered description would carry a 404-bound href. + Catch it here, before the publish runbook. + """ + + metadata = json.loads(_COMMITTED_METADATA.read_text(encoding="utf-8")) + description = metadata["description"] + # Source-form check: no parent-relative or validation-relative + # markdown links remain in the inlined description. + assert "](../" not in description, ( + "unrewritten parent-relative markdown link in inlined description" + ) + assert "](validation/" not in description, ( + "unrewritten validation-relative markdown link in inlined description" + ) + + +@pytest.mark.skipif(not _RELEASE_PRESENT, reason="release bundles not present") +def test_committed_metadata_rendered_links_point_at_known_targets() -> None: + """Every rendered href in the description body points at one of: + + * a GitHub blob URL (the rewriter's output); + * a known external service (huggingface.co/datasets/leadforge); + * a sibling-relative path that resolves under the upload tree + (LICENSE), or an in-document anchor (#footnote-1 etc.). + + Anything else is a 404 risk on the live page. + """ + + metadata = json.loads(_COMMITTED_METADATA.read_text(encoding="utf-8")) + html = preview.render_kaggle_html(metadata, metadata["image"]) + bad: list[str] = [] + for href in _HREF_RE.findall(html): + if any(href.startswith(prefix) for prefix in _LINK_OK_PREFIXES): + continue + bad.append(href) + assert not bad, ( + f"rendered HTML carries non-allowlisted hrefs that would 404 on Kaggle: {bad[:5]}" + ) + + +# --------------------------------------------------------------------------- +# Determinism + audit-artefact-sync (against committed sample) +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif(not _RELEASE_PRESENT, reason="release bundles not present") +def test_render_is_byte_deterministic() -> None: + """Two back-to-back renders against the same metadata produce + byte-identical HTML — the determinism contract this script relies + on for the sync test below.""" + + metadata = json.loads(_COMMITTED_METADATA.read_text(encoding="utf-8")) + a = preview.render_kaggle_html(metadata, metadata["image"]) + b = preview.render_kaggle_html(metadata, metadata["image"]) + assert a == b + + +@pytest.mark.skipif( + not (_RELEASE_PRESENT and _COMMITTED_SAMPLE.exists()), + reason="release bundles or committed preview sample missing", +) +def test_committed_sample_matches_fresh_regeneration() -> None: + """The audit-artefact-sync gate. + + A fresh render of the committed Kaggle metadata must equal + ``release/_preview_committed/kaggle.html`` byte-for-byte. If + this fails, either the renderer changed or the upstream metadata + drifted without re-running the preview script. Regenerate via:: + + python scripts/preview_kaggle_page.py --no-serve + cp release/_preview/kaggle/index.html release/_preview_committed/kaggle.html + """ + + metadata = json.loads(_COMMITTED_METADATA.read_text(encoding="utf-8")) + fresh = preview.render_kaggle_html(metadata, metadata["image"]) + committed = _COMMITTED_SAMPLE.read_text(encoding="utf-8") + assert fresh == committed + + +# --------------------------------------------------------------------------- +# Driver — pre-flight error paths (no server start) +# --------------------------------------------------------------------------- + + +def test_run_preview_raises_on_missing_metadata(tmp_path: Path) -> None: + fake_release = tmp_path / "release" + fake_release.mkdir() + config = preview.PreviewConfig( + release_dir=fake_release, + out_dir=tmp_path / "preview", + port=8765, + open_browser=False, + serve=False, + ) + with pytest.raises(FileNotFoundError, match="dataset metadata not found"): + preview.run_preview(config) + + +def test_run_preview_raises_on_malformed_metadata(tmp_path: Path) -> None: + fake_release = tmp_path / "release" + (fake_release / "kaggle").mkdir(parents=True) + (fake_release / "kaggle" / "dataset-metadata.json").write_text( + '"not-an-object"', encoding="utf-8" + ) + config = preview.PreviewConfig( + release_dir=fake_release, + out_dir=tmp_path / "preview", + port=8765, + open_browser=False, + serve=False, + ) + with pytest.raises(ValueError, match="not a JSON object"): + preview.run_preview(config) + + +def test_run_preview_raises_on_missing_required_metadata_keys(tmp_path: Path) -> None: + """Pre-flight required-key check (Copilot finding COPILOT-1). + + The renderer's _render_header / _render_footer / _render_cover + index ``title`` / ``subtitle`` / ``id`` / ``image`` / + ``licenses[0].name`` / ``expectedUpdateFrequency`` directly; a + malformed metadata file would otherwise raise ``KeyError`` + mid-render and bypass main()'s rc=2 translation. The validator + surfaces every missing key in one message, not just the first. + """ + + fake_release = tmp_path / "release" + (fake_release / "kaggle").mkdir(parents=True) + # Drop several required keys at once. + (fake_release / "kaggle" / "dataset-metadata.json").write_text( + json.dumps( + { + "subtitle": "only the subtitle survives", + "licenses": [{"NOT_NAME": "MIT"}], # malformed: no 'name' inside [0] + "image": "dataset-cover-image.png", + } + ), + encoding="utf-8", + ) + config = preview.PreviewConfig( + release_dir=fake_release, + out_dir=tmp_path / "preview", + port=8765, + open_browser=False, + serve=False, + ) + with pytest.raises(ValueError, match="missing required key") as exc_info: + preview.run_preview(config) + msg = str(exc_info.value) + # All four missing keys reported in one error, alphabetised. + assert "expectedUpdateFrequency" in msg + assert "id" in msg + assert "title" in msg + assert "licenses[0].name" in msg + + +def test_validate_required_metadata_accepts_well_formed_payload(tmp_path: Path) -> None: + """Sanity gate the validator does not over-fire on the canonical fixture.""" + + preview._validate_required_metadata(_minimal_metadata(), tmp_path / "any.json") + + +def test_run_preview_raises_on_missing_cover_image(tmp_path: Path) -> None: + """A well-formed metadata payload that points at a missing cover + image surfaces FileNotFoundError, not a required-key ValueError. + + The required-key validator (Copilot finding COPILOT-1) runs + BEFORE the cover-existence check, so the fixture must include + every required key for this assertion to test the cover-path + rather than the validator. + """ + + fake_release = tmp_path / "release" + (fake_release / "kaggle").mkdir(parents=True) + well_formed = { + **_minimal_metadata(), + "image": "missing.png", # the file does not exist on disk + } + (fake_release / "kaggle" / "dataset-metadata.json").write_text( + json.dumps(well_formed), encoding="utf-8" + ) + config = preview.PreviewConfig( + release_dir=fake_release, + out_dir=tmp_path / "preview", + port=8765, + open_browser=False, + serve=False, + ) + with pytest.raises(FileNotFoundError, match="cover image"): + preview.run_preview(config) + + +def test_run_preview_writes_html_and_copies_cover(tmp_path: Path) -> None: + """End-to-end no-serve path: HTML lands at ``out_dir/index.html``; + cover image is copied as a real file (not a symlink).""" + + fake_release = tmp_path / "release" + (fake_release / "kaggle").mkdir(parents=True) + cover_src = fake_release / "kaggle" / "dataset-cover-image.png" + cover_src.write_bytes(b"\x89PNG\r\n\x1a\nfake") + (fake_release / "kaggle" / "dataset-metadata.json").write_text( + json.dumps(_minimal_metadata()), encoding="utf-8" + ) + out_dir = tmp_path / "preview" + outcome = preview.run_preview( + preview.PreviewConfig( + release_dir=fake_release, + out_dir=out_dir, + port=8765, + open_browser=False, + serve=False, + ) + ) + assert outcome.html_path == out_dir / "index.html" + assert outcome.html_path.is_file() + assert outcome.cover_path.is_file() + assert not outcome.cover_path.is_symlink() + # The HTML references the cover image by sibling-relative name. + assert 'src="dataset-cover-image.png"' in outcome.html_path.read_text(encoding="utf-8") + + +def test_main_returns_2_on_missing_release( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + rc = preview.main( + [ + "--release-dir", + str(tmp_path / "missing"), + "--out-dir", + str(tmp_path / "preview"), + "--no-serve", + ] + ) + assert rc == 2 + captured = capsys.readouterr() + assert "dataset metadata not found" in captured.err + + +def test_parse_args_defaults() -> None: + """``parse_args`` is a free function so tests can exercise the + flag wiring without invoking the full driver.""" + + args = preview.parse_args(["--no-serve"]) + assert args.release_dir == preview.DEFAULT_RELEASE_DIR + assert args.out_dir == preview.DEFAULT_OUT_DIR + assert args.port == preview.DEFAULT_PORT + assert args.open_browser is False + assert args.no_serve is True + + +def test_tier_of_extracts_leading_path_segment() -> None: + """``_tier_of`` is the load-bearing helper that buckets resources + by tier in the file tree — pin its contract.""" + + assert preview._tier_of("intro/lead_scoring.csv") == "intro" + assert preview._tier_of("intermediate/tasks/converted/train.parquet") == "intermediate" + assert preview._tier_of("toplevel.json") == "" + + +# --------------------------------------------------------------------------- +# Server smoke test — covers _preview_common.make_server / serve glue +# (folded back from self-review pass 3 — _serve was previously untested) +# --------------------------------------------------------------------------- + + +def test_plural_helper_handles_singular_zero_and_n() -> None: + """``_preview_common.plural`` is the one helper behind every count + heading in both preview scripts. Pin n=1 → singular, n=0/2/N → + plural (Copilot finding COPILOT-3 — instructor sample previously + rendered "(1 configs)" because the plural was always ``+ 's'``).""" + + import _preview_common # noqa: PLC0415 — local import for the helper test + + assert _preview_common.plural(1, "config") == "1 config" + assert _preview_common.plural(2, "config") == "2 configs" + assert _preview_common.plural(0, "config") == "0 configs" # zero is plural in English + assert _preview_common.plural(1, "tabular file") == "1 tabular file" + assert _preview_common.plural(5, "tabular file") == "5 tabular files" + # Irregular plural form is supported via explicit override (none today). + assert _preview_common.plural(1, "child", "children") == "1 child" + assert _preview_common.plural(3, "child", "children") == "3 children" + + +def test_make_server_binds_and_serves_index(tmp_path: Path) -> None: + """Stand the server up on port 0 (kernel-picked), GET ``/``, + assert 200 + body shape, shut down cleanly. + + Covers every path inside ``_preview_common.make_server`` and + ``_make_handler_factory`` (handler subclass with ``directory=``, + ``ThreadingHTTPServer`` instantiation, address-reuse posture, + static-file serving). ``serve`` itself is the blocking caller + that wraps this and is exercised manually. + """ + + import threading + import urllib.request + + import _preview_common # noqa: PLC0415 — local import for the smoke test + + (tmp_path / "index.html").write_text( + "

preview-smoke-token

", encoding="utf-8" + ) + httpd = _preview_common.make_server(tmp_path, port=0) + bound_port = httpd.server_address[1] + assert bound_port > 0 + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + try: + with urllib.request.urlopen(f"http://localhost:{bound_port}/", timeout=5) as resp: # noqa: S310 — localhost smoke + assert resp.status == 200 + body = resp.read().decode("utf-8") + assert "preview-smoke-token" in body + finally: + httpd.shutdown() + httpd.server_close() + thread.join(timeout=5) + assert not thread.is_alive()