From 65c937403ff54eee82a7cea1029534e301ad74c5 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 5 May 2026 09:57:13 +0300 Subject: [PATCH 1/2] M12: CLI polish Catch the user-facing CLI up to v4 bundle schema and clear two M12 deferrals from the existing roadmap: - `leadforge inspect` now prints `primary_task`, `label_window_days`, `snapshot_day` (with a "(full horizon, no windowing)" annotation when null/equal to horizon), and a count + list of redacted columns (full list for <=4 columns, truncated for more). - `leadforge inspect --json` / `-j` dumps the parsed manifest as pipe-friendly JSON. - `leadforge generate` exposes `--snapshot-day`, `--primary-task`, `--label-window-days` flags. They thread to existing `Generator.from_recipe()` kwargs; recipe defaults still apply when omitted. - Help strings on `--n-accounts`, `--n-contacts`, `--n-leads`, and `--horizon-days` rewritten from "Number of leads." style to "Override recipe default ...". Closes deferrals "M12: CLI --json flag" and "M12: CLI help text polish" in .agent-plan.md. `validate --json` and `--strict` remain scoped out as separate follow-up PRs (--strict still needs a per-check vs global gating design call). Co-Authored-By: Claude Opus 4.7 --- .agent-plan.md | 7 +- leadforge/cli/commands/generate.py | 32 +++++- leadforge/cli/commands/inspect.py | 45 ++++++++ tests/test_cli.py | 175 +++++++++++++++++++++++++++++ 4 files changed, 252 insertions(+), 7 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index ed2a914..b810a04 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -312,9 +312,10 @@ Documentation + CI: | Item | Status | Rationale | |---|---|---| -| M12: CLI `--json` flag | Deferred | No consumer needs it yet; add post-v4 | -| M12: CLI `--strict` flag | Deferred | Per-check control is better than global flag | -| M12: CLI help text polish | Deferred | Low priority vs dataset | +| M12: CLI `--json` flag | **Done** | `leadforge inspect --json`; `validate --json` deferred separately | +| M12: CLI `--strict` flag | Deferred | Per-check control is better than global flag; design call needed | +| M12: CLI `validate --json` | Deferred | Separate follow-up to inspect's --json | +| M12: CLI help text polish | **Done** | inspect surfaces v4 manifest fields; generate exposes `--snapshot-day`, `--primary-task`, `--label-window-days`; help strings tightened | | M14: Sample bundle commit | Absorbed into v4-M2 | v4 dataset IS the sample | | M14: Notebook 1 (inspecting world) | **Done** | `leadforge/examples/notebooks/01_inspect_world.ipynb` | | M14: Notebook 2 (lead scoring baseline) | Deferred | v4 validation script covers this | diff --git a/leadforge/cli/commands/generate.py b/leadforge/cli/commands/generate.py index cd5eeb1..b6c575e 100644 --- a/leadforge/cli/commands/generate.py +++ b/leadforge/cli/commands/generate.py @@ -23,11 +23,32 @@ def generate( "--difficulty", help="Difficulty profile: intro, intermediate, or advanced.", ), - n_accounts: int | None = typer.Option(None, "--n-accounts", help="Number of accounts."), - n_contacts: int | None = typer.Option(None, "--n-contacts", help="Number of contacts."), - n_leads: int | None = typer.Option(None, "--n-leads", help="Number of leads."), + n_accounts: int | None = typer.Option( + None, "--n-accounts", help="Override recipe default account count." + ), + n_contacts: int | None = typer.Option( + None, "--n-contacts", help="Override recipe default contact count." + ), + n_leads: int | None = typer.Option( + None, "--n-leads", help="Override recipe default lead count." + ), horizon_days: int | None = typer.Option( - None, "--horizon-days", help="Simulation horizon in days." + None, "--horizon-days", help="Override recipe default simulation horizon in days." + ), + primary_task: str | None = typer.Option( + None, + "--primary-task", + help="Override recipe default task identifier (e.g. converted_within_60_days).", + ), + label_window_days: int | None = typer.Option( + None, + "--label-window-days", + help="Override recipe default label observation window in days.", + ), + snapshot_day: int | None = typer.Option( + None, + "--snapshot-day", + help="Override recipe default snapshot day for windowed feature aggregation.", ), override: str | None = typer.Option( None, "--override", help="Path to a YAML config override file." @@ -66,6 +87,9 @@ def generate( n_contacts=n_contacts, n_leads=n_leads, horizon_days=horizon_days, + primary_task=primary_task, + label_window_days=label_window_days, + snapshot_day=snapshot_day, override=override_dict, ) except (LeadforgeError, ValueError) as exc: diff --git a/leadforge/cli/commands/inspect.py b/leadforge/cli/commands/inspect.py index 9ad5a5c..b6cf8b6 100644 --- a/leadforge/cli/commands/inspect.py +++ b/leadforge/cli/commands/inspect.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path from typing import Any @@ -13,6 +14,12 @@ def inspect( bundle_path: str = typer.Argument(..., help="Path to a generated bundle directory."), + json_output: bool = typer.Option( # noqa: FBT001 + False, + "--json", + "-j", + help="Emit the parsed manifest as JSON to stdout (pipe-friendly).", + ), ) -> None: """Inspect a generated dataset bundle and print a summary.""" root = Path(bundle_path) @@ -39,6 +46,10 @@ def inspect( typer.echo("Error: manifest.json is not a JSON object", err=True) raise typer.Exit(1) + if json_output: + typer.echo(json.dumps(manifest, indent=2)) + return + typer.echo(f"Bundle: {root}") typer.echo(f" Recipe: {manifest.get('recipe_id', '?')}") typer.echo(f" Seed: {manifest.get('seed', '?')}") @@ -48,6 +59,10 @@ def inspect( typer.echo(f" Generated at: {manifest.get('generation_timestamp', '?')}") typer.echo(f" Package: leadforge {manifest.get('package_version', '?')}") typer.echo(f" Schema ver: {manifest.get('bundle_schema_version', '?')}") + typer.echo(f" Primary task: {manifest.get('primary_task', '?')}") + typer.echo(f" Label window: {manifest.get('label_window_days', '?')} days") + typer.echo(f" Snapshot day: {_format_snapshot_day(manifest)}") + typer.echo(f" Redactions: {_format_redactions(manifest)}") typer.echo(f" Motif family: {manifest.get('motif_family', '?')}") typer.echo("") @@ -74,6 +89,36 @@ def inspect( typer.echo(f"Metadata dir: {'present' if has_metadata else 'absent'}") +def _format_snapshot_day(manifest: dict[str, Any]) -> str: + """Format the ``snapshot_day`` field, annotating the full-horizon case.""" + if "snapshot_day" not in manifest: + return "?" + snapshot_day = manifest.get("snapshot_day") + horizon_days = manifest.get("horizon_days") + if snapshot_day is None or ( + isinstance(snapshot_day, int) + and isinstance(horizon_days, int) + and snapshot_day == horizon_days + ): + return "(full horizon, no windowing)" + return f"{snapshot_day} days" + + +def _format_redactions(manifest: dict[str, Any]) -> str: + """Format the ``redacted_columns`` field as count + list (full or truncated).""" + if "redacted_columns" not in manifest: + return "?" + cols = manifest.get("redacted_columns") or [] + if not isinstance(cols, list): + return "?" + if not cols: + return "0 column(s) []" + if len(cols) <= 4: + return f"{len(cols)} column(s) [{', '.join(cols)}]" + head = ", ".join(cols[:3]) + return f"{len(cols)} column(s) [{head}, ...] ({len(cols)} total)" + + def _safe_get(obj: Any, key: str, default: str = "?") -> Any: """Get a key from *obj* if it's a dict, else return *default*.""" if isinstance(obj, dict): diff --git a/tests/test_cli.py b/tests/test_cli.py index b8438cc..af045e7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -241,6 +241,49 @@ def test_difficulty_flag(self, tmp_path: Path) -> None: manifest = json.loads((out / "manifest.json").read_text()) assert manifest["difficulty"] == "intro" + def test_snapshot_day_flag(self, tmp_path: Path) -> None: + out = tmp_path / "snap_out" + result = runner.invoke( + app, + [ + *_GENERATE_ARGS, + "--snapshot-day", + "45", + "--out", + str(out), + ], + ) + assert result.exit_code == 0, f"generate failed:\n{result.output}" + manifest = json.loads((out / "manifest.json").read_text()) + assert manifest["snapshot_day"] == 45 + + def test_primary_task_and_label_window_flags(self, tmp_path: Path) -> None: + out = tmp_path / "task_out" + result = runner.invoke( + app, + [ + *_GENERATE_ARGS, + "--primary-task", + "converted_within_60_days", + "--label-window-days", + "60", + "--out", + str(out), + ], + ) + assert result.exit_code == 0, f"generate failed:\n{result.output}" + manifest = json.loads((out / "manifest.json").read_text()) + assert manifest["primary_task"] == "converted_within_60_days" + assert manifest["label_window_days"] == 60 + + def test_no_window_flags_uses_recipe_defaults(self, bundle_dir: Path) -> None: + """Regression guard: omitting new flags should still use recipe defaults.""" + manifest = json.loads((bundle_dir / "manifest.json").read_text()) + # Recipe defaults from b2b_saas_procurement_v1 + assert manifest["primary_task"] == "converted_within_90_days" + assert manifest["label_window_days"] == 90 + assert manifest["snapshot_day"] == 30 + # --------------------------------------------------------------------------- # inspect command @@ -270,6 +313,138 @@ def test_file_instead_of_dir_fails(self, bundle_dir: Path) -> None: assert result.exit_code != 0 assert "not a directory" in result.output + def test_inspect_surfaces_v4_fields(self, bundle_dir: Path) -> None: + """inspect prints primary_task, label_window_days, snapshot_day, redactions.""" + manifest = json.loads((bundle_dir / "manifest.json").read_text()) + result = runner.invoke(app, ["inspect", str(bundle_dir)]) + assert result.exit_code == 0 + output = result.output + assert "Primary task:" in output + assert manifest["primary_task"] in output + assert "Label window:" in output + assert str(manifest["label_window_days"]) in output + assert "Snapshot day:" in output + assert str(manifest["snapshot_day"]) in output + assert "Redactions:" in output + + def test_inspect_snapshot_day_none_annotation(self, tmp_path: Path) -> None: + """A manifest with snapshot_day=None prints the full-horizon annotation.""" + bundle = tmp_path / "manual" + bundle.mkdir() + manifest = { + "bundle_schema_version": "4", + "package_version": "1.0.0", + "recipe_id": "x", + "seed": 1, + "exposure_mode": "student_public", + "difficulty": "intro", + "horizon_days": 90, + "primary_task": "converted_within_90_days", + "label_window_days": 90, + "snapshot_day": None, + "motif_family": "fit_dominant", + "redacted_columns": [], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "(full horizon, no windowing)" in result.output + + def test_inspect_redactions_empty(self, tmp_path: Path) -> None: + bundle = tmp_path / "redact_empty" + bundle.mkdir() + manifest = {"redacted_columns": [], "tables": {}, "tasks": {}} + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "Redactions:" in result.output + assert "0 column(s)" in result.output + + def test_inspect_redactions_short_list(self, tmp_path: Path) -> None: + bundle = tmp_path / "redact_short" + bundle.mkdir() + manifest = { + "redacted_columns": ["col_a", "col_b"], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "2 column(s)" in result.output + assert "col_a" in result.output + assert "col_b" in result.output + assert "..." not in result.output.split("Redactions:")[1].splitlines()[0] + + def test_inspect_redactions_long_list_truncates(self, tmp_path: Path) -> None: + bundle = tmp_path / "redact_long" + bundle.mkdir() + cols = ["c1", "c2", "c3", "c4", "c5", "c6"] + manifest = { + "redacted_columns": cols, + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = [line for line in result.output.splitlines() if "Redactions:" in line][0] + assert "6 column(s)" in line + assert "c1" in line + assert "..." in line + assert "(6 total)" in line + # Tail elements should not all appear + assert "c5" not in line + assert "c6" not in line + + def test_inspect_json_emits_valid_json(self, bundle_dir: Path) -> None: + """--json emits valid JSON containing all the human-readable keys.""" + result = runner.invoke(app, ["inspect", str(bundle_dir), "--json"]) + assert result.exit_code == 0 + parsed = json.loads(result.output) + assert isinstance(parsed, dict) + for key in ( + "recipe_id", + "seed", + "exposure_mode", + "difficulty", + "horizon_days", + "package_version", + "bundle_schema_version", + "primary_task", + "label_window_days", + "snapshot_day", + "redacted_columns", + "motif_family", + "tables", + "tasks", + ): + assert key in parsed, f"missing key: {key}" + + def test_inspect_json_short_flag(self, bundle_dir: Path) -> None: + """-j short flag works the same as --json.""" + result = runner.invoke(app, ["inspect", str(bundle_dir), "-j"]) + assert result.exit_code == 0 + parsed = json.loads(result.output) + assert parsed["recipe_id"] == "b2b_saas_procurement_v1" + + def test_inspect_json_and_plain_no_cross_contamination(self, bundle_dir: Path) -> None: + """JSON mode and plain mode return distinct output formats.""" + plain = runner.invoke(app, ["inspect", str(bundle_dir)]) + json_out = runner.invoke(app, ["inspect", str(bundle_dir), "--json"]) + assert plain.exit_code == 0 + assert json_out.exit_code == 0 + # Plain mode has the "Bundle:" header + assert "Bundle:" in plain.output + # JSON mode is parseable + parsed = json.loads(json_out.output) + assert isinstance(parsed, dict) + # Plain output should NOT be parseable as JSON + with pytest.raises(json.JSONDecodeError): + json.loads(plain.output) + # --------------------------------------------------------------------------- # validate command From 68404b87063ca9073ab6eb533d35b31d411f2b78 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 5 May 2026 10:04:33 +0300 Subject: [PATCH 2/2] M12: address self-review of inspect formatting + add CHANGELOG/README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acts on the brutal self-review of #60. Summary: - Drop the snapshot_day == horizon_days "(full horizon)" branch. The manifest is the source of truth; if a user pinned snapshot_day=90 on a 90-day horizon, inspect now prints "90 days" verbatim instead of silently relabelling it. Only manifest snapshot_day=null prints the full-horizon annotation. - Skip v3+ rows (Primary task / Label window / Snapshot day / Redactions) entirely when the manifest doesn't carry those keys — v2 bundles render cleanly with no "?" placeholders. - Inline the two _format_* helpers; drop the redundant runtime isinstance() guards (manifest dict-shape was already validated up top). - Redaction line cleanup: pluralize correctly ("1 column" / "N columns"), drop the "(N total)" redundancy, omit the line entirely on empty redacted_columns instead of printing "0 column(s) []". - Pin the truncation boundary explicitly in tests: 4 cols → full list, 5 cols → first-3 + ellipsis (assertions fail if c4/c5 leak into the truncated head). - Add header-order regression test pinning the 8 pre-existing rows. - Add the missing --json contract test: stdout is JSON-equivalent to on-disk manifest.json. - Add CHANGELOG entry under Unreleased and a `--json | jq` example to the README CLI section. All 954 tests pass; ruff + mypy clean. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 16 +++ README.md | 3 + leadforge/cli/commands/inspect.py | 57 ++++------ tests/test_cli.py | 174 ++++++++++++++++++++++-------- 4 files changed, 170 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ddc4cf..5af52b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ Format inspired by [Keep a Changelog](https://keepachangelog.com/). ## Unreleased +### CLI surfaces v4 fields + +- `leadforge inspect` now prints `Primary task`, `Label window`, + `Snapshot day`, and `Redactions` for v3+ bundles, immediately after + `Schema ver`. Lines are omitted entirely on older v2 bundles — + no `?` placeholders. Snapshot day prints `(full horizon, no + windowing)` only when the manifest stores `null`; numeric values + (including `snapshot_day == horizon_days`) are printed verbatim. +- `leadforge inspect --json` / `-j` emits the parsed `manifest.json` + to stdout — the output is byte-equivalent JSON to the on-disk + manifest, suitable for `jq` pipelines. +- `leadforge generate` adds `--snapshot-day`, `--primary-task`, and + `--label-window-days` flags, threading directly to existing + `Generator.from_recipe()` kwargs. Recipe defaults still apply when + the flags are omitted. + ### Bundle schema v4 `bundle_schema_version` bumped from `"3"` to `"4"`. Closes the final diff --git a/README.md b/README.md index 00e4fe0..560a2e8 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ leadforge generate \ # Inspect bundle metadata leadforge inspect ./out/demo_bundle +# Or pipe the manifest into jq +leadforge inspect ./out/demo_bundle --json | jq .snapshot_day + # Validate bundle integrity leadforge validate ./out/demo_bundle ``` diff --git a/leadforge/cli/commands/inspect.py b/leadforge/cli/commands/inspect.py index b6cf8b6..764e1c2 100644 --- a/leadforge/cli/commands/inspect.py +++ b/leadforge/cli/commands/inspect.py @@ -59,10 +59,29 @@ def inspect( typer.echo(f" Generated at: {manifest.get('generation_timestamp', '?')}") typer.echo(f" Package: leadforge {manifest.get('package_version', '?')}") typer.echo(f" Schema ver: {manifest.get('bundle_schema_version', '?')}") - typer.echo(f" Primary task: {manifest.get('primary_task', '?')}") - typer.echo(f" Label window: {manifest.get('label_window_days', '?')} days") - typer.echo(f" Snapshot day: {_format_snapshot_day(manifest)}") - typer.echo(f" Redactions: {_format_redactions(manifest)}") + + # v3+ fields — only print rows for keys actually present in the manifest, + # so older (v2) bundles render cleanly without "?" placeholders. + if "primary_task" in manifest: + typer.echo(f" Primary task: {manifest['primary_task']}") + if "label_window_days" in manifest: + typer.echo(f" Label window: {manifest['label_window_days']} days") + if "snapshot_day" in manifest: + snapshot_day = manifest["snapshot_day"] + if snapshot_day is None: + typer.echo(" Snapshot day: (full horizon, no windowing)") + else: + typer.echo(f" Snapshot day: {snapshot_day} days") + if "redacted_columns" in manifest: + cols = manifest["redacted_columns"] or [] + if cols: + noun = "column" if len(cols) == 1 else "columns" + if len(cols) <= 4: + names = ", ".join(cols) + else: + names = ", ".join(cols[:3]) + ", ..." + typer.echo(f" Redactions: {len(cols)} {noun} [{names}]") + typer.echo(f" Motif family: {manifest.get('motif_family', '?')}") typer.echo("") @@ -89,36 +108,6 @@ def inspect( typer.echo(f"Metadata dir: {'present' if has_metadata else 'absent'}") -def _format_snapshot_day(manifest: dict[str, Any]) -> str: - """Format the ``snapshot_day`` field, annotating the full-horizon case.""" - if "snapshot_day" not in manifest: - return "?" - snapshot_day = manifest.get("snapshot_day") - horizon_days = manifest.get("horizon_days") - if snapshot_day is None or ( - isinstance(snapshot_day, int) - and isinstance(horizon_days, int) - and snapshot_day == horizon_days - ): - return "(full horizon, no windowing)" - return f"{snapshot_day} days" - - -def _format_redactions(manifest: dict[str, Any]) -> str: - """Format the ``redacted_columns`` field as count + list (full or truncated).""" - if "redacted_columns" not in manifest: - return "?" - cols = manifest.get("redacted_columns") or [] - if not isinstance(cols, list): - return "?" - if not cols: - return "0 column(s) []" - if len(cols) <= 4: - return f"{len(cols)} column(s) [{', '.join(cols)}]" - head = ", ".join(cols[:3]) - return f"{len(cols)} column(s) [{head}, ...] ({len(cols)} total)" - - def _safe_get(obj: Any, key: str, default: str = "?") -> Any: """Get a key from *obj* if it's a dict, else return *default*.""" if isinstance(obj, dict): diff --git a/tests/test_cli.py b/tests/test_cli.py index af045e7..c6a1418 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -327,22 +327,61 @@ def test_inspect_surfaces_v4_fields(self, bundle_dir: Path) -> None: assert str(manifest["snapshot_day"]) in output assert "Redactions:" in output - def test_inspect_snapshot_day_none_annotation(self, tmp_path: Path) -> None: - """A manifest with snapshot_day=None prints the full-horizon annotation.""" - bundle = tmp_path / "manual" + def test_inspect_pre_existing_header_order_unchanged(self, bundle_dir: Path) -> None: + """Regression guard: the 8 pre-v4 header rows stay in the same order.""" + result = runner.invoke(app, ["inspect", str(bundle_dir)]) + assert result.exit_code == 0 + labels = [ + "Recipe:", + "Seed:", + "Mode:", + "Difficulty:", + "Horizon days:", + "Generated at:", + "Package:", + "Schema ver:", + ] + positions = [result.output.index(label) for label in labels] + ordered = list(zip(labels, positions, strict=True)) + assert positions == sorted(positions), f"header rows out of order: {ordered}" + + def test_inspect_v2_bundle_omits_v3_lines(self, tmp_path: Path) -> None: + """v2-era manifests (no v3+ keys) should not print '?'-padded lines.""" + bundle = tmp_path / "v2" bundle.mkdir() manifest = { - "bundle_schema_version": "4", - "package_version": "1.0.0", + "bundle_schema_version": "2", + "package_version": "0.4.0", "recipe_id": "x", "seed": 1, "exposure_mode": "student_public", "difficulty": "intro", "horizon_days": 90, + "motif_family": "fit_dominant", + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + # None of the v3+ rows should appear at all. + assert "Primary task:" not in result.output + assert "Label window:" not in result.output + assert "Snapshot day:" not in result.output + assert "Redactions:" not in result.output + # And no stray "?" placeholder lines from those fields. + assert "? days" not in result.output + + def test_inspect_snapshot_day_none_annotation(self, tmp_path: Path) -> None: + """A manifest with snapshot_day=None prints the full-horizon annotation.""" + bundle = tmp_path / "manual" + bundle.mkdir() + manifest = { + "bundle_schema_version": "4", + "horizon_days": 90, "primary_task": "converted_within_90_days", "label_window_days": 90, "snapshot_day": None, - "motif_family": "fit_dominant", "redacted_columns": [], "tables": {}, "tasks": {}, @@ -352,17 +391,48 @@ def test_inspect_snapshot_day_none_annotation(self, tmp_path: Path) -> None: assert result.exit_code == 0 assert "(full horizon, no windowing)" in result.output - def test_inspect_redactions_empty(self, tmp_path: Path) -> None: + def test_inspect_snapshot_day_equal_to_horizon_prints_value(self, tmp_path: Path) -> None: + """snapshot_day == horizon_days is NOT silently relabelled — manifest wins.""" + bundle = tmp_path / "equal" + bundle.mkdir() + manifest = { + "horizon_days": 90, + "snapshot_day": 90, + "redacted_columns": [], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "Snapshot day: 90 days" in result.output + assert "(full horizon" not in result.output + + def test_inspect_redactions_empty_omits_line(self, tmp_path: Path) -> None: + """Empty redacted_columns prints no line at all (no '0 columns []' noise).""" bundle = tmp_path / "redact_empty" bundle.mkdir() manifest = {"redacted_columns": [], "tables": {}, "tasks": {}} (bundle / "manifest.json").write_text(json.dumps(manifest)) result = runner.invoke(app, ["inspect", str(bundle)]) assert result.exit_code == 0 - assert "Redactions:" in result.output - assert "0 column(s)" in result.output + assert "Redactions:" not in result.output + + def test_inspect_redactions_singular_pluralization(self, tmp_path: Path) -> None: + """One column → 'column' (singular); not 'column(s)' or 'columns'.""" + bundle = tmp_path / "one" + bundle.mkdir() + manifest = {"redacted_columns": ["only_one"], "tables": {}, "tasks": {}} + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "1 column [only_one]" in line + assert "columns" not in line + assert "column(s)" not in line - def test_inspect_redactions_short_list(self, tmp_path: Path) -> None: + def test_inspect_redactions_short_list_full(self, tmp_path: Path) -> None: + """2 columns: full list, plural noun, no ellipsis.""" bundle = tmp_path / "redact_short" bundle.mkdir() manifest = { @@ -373,55 +443,67 @@ def test_inspect_redactions_short_list(self, tmp_path: Path) -> None: (bundle / "manifest.json").write_text(json.dumps(manifest)) result = runner.invoke(app, ["inspect", str(bundle)]) assert result.exit_code == 0 - assert "2 column(s)" in result.output - assert "col_a" in result.output - assert "col_b" in result.output - assert "..." not in result.output.split("Redactions:")[1].splitlines()[0] + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "2 columns [col_a, col_b]" in line + assert "..." not in line - def test_inspect_redactions_long_list_truncates(self, tmp_path: Path) -> None: - bundle = tmp_path / "redact_long" + def test_inspect_redactions_boundary_4_cols_full(self, tmp_path: Path) -> None: + """Exactly 4 columns: still full list, no ellipsis (≤4 → full).""" + bundle = tmp_path / "redact_4" + bundle.mkdir() + manifest = { + "redacted_columns": ["c1", "c2", "c3", "c4"], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "4 columns [c1, c2, c3, c4]" in line + assert "..." not in line + + def test_inspect_redactions_boundary_5_cols_truncates(self, tmp_path: Path) -> None: + """Exactly 5 columns: triggers truncation; first 3 + ellipsis only.""" + bundle = tmp_path / "redact_5" bundle.mkdir() - cols = ["c1", "c2", "c3", "c4", "c5", "c6"] manifest = { - "redacted_columns": cols, + "redacted_columns": ["c1", "c2", "c3", "c4", "c5"], "tables": {}, "tasks": {}, } (bundle / "manifest.json").write_text(json.dumps(manifest)) result = runner.invoke(app, ["inspect", str(bundle)]) assert result.exit_code == 0 - line = [line for line in result.output.splitlines() if "Redactions:" in line][0] - assert "6 column(s)" in line - assert "c1" in line - assert "..." in line - assert "(6 total)" in line - # Tail elements should not all appear + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "5 columns [c1, c2, c3, ...]" in line + # c4 and c5 must not leak into the truncated head — pin the boundary. + assert "c4" not in line assert "c5" not in line - assert "c6" not in line - def test_inspect_json_emits_valid_json(self, bundle_dir: Path) -> None: - """--json emits valid JSON containing all the human-readable keys.""" + def test_inspect_redactions_long_list(self, tmp_path: Path) -> None: + """6 columns: still 'first 3 + ellipsis'.""" + bundle = tmp_path / "redact_long" + bundle.mkdir() + cols = ["c1", "c2", "c3", "c4", "c5", "c6"] + manifest = {"redacted_columns": cols, "tables": {}, "tasks": {}} + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "6 columns [c1, c2, c3, ...]" in line + # No redundant "(N total)" — count is already at the front. + assert "(6 total)" not in line + for tail in ("c4", "c5", "c6"): + assert tail not in line + + def test_inspect_json_equals_manifest_file(self, bundle_dir: Path) -> None: + """The contract: --json output is byte-equivalent JSON to manifest.json.""" result = runner.invoke(app, ["inspect", str(bundle_dir), "--json"]) assert result.exit_code == 0 - parsed = json.loads(result.output) - assert isinstance(parsed, dict) - for key in ( - "recipe_id", - "seed", - "exposure_mode", - "difficulty", - "horizon_days", - "package_version", - "bundle_schema_version", - "primary_task", - "label_window_days", - "snapshot_day", - "redacted_columns", - "motif_family", - "tables", - "tasks", - ): - assert key in parsed, f"missing key: {key}" + on_disk = json.loads((bundle_dir / "manifest.json").read_text()) + from_cli = json.loads(result.output) + assert from_cli == on_disk def test_inspect_json_short_flag(self, bundle_dir: Path) -> None: """-j short flag works the same as --json."""