diff --git a/.agent-plan.md b/.agent-plan.md index ed2a914..b810a04 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -312,9 +312,10 @@ Documentation + CI: | Item | Status | Rationale | |---|---|---| -| M12: CLI `--json` flag | Deferred | No consumer needs it yet; add post-v4 | -| M12: CLI `--strict` flag | Deferred | Per-check control is better than global flag | -| M12: CLI help text polish | Deferred | Low priority vs dataset | +| M12: CLI `--json` flag | **Done** | `leadforge inspect --json`; `validate --json` deferred separately | +| M12: CLI `--strict` flag | Deferred | Per-check control is better than global flag; design call needed | +| M12: CLI `validate --json` | Deferred | Separate follow-up to inspect's --json | +| M12: CLI help text polish | **Done** | inspect surfaces v4 manifest fields; generate exposes `--snapshot-day`, `--primary-task`, `--label-window-days`; help strings tightened | | M14: Sample bundle commit | Absorbed into v4-M2 | v4 dataset IS the sample | | M14: Notebook 1 (inspecting world) | **Done** | `leadforge/examples/notebooks/01_inspect_world.ipynb` | | M14: Notebook 2 (lead scoring baseline) | Deferred | v4 validation script covers this | diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ddc4cf..5af52b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ Format inspired by [Keep a Changelog](https://keepachangelog.com/). ## Unreleased +### CLI surfaces v4 fields + +- `leadforge inspect` now prints `Primary task`, `Label window`, + `Snapshot day`, and `Redactions` for v3+ bundles, immediately after + `Schema ver`. Lines are omitted entirely on older v2 bundles — + no `?` placeholders. Snapshot day prints `(full horizon, no + windowing)` only when the manifest stores `null`; numeric values + (including `snapshot_day == horizon_days`) are printed verbatim. +- `leadforge inspect --json` / `-j` emits the parsed `manifest.json` + to stdout — the output is byte-equivalent JSON to the on-disk + manifest, suitable for `jq` pipelines. +- `leadforge generate` adds `--snapshot-day`, `--primary-task`, and + `--label-window-days` flags, threading directly to existing + `Generator.from_recipe()` kwargs. Recipe defaults still apply when + the flags are omitted. + ### Bundle schema v4 `bundle_schema_version` bumped from `"3"` to `"4"`. Closes the final diff --git a/README.md b/README.md index 00e4fe0..560a2e8 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ leadforge generate \ # Inspect bundle metadata leadforge inspect ./out/demo_bundle +# Or pipe the manifest into jq +leadforge inspect ./out/demo_bundle --json | jq .snapshot_day + # Validate bundle integrity leadforge validate ./out/demo_bundle ``` diff --git a/leadforge/cli/commands/generate.py b/leadforge/cli/commands/generate.py index cd5eeb1..b6c575e 100644 --- a/leadforge/cli/commands/generate.py +++ b/leadforge/cli/commands/generate.py @@ -23,11 +23,32 @@ def generate( "--difficulty", help="Difficulty profile: intro, intermediate, or advanced.", ), - n_accounts: int | None = typer.Option(None, "--n-accounts", help="Number of accounts."), - n_contacts: int | None = typer.Option(None, "--n-contacts", help="Number of contacts."), - n_leads: int | None = typer.Option(None, "--n-leads", help="Number of leads."), + n_accounts: int | None = typer.Option( + None, "--n-accounts", help="Override recipe default account count." + ), + n_contacts: int | None = typer.Option( + None, "--n-contacts", help="Override recipe default contact count." + ), + n_leads: int | None = typer.Option( + None, "--n-leads", help="Override recipe default lead count." + ), horizon_days: int | None = typer.Option( - None, "--horizon-days", help="Simulation horizon in days." + None, "--horizon-days", help="Override recipe default simulation horizon in days." + ), + primary_task: str | None = typer.Option( + None, + "--primary-task", + help="Override recipe default task identifier (e.g. converted_within_60_days).", + ), + label_window_days: int | None = typer.Option( + None, + "--label-window-days", + help="Override recipe default label observation window in days.", + ), + snapshot_day: int | None = typer.Option( + None, + "--snapshot-day", + help="Override recipe default snapshot day for windowed feature aggregation.", ), override: str | None = typer.Option( None, "--override", help="Path to a YAML config override file." @@ -66,6 +87,9 @@ def generate( n_contacts=n_contacts, n_leads=n_leads, horizon_days=horizon_days, + primary_task=primary_task, + label_window_days=label_window_days, + snapshot_day=snapshot_day, override=override_dict, ) except (LeadforgeError, ValueError) as exc: diff --git a/leadforge/cli/commands/inspect.py b/leadforge/cli/commands/inspect.py index 9ad5a5c..764e1c2 100644 --- a/leadforge/cli/commands/inspect.py +++ b/leadforge/cli/commands/inspect.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path from typing import Any @@ -13,6 +14,12 @@ def inspect( bundle_path: str = typer.Argument(..., help="Path to a generated bundle directory."), + json_output: bool = typer.Option( # noqa: FBT001 + False, + "--json", + "-j", + help="Emit the parsed manifest as JSON to stdout (pipe-friendly).", + ), ) -> None: """Inspect a generated dataset bundle and print a summary.""" root = Path(bundle_path) @@ -39,6 +46,10 @@ def inspect( typer.echo("Error: manifest.json is not a JSON object", err=True) raise typer.Exit(1) + if json_output: + typer.echo(json.dumps(manifest, indent=2)) + return + typer.echo(f"Bundle: {root}") typer.echo(f" Recipe: {manifest.get('recipe_id', '?')}") typer.echo(f" Seed: {manifest.get('seed', '?')}") @@ -48,6 +59,29 @@ def inspect( typer.echo(f" Generated at: {manifest.get('generation_timestamp', '?')}") typer.echo(f" Package: leadforge {manifest.get('package_version', '?')}") typer.echo(f" Schema ver: {manifest.get('bundle_schema_version', '?')}") + + # v3+ fields — only print rows for keys actually present in the manifest, + # so older (v2) bundles render cleanly without "?" placeholders. + if "primary_task" in manifest: + typer.echo(f" Primary task: {manifest['primary_task']}") + if "label_window_days" in manifest: + typer.echo(f" Label window: {manifest['label_window_days']} days") + if "snapshot_day" in manifest: + snapshot_day = manifest["snapshot_day"] + if snapshot_day is None: + typer.echo(" Snapshot day: (full horizon, no windowing)") + else: + typer.echo(f" Snapshot day: {snapshot_day} days") + if "redacted_columns" in manifest: + cols = manifest["redacted_columns"] or [] + if cols: + noun = "column" if len(cols) == 1 else "columns" + if len(cols) <= 4: + names = ", ".join(cols) + else: + names = ", ".join(cols[:3]) + ", ..." + typer.echo(f" Redactions: {len(cols)} {noun} [{names}]") + typer.echo(f" Motif family: {manifest.get('motif_family', '?')}") typer.echo("") diff --git a/tests/test_cli.py b/tests/test_cli.py index b8438cc..c6a1418 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -241,6 +241,49 @@ def test_difficulty_flag(self, tmp_path: Path) -> None: manifest = json.loads((out / "manifest.json").read_text()) assert manifest["difficulty"] == "intro" + def test_snapshot_day_flag(self, tmp_path: Path) -> None: + out = tmp_path / "snap_out" + result = runner.invoke( + app, + [ + *_GENERATE_ARGS, + "--snapshot-day", + "45", + "--out", + str(out), + ], + ) + assert result.exit_code == 0, f"generate failed:\n{result.output}" + manifest = json.loads((out / "manifest.json").read_text()) + assert manifest["snapshot_day"] == 45 + + def test_primary_task_and_label_window_flags(self, tmp_path: Path) -> None: + out = tmp_path / "task_out" + result = runner.invoke( + app, + [ + *_GENERATE_ARGS, + "--primary-task", + "converted_within_60_days", + "--label-window-days", + "60", + "--out", + str(out), + ], + ) + assert result.exit_code == 0, f"generate failed:\n{result.output}" + manifest = json.loads((out / "manifest.json").read_text()) + assert manifest["primary_task"] == "converted_within_60_days" + assert manifest["label_window_days"] == 60 + + def test_no_window_flags_uses_recipe_defaults(self, bundle_dir: Path) -> None: + """Regression guard: omitting new flags should still use recipe defaults.""" + manifest = json.loads((bundle_dir / "manifest.json").read_text()) + # Recipe defaults from b2b_saas_procurement_v1 + assert manifest["primary_task"] == "converted_within_90_days" + assert manifest["label_window_days"] == 90 + assert manifest["snapshot_day"] == 30 + # --------------------------------------------------------------------------- # inspect command @@ -270,6 +313,220 @@ def test_file_instead_of_dir_fails(self, bundle_dir: Path) -> None: assert result.exit_code != 0 assert "not a directory" in result.output + def test_inspect_surfaces_v4_fields(self, bundle_dir: Path) -> None: + """inspect prints primary_task, label_window_days, snapshot_day, redactions.""" + manifest = json.loads((bundle_dir / "manifest.json").read_text()) + result = runner.invoke(app, ["inspect", str(bundle_dir)]) + assert result.exit_code == 0 + output = result.output + assert "Primary task:" in output + assert manifest["primary_task"] in output + assert "Label window:" in output + assert str(manifest["label_window_days"]) in output + assert "Snapshot day:" in output + assert str(manifest["snapshot_day"]) in output + assert "Redactions:" in output + + def test_inspect_pre_existing_header_order_unchanged(self, bundle_dir: Path) -> None: + """Regression guard: the 8 pre-v4 header rows stay in the same order.""" + result = runner.invoke(app, ["inspect", str(bundle_dir)]) + assert result.exit_code == 0 + labels = [ + "Recipe:", + "Seed:", + "Mode:", + "Difficulty:", + "Horizon days:", + "Generated at:", + "Package:", + "Schema ver:", + ] + positions = [result.output.index(label) for label in labels] + ordered = list(zip(labels, positions, strict=True)) + assert positions == sorted(positions), f"header rows out of order: {ordered}" + + def test_inspect_v2_bundle_omits_v3_lines(self, tmp_path: Path) -> None: + """v2-era manifests (no v3+ keys) should not print '?'-padded lines.""" + bundle = tmp_path / "v2" + bundle.mkdir() + manifest = { + "bundle_schema_version": "2", + "package_version": "0.4.0", + "recipe_id": "x", + "seed": 1, + "exposure_mode": "student_public", + "difficulty": "intro", + "horizon_days": 90, + "motif_family": "fit_dominant", + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + # None of the v3+ rows should appear at all. + assert "Primary task:" not in result.output + assert "Label window:" not in result.output + assert "Snapshot day:" not in result.output + assert "Redactions:" not in result.output + # And no stray "?" placeholder lines from those fields. + assert "? days" not in result.output + + def test_inspect_snapshot_day_none_annotation(self, tmp_path: Path) -> None: + """A manifest with snapshot_day=None prints the full-horizon annotation.""" + bundle = tmp_path / "manual" + bundle.mkdir() + manifest = { + "bundle_schema_version": "4", + "horizon_days": 90, + "primary_task": "converted_within_90_days", + "label_window_days": 90, + "snapshot_day": None, + "redacted_columns": [], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "(full horizon, no windowing)" in result.output + + def test_inspect_snapshot_day_equal_to_horizon_prints_value(self, tmp_path: Path) -> None: + """snapshot_day == horizon_days is NOT silently relabelled — manifest wins.""" + bundle = tmp_path / "equal" + bundle.mkdir() + manifest = { + "horizon_days": 90, + "snapshot_day": 90, + "redacted_columns": [], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "Snapshot day: 90 days" in result.output + assert "(full horizon" not in result.output + + def test_inspect_redactions_empty_omits_line(self, tmp_path: Path) -> None: + """Empty redacted_columns prints no line at all (no '0 columns []' noise).""" + bundle = tmp_path / "redact_empty" + bundle.mkdir() + manifest = {"redacted_columns": [], "tables": {}, "tasks": {}} + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + assert "Redactions:" not in result.output + + def test_inspect_redactions_singular_pluralization(self, tmp_path: Path) -> None: + """One column → 'column' (singular); not 'column(s)' or 'columns'.""" + bundle = tmp_path / "one" + bundle.mkdir() + manifest = {"redacted_columns": ["only_one"], "tables": {}, "tasks": {}} + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "1 column [only_one]" in line + assert "columns" not in line + assert "column(s)" not in line + + def test_inspect_redactions_short_list_full(self, tmp_path: Path) -> None: + """2 columns: full list, plural noun, no ellipsis.""" + bundle = tmp_path / "redact_short" + bundle.mkdir() + manifest = { + "redacted_columns": ["col_a", "col_b"], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "2 columns [col_a, col_b]" in line + assert "..." not in line + + def test_inspect_redactions_boundary_4_cols_full(self, tmp_path: Path) -> None: + """Exactly 4 columns: still full list, no ellipsis (≤4 → full).""" + bundle = tmp_path / "redact_4" + bundle.mkdir() + manifest = { + "redacted_columns": ["c1", "c2", "c3", "c4"], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "4 columns [c1, c2, c3, c4]" in line + assert "..." not in line + + def test_inspect_redactions_boundary_5_cols_truncates(self, tmp_path: Path) -> None: + """Exactly 5 columns: triggers truncation; first 3 + ellipsis only.""" + bundle = tmp_path / "redact_5" + bundle.mkdir() + manifest = { + "redacted_columns": ["c1", "c2", "c3", "c4", "c5"], + "tables": {}, + "tasks": {}, + } + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "5 columns [c1, c2, c3, ...]" in line + # c4 and c5 must not leak into the truncated head — pin the boundary. + assert "c4" not in line + assert "c5" not in line + + def test_inspect_redactions_long_list(self, tmp_path: Path) -> None: + """6 columns: still 'first 3 + ellipsis'.""" + bundle = tmp_path / "redact_long" + bundle.mkdir() + cols = ["c1", "c2", "c3", "c4", "c5", "c6"] + manifest = {"redacted_columns": cols, "tables": {}, "tasks": {}} + (bundle / "manifest.json").write_text(json.dumps(manifest)) + result = runner.invoke(app, ["inspect", str(bundle)]) + assert result.exit_code == 0 + line = next(line for line in result.output.splitlines() if "Redactions:" in line) + assert "6 columns [c1, c2, c3, ...]" in line + # No redundant "(N total)" — count is already at the front. + assert "(6 total)" not in line + for tail in ("c4", "c5", "c6"): + assert tail not in line + + def test_inspect_json_equals_manifest_file(self, bundle_dir: Path) -> None: + """The contract: --json output is byte-equivalent JSON to manifest.json.""" + result = runner.invoke(app, ["inspect", str(bundle_dir), "--json"]) + assert result.exit_code == 0 + on_disk = json.loads((bundle_dir / "manifest.json").read_text()) + from_cli = json.loads(result.output) + assert from_cli == on_disk + + def test_inspect_json_short_flag(self, bundle_dir: Path) -> None: + """-j short flag works the same as --json.""" + result = runner.invoke(app, ["inspect", str(bundle_dir), "-j"]) + assert result.exit_code == 0 + parsed = json.loads(result.output) + assert parsed["recipe_id"] == "b2b_saas_procurement_v1" + + def test_inspect_json_and_plain_no_cross_contamination(self, bundle_dir: Path) -> None: + """JSON mode and plain mode return distinct output formats.""" + plain = runner.invoke(app, ["inspect", str(bundle_dir)]) + json_out = runner.invoke(app, ["inspect", str(bundle_dir), "--json"]) + assert plain.exit_code == 0 + assert json_out.exit_code == 0 + # Plain mode has the "Bundle:" header + assert "Bundle:" in plain.output + # JSON mode is parseable + parsed = json.loads(json_out.output) + assert isinstance(parsed, dict) + # Plain output should NOT be parseable as JSON + with pytest.raises(json.JSONDecodeError): + json.loads(plain.output) + # --------------------------------------------------------------------------- # validate command