Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions .agent-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@

## Current System State

**v0.4.0 in progress — Milestones 7–8 complete (PRs open).** Full simulation engine + render/bundle
layer implemented. 521 tests passing.
**v0.4.0 in progress — Milestones 7–9 complete (PR open).** Full simulation engine + render/bundle
layer + exposure filtering implemented. 545 tests passing.

---

## Next Up — Milestone 9: Exposure Filtering (v0.4.0)
## Next Up — Milestone 10: CLI `generate` command + `inspect` / `validate` stubs (v0.4.0)

Goal: Apply `student_public` / `research_instructor` exposure-mode filtering during bundle write.
Goal: Wire `leadforge generate` CLI command end-to-end; implement `inspect` and `validate` output.

- [ ] `exposure/modes.py` — `ExposureMode`-aware filter dispatch
- [ ] `exposure/filters.py` — column/table redaction rules per mode
- [ ] `exposure/redaction.py` — latent-column scrubbing for `student_public`
- [ ] Wire into `api/bundle.py` write pipeline
- [ ] `cli/commands/generate.py` — parse flags, call `Generator.from_recipe().generate()`, call `.save()`
- [ ] `cli/commands/inspect.py` — print manifest summary for a written bundle
- [ ] `cli/commands/validate.py` — basic schema / FK / leakage checks on a written bundle
- [ ] Tests for each command

---

Expand All @@ -32,6 +32,13 @@ Goal: Apply `student_public` / `research_instructor` exposure-mode filtering dur

## Completed Phases

### Milestone 9 — Exposure Filtering ✓ (v0.4.0 in PR)
- `exposure/filters.py`: `BundleFilter` frozen dataclass; `FILTERS` dict keyed by `ExposureMode`; `get_filter()`
- `exposure/redaction.py`: `write_metadata_dir()` — writes `metadata/` with `graph.json`, `graph.graphml`, `world_spec.json`, `latent_registry.json`, `mechanism_summary.json`
- `exposure/modes.py`: `apply_exposure(bundle, root, mode)` — dispatch; skips `metadata/` for `student_public`
- Wired into `api/bundle.py` between dataset card and manifest steps
- 24 new tests; total 545 passing

### Milestone 8 — Render / Bundle Layer ✓ (v0.4.0 in PR)
- `render/relational.py`: `to_dataframes()` — 9-table dict of typed DataFrames from SimulationResult + PopulationResult
- `render/snapshots.py`: `build_snapshot()` — 30-column leakage-free lead snapshot with touch/session/activity aggregates, account/contact field joins
Expand Down
12 changes: 10 additions & 2 deletions leadforge/api/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
1. Write relational Parquet tables (``tables/``).
2. Build the lead snapshot and write task splits (``tasks/``).
3. Write ``dataset_card.md`` and ``feature_dictionary.csv``.
4. Build and write ``manifest.json``.
4. Apply exposure filtering — write ``metadata/`` for ``research_instructor``
mode; skip it for ``student_public``.
5. Build and write ``manifest.json``.
"""

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

from leadforge.exposure.modes import apply_exposure
from leadforge.narrative.dataset_card import render_dataset_card
from leadforge.render.manifests import build_manifest, write_manifest
from leadforge.render.relational import to_dataframes
Expand Down Expand Up @@ -74,7 +77,12 @@ def write_bundle(bundle: WorldBundle, path: str) -> None:
write_feature_dictionary(root / "feature_dictionary.csv")

# ------------------------------------------------------------------
# 4. Manifest
# 4. Exposure metadata (research_instructor only)
# ------------------------------------------------------------------
apply_exposure(bundle, root, config.exposure_mode)

# ------------------------------------------------------------------
# 5. Manifest
# ------------------------------------------------------------------
manifest = build_manifest(
config=config,
Expand Down
45 changes: 45 additions & 0 deletions leadforge/exposure/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Per-mode bundle filter rules.

:data:`FILTERS` maps every :class:`~leadforge.core.enums.ExposureMode` to a
:class:`BundleFilter` that governs which artefacts are written when
:func:`~leadforge.api.bundle.write_bundle` produces an output bundle.

Adding a new mode: define its ``BundleFilter`` entry in ``FILTERS``.
"""

from __future__ import annotations

from dataclasses import dataclass

from leadforge.core.enums import ExposureMode


@dataclass(frozen=True)
class BundleFilter:
"""Rules that govern bundle publication for one :class:`ExposureMode`.

Attributes:
write_metadata: Whether to create ``metadata/`` with hidden-truth
files (``graph.json``, ``graph.graphml``, ``world_spec.json``,
``latent_registry.json``, ``mechanism_summary.json``).
"""

write_metadata: bool


#: Canonical filter rules for every supported exposure mode.
FILTERS: dict[ExposureMode, BundleFilter] = {
ExposureMode.student_public: BundleFilter(write_metadata=False),
ExposureMode.research_instructor: BundleFilter(write_metadata=True),
}


def get_filter(mode: ExposureMode) -> BundleFilter:
"""Return the :class:`BundleFilter` for *mode*.

Raises:
KeyError: if *mode* has no registered filter (should never happen
with well-typed callers, but guards against future enum additions
that forget to update ``FILTERS``).
"""
Comment thread
shaypal5 marked this conversation as resolved.
return FILTERS[mode]
36 changes: 36 additions & 0 deletions leadforge/exposure/modes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Exposure-mode dispatch for bundle publication.

:func:`apply_exposure` is the single entry point called by
:func:`~leadforge.api.bundle.write_bundle`. It reads the resolved
:class:`~leadforge.exposure.filters.BundleFilter` for the requested mode
and performs the corresponding writes (or skips them).
"""

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

from leadforge.core.enums import ExposureMode
from leadforge.exposure.filters import get_filter
from leadforge.exposure.redaction import write_metadata_dir

if TYPE_CHECKING:
from leadforge.core.models import WorldBundle


def apply_exposure(bundle: WorldBundle, bundle_root: Path, mode: ExposureMode) -> None:
"""Apply exposure filtering for *mode* to the bundle at *bundle_root*.

For ``research_instructor`` mode this writes the ``metadata/``
directory with all hidden-truth files. For ``student_public`` mode the
directory is not created and no hidden truth is published.

Args:
bundle: Fully populated :class:`~leadforge.core.models.WorldBundle`.
bundle_root: Root directory of the written bundle (must already exist).
mode: Exposure mode that controls which artefacts are published.
"""
filt = get_filter(mode)
if filt.write_metadata:
write_metadata_dir(bundle, bundle_root)
Comment thread
shaypal5 marked this conversation as resolved.
79 changes: 79 additions & 0 deletions leadforge/exposure/redaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Write hidden-truth metadata files for ``research_instructor`` mode.

:func:`write_metadata_dir` creates ``bundle_root/metadata/`` and populates
it with five files that expose the full hidden world:

- ``graph.json`` — world graph as JSON (nodes, edges, motif family)
- ``graph.graphml`` — world graph as GraphML for graph tools
- ``world_spec.json`` — generation config + narrative spec
- ``latent_registry.json`` — per-entity latent trait values
- ``mechanism_summary.json`` — mechanism assignment summary
"""

from __future__ import annotations

import dataclasses
import json
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from leadforge.core.models import WorldBundle


def write_metadata_dir(bundle: WorldBundle, bundle_root: Path) -> None:
Comment thread
shaypal5 marked this conversation as resolved.
"""Populate ``bundle_root/metadata/`` with all hidden-truth files.

Args:
bundle: Fully populated :class:`~leadforge.core.models.WorldBundle`.
bundle_root: Root directory of the written bundle.
"""
from leadforge.core.rng import RNGRoot
from leadforge.mechanisms.policies import assign_mechanisms

# Callers must only invoke this after full bundle assembly; world_graph
# and population are guaranteed non-None at that point.
assert bundle.world_graph is not None # noqa: S101
assert bundle.population is not None # noqa: S101

meta_dir = bundle_root / "metadata"
meta_dir.mkdir(exist_ok=True)

# ------------------------------------------------------------------
# graph.json + graph.graphml
# ------------------------------------------------------------------
(meta_dir / "graph.json").write_text(bundle.world_graph.to_json())
(meta_dir / "graph.graphml").write_text(bundle.world_graph.to_graphml())

# ------------------------------------------------------------------
# latent_registry.json
# ------------------------------------------------------------------
ls = bundle.population.latent_state
latent_registry: dict[str, object] = {
"account_latents": ls.account_latents,
"contact_latents": ls.contact_latents,
"lead_latents": ls.lead_latents,
}
(meta_dir / "latent_registry.json").write_text(json.dumps(latent_registry, indent=2))

# ------------------------------------------------------------------
# world_spec.json — config + narrative (if present)
# ------------------------------------------------------------------
config_dict = dataclasses.asdict(bundle.spec.config)
narrative_dict = (
dataclasses.asdict(bundle.spec.narrative) if bundle.spec.narrative is not None else None
)
world_spec_dict = {"config": config_dict, "narrative": narrative_dict}
(meta_dir / "world_spec.json").write_text(json.dumps(world_spec_dict, indent=2))

# ------------------------------------------------------------------
# mechanism_summary.json
# ------------------------------------------------------------------
# Reconstruct the mechanism assignment with the same RNG substream that
# was used during simulation — produces the identical parameter values.
motif_family = bundle.world_graph.motif_family
mech_rng = RNGRoot(bundle.spec.config.seed).child("mechanisms")
assignment = assign_mechanisms(motif_family, mech_rng)
(meta_dir / "mechanism_summary.json").write_text(
json.dumps(assignment.summary().to_dict(), indent=2)
)
Empty file added tests/exposure/__init__.py
Empty file.
Loading
Loading