From 8087a7063a831dc5ef6db8012bcfd1fdb39476f2 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 29 Apr 2026 08:43:57 +0300 Subject: [PATCH] =?UTF-8?q?fix:=20address=20PR=20#14=20review=20feedback?= =?UTF-8?q?=20=E2=80=94=20rename=20redaction=E2=86=92metadata,=20widen=20g?= =?UTF-8?q?et=5Ffilter,=20clean=20stale=20metadata/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename `exposure/redaction.py` → `exposure/metadata.py` (name now matches behavior) - `apply_exposure()` removes pre-existing `metadata/` dir in `student_public` mode to prevent hidden-truth leakage on path reuse - `get_filter()` now accepts `str | ExposureMode` with automatic coercion - Add regression test for path-reuse leakage and string-mode acceptance test Co-Authored-By: Claude Opus 4.6 --- leadforge/exposure/filters.py | 9 +++++- .../exposure/{redaction.py => metadata.py} | 8 ------ leadforge/exposure/modes.py | 11 ++++++-- tests/exposure/test_exposure.py | 28 +++++++++++++++++-- 4 files changed, 41 insertions(+), 15 deletions(-) rename leadforge/exposure/{redaction.py => metadata.py} (83%) diff --git a/leadforge/exposure/filters.py b/leadforge/exposure/filters.py index cbcab5c..a885ab1 100644 --- a/leadforge/exposure/filters.py +++ b/leadforge/exposure/filters.py @@ -34,12 +34,19 @@ class BundleFilter: } -def get_filter(mode: ExposureMode) -> BundleFilter: +def get_filter(mode: str | ExposureMode) -> BundleFilter: """Return the :class:`BundleFilter` for *mode*. + Args: + mode: An :class:`ExposureMode` or its string value. + Raises: + ValueError: if *mode* is a string that is not a valid + :class:`ExposureMode` value. KeyError: if *mode* has no registered filter (should never happen with well-typed callers, but guards against future enum additions that forget to update ``FILTERS``). """ + if isinstance(mode, str): + mode = ExposureMode(mode) return FILTERS[mode] diff --git a/leadforge/exposure/redaction.py b/leadforge/exposure/metadata.py similarity index 83% rename from leadforge/exposure/redaction.py rename to leadforge/exposure/metadata.py index 13bde34..69b43be 100644 --- a/leadforge/exposure/redaction.py +++ b/leadforge/exposure/metadata.py @@ -39,15 +39,11 @@ def write_metadata_dir(bundle: WorldBundle, bundle_root: Path) -> None: meta_dir = bundle_root / "metadata" meta_dir.mkdir(exist_ok=True) - # ------------------------------------------------------------------ # graph.json + graph.graphml - # ------------------------------------------------------------------ (meta_dir / "graph.json").write_text(bundle.world_graph.to_json()) (meta_dir / "graph.graphml").write_text(bundle.world_graph.to_graphml()) - # ------------------------------------------------------------------ # latent_registry.json - # ------------------------------------------------------------------ ls = bundle.population.latent_state latent_registry: dict[str, object] = { "account_latents": ls.account_latents, @@ -56,9 +52,7 @@ def write_metadata_dir(bundle: WorldBundle, bundle_root: Path) -> None: } (meta_dir / "latent_registry.json").write_text(json.dumps(latent_registry, indent=2)) - # ------------------------------------------------------------------ # world_spec.json — config + narrative (if present) - # ------------------------------------------------------------------ config_dict = dataclasses.asdict(bundle.spec.config) narrative_dict = ( dataclasses.asdict(bundle.spec.narrative) if bundle.spec.narrative is not None else None @@ -66,9 +60,7 @@ def write_metadata_dir(bundle: WorldBundle, bundle_root: Path) -> None: world_spec_dict = {"config": config_dict, "narrative": narrative_dict} (meta_dir / "world_spec.json").write_text(json.dumps(world_spec_dict, indent=2)) - # ------------------------------------------------------------------ # mechanism_summary.json - # ------------------------------------------------------------------ # Reconstruct the mechanism assignment with the same RNG substream that # was used during simulation — produces the identical parameter values. motif_family = bundle.world_graph.motif_family diff --git a/leadforge/exposure/modes.py b/leadforge/exposure/modes.py index 97d5ce6..19901dd 100644 --- a/leadforge/exposure/modes.py +++ b/leadforge/exposure/modes.py @@ -8,12 +8,13 @@ from __future__ import annotations +import shutil from pathlib import Path from typing import TYPE_CHECKING from leadforge.core.enums import ExposureMode from leadforge.exposure.filters import get_filter -from leadforge.exposure.redaction import write_metadata_dir +from leadforge.exposure.metadata import write_metadata_dir if TYPE_CHECKING: from leadforge.core.models import WorldBundle @@ -23,8 +24,9 @@ def apply_exposure(bundle: WorldBundle, bundle_root: Path, mode: ExposureMode) - """Apply exposure filtering for *mode* to the bundle at *bundle_root*. For ``research_instructor`` mode this writes the ``metadata/`` - directory with all hidden-truth files. For ``student_public`` mode the - directory is not created and no hidden truth is published. + directory with all hidden-truth files. For ``student_public`` mode any + pre-existing ``metadata/`` directory is removed so that hidden truth + is never accidentally published when reusing an output path. Args: bundle: Fully populated :class:`~leadforge.core.models.WorldBundle`. @@ -32,5 +34,8 @@ def apply_exposure(bundle: WorldBundle, bundle_root: Path, mode: ExposureMode) - mode: Exposure mode that controls which artefacts are published. """ filt = get_filter(mode) + meta_dir = bundle_root / "metadata" if filt.write_metadata: write_metadata_dir(bundle, bundle_root) + elif meta_dir.exists(): + shutil.rmtree(meta_dir) diff --git a/tests/exposure/test_exposure.py b/tests/exposure/test_exposure.py index 0c55bb2..713ce1b 100644 --- a/tests/exposure/test_exposure.py +++ b/tests/exposure/test_exposure.py @@ -42,10 +42,15 @@ def test_research_instructor_writes_metadata(self) -> None: f = get_filter(ExposureMode.research_instructor) assert f.write_metadata is True + def test_get_filter_accepts_string(self) -> None: + f = get_filter("student_public") + assert isinstance(f, BundleFilter) + assert f.write_metadata is False + def test_unknown_mode_raises(self) -> None: - """get_filter must raise KeyError for an unregistered mode string.""" - with pytest.raises(KeyError): - get_filter("totally_fake_mode") # type: ignore[arg-type] + """get_filter must raise ValueError for an invalid mode string.""" + with pytest.raises(ValueError, match="totally_fake_mode"): + get_filter("totally_fake_mode") # --------------------------------------------------------------------------- @@ -68,6 +73,23 @@ def test_core_files_present(self, tmp_path: Path) -> None: assert (tmp_path / "tables").is_dir() assert (tmp_path / "tasks").is_dir() + def test_reused_output_dir_removes_metadata_when_switching_to_student_public( + self, tmp_path: Path + ) -> None: + instructor_bundle = _make_bundle("research_instructor") + instructor_bundle.save(str(tmp_path)) + assert (tmp_path / "metadata").is_dir() + + public_bundle = _make_bundle("student_public") + public_bundle.save(str(tmp_path)) + + assert not (tmp_path / "metadata").exists() + assert (tmp_path / "manifest.json").exists() + assert (tmp_path / "dataset_card.md").exists() + assert (tmp_path / "feature_dictionary.csv").exists() + assert (tmp_path / "tables").is_dir() + assert (tmp_path / "tasks").is_dir() + class TestResearchInstructorMode: def test_metadata_dir_created(self, tmp_path: Path) -> None: