|
2 | 2 |
|
3 | 3 | The second peer scheme alongside ``lead_scoring``. Its entity rows and FK |
4 | 4 | constraints live here (``entities`` / ``relationships``); the snapshot, feature, |
5 | | -and task definitions live in sibling modules. :meth:`LifecycleScheme.build_world` |
6 | | -is implemented (LTV-Pn.4a); :meth:`write_bundle` / :meth:`write_metadata` are |
7 | | -built out in LTV-Pn.4b–c and currently raise :class:`NotImplementedError`. |
| 5 | +and task definitions live in sibling modules. ``build_world`` (LTV-Pn.4a) and |
| 6 | +the instructor-mode ``write_bundle`` / ``write_metadata`` (LTV-Pn.4b) are |
| 7 | +implemented; the ``student_public`` snapshot-safe export lands in LTV-Pn.4c. |
8 | 8 | """ |
9 | 9 |
|
10 | 10 | from __future__ import annotations |
|
20 | 20 | from leadforge.core.models import GenerationConfig, WorldBundle |
21 | 21 | from leadforge.narrative.spec import NarrativeSpec |
22 | 22 |
|
23 | | -_NOT_IMPLEMENTED = ( |
24 | | - "the lifecycle (b2b_saas_ltv_v1) write path is not implemented yet; " |
25 | | - "it is built across LTV-Pn.4b–c" |
26 | | -) |
27 | | - |
28 | 23 |
|
29 | 24 | def _sample_motif_family(rng: random.Random) -> str: |
30 | 25 | """Deterministically pick a retention motif family for this world. |
@@ -74,11 +69,26 @@ def build_world( |
74 | 69 | ``narrative.yaml`` will not drive them until ``LTV-Po`` decides |
75 | 70 | whether the lifecycle scheme should consume the narrative spec. |
76 | 71 | """ |
| 72 | + from leadforge.core.exceptions import InvalidConfigError |
77 | 73 | from leadforge.core.models import WorldBundle, WorldSpec |
78 | 74 | from leadforge.core.rng import RNGRoot |
79 | 75 | from leadforge.schemes.lifecycle.artifacts import LifecycleArtifacts |
80 | 76 | from leadforge.schemes.lifecycle.engine import simulate_lifecycle |
81 | 77 | from leadforge.schemes.lifecycle.population import build_customer_population |
| 78 | + from leadforge.schemes.lifecycle.snapshots import FORWARD_WINDOWS_DAYS |
| 79 | + |
| 80 | + # config.forward_windows_days is not yet threaded into the snapshot |
| 81 | + # builder, which exports the fixed FORWARD_WINDOWS_DAYS targets. Reject |
| 82 | + # an override now (clear, early) rather than emit a bundle whose manifest |
| 83 | + # disagrees with its task dirs, or under-simulate and fail opaquely later. |
| 84 | + # Threading config-driven windows through is tracked for a later step. |
| 85 | + if tuple(config.forward_windows_days) != tuple(FORWARD_WINDOWS_DAYS): |
| 86 | + raise InvalidConfigError( |
| 87 | + f"config.forward_windows_days={tuple(config.forward_windows_days)} differs " |
| 88 | + f"from the lifecycle scheme's exported windows {tuple(FORWARD_WINDOWS_DAYS)}; " |
| 89 | + "config-driven forward windows are not yet supported (the snapshot builder " |
| 90 | + "exports the fixed set). Use the default until that wiring lands." |
| 91 | + ) |
82 | 92 |
|
83 | 93 | motif_rng = RNGRoot(config.seed).child("lifecycle_motif") |
84 | 94 | motif_family = _sample_motif_family(motif_rng) |
@@ -112,10 +122,163 @@ def write_bundle( |
112 | 122 | path: str, |
113 | 123 | generation_timestamp: str | None = None, |
114 | 124 | ) -> None: |
115 | | - raise NotImplementedError(_NOT_IMPLEMENTED) |
| 125 | + """Serialise a lifecycle *bundle* to *path* (instructor mode). |
| 126 | +
|
| 127 | + Writes the six relational tables, both observation regimes' snapshots |
| 128 | + split into 8 task directories (3 pLTV regression + 1 churn |
| 129 | + classification per regime, the early regime prefixed ``early_``), a |
| 130 | + dataset card, the feature dictionary, the hidden-truth ``metadata/`` |
| 131 | + (via :meth:`write_metadata`), and the manifest (recording |
| 132 | + ``generation_scheme`` + ``observation_date`` + the forward windows). |
| 133 | +
|
| 134 | + ``config.difficulty_params`` is threaded into both snapshot builders — |
| 135 | + when set (LTV-Po resolves it from the recipe profile), it drives the |
| 136 | + snapshot distortions. |
| 137 | +
|
| 138 | + Only ``research_instructor`` mode is supported here. The |
| 139 | + ``student_public`` snapshot-safety projection (event-table cutoff |
| 140 | + filtering, terminal-column drops, per-task target projection) lands in |
| 141 | + LTV-Pn.4c; until then this refuses to write a public bundle rather than |
| 142 | + emit one that is not snapshot-safe. |
| 143 | + """ |
| 144 | + from pathlib import Path |
| 145 | + |
| 146 | + from leadforge.core.enums import ExposureMode |
| 147 | + from leadforge.exposure.modes import apply_exposure |
| 148 | + from leadforge.render.manifests import build_manifest, write_manifest |
| 149 | + from leadforge.render.relational_io import write_relational_tables |
| 150 | + from leadforge.render.tasks import write_task_splits |
| 151 | + from leadforge.schema.dictionaries import write_feature_dictionary |
| 152 | + from leadforge.schemes.lifecycle.artifacts import LifecycleArtifacts |
| 153 | + from leadforge.schemes.lifecycle.features import CUSTOMER_SNAPSHOT_FEATURES |
| 154 | + from leadforge.schemes.lifecycle.render.dataset_card import render_lifecycle_dataset_card |
| 155 | + from leadforge.schemes.lifecycle.render.relational import to_dataframes |
| 156 | + from leadforge.schemes.lifecycle.snapshots import ( |
| 157 | + FORWARD_WINDOWS_DAYS, |
| 158 | + build_customer_snapshot, |
| 159 | + build_early_pltv_snapshot, |
| 160 | + ) |
| 161 | + from leadforge.schemes.lifecycle.tasks import ( |
| 162 | + CALENDAR_REGIME, |
| 163 | + EARLY_REGIME, |
| 164 | + lifecycle_task_manifests, |
| 165 | + ) |
| 166 | + |
| 167 | + artifacts = bundle.artifacts |
| 168 | + if not isinstance(artifacts, LifecycleArtifacts): |
| 169 | + raise RuntimeError( |
| 170 | + "WorldBundle is not populated with lifecycle artifacts. " |
| 171 | + "Call Generator.generate() / build_world() first." |
| 172 | + ) |
| 173 | + config = bundle.spec.config |
| 174 | + if config.exposure_mode is not ExposureMode.research_instructor: |
| 175 | + raise NotImplementedError( |
| 176 | + f"lifecycle write_bundle currently supports only " |
| 177 | + f"research_instructor; {config.exposure_mode.value!r} (snapshot-safe " |
| 178 | + "public export) lands in LTV-Pn.4c" |
| 179 | + ) |
| 180 | + |
| 181 | + population = artifacts.population |
| 182 | + sim = artifacts.simulation_result |
| 183 | + root = Path(path) |
| 184 | + root.mkdir(parents=True, exist_ok=True) |
| 185 | + |
| 186 | + # 1. Relational tables → tables/ |
| 187 | + dfs = to_dataframes(sim, population) |
| 188 | + table_row_counts = write_relational_tables(dfs, root / "tables") |
| 189 | + |
| 190 | + # 2. Both regime snapshots → 8 task directories. |
| 191 | + # difficulty_params (None until LTV-Po resolves it) drives distortions. |
| 192 | + snapshots = { |
| 193 | + CALENDAR_REGIME: build_customer_snapshot( |
| 194 | + population, sim, difficulty_params=config.difficulty_params, seed=config.seed |
| 195 | + ), |
| 196 | + EARLY_REGIME: build_early_pltv_snapshot( |
| 197 | + population, |
| 198 | + sim, |
| 199 | + early_tenure_weeks=config.early_tenure_weeks, |
| 200 | + difficulty_params=config.difficulty_params, |
| 201 | + seed=config.seed, |
| 202 | + ), |
| 203 | + } |
| 204 | + # Each task is a standalone single-target split: drop every OTHER |
| 205 | + # target column so a task's parquet cannot leak the answer's siblings |
| 206 | + # (e.g. ltv_revenue_730d ⊇ ltv_revenue_90d). The deliberate |
| 207 | + # mrr_change_full_period trap (leakage_risk but not a target) is kept. |
| 208 | + all_target_cols = {f.name for f in CUSTOMER_SNAPSHOT_FEATURES if f.is_target} |
| 209 | + task_row_counts: dict[str, dict[str, int]] = {} |
| 210 | + all_tasks = [] |
| 211 | + for regime, snapshot in snapshots.items(): |
| 212 | + for task in lifecycle_task_manifests(regime): |
| 213 | + other_targets = [ |
| 214 | + c for c in all_target_cols - {task.label_column} if c in snapshot.columns |
| 215 | + ] |
| 216 | + task_df = snapshot.drop(columns=other_targets) |
| 217 | + counts = write_task_splits(task_df, root / "tasks", seed=config.seed, task=task) |
| 218 | + task_row_counts[task.task_id] = counts |
| 219 | + all_tasks.append(task) |
| 220 | + |
| 221 | + # 3. Dataset card + feature dictionary |
| 222 | + (root / "dataset_card.md").write_text( |
| 223 | + render_lifecycle_dataset_card( |
| 224 | + bundle.spec, |
| 225 | + table_counts=table_row_counts, |
| 226 | + tasks=tuple(all_tasks), |
| 227 | + observation_date=population.observation_date, |
| 228 | + ) |
| 229 | + ) |
| 230 | + write_feature_dictionary( |
| 231 | + root / "feature_dictionary.csv", features=tuple(CUSTOMER_SNAPSHOT_FEATURES) |
| 232 | + ) |
| 233 | + |
| 234 | + # 4. Exposure metadata (delegates hidden truth to write_metadata) |
| 235 | + apply_exposure(bundle, root, config.exposure_mode) |
| 236 | + |
| 237 | + # 5. Manifest |
| 238 | + manifest = build_manifest( |
| 239 | + config=config, |
| 240 | + generation_scheme=self.name, |
| 241 | + motif_family=artifacts.motif_family, |
| 242 | + table_row_counts=table_row_counts, |
| 243 | + task_row_counts=task_row_counts, |
| 244 | + bundle_root=root, |
| 245 | + generation_timestamp=generation_timestamp, |
| 246 | + extra_fields={ |
| 247 | + "observation_date": population.observation_date, |
| 248 | + # The actual exported target windows (source of truth), not |
| 249 | + # config.forward_windows_days — build_world rejects any mismatch. |
| 250 | + "forward_windows_days": list(FORWARD_WINDOWS_DAYS), |
| 251 | + "early_tenure_weeks": config.early_tenure_weeks, |
| 252 | + }, |
| 253 | + ) |
| 254 | + write_manifest(manifest, root) |
116 | 255 |
|
117 | 256 | def write_metadata(self, bundle: WorldBundle, meta_dir: Path) -> None: |
118 | | - raise NotImplementedError(_NOT_IMPLEMENTED) |
| 257 | + """Write the lifecycle hidden-truth files into *meta_dir*. |
| 258 | +
|
| 259 | + Called by :func:`leadforge.exposure.modes.apply_exposure` after the |
| 260 | + shared ``world_spec.json``. The lifecycle scheme has no hidden graph; |
| 261 | + its latent truth is the per-entity latent registry and the |
| 262 | + motif-derived mechanism parameters. |
| 263 | + """ |
| 264 | + import json |
| 265 | + |
| 266 | + from leadforge.schemes.lifecycle.artifacts import LifecycleArtifacts |
| 267 | + from leadforge.schemes.lifecycle.render.metadata import ( |
| 268 | + latent_registry_dict, |
| 269 | + mechanism_summary_dict, |
| 270 | + ) |
| 271 | + |
| 272 | + artifacts = bundle.artifacts |
| 273 | + if not isinstance(artifacts, LifecycleArtifacts): |
| 274 | + raise RuntimeError("WorldBundle is not populated with lifecycle artifacts.") |
| 275 | + |
| 276 | + (meta_dir / "latent_registry.json").write_text( |
| 277 | + json.dumps(latent_registry_dict(artifacts.population.latent_state), indent=2) |
| 278 | + ) |
| 279 | + (meta_dir / "mechanism_summary.json").write_text( |
| 280 | + json.dumps(mechanism_summary_dict(artifacts.motif_family), indent=2) |
| 281 | + ) |
119 | 282 |
|
120 | 283 |
|
121 | 284 | LIFECYCLE_SCHEME = LifecycleScheme() |
|
0 commit comments