Reject replicate designs in unsupported estimators

igerber · claude · igerber · commit 10bfc0366127 · 2026-03-28T13:02:35.000-04:00
Add NotImplementedError for replicate-weight survey designs in
DifferenceInDifferences, MultiPeriodDiD, StackedDiD, ImputationDiD,
TwoStageDiD, SyntheticDiD, and TROP. These estimators use TSL
(compute_survey_vcov) or custom variance computations that do not
dispatch to replicate-based variance.

Document full replicate-weight support matrix in REGISTRY.md.

Also adds df&lt;=0 early-return in safe_inference/safe_inference_batch,
fixes LinearRegression.get_inference() generic fallback for replicate
designs, re-applies EfficientDiD replicate guard after unit design
rebuild, and adds guard at CS first df_survey read.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py
@@ -240,6 +240,15 @@ def fit(
         resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
             _resolve_survey_for_fit(survey_design, data, self.inference)
         )
+        # Reject replicate-weight designs — base DiD uses compute_survey_vcov
+        # (TSL) directly, not LinearRegression's replicate dispatch.
+        if resolved_survey is not None and resolved_survey.uses_replicate_variance:
+            raise NotImplementedError(
+                "DifferenceInDifferences does not yet support replicate-weight "
+                "survey designs. Use CallawaySantAnna, EfficientDiD, "
+                "ContinuousDiD, or TripleDifference for replicate-weight "
+                "inference, or use a TSL-based survey design (strata/psu/fpc)."
+            )
 
         # Handle absorbed fixed effects (within-transformation)
         working_data = data.copy()
@@ -1008,6 +1017,15 @@ def fit(  # type: ignore[override]
         resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
             _resolve_survey_for_fit(survey_design, data, effective_inference)
         )
+        # Reject replicate-weight designs — MultiPeriodDiD uses
+        # compute_survey_vcov (TSL) directly without replicate dispatch.
+        if resolved_survey is not None and resolved_survey.uses_replicate_variance:
+            raise NotImplementedError(
+                "MultiPeriodDiD does not yet support replicate-weight survey "
+                "designs. Use CallawaySantAnna for staggered adoption with "
+                "replicate weights, or use a TSL-based survey design "
+                "(strata/psu/fpc)."
+            )
 
         # Handle absorbed fixed effects (within-transformation)
         working_data = data.copy()
diff --git a/diff_diff/imputation.py b/diff_diff/imputation.py
@@ -246,6 +246,11 @@ def fit(
 
         # Validate within-unit constancy for panel survey designs
         if resolved_survey is not None:
+            if resolved_survey.uses_replicate_variance:
+                raise NotImplementedError(
+                    "ImputationDiD does not yet support replicate-weight survey "
+                    "designs. Use a TSL-based survey design (strata/psu/fpc)."
+                )
             _validate_unit_constant_survey(data, unit, survey_design)
             if resolved_survey.weight_type != "pweight":
                 raise ValueError(
diff --git a/diff_diff/stacked_did.py b/diff_diff/stacked_did.py
@@ -242,6 +242,15 @@ def fit(
         resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
             _resolve_survey_for_fit(survey_design, data, "analytical")
         )
+        # Reject replicate-weight designs — StackedDiD uses
+        # compute_survey_vcov (TSL) directly without replicate dispatch.
+        if resolved_survey is not None and resolved_survey.uses_replicate_variance:
+            raise NotImplementedError(
+                "StackedDiD does not yet support replicate-weight survey "
+                "designs. Use CallawaySantAnna for staggered adoption with "
+                "replicate weights, or use a TSL-based survey design "
+                "(strata/psu/fpc)."
+            )
 
         # Reject fweight and aweight — Q-weight composition is ratio-valued
         # and breaks both frequency-weight (integer) and analytic-weight
diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
@@ -256,6 +256,12 @@ def fit(  # type: ignore[override]
         resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
             _resolve_survey_for_fit(survey_design, data, "analytical")
         )
+        # Reject replicate-weight designs — SyntheticDiD uses bootstrap variance
+        if resolved_survey is not None and resolved_survey.uses_replicate_variance:
+            raise NotImplementedError(
+                "SyntheticDiD does not yet support replicate-weight survey "
+                "designs. Use a TSL-based survey design (strata/psu/fpc)."
+            )
         # Validate pweight only (strata/PSU/FPC are allowed for Rao-Wu bootstrap)
         if resolved_survey is not None and resolved_survey.weight_type != "pweight":
             raise ValueError(
diff --git a/diff_diff/trop.py b/diff_diff/trop.py
@@ -461,6 +461,12 @@ def fit(
         resolved_survey, _survey_weights, _survey_wt, survey_metadata = _resolve_survey_for_fit(
             survey_design, data, "analytical"
         )
+        # Reject replicate-weight designs — TROP uses Rao-Wu bootstrap
+        if resolved_survey is not None and resolved_survey.uses_replicate_variance:
+            raise NotImplementedError(
+                "TROP does not yet support replicate-weight survey designs. "
+                "Use a TSL-based survey design (strata/psu/fpc)."
+            )
         # Validate weight_type is pweight (keep restriction), but allow
         # strata/PSU/FPC — those are handled via Rao-Wu rescaled bootstrap.
         if resolved_survey is not None and resolved_survey.weight_type != "pweight":
diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py
@@ -242,6 +242,11 @@ def fit(
 
         # Validate within-unit constancy for panel survey designs
         if resolved_survey is not None:
+            if resolved_survey.uses_replicate_variance:
+                raise NotImplementedError(
+                    "TwoStageDiD does not yet support replicate-weight survey "
+                    "designs. Use a TSL-based survey design (strata/psu/fpc)."
+                )
             _validate_unit_constant_survey(data, unit, survey_design)
             if resolved_survey.weight_type != "pweight":
                 raise ValueError(
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -2045,12 +2045,17 @@ variance from the distribution of replicate estimates.
   design structure is fixed and dropped replicates contribute zero to the
   sum without changing the scale. Survey df uses `n_valid - 1` for
   t-based inference.
-- **Note:** SunAbraham rejects replicate-weight designs with
-  `NotImplementedError` because the weighted within-transformation must be
-  recomputed per replicate (not yet implemented).
-- **Note:** CallawaySantAnna, ContinuousDiD, and EfficientDiD reject
-  replicate weights with `n_bootstrap > 0`. Replicate weights provide
-  analytical variance; bootstrap is a separate inference mechanism.
+- **Note:** Replicate-weight support matrix:
+  - **Supported**: CallawaySantAnna (reg, no bootstrap), ContinuousDiD
+    (no bootstrap), EfficientDiD (no bootstrap), TripleDifference (all
+    methods), LinearRegression (OLS path)
+  - **Rejected with NotImplementedError**: SunAbraham (within-transformation
+    must be recomputed per replicate), DifferenceInDifferences,
+    MultiPeriodDiD, StackedDiD (use compute_survey_vcov directly),
+    ImputationDiD, TwoStageDiD (custom variance), SyntheticDiD, TROP
+    (bootstrap-based variance)
+  - CS/ContinuousDiD/EfficientDiD reject replicate + `n_bootstrap > 0`
+    (replicate weights provide analytical variance)
 - **Note:** When invalid replicates are dropped in `compute_replicate_vcov`
   (OLS path), `n_valid` is returned and used for `df_survey = n_valid - 1`
   in `LinearRegression.fit()`. For IF-based replicate paths, replicates
diff --git a/tests/test_survey_phase6.py b/tests/test_survey_phase6.py
@@ -498,10 +498,9 @@ def test_replicate_metadata(self, replicate_data):
         assert sm.n_replicates == len(rep_cols)
         assert sm.df_survey == len(rep_cols) - 1
 
-    def test_replicate_with_did(self, replicate_data):
-        """Replicate weights work end-to-end with DifferenceInDifferences."""
+    def test_replicate_rejected_by_base_did(self, replicate_data):
+        """DifferenceInDifferences rejects replicate-weight designs."""
         data, rep_cols = replicate_data
-        # Add DiD structure
         n = len(data)
         data["treated"] = (np.arange(n) < n // 2).astype(int)
         data["post"] = (np.arange(n) % 4 >= 2).astype(int)
@@ -511,15 +510,11 @@ def test_replicate_with_did(self, replicate_data):
             weights="weight", replicate_weights=rep_cols,
             replicate_method="JK1",
         )
-        est = DifferenceInDifferences()
-        result = est.fit(
-            data, outcome="outcome", treatment="treated", time="post",
-            survey_design=sd,
-        )
-        assert np.isfinite(result.att)
-        assert np.isfinite(result.se)
-        assert result.survey_metadata is not None
-        assert result.survey_metadata.replicate_method == "JK1"
+        with pytest.raises(NotImplementedError, match="DifferenceInDifferences"):
+            DifferenceInDifferences().fit(
+                data, outcome="outcome", treatment="treated", time="post",
+                survey_design=sd,
+            )
 
     def test_replicate_if_variance(self, replicate_data):
         """IF-based replicate variance produces finite results."""