Complete replicate guard coverage and fix remaining review items

igerber · claude · igerber · commit 8b8dcb30e11d · 2026-03-28T13:54:45.000-04:00
- Update REGISTRY CS support: reg/ipw/dr without covariates (not just reg)
- Fix DoseResponseCurve df_survey: pass None (not 0 sentinel) for display
- Fix fweight error string: "non-negative integers" matching REGISTRY
- Add BaconDecomposition replicate rejection guard
- Add rejection tests: MultiPeriodDiD, ImputationDiD, TwoStageDiD,
  BaconDecomposition (plus existing TWFE, StackedDiD coverage)
- Update REGISTRY support matrix to include BaconDecomposition

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/bacon.py b/diff_diff/bacon.py
@@ -462,6 +462,13 @@ def fit(
         resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
             _resolve_survey_for_fit(survey_design, data, "analytical")
         )
+        # Reject replicate-weight designs — Bacon decomposition is a
+        # diagnostic that does not compute replicate-based variance
+        if resolved_survey is not None and resolved_survey.uses_replicate_variance:
+            raise NotImplementedError(
+                "BaconDecomposition does not support replicate-weight survey "
+                "designs. Use a TSL-based survey design (strata/psu/fpc)."
+            )
 
         # Validate within-unit constancy for exact survey weights only.
         # The exact-weight path collapses to per-unit weights via groupby().first(),
diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py
@@ -684,7 +684,7 @@ def fit(
             target="att",
             p_value=att_d_p,
             n_bootstrap=self.n_bootstrap,
-            df_survey=_survey_df,
+            df_survey=_survey_df if _survey_df != 0 else None,
         )
         dose_response_acrt = DoseResponseCurve(
             dose_grid=dvals,
@@ -695,7 +695,7 @@ def fit(
             target="acrt",
             p_value=acrt_d_p,
             n_bootstrap=self.n_bootstrap,
-            df_survey=_survey_df,
+            df_survey=_survey_df if _survey_df != 0 else None,
         )
 
         # Strip bootstrap internals from gt_results
diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -411,7 +411,7 @@ def _validate_weights(weights, weight_type, n):
             fractional = weights - np.round(weights)
             if np.any(np.abs(fractional) > 1e-10):
                 raise ValueError(
-                    "Frequency weights (fweight) must be positive integers. "
+                    "Frequency weights (fweight) must be non-negative integers. "
                     "Fractional values detected. Use pweight for non-integer weights."
                 )
     return weights
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -2046,14 +2046,16 @@ variance from the distribution of replicate estimates.
   sum without changing the scale. Survey df uses `n_valid - 1` for
   t-based inference.
 - **Note:** Replicate-weight support matrix:
-  - **Supported**: CallawaySantAnna (reg, no bootstrap), ContinuousDiD
+  - **Supported**: CallawaySantAnna (reg/ipw/dr without covariates, no
+    bootstrap), ContinuousDiD
     (no bootstrap), EfficientDiD (no bootstrap), TripleDifference (all
     methods), LinearRegression (OLS path)
   - **Rejected with NotImplementedError**: SunAbraham, TwoWayFixedEffects
     (within-transformation must be recomputed per replicate),
     DifferenceInDifferences, MultiPeriodDiD, StackedDiD (use
     compute_survey_vcov directly), ImputationDiD, TwoStageDiD (custom
-    variance), SyntheticDiD, TROP (bootstrap-based variance)
+    variance), SyntheticDiD, TROP (bootstrap-based variance),
+    BaconDecomposition (diagnostic only)
   - CS/ContinuousDiD/EfficientDiD reject replicate + `n_bootstrap > 0`
     (replicate weights provide analytical variance)
 - **Note:** When invalid replicates are dropped in `compute_replicate_vcov`
diff --git a/tests/test_survey.py b/tests/test_survey.py
@@ -2367,7 +2367,7 @@ def test_fractional_fweight_rejected_solve_ols(self):
         y = np.random.randn(n)
         w = np.array([1.5, 2.3, 1.0, 2.0, 1.7, 3.0, 1.0, 2.0, 1.0, 1.0])
 
-        with pytest.raises(ValueError, match="positive integers"):
+        with pytest.raises(ValueError, match="non-negative integers"):
             solve_ols(X, y, weights=w, weight_type="fweight")
 
     def test_fractional_fweight_rejected_compute_robust_vcov(self):
@@ -2378,7 +2378,7 @@ def test_fractional_fweight_rejected_compute_robust_vcov(self):
         resid = np.random.randn(n)
         w = np.array([1.5, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0])
 
-        with pytest.raises(ValueError, match="positive integers"):
+        with pytest.raises(ValueError, match="non-negative integers"):
             compute_robust_vcov(X, resid, weights=w, weight_type="fweight")
 
     def test_integer_fweight_accepted(self):
diff --git a/tests/test_survey_phase6.py b/tests/test_survey_phase6.py
@@ -1408,6 +1408,56 @@ def test_invalid_replicate_rscales_rejected(self):
                 replicate_method="JK1", replicate_rscales=[-1.0, 1.0],
             )
 
+    def _replicate_sd_and_data(self):
+        data, rep_cols = TestEstimatorReplicateWeights._make_staggered_replicate_data()
+        sd = SurveyDesign(
+            weights="weight", replicate_weights=rep_cols,
+            replicate_method="JK1",
+        )
+        return data, sd
+
+    def test_multi_period_did_replicate_rejected(self):
+        """MultiPeriodDiD rejects replicate-weight designs."""
+        from diff_diff.estimators import MultiPeriodDiD
+        data, sd = self._replicate_sd_and_data()
+        data["treated"] = (data["first_treat"] > 0).astype(int)
+        data["post"] = (data["time"] >= 3).astype(int)
+        with pytest.raises(NotImplementedError):
+            MultiPeriodDiD().fit(
+                data, outcome="outcome", treatment="treated",
+                time="post", survey_design=sd,
+            )
+
+    def test_imputation_did_replicate_rejected(self):
+        """ImputationDiD rejects replicate-weight designs."""
+        from diff_diff.imputation import ImputationDiD
+        data, sd = self._replicate_sd_and_data()
+        with pytest.raises(NotImplementedError):
+            ImputationDiD().fit(
+                data, outcome="outcome", unit="unit", time="time",
+                first_treat="first_treat", survey_design=sd,
+            )
+
+    def test_two_stage_did_replicate_rejected(self):
+        """TwoStageDiD rejects replicate-weight designs."""
+        from diff_diff.two_stage import TwoStageDiD
+        data, sd = self._replicate_sd_and_data()
+        with pytest.raises(NotImplementedError):
+            TwoStageDiD().fit(
+                data, outcome="outcome", unit="unit", time="time",
+                first_treat="first_treat", survey_design=sd,
+            )
+
+    def test_bacon_replicate_rejected(self):
+        """BaconDecomposition rejects replicate-weight designs."""
+        from diff_diff.bacon import BaconDecomposition
+        data, sd = self._replicate_sd_and_data()
+        with pytest.raises(NotImplementedError):
+            BaconDecomposition().fit(
+                data, outcome="outcome", unit="unit", time="time",
+                first_treat="first_treat", survey_design=sd,
+            )
+
 
 # =============================================================================
 # Effective-sample and d.f. consistency tests

Original file line number	Diff line number	Diff line change
`@@ -411,7 +411,7 @@ def _validate_weights(weights, weight_type, n):`
`411`	`411`	`fractional = weights - np.round(weights)`
`412`	`412`	`if np.any(np.abs(fractional) > 1e-10):`
`413`	`413`	`raise ValueError(`
`414`		`- "Frequency weights (fweight) must be positive integers. "`
	`414`	`+ "Frequency weights (fweight) must be non-negative integers. "`
`415`	`415`	`"Fractional values detected. Use pweight for non-integer weights."`
`416`	`416`	`)`
`417`	`417`	`return weights`