Skip to content

Commit 10bfc03

Browse files
igerberclaude
andcommitted
Reject replicate designs in unsupported estimators
Add NotImplementedError for replicate-weight survey designs in DifferenceInDifferences, MultiPeriodDiD, StackedDiD, ImputationDiD, TwoStageDiD, SyntheticDiD, and TROP. These estimators use TSL (compute_survey_vcov) or custom variance computations that do not dispatch to replicate-based variance. Document full replicate-weight support matrix in REGISTRY.md. Also adds df<=0 early-return in safe_inference/safe_inference_batch, fixes LinearRegression.get_inference() generic fallback for replicate designs, re-applies EfficientDiD replicate guard after unit design rebuild, and adds guard at CS first df_survey read. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent dd8f51b commit 10bfc03

8 files changed

Lines changed: 67 additions & 18 deletions

File tree

diff_diff/estimators.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,15 @@ def fit(
240240
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
241241
_resolve_survey_for_fit(survey_design, data, self.inference)
242242
)
243+
# Reject replicate-weight designs — base DiD uses compute_survey_vcov
244+
# (TSL) directly, not LinearRegression's replicate dispatch.
245+
if resolved_survey is not None and resolved_survey.uses_replicate_variance:
246+
raise NotImplementedError(
247+
"DifferenceInDifferences does not yet support replicate-weight "
248+
"survey designs. Use CallawaySantAnna, EfficientDiD, "
249+
"ContinuousDiD, or TripleDifference for replicate-weight "
250+
"inference, or use a TSL-based survey design (strata/psu/fpc)."
251+
)
243252

244253
# Handle absorbed fixed effects (within-transformation)
245254
working_data = data.copy()
@@ -1008,6 +1017,15 @@ def fit( # type: ignore[override]
10081017
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
10091018
_resolve_survey_for_fit(survey_design, data, effective_inference)
10101019
)
1020+
# Reject replicate-weight designs — MultiPeriodDiD uses
1021+
# compute_survey_vcov (TSL) directly without replicate dispatch.
1022+
if resolved_survey is not None and resolved_survey.uses_replicate_variance:
1023+
raise NotImplementedError(
1024+
"MultiPeriodDiD does not yet support replicate-weight survey "
1025+
"designs. Use CallawaySantAnna for staggered adoption with "
1026+
"replicate weights, or use a TSL-based survey design "
1027+
"(strata/psu/fpc)."
1028+
)
10111029

10121030
# Handle absorbed fixed effects (within-transformation)
10131031
working_data = data.copy()

diff_diff/imputation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,11 @@ def fit(
246246

247247
# Validate within-unit constancy for panel survey designs
248248
if resolved_survey is not None:
249+
if resolved_survey.uses_replicate_variance:
250+
raise NotImplementedError(
251+
"ImputationDiD does not yet support replicate-weight survey "
252+
"designs. Use a TSL-based survey design (strata/psu/fpc)."
253+
)
249254
_validate_unit_constant_survey(data, unit, survey_design)
250255
if resolved_survey.weight_type != "pweight":
251256
raise ValueError(

diff_diff/stacked_did.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,15 @@ def fit(
242242
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
243243
_resolve_survey_for_fit(survey_design, data, "analytical")
244244
)
245+
# Reject replicate-weight designs — StackedDiD uses
246+
# compute_survey_vcov (TSL) directly without replicate dispatch.
247+
if resolved_survey is not None and resolved_survey.uses_replicate_variance:
248+
raise NotImplementedError(
249+
"StackedDiD does not yet support replicate-weight survey "
250+
"designs. Use CallawaySantAnna for staggered adoption with "
251+
"replicate weights, or use a TSL-based survey design "
252+
"(strata/psu/fpc)."
253+
)
245254

246255
# Reject fweight and aweight — Q-weight composition is ratio-valued
247256
# and breaks both frequency-weight (integer) and analytic-weight

diff_diff/synthetic_did.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,12 @@ def fit( # type: ignore[override]
256256
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
257257
_resolve_survey_for_fit(survey_design, data, "analytical")
258258
)
259+
# Reject replicate-weight designs — SyntheticDiD uses bootstrap variance
260+
if resolved_survey is not None and resolved_survey.uses_replicate_variance:
261+
raise NotImplementedError(
262+
"SyntheticDiD does not yet support replicate-weight survey "
263+
"designs. Use a TSL-based survey design (strata/psu/fpc)."
264+
)
259265
# Validate pweight only (strata/PSU/FPC are allowed for Rao-Wu bootstrap)
260266
if resolved_survey is not None and resolved_survey.weight_type != "pweight":
261267
raise ValueError(

diff_diff/trop.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ def fit(
461461
resolved_survey, _survey_weights, _survey_wt, survey_metadata = _resolve_survey_for_fit(
462462
survey_design, data, "analytical"
463463
)
464+
# Reject replicate-weight designs — TROP uses Rao-Wu bootstrap
465+
if resolved_survey is not None and resolved_survey.uses_replicate_variance:
466+
raise NotImplementedError(
467+
"TROP does not yet support replicate-weight survey designs. "
468+
"Use a TSL-based survey design (strata/psu/fpc)."
469+
)
464470
# Validate weight_type is pweight (keep restriction), but allow
465471
# strata/PSU/FPC — those are handled via Rao-Wu rescaled bootstrap.
466472
if resolved_survey is not None and resolved_survey.weight_type != "pweight":

diff_diff/two_stage.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,11 @@ def fit(
242242

243243
# Validate within-unit constancy for panel survey designs
244244
if resolved_survey is not None:
245+
if resolved_survey.uses_replicate_variance:
246+
raise NotImplementedError(
247+
"TwoStageDiD does not yet support replicate-weight survey "
248+
"designs. Use a TSL-based survey design (strata/psu/fpc)."
249+
)
245250
_validate_unit_constant_survey(data, unit, survey_design)
246251
if resolved_survey.weight_type != "pweight":
247252
raise ValueError(

docs/methodology/REGISTRY.md

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,12 +2045,17 @@ variance from the distribution of replicate estimates.
20452045
design structure is fixed and dropped replicates contribute zero to the
20462046
sum without changing the scale. Survey df uses `n_valid - 1` for
20472047
t-based inference.
2048-
- **Note:** SunAbraham rejects replicate-weight designs with
2049-
`NotImplementedError` because the weighted within-transformation must be
2050-
recomputed per replicate (not yet implemented).
2051-
- **Note:** CallawaySantAnna, ContinuousDiD, and EfficientDiD reject
2052-
replicate weights with `n_bootstrap > 0`. Replicate weights provide
2053-
analytical variance; bootstrap is a separate inference mechanism.
2048+
- **Note:** Replicate-weight support matrix:
2049+
- **Supported**: CallawaySantAnna (reg, no bootstrap), ContinuousDiD
2050+
(no bootstrap), EfficientDiD (no bootstrap), TripleDifference (all
2051+
methods), LinearRegression (OLS path)
2052+
- **Rejected with NotImplementedError**: SunAbraham (within-transformation
2053+
must be recomputed per replicate), DifferenceInDifferences,
2054+
MultiPeriodDiD, StackedDiD (use compute_survey_vcov directly),
2055+
ImputationDiD, TwoStageDiD (custom variance), SyntheticDiD, TROP
2056+
(bootstrap-based variance)
2057+
- CS/ContinuousDiD/EfficientDiD reject replicate + `n_bootstrap > 0`
2058+
(replicate weights provide analytical variance)
20542059
- **Note:** When invalid replicates are dropped in `compute_replicate_vcov`
20552060
(OLS path), `n_valid` is returned and used for `df_survey = n_valid - 1`
20562061
in `LinearRegression.fit()`. For IF-based replicate paths, replicates

tests/test_survey_phase6.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,9 @@ def test_replicate_metadata(self, replicate_data):
498498
assert sm.n_replicates == len(rep_cols)
499499
assert sm.df_survey == len(rep_cols) - 1
500500

501-
def test_replicate_with_did(self, replicate_data):
502-
"""Replicate weights work end-to-end with DifferenceInDifferences."""
501+
def test_replicate_rejected_by_base_did(self, replicate_data):
502+
"""DifferenceInDifferences rejects replicate-weight designs."""
503503
data, rep_cols = replicate_data
504-
# Add DiD structure
505504
n = len(data)
506505
data["treated"] = (np.arange(n) < n // 2).astype(int)
507506
data["post"] = (np.arange(n) % 4 >= 2).astype(int)
@@ -511,15 +510,11 @@ def test_replicate_with_did(self, replicate_data):
511510
weights="weight", replicate_weights=rep_cols,
512511
replicate_method="JK1",
513512
)
514-
est = DifferenceInDifferences()
515-
result = est.fit(
516-
data, outcome="outcome", treatment="treated", time="post",
517-
survey_design=sd,
518-
)
519-
assert np.isfinite(result.att)
520-
assert np.isfinite(result.se)
521-
assert result.survey_metadata is not None
522-
assert result.survey_metadata.replicate_method == "JK1"
513+
with pytest.raises(NotImplementedError, match="DifferenceInDifferences"):
514+
DifferenceInDifferences().fit(
515+
data, outcome="outcome", treatment="treated", time="post",
516+
survey_design=sd,
517+
)
523518

524519
def test_replicate_if_variance(self, replicate_data):
525520
"""IF-based replicate variance produces finite results."""

0 commit comments

Comments
 (0)