Skip to content

Commit b8ff7ce

Browse files
authored
Merge pull request #236 from igerber/survey-next-phase
Add survey data support for SyntheticDiD and TROP (Phase 5)
2 parents aec5671 + e4b3f10 commit b8ff7ce

13 files changed

Lines changed: 1562 additions & 280 deletions

File tree

TODO.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Deferred items from PR reviews that were not addressed before merge.
5454
| Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
5555
| CallawaySantAnna survey: strata/PSU/FPC rejected at runtime. Full design-based SEs require routing the combined IF/WIF through `compute_survey_vcov()`. Currently weights-only. | `staggered.py` | #233 | Medium |
5656
| CallawaySantAnna survey + covariates + IPW/DR: DRDID panel nuisance-estimation IF corrections not implemented. Currently gated with NotImplementedError. Regression method with covariates works (has WLS nuisance IF correction). | `staggered.py` | #233 | Medium |
57+
| SyntheticDiD/TROP survey: strata/PSU/FPC deferred. Full design-based bootstrap (Rao-Wu rescaled weights) needed for survey-aware resampling. Currently pweight-only. | `synthetic_did.py`, `trop.py` || Medium |
5758
| EfficientDiD hausman_pretest() clustered covariance uses stale `n_cl` after filtering non-finite EIF rows — should recompute effective cluster count and remap indices after `row_finite` filtering | `efficient_did.py` | #230 | Medium |
5859
| EfficientDiD `control_group="last_cohort"` trims at `last_g - anticipation` but REGISTRY says `t >= last_g`. With `anticipation=0` (default) these are identical. With `anticipation>0`, code is arguably more conservative (excludes anticipation-contaminated periods). Either align REGISTRY with code or change code to `t < last_g` — needs design decision. | `efficient_did.py` | #230 | Low |
5960
| TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |

diff_diff/results.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,8 @@ class SyntheticDiDResults:
680680
pre_treatment_fit: Optional[float] = field(default=None)
681681
placebo_effects: Optional[np.ndarray] = field(default=None)
682682
n_bootstrap: Optional[int] = field(default=None)
683+
# Survey design metadata (SurveyMetadata instance from diff_diff.survey)
684+
survey_metadata: Optional[Any] = field(default=None)
683685

684686
def __repr__(self) -> str:
685687
"""Concise string representation."""
@@ -735,6 +737,28 @@ def summary(self, alpha: Optional[float] = None) -> str:
735737
if self.variance_method == "bootstrap" and self.n_bootstrap is not None:
736738
lines.append(f"{'Bootstrap replications:':<25} {self.n_bootstrap:>10}")
737739

740+
# Add survey design info
741+
if self.survey_metadata is not None:
742+
sm = self.survey_metadata
743+
lines.extend(
744+
[
745+
"",
746+
"-" * 75,
747+
"Survey Design".center(75),
748+
"-" * 75,
749+
f"{'Weight type:':<25} {sm.weight_type:>10}",
750+
]
751+
)
752+
if sm.n_strata is not None:
753+
lines.append(f"{'Strata:':<25} {sm.n_strata:>10}")
754+
if sm.n_psu is not None:
755+
lines.append(f"{'PSU/Cluster:':<25} {sm.n_psu:>10}")
756+
lines.append(f"{'Effective sample size:':<25} {sm.effective_n:>10.1f}")
757+
lines.append(f"{'Design effect (DEFF):':<25} {sm.design_effect:>10.2f}")
758+
if sm.df_survey is not None:
759+
lines.append(f"{'Survey d.f.:':<25} {sm.df_survey:>10}")
760+
lines.append("-" * 75)
761+
738762
lines.extend(
739763
[
740764
"",
@@ -812,6 +836,15 @@ def to_dict(self) -> Dict[str, Any]:
812836
}
813837
if self.n_bootstrap is not None:
814838
result["n_bootstrap"] = self.n_bootstrap
839+
if self.survey_metadata is not None:
840+
sm = self.survey_metadata
841+
result["weight_type"] = sm.weight_type
842+
result["effective_n"] = sm.effective_n
843+
result["design_effect"] = sm.design_effect
844+
result["sum_weights"] = sm.sum_weights
845+
result["n_strata"] = sm.n_strata
846+
result["n_psu"] = sm.n_psu
847+
result["df_survey"] = sm.df_survey
815848
return result
816849

817850
def to_dataframe(self) -> pd.DataFrame:

diff_diff/survey.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,66 @@ def _validate_unit_constant_survey(data, unit_col, survey_design):
430430
)
431431

432432

433+
def _resolve_pweight_only(resolved_survey, estimator_name):
434+
"""Guard: reject non-pweight and strata/PSU/FPC for pweight-only estimators.
435+
436+
Parameters
437+
----------
438+
resolved_survey : ResolvedSurveyDesign or None
439+
Resolved survey design. If None, returns immediately.
440+
estimator_name : str
441+
Estimator name for error messages.
442+
443+
Raises
444+
------
445+
ValueError
446+
If weight_type is not 'pweight'.
447+
NotImplementedError
448+
If strata, PSU, or FPC are present.
449+
"""
450+
if resolved_survey is None:
451+
return
452+
if resolved_survey.weight_type != "pweight":
453+
raise ValueError(
454+
f"{estimator_name} survey support requires weight_type='pweight'. "
455+
f"Got '{resolved_survey.weight_type}'."
456+
)
457+
if (
458+
resolved_survey.strata is not None
459+
or resolved_survey.psu is not None
460+
or resolved_survey.fpc is not None
461+
):
462+
raise NotImplementedError(
463+
f"{estimator_name} does not yet support strata/PSU/FPC in "
464+
"SurveyDesign. Use SurveyDesign(weights=...) only. Full "
465+
"design-based bootstrap is planned for the Bootstrap + "
466+
"Survey Interaction phase."
467+
)
468+
469+
470+
def _extract_unit_survey_weights(data, unit_col, survey_design, unit_order):
471+
"""Extract unit-level survey weights aligned to a given unit ordering.
472+
473+
Parameters
474+
----------
475+
data : pd.DataFrame
476+
Panel data with survey weight column.
477+
unit_col : str
478+
Unit identifier column name.
479+
survey_design : SurveyDesign
480+
Survey design (uses ``weights`` column name).
481+
unit_order : array-like
482+
Ordered sequence of unit identifiers to align weights to.
483+
484+
Returns
485+
-------
486+
np.ndarray
487+
Float64 array of unit-level weights, one per unit in ``unit_order``.
488+
"""
489+
unit_w = data.groupby(unit_col)[survey_design.weights].first()
490+
return np.array([unit_w[u] for u in unit_order], dtype=np.float64)
491+
492+
433493
def _resolve_survey_for_fit(survey_design, data, inference_mode="analytical"):
434494
"""
435495
Shared helper: validate and resolve a SurveyDesign for an estimator fit() call.

0 commit comments

Comments
 (0)