|
17 | 17 | import numpy as np |
18 | 18 | import pandas as pd |
19 | 19 |
|
| 20 | +from diff_diff.results import _format_survey_block |
20 | 21 | from diff_diff.utils import within_transform as _within_transform_util |
21 | 22 |
|
22 | 23 |
|
@@ -144,23 +145,7 @@ def summary(self) -> str: |
144 | 145 | # Add survey design info |
145 | 146 | if self.survey_metadata is not None: |
146 | 147 | sm = self.survey_metadata |
147 | | - lines.extend( |
148 | | - [ |
149 | | - "-" * 85, |
150 | | - "Survey Design".center(85), |
151 | | - "-" * 85, |
152 | | - f"{'Weight type:':<35} {sm.weight_type:>10}", |
153 | | - ] |
154 | | - ) |
155 | | - if sm.n_strata is not None: |
156 | | - lines.append(f"{'Strata:':<35} {sm.n_strata:>10}") |
157 | | - if sm.n_psu is not None: |
158 | | - lines.append(f"{'PSU/Cluster:':<35} {sm.n_psu:>10}") |
159 | | - lines.append(f"{'Effective sample size:':<35} {sm.effective_n:>10.1f}") |
160 | | - lines.append(f"{'Design effect (DEFF):':<35} {sm.design_effect:>10.2f}") |
161 | | - if sm.df_survey is not None: |
162 | | - lines.append(f"{'Survey d.f.:':<35} {sm.df_survey:>10}") |
163 | | - lines.extend(["-" * 85, ""]) |
| 148 | + lines.extend(_format_survey_block(sm, 85)) |
164 | 149 |
|
165 | 150 | lines.extend( |
166 | 151 | [ |
@@ -477,6 +462,13 @@ def fit( |
477 | 462 | resolved_survey, survey_weights, survey_weight_type, survey_metadata = ( |
478 | 463 | _resolve_survey_for_fit(survey_design, data, "analytical") |
479 | 464 | ) |
| 465 | + # Reject replicate-weight designs — Bacon decomposition is a |
| 466 | + # diagnostic that does not compute replicate-based variance |
| 467 | + if resolved_survey is not None and resolved_survey.uses_replicate_variance: |
| 468 | + raise NotImplementedError( |
| 469 | + "BaconDecomposition does not support replicate-weight survey " |
| 470 | + "designs. Use a TSL-based survey design (strata/psu/fpc)." |
| 471 | + ) |
480 | 472 |
|
481 | 473 | # Validate within-unit constancy for exact survey weights only. |
482 | 474 | # The exact-weight path collapses to per-unit weights via groupby().first(), |
@@ -593,6 +585,13 @@ def fit( |
593 | 585 | weights=survey_weights, |
594 | 586 | ) |
595 | 587 |
|
| 588 | + if not comparisons: |
| 589 | + raise ValueError( |
| 590 | + "No valid 2x2 comparisons remain after filtering. " |
| 591 | + "All cells have zero effective weight or insufficient data. " |
| 592 | + "Check subpopulation/domain definition." |
| 593 | + ) |
| 594 | + |
596 | 595 | # Normalize weights to sum to 1 |
597 | 596 | total_weight = sum(c.weight for c in comparisons) |
598 | 597 | if total_weight > 0: |
@@ -849,13 +848,21 @@ def _compute_treated_vs_never( |
849 | 848 | never_post_mask = never_mask & df[time].isin(post_periods) |
850 | 849 |
|
851 | 850 | # Guard against empty cells (unbalanced/filtered panels) |
| 851 | + # Also check positive weight mass for survey/subpopulation designs |
852 | 852 | if not ( |
853 | 853 | np.any(treated_pre_mask) |
854 | 854 | and np.any(treated_post_mask) |
855 | 855 | and np.any(never_pre_mask) |
856 | 856 | and np.any(never_post_mask) |
857 | 857 | ): |
858 | 858 | return None |
| 859 | + if ( |
| 860 | + np.sum(w[treated_pre_mask]) <= 0 |
| 861 | + or np.sum(w[treated_post_mask]) <= 0 |
| 862 | + or np.sum(w[never_pre_mask]) <= 0 |
| 863 | + or np.sum(w[never_post_mask]) <= 0 |
| 864 | + ): |
| 865 | + return None |
859 | 866 |
|
860 | 867 | treated_pre = np.average(y[treated_pre_mask], weights=w[treated_pre_mask]) |
861 | 868 | treated_post = np.average(y[treated_post_mask], weights=w[treated_post_mask]) |
@@ -966,14 +973,21 @@ def _compute_timing_comparison( |
966 | 973 | control_pre_mask = control_mask & df[time].isin(pre_periods) |
967 | 974 | control_post_mask = control_mask & df[time].isin(post_periods) |
968 | 975 |
|
969 | | - # Skip if any cell is empty |
| 976 | + # Skip if any cell is empty or has zero effective weight |
970 | 977 | if ( |
971 | 978 | treated_pre_mask.sum() == 0 |
972 | 979 | or treated_post_mask.sum() == 0 |
973 | 980 | or control_pre_mask.sum() == 0 |
974 | 981 | or control_post_mask.sum() == 0 |
975 | 982 | ): |
976 | 983 | return None |
| 984 | + if ( |
| 985 | + np.sum(w[treated_pre_mask]) <= 0 |
| 986 | + or np.sum(w[treated_post_mask]) <= 0 |
| 987 | + or np.sum(w[control_pre_mask]) <= 0 |
| 988 | + or np.sum(w[control_post_mask]) <= 0 |
| 989 | + ): |
| 990 | + return None |
977 | 991 |
|
978 | 992 | treated_pre = np.average(y[treated_pre_mask], weights=w[treated_pre_mask]) |
979 | 993 | treated_post = np.average(y[treated_post_mask], weights=w[treated_post_mask]) |
|
0 commit comments