Address AI review P1/P2 findings for survey Phase 5

igerber · claude · igerber · commit 1be7d1e3b99a · 2026-03-24T07:40:48.000-04:00
- Return composed ω_eff (not raw ω) in SyntheticDiDResults.unit_weights
  so returned weights match the estimator actually used under survey
- Add NaN finite guard in TROP local _fit_with_fixed_lambda() and Rust
  bootstrap to skip non-finite treated outcomes (match main fit contract)
- Add finite guard on bootstrap ATT accumulator
- Add regression tests for effective weight semantics and NaN bootstrap

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
@@ -508,8 +508,10 @@ def fit(  # type: ignore[override]
         else:
             p_value = p_value_analytical
 
-        # Create weight dictionaries (store original ω, not composed)
-        unit_weights_dict = {unit_id: w for unit_id, w in zip(control_units, unit_weights)}
+        # Create weight dictionaries.  When survey weights are active, store
+        # the effective (composed) weights that were actually used for the ATT
+        # so that results.unit_weights matches the estimator.
+        unit_weights_dict = {unit_id: w for unit_id, w in zip(control_units, omega_eff)}
         time_weights_dict = {period: w for period, w in zip(pre_periods, time_weights)}
 
         # Store results
diff --git a/diff_diff/trop_local.py b/diff_diff/trop_local.py
@@ -949,7 +949,8 @@ def _bootstrap_variance(
                     optimal_lambda,
                     survey_design=survey_design,
                 )
-                bootstrap_estimates_list.append(att)
+                if np.isfinite(att):
+                    bootstrap_estimates_list.append(att)
             except (ValueError, np.linalg.LinAlgError, KeyError):
                 continue
 
@@ -1032,6 +1033,10 @@ def _fit_with_fixed_lambda(
         tau_values = []
         tau_weights = []
         for t, i in treated_observations:
+            # Skip non-finite outcomes (match main fit NaN contract)
+            if not np.isfinite(Y[t, i]):
+                continue
+
             # Compute observation-specific weights for this (i, t)
             weight_matrix = self._compute_observation_weights(
                 Y, D, i, t, lambda_time, lambda_unit, control_unit_idx, n_units, n_periods
@@ -1048,6 +1053,8 @@ def _fit_with_fixed_lambda(
             if local_weight_arr is not None:
                 tau_weights.append(local_weight_arr[i])
 
-        if local_weight_arr is not None and tau_values:
+        if not tau_values:
+            return float("nan")
+        if local_weight_arr is not None:
             return float(np.average(tau_values, weights=tau_weights))
         return float(np.mean(tau_values))
diff --git a/rust/src/trop.rs b/rust/src/trop.rs
@@ -1035,6 +1035,11 @@ pub fn bootstrap_trop_variance<'py>(
             let mut tau_count = 0usize;
 
             for (t, i) in boot_treated {
+                // Skip non-finite outcomes (match main fit NaN contract)
+                if !y_boot[[t, i]].is_finite() {
+                    continue;
+                }
+
                 let weight_matrix = compute_weight_matrix(
                     &y_boot.view(),
                     &d_boot.view(),
diff --git a/tests/test_survey_phase5.py b/tests/test_survey_phase5.py
@@ -350,6 +350,39 @@ def test_covariates_with_survey(self, sdid_survey_data, survey_design_weights):
         assert np.isfinite(result.att)
         assert result.survey_metadata is not None
 
+    def test_effective_weights_returned(self, sdid_survey_data, survey_design_weights):
+        """unit_weights returns composed ω_eff (not raw ω) under survey weighting."""
+        est = SyntheticDiD(variance_method="placebo", n_bootstrap=50, seed=42)
+        result = est.fit(
+            sdid_survey_data,
+            outcome="outcome",
+            treatment="treated",
+            unit="unit",
+            time="time",
+            post_periods=[6, 7, 8, 9],
+            survey_design=survey_design_weights,
+        )
+        weights = result.unit_weights
+        # Effective weights should sum to 1 (renormalized)
+        assert sum(weights.values()) == pytest.approx(1.0, abs=1e-10)
+        # With non-uniform survey weights, effective weights should differ
+        # from what uniform survey weights would produce
+        sdid_survey_data_u = sdid_survey_data.copy()
+        sdid_survey_data_u["uniform_w"] = 1.0
+        result_u = est.fit(
+            sdid_survey_data_u,
+            outcome="outcome",
+            treatment="treated",
+            unit="unit",
+            time="time",
+            post_periods=[6, 7, 8, 9],
+            survey_design=SurveyDesign(weights="uniform_w"),
+        )
+        # Non-uniform weights should change the returned weight distribution
+        eff_vals = sorted(weights.values(), reverse=True)
+        uni_vals = sorted(result_u.unit_weights.values(), reverse=True)
+        assert eff_vals != pytest.approx(uni_vals, abs=1e-6)
+
 
 # =============================================================================
 # TROP Survey Tests
@@ -577,3 +610,41 @@ def test_to_dict_includes_survey(self, trop_survey_data, survey_design_weights):
         d = result.to_dict()
         assert "weight_type" in d
         assert d["weight_type"] == "pweight"
+
+    def test_local_bootstrap_nan_treated_outcomes(self, trop_survey_data):
+        """Bootstrap handles NaN treated outcomes without poisoning SE."""
+        trop_survey_data = trop_survey_data.copy()
+        # Set some treated post-treatment outcomes to NaN
+        mask = (trop_survey_data["D"] == 1) & (trop_survey_data["time"] == 7)
+        trop_survey_data.loc[mask, "outcome"] = np.nan
+
+        est = TROP(method="local", n_bootstrap=10, seed=42, max_iter=5)
+        result = est.fit(
+            trop_survey_data,
+            outcome="outcome",
+            treatment="D",
+            unit="unit",
+            time="time",
+        )
+        # Point estimate should use finite cells only
+        assert np.isfinite(result.att)
+        # SE should remain finite (not poisoned by NaN)
+        assert np.isfinite(result.se)
+
+    def test_local_bootstrap_nan_with_survey(self, trop_survey_data, survey_design_weights):
+        """Bootstrap + survey handles NaN treated outcomes correctly."""
+        trop_survey_data = trop_survey_data.copy()
+        mask = (trop_survey_data["D"] == 1) & (trop_survey_data["time"] == 8)
+        trop_survey_data.loc[mask, "outcome"] = np.nan
+
+        est = TROP(method="local", n_bootstrap=10, seed=42, max_iter=5)
+        result = est.fit(
+            trop_survey_data,
+            outcome="outcome",
+            treatment="D",
+            unit="unit",
+            time="time",
+            survey_design=survey_design_weights,
+        )
+        assert np.isfinite(result.att)
+        assert np.isfinite(result.se)