Match R's H/n, asy_rep/n, colMeans convention for panel PS corrections; fix VCV index subsetting

igerber · claude · igerber · commit eac680ee5302 · 2026-03-29T10:27:58.000-04:00
Panel IPW/DR PS corrections: restructure to match R's std_ipw_did_panel /
drdid_panel convention: H = X'WX/n, asy_lin_rep = score @ solve(H) / n,
M2 = colMeans(). Algebraically equivalent but mirrors R source literally.

HonestDiD VCV subsetting: store event_study_vcov_index (the exact event-time
ordering matching VCV columns) so subsetting works correctly even when
universal base period injects a reference row into event_study_effects.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
@@ -665,15 +665,21 @@ def _extract_event_study_params(
                 # otherwise fall back to diagonal from SEs
                 if hasattr(results, "event_study_vcov") and results.event_study_vcov is not None:
                     vcov = results.event_study_vcov
-                    # VCV is indexed by ALL event times from aggregation;
-                    # rel_times may be a filtered subset (NaN-SE times dropped).
-                    # Subset VCV to match the surviving rel_times.
-                    all_event_times = sorted(results.event_study_effects.keys())
-                    if vcov.shape[0] == len(all_event_times) and len(rel_times) < len(all_event_times):
-                        idx = [all_event_times.index(t) for t in rel_times]
-                        sigma = vcov[np.ix_(idx, idx)]
-                    else:
+                    # VCV is indexed by the aggregated event times (stored in
+                    # event_study_vcov_index), NOT by event_study_effects keys
+                    # (which may include an injected reference period).
+                    # Subset to match the surviving rel_times.
+                    vcov_index = getattr(results, "event_study_vcov_index", None)
+                    if vcov_index is not None and len(rel_times) < len(vcov_index):
+                        idx = [vcov_index.index(t) for t in rel_times if t in vcov_index]
+                        if len(idx) == len(rel_times):
+                            sigma = vcov[np.ix_(idx, idx)]
+                        else:
+                            sigma = np.diag(np.array(ses) ** 2)
+                    elif vcov.shape[0] == len(rel_times):
                         sigma = vcov
+                    else:
+                        sigma = np.diag(np.array(ses) ** 2)
                 else:
                     sigma = np.diag(np.array(ses) ** 2)
 
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -1775,8 +1775,10 @@ def fit(
         # Clear it when bootstrap overwrites event-study SEs to prevent
         # HonestDiD from mixing analytical VCV with bootstrap SEs.
         event_study_vcov = getattr(self, "_event_study_vcov", None)
+        event_study_vcov_index = getattr(self, "_event_study_vcov_index", None)
         if bootstrap_results is not None and event_study_vcov is not None:
             event_study_vcov = None
+            event_study_vcov_index = None
 
         self.results_ = CallawaySantAnnaResults(
             group_time_effects=group_time_effects,
@@ -1800,6 +1802,7 @@ def fit(
             pscore_trim=self.pscore_trim,
             survey_metadata=survey_metadata,
             event_study_vcov=event_study_vcov,
+            event_study_vcov_index=event_study_vcov_index,
             panel=self.panel,
         )
 
@@ -2032,35 +2035,29 @@ def _ipw_estimation(
                 X_all_int = np.column_stack([np.ones(n_t + n_c), X_all])
                 pscore_all = np.concatenate([pscore_treated, pscore_control])
 
-                # Survey-weighted PS Hessian: sum(w_i * mu_i * (1-mu_i) * x_i * x_i')
+                # PS IF correction — matches R's std_ipw_did_panel convention:
+                # H = X'WX / n, asy_lin_rep = score @ solve(H) / n, M2 = colMeans
+                n_all_panel = n_t + n_c
                 W_ps = pscore_all * (1 - pscore_all)
                 if sw_all is not None:
                     W_ps = W_ps * sw_all
-                H = X_all_int.T @ (W_ps[:, None] * X_all_int)
-                try:
-                    H_inv = np.linalg.solve(H, np.eye(H.shape[0]))
-                except np.linalg.LinAlgError:
-                    H_inv = np.linalg.lstsq(H, np.eye(H.shape[0]), rcond=None)[0]
+                H = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel
+                H_inv = _safe_inv(H)
 
-                # PS score: w_i * (D_i - pi_i) * X_i
                 D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
                 score_ps = (D_all - pscore_all)[:, None] * X_all_int
                 if sw_all is not None:
                     score_ps = score_ps * sw_all[:, None]
-                asy_lin_rep_ps = score_ps @ H_inv  # shape (n_t + n_c, p)
+                asy_lin_rep_ps = score_ps @ H_inv / n_all_panel
 
-                # M2: gradient of ATT w.r.t. PS parameters
-                # R convention: colMeans over ALL n obs (zero for treated rows)
                 att_control_weighted = np.sum(weights_control_norm * control_change)
-                M2 = np.sum(
+                M2 = np.mean(
                     (weights_control_norm * (control_change - att_control_weighted))[:, None]
                     * X_all_int[n_t:],
                     axis=0,
-                ) / (n_t + n_c)
+                )
 
-                # PS correction to influence function
-                inf_ps_correction = asy_lin_rep_ps @ M2
-                inf_func = inf_func + inf_ps_correction
+                inf_func = inf_func + asy_lin_rep_ps @ M2
 
                 # SE from influence function variance
                 var_psi = np.sum(inf_func**2)
@@ -2295,29 +2292,26 @@ def _doubly_robust(
                     )
                     pscore_all = np.concatenate([pscore_treated_clipped, pscore_control])
 
-                    # Survey-weighted PS Hessian
+                    # PS IF correction — R convention: H/n, asy_rep/n, colMeans
+                    n_all_panel = n_t + n_c
                     W_ps = pscore_all * (1 - pscore_all)
                     if sw_all is not None:
                         W_ps = W_ps * sw_all
-                    H_ps = X_all_int.T @ (W_ps[:, None] * X_all_int)
+                    H_ps = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel
                     H_ps_inv = _safe_inv(H_ps)
 
-                    # PS score
                     D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
                     score_ps = (D_all - pscore_all)[:, None] * X_all_int
                     if sw_all is not None:
                         score_ps = score_ps * sw_all[:, None]
-                    asy_lin_rep_ps = score_ps @ H_ps_inv  # (n_t+n_c, p+1)
+                    asy_lin_rep_ps = score_ps @ H_ps_inv / n_all_panel
 
-                    # M2_dr: dATT/dgamma — gradient of DR ATT w.r.t. PS parameters
-                    # Only the control augmentation term depends on PS via w_ipw
-                    # R convention: colMeans over ALL n obs (zero for treated rows)
                     dr_resid_control = m_control - control_change
-                    M2_dr = np.sum(
+                    M2_dr = np.mean(
                         ((weights_control / sw_t_sum) * dr_resid_control)[:, None]
                         * X_all_int[n_t:],
                         axis=0,
-                    ) / (n_t + n_c)
+                    )
                     inf_func = inf_func + asy_lin_rep_ps @ M2_dr
 
                     # --- OR IF correction ---
@@ -2358,27 +2352,27 @@ def _doubly_robust(
                 inf_func = np.concatenate([psi_treated, psi_control])
 
                 if X_treated is not None and X_control is not None and X_treated.shape[1] > 0:
-                    # --- PS IF correction ---
-                    X_all_int = np.column_stack([np.ones(n_t + n_c), X_all])
+                    # --- PS IF correction — R convention: H/n, asy_rep/n, colMeans ---
+                    n_all_panel = n_t + n_c
+                    X_all_int = np.column_stack([np.ones(n_all_panel), X_all])
                     pscore_treated_clipped = np.clip(
                         pscore[:n_t], self.pscore_trim, 1 - self.pscore_trim
                     )
                     pscore_all = np.concatenate([pscore_treated_clipped, pscore_control])
 
                     W_ps = pscore_all * (1 - pscore_all)
-                    H_ps = X_all_int.T @ (W_ps[:, None] * X_all_int)
+                    H_ps = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel
                     H_ps_inv = _safe_inv(H_ps)
 
                     D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
                     score_ps = (D_all - pscore_all)[:, None] * X_all_int
-                    asy_lin_rep_ps = score_ps @ H_ps_inv
+                    asy_lin_rep_ps = score_ps @ H_ps_inv / n_all_panel
 
-                    # R convention: colMeans over ALL n obs (zero for treated rows)
                     dr_resid_control = m_control - control_change
-                    M2_dr = np.sum(
+                    M2_dr = np.mean(
                         ((weights_control / n_t) * dr_resid_control)[:, None] * X_all_int[n_t:],
                         axis=0,
-                    ) / (n_t + n_c)
+                    )
                     inf_func = inf_func + asy_lin_rep_ps @ M2_dr
 
                     # --- OR IF correction ---
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -751,6 +751,12 @@ def _aggregate_event_study(
             except (ValueError, np.linalg.LinAlgError):
                 pass  # Fall back to diagonal (None)
 
+        # Store the event-time index that matches VCV columns (for subsetting
+        # in HonestDiD when some event times are filtered out)
+        self._event_study_vcov_index = (
+            [e for e, _ in sorted_periods] if event_study_vcov is not None else None
+        )
+
         # Attach VCV to self for CallawaySantAnna to pick up
         self._event_study_vcov = event_study_vcov
 
diff --git a/diff_diff/staggered_results.py b/diff_diff/staggered_results.py
@@ -115,8 +115,9 @@ class CallawaySantAnnaResults:
     event_study_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None)
     group_effects: Optional[Dict[Any, Dict[str, Any]]] = field(default=None)
     influence_functions: Optional["np.ndarray"] = field(default=None, repr=False)
-    # Full event-study VCV matrix (Phase 7d): indexed by sorted relative times
+    # Full event-study VCV matrix (Phase 7d): indexed by event_study_vcov_index
     event_study_vcov: Optional["np.ndarray"] = field(default=None, repr=False)
+    event_study_vcov_index: Optional[list] = field(default=None, repr=False)
     bootstrap_results: Optional["CSBootstrapResults"] = field(default=None, repr=False)
     cband_crit_value: Optional[float] = None
     pscore_trim: float = 0.01