Guard all replicate-df paths for NaN inference when rank <= 1

igerber · claude · igerber · commit dd8f51bdd7d6 · 2026-03-28T12:38:40.000-04:00
Fix three remaining gaps where undefined replicate df still produced
finite inference:

1. safe_inference/safe_inference_batch: early-return all-NaN when df&lt;=0
2. LinearRegression.get_inference: skip generic "df&lt;=0 → normal" fallback
   for replicate designs so df=0 sentinel flows through to safe_inference
3. EfficientDiD: re-apply replicate guard after unit-level design rebuild
   overwrites self._survey_df
4. CallawaySantAnna: add guard at first df_survey read (covers general
   survey+covariate g,t path)

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
@@ -527,6 +527,10 @@ def fit(
             )
             # Use unit-level df (not panel-level) for t-distribution
             self._survey_df = self._unit_resolved_survey.df_survey
+            # Re-apply replicate guard: undefined df → NaN inference
+            if (self._survey_df is None
+                    and self._unit_resolved_survey.uses_replicate_variance):
+                self._survey_df = 0
         else:
             self._unit_resolved_survey = None
 
diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -2043,7 +2043,11 @@ def get_inference(
             effective_df = self.df_
 
         # Warn if df is non-positive and fall back to normal distribution
-        if effective_df is not None and effective_df <= 0:
+        # (skip for replicate designs — df=0 is intentional for NaN inference)
+        _is_replicate = (hasattr(self, 'survey_design') and self.survey_design is not None
+                         and hasattr(self.survey_design, 'uses_replicate_variance')
+                         and self.survey_design.uses_replicate_variance)
+        if effective_df is not None and effective_df <= 0 and not _is_replicate:
             import warnings
 
             warnings.warn(
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -1410,6 +1410,11 @@ def fit(
         # Survey df for safe_inference calls — use the unit-level resolved
         # survey df computed in _precompute_structures for consistency.
         df_survey = precomputed.get("df_survey")
+        # Guard: replicate design with undefined df (rank <= 1) → NaN inference
+        if (df_survey is None and resolved_survey is not None
+                and hasattr(resolved_survey, 'uses_replicate_variance')
+                and resolved_survey.uses_replicate_variance):
+            df_survey = 0
 
         # Compute ATT(g,t) for each group-time combination
         min_period = min(time_periods)
diff --git a/diff_diff/utils.py b/diff_diff/utils.py
@@ -177,6 +177,9 @@ def safe_inference(effect, se, alpha=0.05, df=None):
     """
     if not (np.isfinite(se) and se > 0):
         return np.nan, np.nan, (np.nan, np.nan)
+    if df is not None and df <= 0:
+        # Undefined degrees of freedom (e.g., rank-deficient replicate design)
+        return np.nan, np.nan, (np.nan, np.nan)
     t_stat = effect / se
     p_value = compute_p_value(t_stat, df=df)
     conf_int = compute_confidence_interval(effect, se, alpha, df=df)
@@ -213,6 +216,10 @@ def safe_inference_batch(effects, ses, alpha=0.05, df=None):
     ci_lowers = np.full(n, np.nan)
     ci_uppers = np.full(n, np.nan)
 
+    # Undefined df (e.g., rank-deficient replicate design) → all NaN
+    if df is not None and df <= 0:
+        return t_stats, p_values, ci_lowers, ci_uppers
+
     valid = np.isfinite(ses) & (ses > 0)
     if not np.any(valid):
         return t_stats, p_values, ci_lowers, ci_uppers