ArcInstitute · Rive-001 · Dec 16, 2025 · Dec 16, 2025 · gemini-code-assist · Dec 19, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "cell-eval"
-version = "0.6.6"
+version = "0.6.7"
 description = "Evaluation metrics for single-cell perturbation predictions"
 readme = "README.md"
 authors = [

diff --git a/src/cell_eval/_cli/_run.py b/src/cell_eval/_cli/_run.py
@@ -108,6 +108,12 @@ def parse_args_run(parser: ap.ArgumentParser):
         type=str,
         help="Metrics to skip (comma-separated for multiple) (see docs for more details)",
     )
+    parser.add_argument(
+        "--fdr-threshold",
+        type=float,
+        default=0.05,
+        help="FDR threshold for DE significance [default: %(default)s]",
+    )
     parser.add_argument(
         "--version",
         action="version",
@@ -142,6 +148,30 @@ def run_evaluation(args: ap.Namespace):
         else {}
     )
 
+    # Add fdr_threshold to all DE metrics that accept it
+    de_metrics_with_fdr = [
+        "de_spearman_sig",
+        "de_direction_match",
+        "de_spearman_lfc_sig",
+        "de_sig_genes_recall",
+        "de_nsig_counts",
+        "pr_auc",
+        "roc_auc",
+        # overlap/precision metrics
+        "overlap_at_N",
+        "overlap_at_50",
+        "overlap_at_100",
+        "overlap_at_200",
+        "overlap_at_500",
+        "precision_at_N",
+        "precision_at_50",
+        "precision_at_100",
+        "precision_at_200",
+        "precision_at_500",
+    ]
+    for metric_name in de_metrics_with_fdr:
+        metric_kwargs.setdefault(metric_name, {})["fdr_threshold"] = args.fdr_threshold
+
     skip_metrics = args.skip_metrics.split(",") if args.skip_metrics else None
 
     if args.celltype_col is not None:

diff --git a/src/cell_eval/_evaluator.py b/src/cell_eval/_evaluator.py
@@ -53,6 +53,8 @@ class MetricsEvaluator:
     pdex_kwargs: dict[str, Any] | None = None
         Keyword arguments for parallel_differential_expression.
         These will overwrite arguments passed to MetricsEvaluator.__init__ if they conflict.
+    fdr_threshold: float = 0.05
+        FDR threshold for DE significance used in DE metrics.
     """
 
     def __init__(
@@ -71,6 +73,7 @@ def __init__(
         prefix: str | None = None,
         pdex_kwargs: dict[str, Any] | None = None,
         skip_de: bool = False,
+        fdr_threshold: float = 0.05,
     ):
         # Enable a global string cache for categorical columns
         pl.enable_string_cache()
@@ -107,6 +110,7 @@ def __init__(
 
         self.outdir = outdir
         self.prefix = prefix
+        self.fdr_threshold = fdr_threshold
 
     def compute(
         self,
@@ -117,9 +121,13 @@ def compute(
         write_csv: bool = True,
         break_on_error: bool = False,
     ) -> tuple[pl.DataFrame, pl.DataFrame]:
+        # Inject fdr_threshold into DE metric configs
+        de_metric_configs = _build_de_metric_configs(self.fdr_threshold)
+        merged_configs = {**de_metric_configs, **(metric_configs or {})}
+
         pipeline = MetricPipeline(
             profile=profile,
-            metric_configs=metric_configs,
+            metric_configs=merged_configs,
             break_on_error=break_on_error,
         )
         if skip_metrics is not None:
@@ -156,6 +164,31 @@ def compute(
         return results, agg_results
 
 
+def _build_de_metric_configs(fdr_threshold: float) -> dict[str, dict[str, Any]]:
+    """Build metric configs with fdr_threshold for all DE metrics that accept it."""
+    de_metrics_with_fdr = [
+        "de_spearman_sig",
+        "de_direction_match",
+        "de_spearman_lfc_sig",
+        "de_sig_genes_recall",
+        "de_nsig_counts",
+        "pr_auc",
+        "roc_auc",
+        # overlap/precision metrics
+        "overlap_at_N",
+        "overlap_at_50",
+        "overlap_at_100",
+        "overlap_at_200",
+        "overlap_at_500",
+        "precision_at_N",
+        "precision_at_50",
+        "precision_at_100",
+        "precision_at_200",
+        "precision_at_500",
+    ]
+    return {metric: {"fdr_threshold": fdr_threshold} for metric in de_metrics_with_fdr}
+
+
 def _build_anndata_pair(
     real: ad.AnnData | str,
     pred: ad.AnnData | str,

diff --git a/src/cell_eval/metrics/_de.py b/src/cell_eval/metrics/_de.py
@@ -199,19 +199,24 @@ def __call__(self, data: DEComparison) -> dict[str, dict[str, int]]:
         return counts
 
 
-def compute_pr_auc(data: DEComparison) -> dict[str, float]:
+def compute_pr_auc(
+    data: DEComparison, fdr_threshold: float = 0.05
+) -> dict[str, float]:
     """Compute precision-recall AUC per perturbation for significant recovery."""
-    return compute_generic_auc(data, method="pr")
+    return compute_generic_auc(data, method="pr", fdr_threshold=fdr_threshold)
 
 
-def compute_roc_auc(data: DEComparison) -> dict[str, float]:
+def compute_roc_auc(
+    data: DEComparison, fdr_threshold: float = 0.05
+) -> dict[str, float]:
     """Compute ROC AUC per perturbation for significant recovery."""
-    return compute_generic_auc(data, method="roc")
+    return compute_generic_auc(data, method="roc", fdr_threshold=fdr_threshold)
 
 
 def compute_generic_auc(
     data: DEComparison,
     method: Literal["pr", "roc"] = "pr",
+    fdr_threshold: float = 0.05,
 ) -> dict[str, float]:
     """Compute AUC values for significant recovery per perturbation."""
 
@@ -221,7 +226,7 @@ def compute_generic_auc(
     pred_fdr_col = data.pred.fdr_col
 
     labeled_real = data.real.data.with_columns(
-        (pl.col(real_fdr_col) < 0.05).cast(pl.Float32).alias("label")
+        (pl.col(real_fdr_col) < fdr_threshold).cast(pl.Float32).alias("label")
     ).select([target_col, feature_col, "label"])
 
     merged = (