diff --git a/docs/_static/docstring_previews/milo_de_nhoods.png b/docs/_static/docstring_previews/milo_de_nhoods.png
new file mode 100644
index 00000000..f7e12207
Binary files /dev/null and b/docs/_static/docstring_previews/milo_de_nhoods.png differ
diff --git a/docs/_static/docstring_previews/milo_de_nhoods_nhood_graph.png b/docs/_static/docstring_previews/milo_de_nhoods_nhood_graph.png
new file mode 100644
index 00000000..e6b90307
Binary files /dev/null and b/docs/_static/docstring_previews/milo_de_nhoods_nhood_graph.png differ
diff --git a/docs/_static/docstring_previews/milo_de_nhoods_volcano.png b/docs/_static/docstring_previews/milo_de_nhoods_volcano.png
new file mode 100644
index 00000000..0ebe5218
Binary files /dev/null and b/docs/_static/docstring_previews/milo_de_nhoods_volcano.png differ
diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks
index 7ae61d53..d2ab4bfb 160000
--- a/docs/tutorials/notebooks
+++ b/docs/tutorials/notebooks
@@ -1 +1 @@
-Subproject commit 7ae61d535361c00e845c39217d513c049f340241
+Subproject commit d2ab4bfbd1bcc18eaf977766117ba5fff62804f8
diff --git a/pertpy/tools/_milo.py b/pertpy/tools/_milo.py
index cb8a3ab3..ee50d55b 100644
--- a/pertpy/tools/_milo.py
+++ b/pertpy/tools/_milo.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import contextlib
+import io
 import random
 import re
 from importlib.util import find_spec
@@ -28,6 +30,32 @@
 from sklearn.metrics.pairwise import euclidean_distances
 
 
+def _weighted_bh(pvalues: np.ndarray, weights: np.ndarray) -> np.ndarray:
+    """Density-weighted Benjamini-Hochberg adjustment (Cydar/Milo style).
+
+    NaN p-values are passed through; infinite weights are treated as zero.
+    """
+    pvalues = np.asarray(pvalues, dtype=float)
+    weights = np.asarray(weights, dtype=float)
+    weights = np.where(np.isinf(weights), 0.0, weights)
+    out = np.full_like(pvalues, np.nan)
+    keep = ~np.isnan(pvalues)
+    if not keep.any():
+        return out
+    p = pvalues[keep]
+    w = weights[keep]
+    o = np.argsort(p)
+    p_sorted = p[o]
+    w_sorted = w[o]
+    adj_rev = (w.sum() * p_sorted / np.cumsum(w_sorted))[::-1]
+    adj = np.minimum.accumulate(adj_rev)[::-1]
+    adj = np.minimum(adj, 1.0)
+    final = np.empty_like(p)
+    final[o] = adj
+    out[keep] = final
+    return out
+
+
 class Milo:
     """Python implementation of Milo."""
 
@@ -511,6 +539,232 @@ def da_nhoods(
 
         self._graph_spatial_fdr(sample_adata)
 
+    def de_nhoods(
+        self,
+        mdata: MuData,
+        design: str,
+        *,
+        column: str,
+        baseline: str,
+        group_to_compare: str,
+        solver: Literal["pydeseq2", "statsmodels"] = "pydeseq2",
+        layer: str | None = None,
+        sample_col: str | None = None,
+        feature_key: str | None = "rna",
+        min_n_cells_per_sample: int = 3,
+        min_count: int = 3,
+        subset_nhoods: list[int] | None = None,
+        fit_kwargs: dict | None = None,
+    ) -> pd.DataFrame:
+        """Per-neighbourhood differential expression testing (miloDE).
+
+        For each neighbourhood, cells are pseudobulked by sample and a per-gene linear model is fit on the pseudobulk counts via the existing pertpy DE method (`PyDESeq2` or `Statsmodels`).
+        P-values are corrected twice: across genes within each nhood (BH, exposed as `adj_p_value` to match pertpy DE conventions) and across nhoods per gene (density-weighted BH, the same correction `da_nhoods` uses).
+        Neighbourhoods that fail validity checks (too few samples per condition, rank-deficient design) are skipped and marked `test_performed=False`.
+
+        Args:
+            mdata: MuData with `make_nhoods` and `count_nhoods` already run.
+            design: Right-hand-side formula referencing columns of `mdata[feature_key].obs`, e.g. `"~condition"` or `"~replicate+condition"`.
+            column: Column from the design that defines the contrast factor.
+            baseline: Level of `column` used as the reference (denominator).
+            group_to_compare: Level of `column` compared against `baseline` (numerator).
+            solver: `"pydeseq2"` for the NB-GLM Wald test, `"statsmodels"` for OLS by default (pass `fit_kwargs={"regression_model": sm.GLM, "family": ...}` for a GLM).
+            layer: Layer in `mdata[feature_key]` to use as raw counts; defaults to `.X`.
+            sample_col: Column in `mdata[feature_key].obs` identifying samples; defaults to the value stored by `count_nhoods` in `mdata['milo'].uns['sample_col']`.
+            feature_key: Cell-level modality key in `mdata`.
+            min_n_cells_per_sample: Drop samples with fewer than this many cells in the nhood before pseudobulking.
+            min_count: Drop genes whose total pseudobulk count across the surviving samples is below this threshold.
+            subset_nhoods: Optional integer indices (into `mdata['milo'].var_names`) restricting which nhoods are tested.
+            fit_kwargs: Extra keyword arguments forwarded to the per-nhood model's `fit`.
+
+        Returns:
+            Long-form DataFrame with one row per (nhood, gene) pair, columns `nhood`, `variable`, `log_fc`, `p_value`, `adj_p_value`, `pval_corrected_across_nhoods`, `test_performed`.
+            Skipped nhoods contribute rows with NaN test statistics and `test_performed=False`.
+
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.bhattacherjee()
+            >>> milo = pt.tl.Milo()
+            >>> mdata = milo.load(adata)
+            >>> sc.pp.neighbors(mdata["rna"])
+            >>> milo.make_nhoods(mdata["rna"])
+            >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
+            >>> de = milo.de_nhoods(
+            ...     mdata, design="~label", column="label", baseline="control", group_to_compare="treated"
+            ... )
+        """
+        from scipy.sparse import issparse
+
+        try:
+            sample_adata = mdata["milo"]
+        except KeyError:
+            raise RuntimeError("mdata['milo'] is missing -- run Milo.count_nhoods() first.") from None
+        adata = mdata[feature_key]
+
+        if "nhoods" not in adata.obsm:
+            raise KeyError(f"mdata[{feature_key!r}].obsm['nhoods'] is missing -- run make_nhoods() first.")
+
+        if sample_col is None:
+            sample_col = sample_adata.uns.get("sample_col")
+            if sample_col is None:
+                raise KeyError(
+                    "sample_col not found in mdata['milo'].uns -- run count_nhoods() first or pass `sample_col`."
+                )
+
+        covariates = [c.strip() for c in re.split(r"\+|\*|:", design.lstrip("~ "))]
+        covariates = [c for c in covariates if c and c not in {"0", "1"}]
+        missing = [c for c in covariates + [sample_col] if c not in adata.obs.columns]
+        if missing:
+            raise KeyError(f"Columns {missing!r} not found in mdata[{feature_key!r}].obs.")
+
+        sample_obs_map = adata.obs[[sample_col, *covariates]].drop_duplicates().set_index(sample_col)
+        if not sample_obs_map.index.is_unique:
+            raise AssertionError(
+                f"Each sample must map to a single covariate value; got duplicates for samples "
+                f"{sample_obs_map.index[sample_obs_map.index.duplicated()].unique().tolist()}."
+            )
+
+        nhoods = adata.obsm["nhoods"]
+        if not issparse(nhoods):
+            nhoods = csr_matrix(nhoods)
+        nhoods_csc = nhoods.tocsc()
+        n_nhoods_total = nhoods.shape[1]
+
+        nhood_ix = np.arange(n_nhoods_total) if subset_nhoods is None else np.asarray(subset_nhoods, dtype=int)
+
+        nhood_names = sample_adata.var_names.to_numpy()
+        var_names = adata.var_names.to_numpy()
+        n_genes = adata.n_vars
+
+        logfc = np.full((n_nhoods_total, n_genes), np.nan, dtype=np.float32)
+        # p-values are stored in float64 because Wald tests on strong DE signals
+        # routinely produce values < 1e-45, which would underflow to zero in float32.
+        pvals = np.full((n_nhoods_total, n_genes), np.nan, dtype=np.float64)
+        padj_genes = np.full((n_nhoods_total, n_genes), np.nan, dtype=np.float64)
+        test_performed = np.zeros(n_nhoods_total, dtype=bool)
+
+        # Lazy-import the DE class
+        sm = None
+        if solver == "pydeseq2":
+            from pertpy.tools._differential_gene_expression._pydeseq2 import PyDESeq2
+
+            model_cls: type = PyDESeq2
+        elif solver == "statsmodels":
+            import statsmodels.api as sm  # type: ignore[no-redef]
+
+            from pertpy.tools._differential_gene_expression._statsmodels import Statsmodels
+
+            model_cls = Statsmodels
+        else:
+            raise ValueError(f"Unknown solver {solver!r}. Use 'pydeseq2' or 'statsmodels'.")
+        user_fit_kwargs = dict(fit_kwargs or {})
+
+        # For statsmodels on raw counts the regression model should be an NB-GLM with a library-size offset.
+        sm_glm_default = solver == "statsmodels" and "regression_model" not in user_fit_kwargs
+
+        failures: dict[str, list[int]] = {}
+
+        for j in nhood_ix:
+            col = nhoods_csc.getcol(j)
+            cell_idx = col.indices
+            if cell_idx.size == 0:
+                continue
+            sub = adata[cell_idx]
+            sample_counts = sub.obs[sample_col].value_counts()
+            kept_samples = sample_counts[sample_counts >= min_n_cells_per_sample].index.to_list()
+            cond_levels = sample_obs_map.loc[kept_samples, column].dropna().unique()
+            if {baseline, group_to_compare} - set(cond_levels):
+                continue
+            sub = sub[sub.obs[sample_col].isin(kept_samples)].copy()
+
+            try:
+                pdata = sc.get.aggregate(sub, by=sample_col, func="sum", layer=layer)
+            except Exception as e:  # noqa: BLE001
+                failures.setdefault(f"pseudobulk failed ({type(e).__name__})", []).append(int(j))
+                continue
+            pdata.X = pdata.layers["sum"]
+            if issparse(pdata.X):
+                pdata.X = pdata.X.toarray()
+
+            for cov in covariates:
+                pdata.obs[cov] = pdata.obs[sample_col].map(sample_obs_map[cov]).astype("category")
+
+            gene_mask = np.asarray(pdata.X.sum(axis=0)).ravel() >= min_count
+            if gene_mask.sum() < 2:
+                continue
+            pdata = pdata[:, gene_mask].copy()
+
+            try:
+                # Solvers (pydeseq2 in particular) print progress per call; silenced
+                # because running once per nhood would otherwise flood stdout.
+                with contextlib.redirect_stdout(io.StringIO()):
+                    model = model_cls(pdata, design=design)
+                    this_fit_kwargs = dict(user_fit_kwargs)
+                    if sm_glm_default and sm is not None:
+                        lib_size = np.asarray(pdata.X.sum(axis=1)).ravel().astype(float)
+                        lib_size[lib_size <= 0] = 1.0
+                        this_fit_kwargs.setdefault("regression_model", sm.GLM)
+                        this_fit_kwargs.setdefault("family", sm.families.NegativeBinomial())
+                        this_fit_kwargs.setdefault("offset", np.log(lib_size))
+                    model.fit(**this_fit_kwargs)
+                    contrast_vec = model.contrast(column=column, baseline=baseline, group_to_compare=group_to_compare)
+                    res = model.test_contrasts(contrast_vec)
+            except Exception as e:  # noqa: BLE001
+                # Keep the message stable so similar small-sample failures collapse into one bucket.
+                key = str(e).split("\n", 1)[0][:120]
+                failures.setdefault(key, []).append(int(j))
+                continue
+
+            gene_pos = pd.Series(np.arange(n_genes), index=var_names)
+            gi = gene_pos.reindex(res["variable"]).to_numpy()
+            ok = ~np.isnan(gi)
+            gi = gi[ok].astype(int)
+            logfc[j, gi] = res.loc[ok, "log_fc"].to_numpy(dtype=np.float32)
+            pvals[j, gi] = res.loc[ok, "p_value"].to_numpy(dtype=np.float64)
+            if "adj_p_value" in res.columns:
+                padj_genes[j, gi] = res.loc[ok, "adj_p_value"].to_numpy(dtype=np.float64)
+            test_performed[j] = True
+
+        if failures:
+            total = sum(len(v) for v in failures.values())
+            logger.warning(
+                f"de_nhoods: {total} of {len(nhood_ix)} nhoods skipped "
+                f"(see `obs['test_performed'] == False`); commonest reason: {max(failures, key=lambda k: len(failures[k]))!r}."
+            )
+
+        # Backfill BH across genes for solvers that did not provide it
+        from statsmodels.stats.multitest import multipletests
+
+        for j in range(n_nhoods_total):
+            if not test_performed[j]:
+                continue
+            row = pvals[j]
+            valid = ~np.isnan(row)
+            if not valid.any() or not np.isnan(padj_genes[j, valid]).any():
+                continue
+            padj_genes[j, valid] = multipletests(row[valid], method="fdr_bh")[1]
+
+        # Density-weighted BH across nhoods per gene
+        weights = 1.0 / np.asarray(sample_adata.var["kth_distance"], dtype=float)
+        padj_nhoods = np.full_like(pvals, np.nan)
+        for g in range(n_genes):
+            padj_nhoods[:, g] = _weighted_bh(pvals[:, g], weights)
+
+        nhood_rep = np.repeat(nhood_names, n_genes)
+        gene_rep = np.tile(var_names, n_nhoods_total)
+        return pd.DataFrame(
+            {
+                "nhood": nhood_rep,
+                "variable": gene_rep,
+                "log_fc": logfc.ravel(),
+                "p_value": pvals.ravel(),
+                "adj_p_value": padj_genes.ravel(),
+                "pval_corrected_across_nhoods": padj_nhoods.ravel(),
+                "test_performed": np.repeat(test_performed, n_genes),
+            }
+        )
+
     def annotate_nhoods(
         self,
         mdata: MuData,
@@ -821,24 +1075,9 @@ def _graph_spatial_fdr(
         Args:
             sample_adata: Sample-level AnnData.
         """
-        # use 1/connectivity as the weighting for the weighted BH adjustment from Cydar
-        w = 1 / sample_adata.var["kth_distance"]
-        w[np.isinf(w)] = 0
-
-        # Computing a density-weighted q-value.
-        pvalues = sample_adata.var["PValue"]
-        keep_nhoods = ~pvalues.isna()  # Filtering in case of test on subset of nhoods
-        o = pvalues[keep_nhoods].argsort()
-        pvalues = pvalues.loc[keep_nhoods].iloc[o]
-        w = w.loc[keep_nhoods].iloc[o]
-
-        adjp = np.zeros(shape=len(o))
-        adjp[o] = (sum(w) * pvalues / np.cumsum(w))[::-1].cummin()[::-1]
-        adjp = np.array([x if x < 1 else 1 for x in adjp])
-
-        sample_adata.var["SpatialFDR"] = np.nan
-        sample_adata.var.loc[keep_nhoods, "SpatialFDR"] = adjp
-
+        weights = 1.0 / np.asarray(sample_adata.var["kth_distance"], dtype=float)
+        adjp = _weighted_bh(sample_adata.var["PValue"].to_numpy(dtype=float), weights)
+        sample_adata.var["SpatialFDR"] = adjp
         # Fill missing values with 1 to avoid downstream NaN complications
         # e.g. https://github.com/scverse/pertpy/issues/912
         sample_adata.var["SpatialFDR"] = sample_adata.var["SpatialFDR"].fillna(1)
@@ -891,16 +1130,130 @@ def plot_nhood_graph(  # pragma: no cover # noqa: D417
             .. image:: /_static/docstring_previews/milo_nhood_graph.png
         """
         nhood_adata = mdata["milo"].T.copy()
+        return self._render_nhood_graph(
+            nhood_adata,
+            logfc=nhood_adata.obs["logFC"],
+            spatial_fdr=nhood_adata.obs["SpatialFDR"],
+            alpha=alpha,
+            min_logFC=min_logFC,
+            min_size=min_size,
+            plot_edges=plot_edges,
+            title=title,
+            color_map=color_map,
+            palette=palette,
+            ax=ax,
+            return_fig=return_fig,
+            **kwargs,
+        )
+
+    @_doc_params(common_plot_args=doc_common_plot_args)
+    def plot_de_nhood_graph(  # pragma: no cover # noqa: D417
+        self,
+        mdata: MuData,
+        de_results: pd.DataFrame,
+        gene: str,
+        *,
+        alpha: float = 0.1,
+        min_logFC: float = 0,
+        min_size: int = 10,
+        plot_edges: bool = False,
+        title: str | None = None,
+        color_map: Colormap | str | None = None,
+        palette: str | Sequence[str] | None = None,
+        ax: Axes | None = None,
+        return_fig: bool = False,
+        **kwargs,
+    ) -> Figure | None:
+        """Visualize per-neighbourhood DE logFC of a single gene from `de_nhoods` results.
 
+        Pairs with `de_nhoods` the same way `plot_nhood_graph` pairs with `da_nhoods`.
+        Uses the same embedding (`build_nhood_graph` must have been run) and colors nhoods by `log_fc`, masking those with `pval_corrected_across_nhoods > alpha`.
+
+        Args:
+            mdata: MuData with `build_nhood_graph` already run.
+            de_results: Long DataFrame returned by `de_nhoods`.
+            gene: Gene to plot; must appear in `de_results["variable"]`.
+            alpha: Significance threshold on `pval_corrected_across_nhoods`.
+            min_logFC: Minimum absolute log fold-change to color a nhood.
+            min_size: Multiplier on `Nhood_size` for the node radius.
+            plot_edges: Whether to draw nhood overlap edges.
+            title: Plot title; defaults to `miloDE logFC for <gene>`.
+            {common_plot_args}
+            **kwargs: Forwarded to `scanpy.pl.embedding`.
+
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.bhattacherjee()
+            >>> milo = pt.tl.Milo()
+            >>> mdata = milo.load(adata)
+            >>> sc.pp.neighbors(mdata["rna"])
+            >>> sc.tl.umap(mdata["rna"])
+            >>> milo.make_nhoods(mdata["rna"])
+            >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
+            >>> milo.build_nhood_graph(mdata)
+            >>> de = milo.de_nhoods(
+            ...     mdata,
+            ...     design="~label",
+            ...     column="label",
+            ...     baseline="control",
+            ...     group_to_compare="treated",
+            ...     layer="counts",
+            ... )
+            >>> milo.plot_de_nhood_graph(mdata, de, gene="CD4")
+
+        Preview:
+            .. image:: /_static/docstring_previews/milo_de_nhoods_nhood_graph.png
+        """
+        g = de_results.loc[de_results["variable"] == gene]
+        if g.empty:
+            raise KeyError(f"Gene {gene!r} not found in de_results['variable'].")
+        g = g.set_index("nhood")
+        nhood_adata = mdata["milo"].T.copy()
+        logfc = g["log_fc"].reindex(nhood_adata.obs_names).to_numpy(dtype=float)
+        spatial_fdr = g["pval_corrected_across_nhoods"].reindex(nhood_adata.obs_names).to_numpy(dtype=float)
+        return self._render_nhood_graph(
+            nhood_adata,
+            logfc=pd.Series(logfc, index=nhood_adata.obs_names),
+            spatial_fdr=pd.Series(spatial_fdr, index=nhood_adata.obs_names),
+            alpha=alpha,
+            min_logFC=min_logFC,
+            min_size=min_size,
+            plot_edges=plot_edges,
+            title=title if title is not None else f"miloDE logFC for {gene}",
+            color_map=color_map,
+            palette=palette,
+            ax=ax,
+            return_fig=return_fig,
+            **kwargs,
+        )
+
+    def _render_nhood_graph(
+        self,
+        nhood_adata: AnnData,
+        *,
+        logfc: pd.Series,
+        spatial_fdr: pd.Series,
+        alpha: float,
+        min_logFC: float,
+        min_size: int,
+        plot_edges: bool,
+        title: str,
+        color_map,
+        palette,
+        ax,
+        return_fig: bool,
+        **kwargs,
+    ) -> Figure | None:
         if "Nhood_size" not in nhood_adata.obs.columns:
             raise KeyError(
-                'Cannot find "Nhood_size" column in adata.uns["nhood_adata"].obs -- \
-                    please run milopy.utils.build_nhood_graph(adata)'
+                'Cannot find "Nhood_size" column in adata.uns["nhood_adata"].obs -- '
+                "please run milo.build_nhood_graph(mdata) first"
             )
 
-        nhood_adata.obs["graph_color"] = nhood_adata.obs["logFC"]
-        nhood_adata.obs.loc[nhood_adata.obs["SpatialFDR"] > alpha, "graph_color"] = np.nan
-        nhood_adata.obs["abs_logFC"] = abs(nhood_adata.obs["logFC"])
+        nhood_adata.obs["graph_color"] = logfc
+        nhood_adata.obs.loc[spatial_fdr > alpha, "graph_color"] = np.nan
+        nhood_adata.obs["abs_logFC"] = logfc.abs()
         nhood_adata.obs.loc[nhood_adata.obs["abs_logFC"] < min_logFC, "graph_color"] = np.nan
 
         # Plotting order - extreme logFC on top
@@ -930,7 +1283,6 @@ def plot_nhood_graph(  # pragma: no cover # noqa: D417
             show=False,
             **kwargs,
         )
-
         if return_fig:
             return fig
         plt.show()
diff --git a/tests/tools/test_milo.py b/tests/tools/test_milo.py
index 4060712e..d8f0df99 100644
--- a/tests/tools/test_milo.py
+++ b/tests/tools/test_milo.py
@@ -233,6 +233,137 @@ def annotate_nhoods_mdata(adata, milo):
     return milo_mdata
 
 
+@pytest.fixture
+def de_nhoods_mdata(da_nhoods_mdata):
+    # da_nhoods_mdata is built from pbmc68k_reduced whose X is log-normalized.
+    # de_nhoods needs an integer-count layer (for PyDESeq2) so we simulate one
+    # by drawing NB counts whose mean tracks the normalized expression — plus a
+    # planted DE effect on a chosen gene to make the test directional.
+    mdata = da_nhoods_mdata.copy()
+    rna = mdata["rna"]
+    rng = np.random.default_rng(0)
+    base = np.asarray(rna.X.todense() if hasattr(rna.X, "todense") else rna.X)
+    mu = np.clip(np.expm1(np.clip(base, 0, 8)), 0.0, 200.0) + 0.5
+    # planted DE: gene 0 up in ConditionB
+    is_b = (rna.obs["condition"] == "ConditionB").to_numpy()
+    mu[is_b, 0] *= 5.0
+    counts = rng.negative_binomial(n=4, p=4 / (4 + mu)).astype(np.int32)
+    rna.layers["counts"] = counts
+    rna.uns["de_gene"] = rna.var_names[0]
+    return mdata
+
+
+def test_de_nhoods_shapes(de_nhoods_mdata, milo):
+    if find_spec("pydeseq2") is None:
+        pytest.skip("pydeseq2 not available")
+    mdata = de_nhoods_mdata.copy()
+    de = milo.de_nhoods(
+        mdata,
+        design="~condition",
+        column="condition",
+        baseline="ConditionA",
+        group_to_compare="ConditionB",
+        layer="counts",
+        min_n_cells_per_sample=2,
+        min_count=1,
+    )
+    expected = mdata["milo"].n_vars * mdata["rna"].n_vars
+    assert len(de) == expected
+    for c in [
+        "nhood",
+        "variable",
+        "log_fc",
+        "p_value",
+        "adj_p_value",
+        "pval_corrected_across_nhoods",
+        "test_performed",
+    ]:
+        assert c in de.columns
+    assert de["test_performed"].dtype == bool
+
+
+def test_de_nhoods_fdr_bounds(de_nhoods_mdata, milo):
+    if find_spec("pydeseq2") is None:
+        pytest.skip("pydeseq2 not available")
+    mdata = de_nhoods_mdata.copy()
+    de = milo.de_nhoods(
+        mdata,
+        design="~condition",
+        column="condition",
+        baseline="ConditionA",
+        group_to_compare="ConditionB",
+        layer="counts",
+        min_n_cells_per_sample=2,
+        min_count=1,
+    )
+    valid = de.dropna(subset=["p_value"])
+    assert ((valid["p_value"] >= 0) & (valid["p_value"] <= 1)).all()
+    both_g = valid.dropna(subset=["adj_p_value"])
+    assert (both_g["p_value"] <= both_g["adj_p_value"] + 1e-12).all()
+    both_n = valid.dropna(subset=["pval_corrected_across_nhoods"])
+    assert (both_n["p_value"] <= both_n["pval_corrected_across_nhoods"] + 1e-12).all()
+
+
+def test_de_nhoods_planted_signal(de_nhoods_mdata, milo):
+    if find_spec("pydeseq2") is None:
+        pytest.skip("pydeseq2 not available")
+    mdata = de_nhoods_mdata.copy()
+    de = milo.de_nhoods(
+        mdata,
+        design="~condition",
+        column="condition",
+        baseline="ConditionA",
+        group_to_compare="ConditionB",
+        layer="counts",
+        min_n_cells_per_sample=2,
+        min_count=1,
+    )
+    g = mdata["rna"].uns["de_gene"]
+    lfc = de.loc[de["variable"] == g, "log_fc"].dropna()
+    assert np.median(lfc) > 0
+
+
+def test_plot_de_nhood_graph(de_nhoods_mdata, milo):
+    if find_spec("pydeseq2") is None:
+        pytest.skip("pydeseq2 not available")
+    import matplotlib
+
+    matplotlib.use("Agg")
+    mdata = de_nhoods_mdata.copy()
+    milo.build_nhood_graph(mdata)
+    de = milo.de_nhoods(
+        mdata,
+        design="~condition",
+        column="condition",
+        baseline="ConditionA",
+        group_to_compare="ConditionB",
+        layer="counts",
+        min_n_cells_per_sample=2,
+        min_count=1,
+    )
+    g = mdata["rna"].uns["de_gene"]
+    fig = milo.plot_de_nhood_graph(mdata, de, gene=g, return_fig=True)
+    assert fig is not None
+    with pytest.raises(KeyError):
+        milo.plot_de_nhood_graph(mdata, de, gene="not_a_real_gene")
+
+
+def test_de_nhoods_statsmodels_runs(de_nhoods_mdata, milo):
+    mdata = de_nhoods_mdata.copy()
+    de = milo.de_nhoods(
+        mdata,
+        design="~condition",
+        column="condition",
+        baseline="ConditionA",
+        group_to_compare="ConditionB",
+        solver="statsmodels",
+        layer="counts",
+        min_n_cells_per_sample=2,
+        min_count=1,
+    )
+    assert de["test_performed"].any()
+
+
 def test_annotate_nhoods_missing_samples(annotate_nhoods_mdata, milo):
     mdata = annotate_nhoods_mdata.copy()
     del mdata.mod["milo"]