diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index 7ae61d53..ba197060 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit 7ae61d535361c00e845c39217d513c049f340241 +Subproject commit ba197060e7e195e8efe1d02e2f2e6e7c02abe0e9 diff --git a/pertpy/data/__init__.py b/pertpy/data/__init__.py index d8ccfec5..9a23f0a7 100644 --- a/pertpy/data/__init__.py +++ b/pertpy/data/__init__.py @@ -25,6 +25,7 @@ gehring_2019, haber_2017_regions, hagai_2018, + human_cytokine_dict, kang_2018, mcfarland_2020, norman_2019, @@ -84,6 +85,7 @@ "gehring_2019", "haber_2017_regions", "hagai_2018", + "human_cytokine_dict", "kang_2018", "mcfarland_2020", "norman_2019", diff --git a/pertpy/data/_datasets.py b/pertpy/data/_datasets.py index 49bd41bf..a4e94ac0 100644 --- a/pertpy/data/_datasets.py +++ b/pertpy/data/_datasets.py @@ -1,5 +1,6 @@ from pathlib import Path +import pandas as pd import scanpy as sc from anndata import AnnData from mudata import MuData @@ -1598,3 +1599,43 @@ def hagai_2018() -> AnnData: # pragma: no cover adata = sc.read_h5ad(output_file_path) return adata + + +def human_cytokine_dict(exclude_well_biased_genes: bool = True) -> pd.DataFrame: + """Human Cytokine Dictionary curated from PBMC allows you to infer differential cytokine activity. + + The Human Cytokine Dictionary was created from single-cell RNA-seq of 9,697,974 human peripheral blood mononuclear cells (PBMC) + from 12 donors stimulated in vitro with 87 different cytokines. + Genes with a mean-to-stddev-ratio above 1 across all 6 wells for >10 cytokines in a given cell type and for >5 cell types are "well-biased". + + Args: + exclude_well_biased_genes: Whether to exclude well-biased genes from the returned dataframe. + + References: + Oesinghaus, L., Becker, S., Vornholz, L., Papalexi, E. et al. + A single-cell cytokine dictionary of human peripheral blood. + bioRxiv (2025). https://doi.org/10.64898/2025.12.12.693897 + + Returns: + :class:`~ pandas.DataFrame` object of differentially expressed genes after cytokine perturbation. + + """ + output_file_name = "human_cytokine_dict.csv" + output_file_path = settings.datasetdir / output_file_name + if not Path(output_file_path).exists(): + _download( + url="https://cdn.parsebiosciences.com/gigalab/10m/DEGs.csv", + output_file_name=output_file_name, + output_path=settings.datasetdir, + is_zip=False, + ) + + cytokine_dict = pd.read_csv(output_file_path, index_col=0) + revision_cytokines = ["TGF-beta1", "IL-18", "C3a"] + cytokine_dict = cytokine_dict[~cytokine_dict["cytokine"].isin(revision_cytokines)] + cytokine_dict = cytokine_dict.reset_index(drop=True) + + if exclude_well_biased_genes: + cytokine_dict = cytokine_dict.loc[~cytokine_dict.well_biased] + + return cytokine_dict