Skip to content

Commit 5918c28

Browse files
committed
Add CLR normalization for ADT data
1 parent 17ef5c6 commit 5918c28

3 files changed

Lines changed: 35 additions & 11 deletions

File tree

src/methods/cellmapper_scvi/config.vsh.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ arguments:
5858
choices: ["clr", "log_cp10k"]
5959
default: "clr"
6060
description: Normalization method for ADT data, clr = centered log ratio.
61+
- name: "--plot_umap"
62+
type: boolean
63+
default: false
64+
description: Whether to plot the UMAP embedding of the latent space (for diagnoscic purposes)
6165
resources:
6266
- type: python_script
6367
path: script.py

src/methods/cellmapper_scvi/script.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
# Note: this section is auto-generated by viash at runtime. To edit it, make changes
88
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
99
par = {
10-
'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/swap/train_mod1.h5ad',
11-
'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/swap/train_mod2.h5ad',
12-
'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/swap/test_mod1.h5ad',
10+
'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/train_mod1.h5ad',
11+
'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/train_mod2.h5ad',
12+
'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/test_mod1.h5ad',
1313
'output': 'output.h5ad',
1414
'n_neighbors': 30,
1515
'kernel_method': 'hnoca',
16-
'use_hvg': True,
17-
'adt_normalization': 'clr',
16+
'use_hvg': False,
17+
'adt_normalization': 'clr', # Normalization method for ADT data
18+
'plot_umap': True,
1819

1920
}
2021
meta = {
@@ -43,7 +44,9 @@
4344

4445
# Compute a latent representation using an appropriate model based on the modality
4546
print("Get latent representation", flush=True)
46-
adata = get_representation(adata=adata, modality=mod1, use_hvg=par['use_hvg'], adt_normalization=par['adt_normalization'])
47+
adata = get_representation(
48+
adata=adata, modality=mod1, use_hvg=par['use_hvg'], adt_normalization=par['adt_normalization'], plot_umap=par['plot_umap']
49+
)
4750

4851
# Place the representation back into individual objects
4952
input_train_mod1.obsm["X_scvi"] = adata[adata.obs["split"] == "train"].obsm["X_scvi"].copy()

src/methods/cellmapper_scvi/utils.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
import scvi
44
from scipy.sparse import issparse, csr_matrix, csc_matrix
55
import muon
6+
import scanpy as sc
67

78

89
def get_representation(
9-
adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"], use_hvg: bool = True, adt_normalization: Literal["clr", "log_cp10k"] = "clr") -> ad.AnnData:
10+
adata: ad.AnnData,
11+
modality: Literal["GEX", "ADT", "ATAC"],
12+
use_hvg: bool = True,
13+
adt_normalization: Literal["clr", "log_cp10k"] = "clr",
14+
plot_umap: bool = False,
15+
) -> ad.AnnData:
1016
"""
1117
Get a joint latent space representation of the data based on the modality.
1218
@@ -29,6 +35,9 @@ def get_representation(
2935
Normalization method for ADT data. Options are:
3036
- "clr" (centered log-ratio transformation)
3137
- "log_cp10k" (normalization to 10k counts per cell and logarithm transformation)
38+
plot_umap
39+
Purely for diagnostic purposes, to see whether the data integration looks ok, this optionally computes
40+
a UMAP in shared latent space and stores a plot.
3241
3342
Returns
3443
-------
@@ -46,8 +55,9 @@ def get_representation(
4655
# Setup the AnnData object for scVI
4756
if modality == "GEX":
4857
layer = "counts"
49-
scvi.model.SCVI.setup_anndata(adata, batch_key="batch", layer=layer)
50-
model = scvi.model.SCVI(adata, gene_likelihood="nb", n_layers=2, n_latent=30)
58+
scvi.model.SCVI.setup_anndata(adata, layer=layer, categorical_covariate_keys=["split", "batch"])
59+
model = scvi.model.SCVI(adata)
60+
5161
elif modality == "ADT":
5262
print(f"Normalizing the ADT data using method '{adt_normalization}'")
5363
if adt_normalization == "clr":
@@ -60,11 +70,11 @@ def get_representation(
6070
raise ValueError(f"Unknown ADT normalization method: {adt_normalization}")
6171

6272
layer = "adt_normalized"
63-
scvi.model.SCVI.setup_anndata(adata, batch_key="batch", layer=layer)
73+
scvi.model.SCVI.setup_anndata(adata, layer=layer, categorical_covariate_keys=["split", "batch"])
6474
model = scvi.model.SCVI(adata, gene_likelihood="normal", n_layers=1, n_latent=10)
6575
elif modality == "ATAC":
6676
layer = "counts"
67-
scvi.model.PEAKVI.setup_anndata(adata, batch_key="batch", layer=layer)
77+
scvi.model.PEAKVI.setup_anndata(adata, layer=layer, categorical_covariate_keys=["split", "batch"])
6878
model = scvi.model.PEAKVI(adata)
6979
else:
7080
raise ValueError(f"Unknown modality: {modality}")
@@ -80,4 +90,11 @@ def get_representation(
8090
# Get the latent representation
8191
adata.obsm["X_scvi"] = model.get_latent_representation()
8292

93+
if plot_umap:
94+
sc.pp.neighbors(adata, use_rep="X_scvi")
95+
sc.tl.umap(adata)
96+
97+
plot_name = f"_{modality}_{adt_normalization}_use_hvg_{use_hvg}.png"
98+
sc.pl.embedding(adata, basis="umap", color=["batch", "split"], show=False, save=plot_name)
99+
83100
return adata

0 commit comments

Comments
 (0)