Skip to content

Commit 17ef5c6

Browse files
committed
Add clr normalization for adt counts
1 parent 0d503dc commit 17ef5c6

3 files changed

Lines changed: 38 additions & 7 deletions

File tree

src/methods/cellmapper_scvi/config.vsh.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,23 @@ info:
2121
cellmapper_hnoca_hvg:
2222
kernel_method: hnoca
2323
use_hvg: true
24+
adt_normalization: clr
2425
cellmapper_hnoca_all_genes:
2526
kernel_method: hnoca
2627
use_hvg: false
28+
adt_normalization: clr
2729
cellmapper_gauss_hvg:
2830
kernel_method: gauss
2931
use_hvg: true
32+
adt_normalization: clr
33+
cellmapper_gauss_hvg_log_cp10k:
34+
kernel_method: gauss
35+
use_hvg: true
36+
adt_normalization: log_cp10k
3037
cellmapper_gauss_all_genes:
3138
kernel_method: gauss
3239
use_hvg: false
40+
adt_normalization: clr
3341

3442
arguments:
3543
- name: "--kernel_method"
@@ -45,6 +53,11 @@ arguments:
4553
type: boolean
4654
default: true
4755
description: Whether to use highly variable genes (HVG) for the mapping (Generic analysis parameter).
56+
- name: "--adt_normalization"
57+
type: "string"
58+
choices: ["clr", "log_cp10k"]
59+
default: "clr"
60+
description: Normalization method for ADT data, clr = centered log ratio.
4861
resources:
4962
- type: python_script
5063
path: script.py
@@ -56,6 +69,7 @@ engines:
5669
packages:
5770
- cellmapper>=0.2.2
5871
- scvi-tools>=1.3.0
72+
- muon>=0.1.6
5973

6074
runners:
6175
- type: executable

src/methods/cellmapper_scvi/script.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
# Note: this section is auto-generated by viash at runtime. To edit it, make changes
88
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
99
par = {
10-
'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/normal/train_mod1.h5ad',
11-
'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/normal/train_mod2.h5ad',
12-
'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/normal/test_mod1.h5ad',
10+
'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/swap/train_mod1.h5ad',
11+
'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/swap/train_mod2.h5ad',
12+
'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/swap/test_mod1.h5ad',
1313
'output': 'output.h5ad',
1414
'n_neighbors': 30,
1515
'kernel_method': 'hnoca',
1616
'use_hvg': True,
17+
'adt_normalization': 'clr',
1718

1819
}
1920
meta = {
@@ -42,7 +43,7 @@
4243

4344
# Compute a latent representation using an appropriate model based on the modality
4445
print("Get latent representation", flush=True)
45-
adata = get_representation(adata=adata, modality=mod1, use_hvg=par['use_hvg'])
46+
adata = get_representation(adata=adata, modality=mod1, use_hvg=par['use_hvg'], adt_normalization=par['adt_normalization'])
4647

4748
# Place the representation back into individual objects
4849
input_train_mod1.obsm["X_scvi"] = adata[adata.obs["split"] == "train"].obsm["X_scvi"].copy()

src/methods/cellmapper_scvi/utils.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from typing import Literal
22
import anndata as ad
33
import scvi
4-
from scipy.sparse import issparse
4+
from scipy.sparse import issparse, csr_matrix, csc_matrix
5+
import muon
56

67

7-
def get_representation(adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"], use_hvg: bool = True) -> ad.AnnData:
8+
def get_representation(
9+
adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"], use_hvg: bool = True, adt_normalization: Literal["clr", "log_cp10k"] = "clr") -> ad.AnnData:
810
"""
911
Get a joint latent space representation of the data based on the modality.
1012
@@ -23,6 +25,10 @@ def get_representation(adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"
2325
(e.g. UMI counts for GEX and peak counts for ATAC), and the normalized data in the `normalized` layer.
2426
use_hvg
2527
Whether to subset the data to highly variable genes (HVGs) before training the model
28+
adt_normalization
29+
Normalization method for ADT data. Options are:
30+
- "clr" (centered log-ratio transformation)
31+
- "log_cp10k" (normalization to 10k counts per cell and logarithm transformation)
2632
2733
Returns
2834
-------
@@ -43,7 +49,17 @@ def get_representation(adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"
4349
scvi.model.SCVI.setup_anndata(adata, batch_key="batch", layer=layer)
4450
model = scvi.model.SCVI(adata, gene_likelihood="nb", n_layers=2, n_latent=30)
4551
elif modality == "ADT":
46-
layer = "normalized"
52+
print(f"Normalizing the ADT data using method '{adt_normalization}'")
53+
if adt_normalization == "clr":
54+
adata.X = csc_matrix(adata.layers["counts"]) # Use raw counts for ADT
55+
muon.prot.pp.clr(adata)
56+
adata.layers["adt_normalized"] = csr_matrix(adata.X)
57+
elif adt_normalization == "log_cp10k":
58+
adata.layers["adt_normalized"] = adata.layers["normalized"]
59+
else:
60+
raise ValueError(f"Unknown ADT normalization method: {adt_normalization}")
61+
62+
layer = "adt_normalized"
4763
scvi.model.SCVI.setup_anndata(adata, batch_key="batch", layer=layer)
4864
model = scvi.model.SCVI(adata, gene_likelihood="normal", n_layers=1, n_latent=10)
4965
elif modality == "ATAC":

0 commit comments

Comments
 (0)