11from typing import Literal
22import anndata as ad
33import scvi
4- from scipy .sparse import issparse
4+ from scipy .sparse import issparse , csr_matrix , csc_matrix
5+ import muon
56
67
7- def get_representation (adata : ad .AnnData , modality : Literal ["GEX" , "ADT" , "ATAC" ], use_hvg : bool = True ) -> ad .AnnData :
8+ def get_representation (
9+ adata : ad .AnnData , modality : Literal ["GEX" , "ADT" , "ATAC" ], use_hvg : bool = True , adt_normalization : Literal ["clr" , "log_cp10k" ] = "clr" ) -> ad .AnnData :
810 """
911 Get a joint latent space representation of the data based on the modality.
1012
@@ -23,6 +25,10 @@ def get_representation(adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"
2325 (e.g. UMI counts for GEX and peak counts for ATAC), and the normalized data in the `normalized` layer.
2426 use_hvg
2527 Whether to subset the data to highly variable genes (HVGs) before training the model
28+ adt_normalization
29+ Normalization method for ADT data. Options are:
30+ - "clr" (centered log-ratio transformation)
31+ - "log_cp10k" (normalization to 10k counts per cell and logarithm transformation)
2632
2733 Returns
2834 -------
@@ -43,7 +49,17 @@ def get_representation(adata: ad.AnnData, modality: Literal["GEX", "ADT", "ATAC"
4349 scvi .model .SCVI .setup_anndata (adata , batch_key = "batch" , layer = layer )
4450 model = scvi .model .SCVI (adata , gene_likelihood = "nb" , n_layers = 2 , n_latent = 30 )
4551 elif modality == "ADT" :
46- layer = "normalized"
52+ print (f"Normalizing the ADT data using method '{ adt_normalization } '" )
53+ if adt_normalization == "clr" :
54+ adata .X = csc_matrix (adata .layers ["counts" ]) # Use raw counts for ADT
55+ muon .prot .pp .clr (adata )
56+ adata .layers ["adt_normalized" ] = csr_matrix (adata .X )
57+ elif adt_normalization == "log_cp10k" :
58+ adata .layers ["adt_normalized" ] = adata .layers ["normalized" ]
59+ else :
60+ raise ValueError (f"Unknown ADT normalization method: { adt_normalization } " )
61+
62+ layer = "adt_normalized"
4763 scvi .model .SCVI .setup_anndata (adata , batch_key = "batch" , layer = layer )
4864 model = scvi .model .SCVI (adata , gene_likelihood = "normal" , n_layers = 1 , n_latent = 10 )
4965 elif modality == "ATAC" :
0 commit comments