Add tangram (#117)

LouisK92 · web-flow · commit 34b79083689d · 2025-12-11T23:19:45.000+01:00
diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh
@@ -57,6 +57,8 @@ celltype_annotation_methods:
   - ssam
   # - tacco
   # - moscot
+  # - mapmycells
+  # - tangram
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -50,6 +50,7 @@ celltype_annotation_methods:
   - tacco
   - moscot
   - mapmycells
+  - tangram
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
@@ -53,6 +53,7 @@ celltype_annotation_methods:
   # - tacco
   # - moscot
   # - mapmycells
+  # - tangram
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -48,6 +48,8 @@ celltype_annotation_methods:
   - ssam
   - tacco
   - moscot
+  - mapmycells
+  - tangram
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
diff --git a/src/methods_cell_type_annotation/tangram/config.vsh.yaml b/src/methods_cell_type_annotation/tangram/config.vsh.yaml
@@ -0,0 +1,43 @@
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+
+name: tangram
+label: "Tangram"
+summary: "Annotate cell types using Tangram"
+description: "Annotate cell types using Tangram"
+links:
+  documentation: "https://tangram-sc.readthedocs.io"
+  repository: "https://github.com/broadinstitute/Tangram"
+references:
+  doi: "10.1038/s41592-021-01264-7"
+
+arguments:
+  - name: --mode
+    required: false
+    direction: input
+    type: string
+    default: "cells"
+  - name: --num_epochs
+    required: false
+    direction: input
+    type: integer
+    default: 1000
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    #image: openproblems/base_pytorch_nvidia:1 #NOTE: leads to dependency issues.
+    # TODO: could try some other base image with pytorch and cuda installed.
+    image: openproblems/base_python:1
+    setup:
+      - type: python
+        pypi: [tangram-sc]
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, midcpu, midmem, gpu ]
diff --git a/src/methods_cell_type_annotation/tangram/script.py b/src/methods_cell_type_annotation/tangram/script.py
@@ -0,0 +1,82 @@
+import anndata as ad
+import tangram as tg
+import torch
+
+## VIASH START
+par = {
+    'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+    'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+    'output': 'spatial_with_celltypes.h5ad',
+    'celltype_key': 'cell_type',
+    'mode': 'cells',
+    'num_epochs': 1000,
+}
+meta = {
+    'name': 'tangram',
+}
+## VIASH END
+
+# GPU check
+if torch.cuda.is_available():
+    device = "cuda:0"
+else:
+    device = "cpu"
+
+# Optional parameter check: For this specific annotation method the par['input_spatial_normalized_counts'] and par['input_scrnaseq_reference'] are required
+assert par['input_spatial_normalized_counts'] is not None, 'Spatial input is required for this annotation method.'
+assert par['input_scrnaseq_reference'] is not None, 'Single cell input is required for this annotation method.'
+
+# Read input
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
+
+# use log1p noramlized values  
+adata_sc.X = adata_sc.layers['normalized']
+adata_sp.X = adata_sp.layers['normalized']
+    
+adata_sp_orig = adata_sp.copy()
+
+# use all the genes from adata_sp as markers for tangram
+markers = adata_sp.var_names.tolist()
+    
+# Removes genes that all entries are zero. Finds the intersection between adata_sc, adata_st and given marker gene list, 
+# save the intersected markers in two adatas. Calculates density priors and save it with adata_st
+tg.pp_adatas(adata_sc=adata_sc, adata_sp=adata_sp, genes=markers)
+    
+# Map single cell data (`adata_sc`) on spatial data (`adata_sp`).
+# density_prior (str, ndarray or None): Spatial density of spots, when is a string, value can be 'rna_count_based' or 
+# 'uniform', when is a ndarray, shape = (number_spots,). 
+# use 'uniform' if the spatial voxels are at single cell resolution (e.g. MERFISH). 'rna_count_based', assumes that 
+# cell density is proportional to the number of RNA molecules.
+adata_map = tg.map_cells_to_space(
+    adata_sc=adata_sc,
+    adata_sp=adata_sp,
+    device=device,
+    mode=par['mode'],
+    num_epochs=par['num_epochs'],
+    density_prior='uniform'
+)
+    
+# Spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData
+tg.project_cell_annotations(
+    adata_map = adata_map,
+    adata_sp = adata_sp, 
+    annotation=par['celltype_key']
+)
+
+# Use original without extra layers generated from tangram
+df = adata_sp.obsm['tangram_ct_pred'].copy()
+adata_sp = adata_sp_orig.copy()
+
+# Set the cell type annotation
+adata_sp.obs[par['celltype_key']] = df.idxmax(axis=1)
+
+
+# # Normalize by row before setting the score
+# normalized_df = df.div(df.sum(axis=1), axis=0)
+# max_values = normalized_df.max(axis=1)
+# adata_sp.obs['tangram_score'] = max_values
+# adata_sp.obsm['ct_tangram_scores'] = normalized_df
+
+# Write output
+adata_sp.write_h5ad(par['output'])
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
@@ -98,7 +98,7 @@ argument_groups:
           A list of cell type annotation methods to run.
         type: string
         multiple: true
-        default: "ssam:tacco:moscot:mapmycells"
+        default: "ssam:tacco:moscot:mapmycells:tangram"
       - name: "--expression_correction_methods"
         description: |
           A list of expression correction methods to run.
@@ -169,6 +169,7 @@ dependencies:
   - name: methods_cell_type_annotation/tacco
   - name: methods_cell_type_annotation/moscot
   - name: methods_cell_type_annotation/mapmycells
+  - name: methods_cell_type_annotation/tangram
   - name: methods_expression_correction/no_correction
   - name: methods_expression_correction/gene_efficiency_correction
   - name: methods_expression_correction/resolvi_correction
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
@@ -375,7 +375,8 @@ workflow run_wf {
     ssam,
     tacco,
     moscot,
-    mapmycells
+    mapmycells,
+    tangram
   ]
   
   cta_ch = normalization_ch

Original file line number	Diff line number	Diff line change
`@@ -375,7 +375,8 @@ workflow run_wf {`
`375`	`375`	`ssam,`
`376`	`376`	`tacco,`
`377`	`377`	`moscot,`
`378`		`- mapmycells`
	`378`	`+ mapmycells,`
	`379`	`+ tangram`
`379`	`380`	`]`
`380`	`381`
`381`	`382`	`cta_ch = normalization_ch`