openproblems-bio
diff --git a/‎scripts/run_benchmark/run_full_local.sh‎
Lines changed: 3 additions & 0 deletions b/‎scripts/run_benchmark/run_full_local.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scripts/run_benchmark/run_full_seqeracloud.sh‎
Lines changed: 3 additions & 0 deletions b/‎scripts/run_benchmark/run_full_seqeracloud.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scripts/run_benchmark/run_test_local.sh‎
Lines changed: 3 additions & 0 deletions b/‎scripts/run_benchmark/run_test_local.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scripts/run_benchmark/run_test_seqeracloud.sh‎
Lines changed: 3 additions & 0 deletions b/‎scripts/run_benchmark/run_test_seqeracloud.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/data_processors/process_dataset/script.py‎
Lines changed: 4 additions & 0 deletions b/‎src/data_processors/process_dataset/script.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/methods_cell_type_annotation/mapmycells/config.vsh.yaml‎
Lines changed: 35 additions & 0 deletions b/‎src/methods_cell_type_annotation/mapmycells/config.vsh.yaml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/methods_cell_type_annotation/mapmycells/script.py‎
Lines changed: 106 additions & 0 deletions b/‎src/methods_cell_type_annotation/mapmycells/script.py‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎src/methods_cell_type_annotation/singler/config.vsh.yaml‎
Lines changed: 38 additions & 0 deletions b/‎src/methods_cell_type_annotation/singler/config.vsh.yaml‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎src/methods_cell_type_annotation/singler/script.py‎
Lines changed: 53 additions & 0 deletions b/‎src/methods_cell_type_annotation/singler/script.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎src/methods_cell_type_annotation/ssam/config.vsh.yaml‎
Lines changed: 1 addition & 1 deletion b/‎src/methods_cell_type_annotation/ssam/config.vsh.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -57,6 +57,9 @@ celltype_annotation_methods:
   - ssam
   # - tacco
   # - moscot
+  # - mapmycells
+  # - tangram
+  # - singler
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
 
@@ -49,6 +49,9 @@ celltype_annotation_methods:
   - ssam
   - tacco
   - moscot
+  - mapmycells
+  - tangram
+  - singler
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
 
@@ -52,6 +52,9 @@ celltype_annotation_methods:
   - ssam
   # - tacco
   # - moscot
+  # - mapmycells
+  # - tangram
+  # - singler
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
 
@@ -48,6 +48,9 @@ celltype_annotation_methods:
   - ssam
   - tacco
   - moscot
+  - mapmycells
+  - tangram
+  - singler
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
 
@@ -180,6 +180,10 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
 if adata.n_obs > N_MAX_SC:
     adata = adata[subsample_adata_group_balanced(adata, "cell_type", N_MAX_SC, seed=0)]
 
+# Make the single-cell data gene names unique
+adata.var_names = adata.var_names.astype(str)
+adata.var_names_make_unique()
+
 # Subset single-cell and spatial data to shared genes
 sp_genes = sdata['transcripts']['feature_name'].unique().compute().tolist()
 sc_genes = adata.var["feature_name"].unique().tolist()
 
@@ -0,0 +1,35 @@
+name: mapmycells
+label: "mapmycells"
+summary: "Mapping of annotations from single-cell to spatial using moscot"
+description: "Mapping of annotations from single-cell to spatial using moscot"
+links:
+  documentation: 'https://github.com/AllenInstitute/cell_type_mapper'
+  repository: 'https://github.com/AllenInstitute/cell_type_mapper'
+references:
+  doi: "10.1038/s41586-023-06812-z"
+
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+   
+  
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+      - /src/base/setup_txsim_partial.yaml
+    setup:
+      - type: python
+        pypi: 
+          - numpy
+          - git+https://github.com/AllenInstitute/cell_type_mapper.git
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ hightime, midcpu, highmem]
@@ -0,0 +1,106 @@
+import anndata as ad
+import os
+import subprocess
+import json
+import pandas as pd
+from pathlib import Path 
+## VIASH START
+par = {
+    'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+    'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+    'celltype_key': 'cell_type',
+    "output": 'spatial_with_celltypes.h5ad'
+}
+meta = { "temp_dir": './tmp/'}
+
+## VIASH END
+
+TMP_DIR = Path(meta["temp_dir"] or "/tmp/")
+TMP_DIR.mkdir(parents=True, exist_ok=True)
+
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
+
+if "counts" in adata_sc.layers:
+    adata_sc.X = adata_sc.layers["counts"]
+
+adata_sp.var_names = adata_sp.var_names.astype(str)
+adata_sc.var_names = adata_sc.var_names.astype(str)
+adata_sp.var_names_make_unique()
+adata_sc.var_names_make_unique()
+
+common_genes = list(set(adata_sp.var.index).intersection(adata_sc.var.index))
+
+adata_sc = adata_sc[:, common_genes]
+sc_path = os.path.join(meta["temp_dir"],"sc_adata_processed.h5ad")
+adata_sc.write_h5ad(sc_path)
+sp_path = os.path.join(meta["temp_dir"],"sp_processed.h5ad")
+adata_sp[:, common_genes].write_h5ad(sp_path)
+
+
+
+precomputed_path = os.path.join(meta["temp_dir"],"precomputed_stats.h5ad")
+
+command = [
+    "python",
+    "-m",
+    "cell_type_mapper.cli.precompute_stats_scrattch",
+    "--h5ad_path",
+    sc_path,  
+    "--hierarchy",
+    "['cell_type']",
+    "--output_path",
+   precomputed_path
+]
+
+subprocess.run(command)
+
+data = {"None": common_genes}
+genes_file_path = os.path.join(meta["temp_dir"],"genes.json")
+with open(genes_file_path, "w") as json_file:
+        json.dump(data, json_file, indent=2)
+
+command = [
+    "python",
+    "-m",
+    "cell_type_mapper.cli.from_specified_markers",
+    "--query_path",
+    sp_path,  
+    "--type_assignment.normalization",
+    "log2CPM", 
+    "--precomputed_stats.path",
+    precomputed_path,
+    "--query_markers.serialized_lookup",
+    genes_file_path,
+    "--csv_result_path",
+    os.path.join(meta["temp_dir"],"results.csv"),
+    "--extended_result_path",
+    os.path.join(meta["temp_dir"], "extended_results.json"),
+    "--flatten",
+    "True",
+    "--type_assignment.bootstrap_iteration", 
+    "1",
+    "--type_assignment.bootstrap_factor",
+    "1.0"
+]
+
+subprocess.run(command)
+annotation_df = pd.read_csv(os.path.join(meta["temp_dir"],"results.csv"), skiprows=3)
+adata_sp.obs[par['celltype_key']] = list(annotation_df['cell_type_label'])
+
+
+
+# Delete all temporary files
+for file_path in [
+     sc_path,
+     sp_path,
+     precomputed_path,
+     genes_file_path,
+     os.path.join(meta["temp_dir"],"results.csv"),
+     os.path.join(meta["temp_dir"], "extended_results.json")
+]:
+        if os.path.isfile(file_path):
+            os.remove(file_path)
+
+
+adata_sp.write_h5ad(par['output'])
@@ -0,0 +1,38 @@
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+
+name: singler
+label: "singler"
+summary: "Cell type annotations using single-cell reference with SingleR"
+description: "Cell type annotations using single-cell reference with SingleR"
+
+links:
+  documentation: "https://github.com/SingleR-inc/singler-py"
+  repository: "https://github.com/SingleR-inc/singler-py"
+references:
+  doi: "10.1038/s41590-018-0276-y"
+
+arguments:
+  - name: --labels_key
+    type: string
+    description: The key of the cell labels in the input data.
+    default: cell_labels
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+    - type: python
+      pypi: [singler]
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, midcpu, midmem ]
@@ -0,0 +1,53 @@
+import anndata as ad
+import os
+import shutil
+
+import singlecellexperiment as sce
+import singler
+
+## VIASH START
+# The following code has been auto-generated by Viash.
+par = {
+  'input_spatial_normalized_counts': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+  'input_transcript_assignments': r'resources_test/task_ist_preprocessing/mouse_brain_combined/transcript_assignments.zarr',
+  'input_scrnaseq_reference': r'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+  'celltype_key': r'cell_type',
+  'output': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_cell_types.h5ad',
+  'labels_key': r'cell_labels'
+}
+meta = {
+  'name': r'singleR',
+  'functionality_name': r'singleR'
+}
+dep = {
+  
+}
+
+## VIASH END
+sce_h5ad = sce.read_h5ad(par['input_spatial_normalized_counts'])
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+
+sce_ref = sce.read_h5ad(par['input_scrnaseq_reference'])
+
+features = [str(x) for x in sce_h5ad.row_data.row_names]
+
+mat = sce_h5ad.assay("counts") ##example has raw, not sure
+mat = mat.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR
+
+mat_ref = sce_ref.assay("normalized")
+mat_ref = mat_ref.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR
+
+## create the reference from our sc data
+built = singler.train_single(ref_data = mat_ref,   
+                             ref_labels = sce_ref.get_column_data().column("cell_type"),    
+                             ref_features = sce_ref.get_row_names(),    
+                             test_features = features,)
+
+## annotate the dataset
+output = singler.classify_single(mat, ref_prebuilt=built)
+
+adata_sp.obs["cell_type"] = output['best']
+
+# Write output
+print('Writing output', flush=True)
+adata_sp.write(par['output'])
@@ -36,4 +36,4 @@ runners:
   - type: executable
   - type: nextflow
     directives:
-      label: [ hightime, midcpu, midmem ]
+      label: [ veryhightime, midcpu, midmem ]