Skip to content

Commit e68f525

Browse files
committed
Merge remote-tracking branch 'origin/main' into add_rctd
2 parents ed321e9 + 142e0f5 commit e68f525

21 files changed

Lines changed: 417 additions & 6 deletions

File tree

scripts/run_benchmark/run_full_local.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ celltype_annotation_methods:
5757
- ssam
5858
# - tacco
5959
# - moscot
60+
# - mapmycells
61+
# - tangram
62+
# - singler
6063
expression_correction_methods:
6164
- no_correction
6265
# - gene_efficiency_correction

scripts/run_benchmark/run_full_seqeracloud.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ celltype_annotation_methods:
4949
- ssam
5050
- tacco
5151
- moscot
52+
- mapmycells
53+
- tangram
54+
- singler
5255
expression_correction_methods:
5356
- no_correction
5457
- gene_efficiency_correction

scripts/run_benchmark/run_test_local.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ celltype_annotation_methods:
5252
- ssam
5353
# - tacco
5454
# - moscot
55+
# - mapmycells
56+
# - tangram
57+
# - singler
5558
expression_correction_methods:
5659
- no_correction
5760
# - gene_efficiency_correction

scripts/run_benchmark/run_test_seqeracloud.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ celltype_annotation_methods:
4848
- ssam
4949
- tacco
5050
- moscot
51+
- mapmycells
52+
- tangram
53+
- singler
5154
expression_correction_methods:
5255
- no_correction
5356
- gene_efficiency_correction

src/data_processors/process_dataset/script.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
180180
if adata.n_obs > N_MAX_SC:
181181
adata = adata[subsample_adata_group_balanced(adata, "cell_type", N_MAX_SC, seed=0)]
182182

183+
# Make the single-cell data gene names unique
184+
adata.var_names = adata.var_names.astype(str)
185+
adata.var_names_make_unique()
186+
183187
# Subset single-cell and spatial data to shared genes
184188
sp_genes = sdata['transcripts']['feature_name'].unique().compute().tolist()
185189
sc_genes = adata.var["feature_name"].unique().tolist()
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: mapmycells
2+
label: "mapmycells"
3+
summary: "Mapping of annotations from single-cell to spatial using moscot"
4+
description: "Mapping of annotations from single-cell to spatial using moscot"
5+
links:
6+
documentation: 'https://github.com/AllenInstitute/cell_type_mapper'
7+
repository: 'https://github.com/AllenInstitute/cell_type_mapper'
8+
references:
9+
doi: "10.1038/s41586-023-06812-z"
10+
11+
__merge__: /src/api/comp_method_cell_type_annotation.yaml
12+
13+
14+
resources:
15+
- type: python_script
16+
path: script.py
17+
18+
engines:
19+
- type: docker
20+
image: openproblems/base_python:1
21+
__merge__:
22+
- /src/base/setup_spatialdata_partial.yaml
23+
- /src/base/setup_txsim_partial.yaml
24+
setup:
25+
- type: python
26+
pypi:
27+
- numpy
28+
- git+https://github.com/AllenInstitute/cell_type_mapper.git
29+
- type: native
30+
31+
runners:
32+
- type: executable
33+
- type: nextflow
34+
directives:
35+
label: [ hightime, midcpu, highmem]
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import anndata as ad
2+
import os
3+
import subprocess
4+
import json
5+
import pandas as pd
6+
from pathlib import Path
7+
## VIASH START
8+
par = {
9+
'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
10+
'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
11+
'celltype_key': 'cell_type',
12+
"output": 'spatial_with_celltypes.h5ad'
13+
}
14+
meta = { "temp_dir": './tmp/'}
15+
16+
## VIASH END
17+
18+
TMP_DIR = Path(meta["temp_dir"] or "/tmp/")
19+
TMP_DIR.mkdir(parents=True, exist_ok=True)
20+
21+
adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
22+
adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
23+
24+
if "counts" in adata_sc.layers:
25+
adata_sc.X = adata_sc.layers["counts"]
26+
27+
adata_sp.var_names = adata_sp.var_names.astype(str)
28+
adata_sc.var_names = adata_sc.var_names.astype(str)
29+
adata_sp.var_names_make_unique()
30+
adata_sc.var_names_make_unique()
31+
32+
common_genes = list(set(adata_sp.var.index).intersection(adata_sc.var.index))
33+
34+
adata_sc = adata_sc[:, common_genes]
35+
sc_path = os.path.join(meta["temp_dir"],"sc_adata_processed.h5ad")
36+
adata_sc.write_h5ad(sc_path)
37+
sp_path = os.path.join(meta["temp_dir"],"sp_processed.h5ad")
38+
adata_sp[:, common_genes].write_h5ad(sp_path)
39+
40+
41+
42+
precomputed_path = os.path.join(meta["temp_dir"],"precomputed_stats.h5ad")
43+
44+
command = [
45+
"python",
46+
"-m",
47+
"cell_type_mapper.cli.precompute_stats_scrattch",
48+
"--h5ad_path",
49+
sc_path,
50+
"--hierarchy",
51+
"['cell_type']",
52+
"--output_path",
53+
precomputed_path
54+
]
55+
56+
subprocess.run(command)
57+
58+
data = {"None": common_genes}
59+
genes_file_path = os.path.join(meta["temp_dir"],"genes.json")
60+
with open(genes_file_path, "w") as json_file:
61+
json.dump(data, json_file, indent=2)
62+
63+
command = [
64+
"python",
65+
"-m",
66+
"cell_type_mapper.cli.from_specified_markers",
67+
"--query_path",
68+
sp_path,
69+
"--type_assignment.normalization",
70+
"log2CPM",
71+
"--precomputed_stats.path",
72+
precomputed_path,
73+
"--query_markers.serialized_lookup",
74+
genes_file_path,
75+
"--csv_result_path",
76+
os.path.join(meta["temp_dir"],"results.csv"),
77+
"--extended_result_path",
78+
os.path.join(meta["temp_dir"], "extended_results.json"),
79+
"--flatten",
80+
"True",
81+
"--type_assignment.bootstrap_iteration",
82+
"1",
83+
"--type_assignment.bootstrap_factor",
84+
"1.0"
85+
]
86+
87+
subprocess.run(command)
88+
annotation_df = pd.read_csv(os.path.join(meta["temp_dir"],"results.csv"), skiprows=3)
89+
adata_sp.obs[par['celltype_key']] = list(annotation_df['cell_type_label'])
90+
91+
92+
93+
# Delete all temporary files
94+
for file_path in [
95+
sc_path,
96+
sp_path,
97+
precomputed_path,
98+
genes_file_path,
99+
os.path.join(meta["temp_dir"],"results.csv"),
100+
os.path.join(meta["temp_dir"], "extended_results.json")
101+
]:
102+
if os.path.isfile(file_path):
103+
os.remove(file_path)
104+
105+
106+
adata_sp.write_h5ad(par['output'])
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
__merge__: /src/api/comp_method_cell_type_annotation.yaml
2+
3+
name: singler
4+
label: "singler"
5+
summary: "Cell type annotations using single-cell reference with SingleR"
6+
description: "Cell type annotations using single-cell reference with SingleR"
7+
8+
links:
9+
documentation: "https://github.com/SingleR-inc/singler-py"
10+
repository: "https://github.com/SingleR-inc/singler-py"
11+
references:
12+
doi: "10.1038/s41590-018-0276-y"
13+
14+
arguments:
15+
- name: --labels_key
16+
type: string
17+
description: The key of the cell labels in the input data.
18+
default: cell_labels
19+
20+
resources:
21+
- type: python_script
22+
path: script.py
23+
24+
engines:
25+
- type: docker
26+
image: openproblems/base_python:1
27+
setup:
28+
- type: python
29+
pypi: [singler]
30+
__merge__:
31+
- /src/base/setup_spatialdata_partial.yaml
32+
- type: native
33+
34+
runners:
35+
- type: executable
36+
- type: nextflow
37+
directives:
38+
label: [ midtime, midcpu, midmem ]
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import anndata as ad
2+
import os
3+
import shutil
4+
5+
import singlecellexperiment as sce
6+
import singler
7+
8+
## VIASH START
9+
# The following code has been auto-generated by Viash.
10+
par = {
11+
'input_spatial_normalized_counts': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
12+
'input_transcript_assignments': r'resources_test/task_ist_preprocessing/mouse_brain_combined/transcript_assignments.zarr',
13+
'input_scrnaseq_reference': r'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
14+
'celltype_key': r'cell_type',
15+
'output': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_cell_types.h5ad',
16+
'labels_key': r'cell_labels'
17+
}
18+
meta = {
19+
'name': r'singleR',
20+
'functionality_name': r'singleR'
21+
}
22+
dep = {
23+
24+
}
25+
26+
## VIASH END
27+
sce_h5ad = sce.read_h5ad(par['input_spatial_normalized_counts'])
28+
adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
29+
30+
sce_ref = sce.read_h5ad(par['input_scrnaseq_reference'])
31+
32+
features = [str(x) for x in sce_h5ad.row_data.row_names]
33+
34+
mat = sce_h5ad.assay("counts") ##example has raw, not sure
35+
mat = mat.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR
36+
37+
mat_ref = sce_ref.assay("normalized")
38+
mat_ref = mat_ref.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR
39+
40+
## create the reference from our sc data
41+
built = singler.train_single(ref_data = mat_ref,
42+
ref_labels = sce_ref.get_column_data().column("cell_type"),
43+
ref_features = sce_ref.get_row_names(),
44+
test_features = features,)
45+
46+
## annotate the dataset
47+
output = singler.classify_single(mat, ref_prebuilt=built)
48+
49+
adata_sp.obs["cell_type"] = output['best']
50+
51+
# Write output
52+
print('Writing output', flush=True)
53+
adata_sp.write(par['output'])

src/methods_cell_type_annotation/ssam/config.vsh.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ runners:
3636
- type: executable
3737
- type: nextflow
3838
directives:
39-
label: [ hightime, midcpu, midmem ]
39+
label: [ veryhightime, midcpu, midmem ]

0 commit comments

Comments
 (0)