forked from openproblems-bio/task_predict_modality
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
80 lines (68 loc) · 3 KB
/
script.py
File metadata and controls
80 lines (68 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys
import anndata as ad
import cellmapper as cm
from scipy.sparse import csc_matrix
## VIASH START
# Note: this section is auto-generated by viash at runtime. To edit it, make changes
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
par = {
'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/train_mod1.h5ad',
'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/train_mod2.h5ad',
'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/test_mod1.h5ad',
'output': 'output.h5ad',
'n_neighbors': 30,
'kernel_method': 'hnoca',
'use_hvg': False,
'adt_normalization': 'clr', # Normalization method for ADT data
'plot_umap': True,
}
meta = {
'name': 'cellmapper_scvi',
'resources_dir': 'target/executable/methods/cellmapper_scvi',
}
## VIASH END
sys.path.append(meta['resources_dir'])
from utils import get_representation
print('Reading input files', flush=True)
input_train_mod1 = ad.read_h5ad(par['input_train_mod1'])
input_train_mod2 = ad.read_h5ad(par['input_train_mod2'])
input_test_mod1 = ad.read_h5ad(par['input_test_mod1'])
mod1 = input_train_mod1.uns['modality']
mod2 = input_train_mod2.uns['modality']
print(f"Modality 1: {mod1}, n_features: {input_train_mod1.n_vars}", flush=True)
print(f"Modality 2: {mod2}, n_features: {input_train_mod2.n_vars}", flush=True)
print("Concatenating train and test data", flush=True)
adata = ad.concat(
[input_train_mod1, input_test_mod1], merge = "same", label="split", keys=["train", "test"]
)
# Compute a latent representation using an appropriate model based on the modality
print("Get latent representation", flush=True)
adata = get_representation(
adata=adata, modality=mod1, use_hvg=par['use_hvg'], adt_normalization=par['adt_normalization'], plot_umap=par['plot_umap']
)
# Place the representation back into individual objects
input_train_mod1.obsm["X_scvi"] = adata[adata.obs["split"] == "train"].obsm["X_scvi"].copy()
input_test_mod1.obsm["X_scvi"] = adata[adata.obs["split"] == "test"].obsm["X_scvi"].copy()
# copy the normalized layer to obsm for mod2
input_train_mod1.obsm["mod2"] = input_train_mod2.layers["normalized"]
print('Setup and prepare Cellmapper', flush=True)
cmap = cm.CellMapper(query=input_test_mod1, reference=input_train_mod1)
cmap.compute_neighbors(
use_rep="X_scvi",
n_neighbors=par['n_neighbors'],
)
cmap.compute_mapping_matrix(kernel_method=par['kernel_method'])
print("Predict on test data", flush=True)
cmap.map_obsm(key="mod2", prediction_postfix="pred")
mod2_pred = csc_matrix(cmap.query.obsm["mod2_pred"])
print("Write output AnnData to file", flush=True)
output = ad.AnnData(
layers={"normalized": mod2_pred},
obs=input_test_mod1.obs,
var=input_train_mod2.var,
uns={
'dataset_id': input_train_mod1.uns['dataset_id'],
'method_id': meta["name"],
},
)
output.write_h5ad(par['output'], compression='gzip')