-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscript.py
More file actions
64 lines (56 loc) · 2.44 KB
/
script.py
File metadata and controls
64 lines (56 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import anndata as ad
import cellmapper as cm
from scipy.sparse import csc_matrix
## VIASH START
# Note: this section is auto-generated by viash at runtime. To edit it, make changes
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
par = {
'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/normal/train_mod1.h5ad',
'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/normal/train_mod2.h5ad',
'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_cite/normal/test_mod1.h5ad',
'output': 'output.h5ad',
'fallback_representation': 'joint_pca', # or None for fallback_representation
'n_neighbors': 30,
'kernel_method': 'gauss',
'mask_var': "hvg" # variable to mask for fallback representation
}
meta = {
'name': 'cellmapper_linear',
}
## VIASH END
print('Reading input files', flush=True)
input_train_mod1 = ad.read_h5ad(par['input_train_mod1'])
input_train_mod2 = ad.read_h5ad(par['input_train_mod2'])
input_test_mod1 = ad.read_h5ad(par['input_test_mod1'])
print('Prepare the data', flush=True)
# Make sure we have normalized data in .X for mod1
input_train_mod1.X = input_train_mod1.layers["normalized"].copy()
input_test_mod1.X = input_test_mod1.layers["normalized"].copy()
# copy the normalized layer to obsm for mod2
input_train_mod1.obsm["mod2"] = input_train_mod2.layers["normalized"]
# choose the kNN method based on total cell number
n_obs = input_test_mod1.n_obs + input_train_mod1.n_obs
print("Set up and prepare Cellmapper", flush=True)
cmap = cm.CellMapper(query=input_test_mod1, reference=input_train_mod1)
cmap.compute_neighbors(
use_rep=None,
knn_method="sklearn" if n_obs < 60000 else "pynndescent",
fallback_representation=par['fallback_representation'],
n_neighbors=par['n_neighbors'],
fallback_kwargs={"mask_var": par['mask_var']},
)
cmap.compute_mapping_matrix(kernel_method=par['kernel_method'])
print("Predict on test data", flush=True)
cmap.map_obsm(key="mod2", prediction_postfix="pred")
mod2_pred = csc_matrix(cmap.query.obsm["mod2_pred"])
print("Write output AnnData to file", flush=True)
output = ad.AnnData(
layers={"normalized": mod2_pred},
obs=input_test_mod1.obs,
var=input_train_mod2.var,
uns={
'dataset_id': input_train_mod1.uns['dataset_id'],
'method_id': meta["name"],
},
)
output.write_h5ad(par['output'], compression='gzip')