|
4 | 4 | from collections import defaultdict |
5 | 5 | import anndata as ad |
6 | 6 | from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache |
| 7 | +import re |
7 | 8 |
|
8 | 9 | ## VIASH START |
9 | 10 | par = { |
10 | 11 | "abca_version": "20230630", |
11 | | - "regions": ["MB", "TF"], |
| 12 | + "regions": ["MB", "TH"], |
12 | 13 | "sample_n_obs": 5000, |
13 | 14 | "sample_obs_weight": "subclass", |
14 | 15 | "sample_transform": "sqrt", |
|
76 | 77 | # TODO: potentially also load other chemistries (currently only 10Xv2) |
77 | 78 |
|
78 | 79 | print("Downloading and reading expression matrices", flush=True) |
| 80 | +abca_data_subdir="WMB-10Xv2" |
| 81 | +abca_region_files = [ |
| 82 | + [file, region] |
| 83 | + for file in abc_cache.list_data_files(abca_data_subdir) |
| 84 | + for region in REGIONS |
| 85 | + if re.match(f"WMB-10Xv2-{region}[\\-0-9]*/raw", file) |
| 86 | +] |
| 87 | + |
79 | 88 | adatas = [] |
80 | | -for region in REGIONS: |
| 89 | +for region, abca_data_file in abca_region_files: |
81 | 90 | try: |
82 | | - print(f"Downloading h5ad file for region {region}", flush=True) |
83 | | - adata_path = abc_cache.get_data_path(directory="WMB-10Xv2", file_name=f"WMB-10Xv2-{region}/raw") |
| 91 | + print(f"Downloading file {abca_data_file} for region {region}", flush=True) |
| 92 | + adata_path = abc_cache.get_data_path( |
| 93 | + directory=abca_data_subdir, |
| 94 | + file_name=abca_data_file |
| 95 | + ) |
84 | 96 |
|
85 | | - print(f"Reading h5ad for region {region}", flush=True) |
| 97 | + print(f"Reading file {adata_path}", flush=True) |
86 | 98 | adata = ad.read_h5ad(str(adata_path)) |
87 | 99 |
|
88 | 100 | if not par["keep_files"]: |
|
91 | 103 | # filter cells |
92 | 104 | adata = adata[adata.obs_names.isin(obs.index)].copy() |
93 | 105 |
|
94 | | - # add region to obs |
95 | | - adata.obs["region"] = region |
96 | | - |
97 | 106 | # move counts to layer |
98 | 107 | adata.layers["counts"] = adata.X |
99 | 108 | del adata.X |
|
0 commit comments