|
85 | 85 | ], |
86 | 86 | "source": [ |
87 | 87 | "import warnings\n", |
88 | | - "from typing import Any\n", |
89 | 88 | "\n", |
90 | 89 | "import cellxgene_census\n", |
91 | 90 | "import numpy as np\n", |
92 | | - "import pandas as pd\n", |
93 | 91 | "import scanpy as sc\n", |
94 | 92 | "import scvi\n", |
95 | 93 | "import tiledbsoma as soma\n", |
96 | | - "import tiledbsoma_ml\n", |
97 | | - "import torch\n", |
98 | 94 | "from cellxgene_census.experimental.pp import highly_variable_genes\n", |
99 | | - "#from lightning import LightningDataModule\n", |
100 | | - "#from sklearn.preprocessing import LabelEncoder\n", |
101 | | - "#from torch.utils.data import DataLoader\n", |
| 95 | + "\n", |
| 96 | + "# from lightning import LightningDataModule\n", |
| 97 | + "# from sklearn.preprocessing import LabelEncoder\n", |
| 98 | + "# from torch.utils.data import DataLoader\n", |
102 | 99 | "from scvi.dataloaders import TileDBDataModule\n", |
103 | 100 | "\n", |
104 | 101 | "warnings.filterwarnings(\"ignore\")" |
|
142 | 139 | "outputs": [], |
143 | 140 | "source": [ |
144 | 141 | "experiment_name = \"mus_musculus\"\n", |
145 | | - "#obs_val_filt = 'is_primary_data == True and tissue_general in [\"spleen\", \"kidney\"] and nnz >= 500'\n", |
| 142 | + "# obs_val_filt = 'is_primary_data == True and tissue_general in [\"spleen\", \"kidney\"] and nnz >= 500'\n", |
146 | 143 | "obs_val_filt = 'is_primary_data == True and tissue_general in [\"liver\"] and nnz >= 500'\n", |
147 | | - "#obs_val_filt = 'is_primary_data == True and tissue_general in [\"liver\", \"heart\"] and nnz >= 500'\n", |
| 144 | + "# obs_val_filt = 'is_primary_data == True and tissue_general in [\"liver\", \"heart\"] and nnz >= 500'\n", |
148 | 145 | "top_n_hvg = 500\n", |
149 | | - "hvg_batch = [\"dataset_id\",\"donor_id\"]" |
| 146 | + "hvg_batch = [\"dataset_id\", \"donor_id\"]" |
150 | 147 | ] |
151 | 148 | }, |
152 | 149 | { |
|
171 | 168 | ")\n", |
172 | 169 | "hv = hvgs_df.highly_variable\n", |
173 | 170 | "hv_idx = hv[hv].index\n", |
174 | | - "#hv_idx = np.arange(10)" |
| 171 | + "# hv_idx = np.arange(10)" |
175 | 172 | ] |
176 | 173 | }, |
177 | 174 | { |
|
207 | 204 | "metadata": {}, |
208 | 205 | "outputs": [], |
209 | 206 | "source": [ |
210 | | - "batch_keys = [\"dataset_id\",\"donor_id\"]" |
| 207 | + "batch_keys = [\"dataset_id\", \"donor_id\"]" |
211 | 208 | ] |
212 | 209 | }, |
213 | 210 | { |
|
242 | 239 | " batch_size=1024,\n", |
243 | 240 | " shuffle=True,\n", |
244 | 241 | " seed=42,\n", |
245 | | - " batch_column_names = batch_keys,\n", |
| 242 | + " batch_column_names=batch_keys,\n", |
246 | 243 | " dataloader_kwargs={\"num_workers\": 64, \"persistent_workers\": False},\n", |
247 | 244 | " accelerator=\"gpu\",\n", |
248 | 245 | " device=2,\n", |
249 | | - " return_sparse_X=False\n", |
| 246 | + " return_sparse_X=False,\n", |
250 | 247 | ")\n", |
251 | 248 | "print(datamodule.n_obs, datamodule.n_vars, datamodule.n_batch)" |
252 | 249 | ] |
|
281 | 278 | "n_latent = 10\n", |
282 | 279 | "datamodule.setup()\n", |
283 | 280 | "model = scvi.model.SCVI(\n", |
284 | | - " adata=None, registry=datamodule.registry ,n_layers=n_layers, n_latent=n_latent, gene_likelihood=\"nb\", encode_covariates=False\n", |
| 281 | + " adata=None,\n", |
| 282 | + " registry=datamodule.registry,\n", |
| 283 | + " n_layers=n_layers,\n", |
| 284 | + " n_latent=n_latent,\n", |
| 285 | + " gene_likelihood=\"nb\",\n", |
| 286 | + " encode_covariates=False,\n", |
285 | 287 | ")" |
286 | 288 | ] |
287 | 289 | }, |
|
301 | 303 | "source": [ |
302 | 304 | "# creating the dataloader for trainset\n", |
303 | 305 | "training_dataloader = (\n", |
304 | | - " datamodule.on_before_batch_transfer(batch, None)\n", |
305 | | - " for batch in datamodule.train_dataloader()\n", |
| 306 | + " datamodule.on_before_batch_transfer(batch, None) for batch in datamodule.train_dataloader()\n", |
306 | 307 | ")" |
307 | 308 | ] |
308 | 309 | }, |
|
361 | 362 | ], |
362 | 363 | "source": [ |
363 | 364 | "import time\n", |
| 365 | + "\n", |
364 | 366 | "start = time.time()\n", |
365 | 367 | "model.train(\n", |
366 | 368 | " datamodule=training_dataloader,\n", |
367 | | - " #datamodule=datamodule,\n", |
| 369 | + " # datamodule=datamodule,\n", |
368 | 370 | " max_epochs=10,\n", |
369 | 371 | " batch_size=1024,\n", |
370 | | - " #accelerator=\"gpu\",\n", |
371 | | - " #devices=-1,\n", |
372 | | - " #strategy=\"ddp_notebook_find_unused_parameters_true\",\n", |
| 372 | + " # accelerator=\"gpu\",\n", |
| 373 | + " # devices=-1,\n", |
| 374 | + " # strategy=\"ddp_notebook_find_unused_parameters_true\",\n", |
373 | 375 | ")\n", |
374 | 376 | "end = time.time()\n", |
375 | 377 | "print(f\"Elapsed time: {end - start:.2f} seconds\")" |
|
610 | 612 | "source": [ |
611 | 613 | "sc.pp.neighbors(adata, use_rep=\"scvi\", key_added=\"scvi\")\n", |
612 | 614 | "sc.tl.umap(adata, neighbors_key=\"scvi\")\n", |
613 | | - "sc.pl.umap(adata, color=[\"dataset_id\",\"donor_id\"], title=\"SCVI\")" |
| 615 | + "sc.pl.umap(adata, color=[\"dataset_id\", \"donor_id\"], title=\"SCVI\")" |
614 | 616 | ] |
615 | 617 | }, |
616 | 618 | { |
|
641 | 643 | "metadata": {}, |
642 | 644 | "outputs": [], |
643 | 645 | "source": [ |
644 | | - "#sc.pl.umap(adata, color=\"tissue_general\", title=\"SCVI\")" |
| 646 | + "# sc.pl.umap(adata, color=\"tissue_general\", title=\"SCVI\")" |
645 | 647 | ] |
646 | 648 | }, |
647 | 649 | { |
|
666 | 668 | "metadata": {}, |
667 | 669 | "outputs": [], |
668 | 670 | "source": [ |
669 | | - "scvi.model.SCVI.setup_anndata(adata, \n", |
670 | | - " batch_key=\"batch\")" |
| 671 | + "scvi.model.SCVI.setup_anndata(adata, batch_key=\"batch\")" |
671 | 672 | ] |
672 | 673 | }, |
673 | 674 | { |
|
676 | 677 | "metadata": {}, |
677 | 678 | "outputs": [], |
678 | 679 | "source": [ |
679 | | - "#model_census3 = scvi.model.SCVI.load(\"census_model\", adata=adata)\n", |
| 680 | + "# model_census3 = scvi.model.SCVI.load(\"census_model\", adata=adata)\n", |
680 | 681 | "model_census3 = scvi.model.SCVI(adata)" |
681 | 682 | ] |
682 | 683 | }, |
|
769 | 770 | "source": [ |
770 | 771 | "sc.pp.neighbors(adata, use_rep=\"scvi_non_dataloder\", key_added=\"scvi_non_dataloder\")\n", |
771 | 772 | "sc.tl.umap(adata, neighbors_key=\"scvi_non_dataloder\")\n", |
772 | | - "sc.pl.umap(adata, color=[\"dataset_id\",\"donor_id\"], title=\"SCVI_non_dataloder\")" |
| 773 | + "sc.pl.umap(adata, color=[\"dataset_id\", \"donor_id\"], title=\"SCVI_non_dataloder\")" |
773 | 774 | ] |
774 | 775 | }, |
775 | 776 | { |
|
798 | 799 | "metadata": {}, |
799 | 800 | "outputs": [], |
800 | 801 | "source": [ |
801 | | - "#sc.pl.umap(adata, color=\"tissue_general\", title=\"SCVI_non_dataloder\")" |
| 802 | + "# sc.pl.umap(adata, color=\"tissue_general\", title=\"SCVI_non_dataloder\")" |
802 | 803 | ] |
803 | 804 | }, |
804 | 805 | { |
|
945 | 946 | "metadata": {}, |
946 | 947 | "outputs": [], |
947 | 948 | "source": [ |
948 | | - "#model_scanvi.save(\"census_model_scanvi\", save_anndata=False, overwrite=True, datamodule=datamodule_scanvi)" |
| 949 | + "# model_scanvi.save(\"census_model_scanvi\", save_anndata=False, overwrite=True, datamodule=datamodule_scanvi)" |
949 | 950 | ] |
950 | 951 | }, |
951 | 952 | { |
|
1037 | 1038 | "source": [ |
1038 | 1039 | "sc.pp.neighbors(adata, use_rep=\"scanvi\", key_added=\"scanvi\")\n", |
1039 | 1040 | "sc.tl.umap(adata, neighbors_key=\"scanvi\")\n", |
1040 | | - "sc.pl.umap(adata, color=[\"dataset_id\",\"donor_id\"], title=\"SCANVI\")" |
| 1041 | + "sc.pl.umap(adata, color=[\"dataset_id\", \"donor_id\"], title=\"SCANVI\")" |
1041 | 1042 | ] |
1042 | 1043 | }, |
1043 | 1044 | { |
|
1066 | 1067 | "metadata": {}, |
1067 | 1068 | "outputs": [], |
1068 | 1069 | "source": [ |
1069 | | - "#sc.pl.umap(adata, color=\"tissue_general\", title=\"SCANVI\")" |
| 1070 | + "# sc.pl.umap(adata, color=\"tissue_general\", title=\"SCANVI\")" |
1070 | 1071 | ] |
1071 | 1072 | }, |
1072 | 1073 | { |
|
0 commit comments