|
140 | 140 | "source": [ |
141 | 141 | "min_nnz\n", |
142 | 142 | "experiment_name = \"mus_musculus\"\n", |
143 | | - "#obs_val_filt = ('is_primary_data == True and tissue_general in [\"spleen\", \"kidney\"] '\n", |
| 143 | + "# obs_val_filt = ('is_primary_data == True and tissue_general in [\"spleen\", \"kidney\"] '\n", |
144 | 144 | "# 'and nnz >= 500')\n", |
145 | | - "obs_val_filt = ('is_primary_data == True and tissue_general in [\"kidney\"] '\n", |
146 | | - " 'and nnz >= 500 and donor_id not in [\"pooled\"]')\n", |
147 | | - "#obs_val_filt = ('is_primary_data == True and tissue_general in [\"liver\", \"heart\"] '\n", |
| 145 | + "obs_val_filt = (\n", |
| 146 | + " 'is_primary_data == True and tissue_general in [\"kidney\"] '\n", |
| 147 | + " 'and nnz >= 500 and donor_id not in [\"pooled\"]'\n", |
| 148 | + ")\n", |
| 149 | + "# obs_val_filt = ('is_primary_data == True and tissue_general in [\"liver\", \"heart\"] '\n", |
148 | 150 | "# 'and nnz >= 500')\n", |
149 | | - "obs_val_filt = ('is_primary_data == True and cell_type in [\"B cell\",\"T cell\", \"macrophage\",\"fibroblast\",\"endothelial\",\"myeloid\",\"lymphocyte\",\"natural killer cell\",\"CD4-positive, alpha-beta T cell\",\"CD8-positive, alpha-beta T cell\"] '\n", |
150 | | - " 'and donor_id not in [\"pooled\"] and dataset_id in [\"98e5ea9f-16d6-47ec-a529-686e76515e39\",\"58b01044-c5e5-4b0f-8a2d-6ebf951e01ff\",\"48b37086-25f7-4ecd-be66-f5bb378e3aea\"] and tissue_general not in [\"vasculature\"] and nnz >= 300')" |
| 151 | + "obs_val_filt = (\n", |
| 152 | + " 'is_primary_data == True and cell_type in [\"B cell\",\"T cell\", \"macrophage\",\"fibroblast\",\"endothelial\",\"myeloid\",\"lymphocyte\",\"natural killer cell\",\"CD4-positive, alpha-beta T cell\",\"CD8-positive, alpha-beta T cell\"] '\n", |
| 153 | + " 'and donor_id not in [\"pooled\"] and dataset_id in [\"98e5ea9f-16d6-47ec-a529-686e76515e39\",\"58b01044-c5e5-4b0f-8a2d-6ebf951e01ff\",\"48b37086-25f7-4ecd-be66-f5bb378e3aea\"] and tissue_general not in [\"vasculature\"] and nnz >= 300'\n", |
| 154 | + ")" |
151 | 155 | ] |
152 | 156 | }, |
153 | 157 | { |
|
163 | 167 | "metadata": {}, |
164 | 168 | "outputs": [], |
165 | 169 | "source": [ |
166 | | - "batch_keys = [\"dataset_id\",\"donor_id\",\"assay\",\"tissue_general\"]" |
| 170 | + "batch_keys = [\"dataset_id\", \"donor_id\", \"assay\", \"tissue_general\"]" |
167 | 171 | ] |
168 | 172 | }, |
169 | 173 | { |
|
248 | 252 | " dataloader_kwargs={\"num_workers\": 64, \"persistent_workers\": False},\n", |
249 | 253 | " accelerator=\"gpu\",\n", |
250 | 254 | " device=1,\n", |
251 | | - " return_sparse_X=False\n", |
| 255 | + " return_sparse_X=False,\n", |
252 | 256 | ")\n", |
253 | 257 | "print(datamodule.n_obs, datamodule.n_vars, datamodule.n_batch)" |
254 | 258 | ] |
|
300 | 304 | "source": [ |
301 | 305 | "# creating the dataloader for trainset\n", |
302 | 306 | "training_dataloader = (\n", |
303 | | - " datamodule.on_before_batch_transfer(batch, None)\n", |
304 | | - " for batch in datamodule.train_dataloader()\n", |
| 307 | + " datamodule.on_before_batch_transfer(batch, None) for batch in datamodule.train_dataloader()\n", |
305 | 308 | ")" |
306 | 309 | ] |
307 | 310 | }, |
|
358 | 361 | ], |
359 | 362 | "source": [ |
360 | 363 | "import time\n", |
| 364 | + "\n", |
361 | 365 | "start = time.time()\n", |
362 | 366 | "model.train(\n", |
363 | 367 | " datamodule=training_dataloader,\n", |
364 | | - " #datamodule=datamodule,\n", |
| 368 | + " # datamodule=datamodule,\n", |
365 | 369 | " max_epochs=100,\n", |
366 | 370 | " batch_size=1024,\n", |
367 | 371 | " # accelerator=\"gpu\",\n", |
|
572 | 576 | "metadata": {}, |
573 | 577 | "outputs": [], |
574 | 578 | "source": [ |
575 | | - "#adata = adata[~adata.obs[\"cell_type\"].str.contains(\"kidney\", na=False)]\n", |
576 | | - "#adata.obs.loc[adata.obs.cell_type.isin([\"CD8-positive, alpha-beta T cell\",\"CD4-positive, alpha-beta T cell\"],\"cell_type\"] = \"T cell\"" |
| 579 | + "# adata = adata[~adata.obs[\"cell_type\"].str.contains(\"kidney\", na=False)]\n", |
| 580 | + "# adata.obs.loc[adata.obs.cell_type.isin([\"CD8-positive, alpha-beta T cell\",\"CD4-positive, alpha-beta T cell\"],\"cell_type\"] = \"T cell\"" |
577 | 581 | ] |
578 | 582 | }, |
579 | 583 | { |
|
603 | 607 | } |
604 | 608 | ], |
605 | 609 | "source": [ |
606 | | - "sc.pl.umap(adata, color=[\"dataset_id\",\"donor_id\"], \n", |
607 | | - " title=[\"SCVI_\"+x for x in [\"dataset_id\",\"donor_id\"]])" |
| 610 | + "sc.pl.umap(\n", |
| 611 | + " adata,\n", |
| 612 | + " color=[\"dataset_id\", \"donor_id\"],\n", |
| 613 | + " title=[\"SCVI_\" + x for x in [\"dataset_id\", \"donor_id\"]],\n", |
| 614 | + ")" |
608 | 615 | ] |
609 | 616 | }, |
610 | 617 | { |
|
624 | 631 | } |
625 | 632 | ], |
626 | 633 | "source": [ |
627 | | - "sc.pl.umap(adata, color=[\"assay\",\"tissue_general\"], \n", |
628 | | - " title=[\"SCVI_\"+x for x in [\"assay\",\"tissue_general\"]])" |
| 634 | + "sc.pl.umap(\n", |
| 635 | + " adata,\n", |
| 636 | + " color=[\"assay\", \"tissue_general\"],\n", |
| 637 | + " title=[\"SCVI_\" + x for x in [\"assay\", \"tissue_general\"]],\n", |
| 638 | + ")" |
629 | 639 | ] |
630 | 640 | }, |
631 | 641 | { |
|
754 | 764 | "metadata": {}, |
755 | 765 | "outputs": [], |
756 | 766 | "source": [ |
757 | | - "scvi.model.SCVI.setup_anndata(adata, \n", |
758 | | - " layer=\"counts\",\n", |
759 | | - " categorical_covariate_keys=batch_keys)" |
| 767 | + "scvi.model.SCVI.setup_anndata(adata, layer=\"counts\", categorical_covariate_keys=batch_keys)" |
760 | 768 | ] |
761 | 769 | }, |
762 | 770 | { |
|
858 | 866 | } |
859 | 867 | ], |
860 | 868 | "source": [ |
861 | | - "sc.pl.umap(adata, color=[\"dataset_id\",\"donor_id\"], \n", |
862 | | - " title=[\"SCVI_adata_\"+x for x in [\"dataset_id\",\"donor_id\"]])" |
| 869 | + "sc.pl.umap(\n", |
| 870 | + " adata,\n", |
| 871 | + " color=[\"dataset_id\", \"donor_id\"],\n", |
| 872 | + " title=[\"SCVI_adata_\" + x for x in [\"dataset_id\", \"donor_id\"]],\n", |
| 873 | + ")" |
863 | 874 | ] |
864 | 875 | }, |
865 | 876 | { |
|
879 | 890 | } |
880 | 891 | ], |
881 | 892 | "source": [ |
882 | | - "sc.pl.umap(adata, color=[\"assay\",\"tissue_general\"], \n", |
883 | | - " title=[\"SCVI_adata_\"+x for x in [\"assay\",\"tissue_general\"]])" |
| 893 | + "sc.pl.umap(\n", |
| 894 | + " adata,\n", |
| 895 | + " color=[\"assay\", \"tissue_general\"],\n", |
| 896 | + " title=[\"SCVI_adata_\" + x for x in [\"assay\", \"tissue_general\"]],\n", |
| 897 | + ")" |
884 | 898 | ] |
885 | 899 | }, |
886 | 900 | { |
|
1039 | 1053 | "metadata": {}, |
1040 | 1054 | "outputs": [], |
1041 | 1055 | "source": [ |
1042 | | - "#model_scanvi.save(\n", |
| 1056 | + "# model_scanvi.save(\n", |
1043 | 1057 | "# \"census_model_scanvi\", save_anndata=False, overwrite=True, datamodule=datamodule_scanvi\n", |
1044 | | - "#)" |
| 1058 | + "# )" |
1045 | 1059 | ] |
1046 | 1060 | }, |
1047 | 1061 | { |
|
1126 | 1140 | } |
1127 | 1141 | ], |
1128 | 1142 | "source": [ |
1129 | | - "sc.pl.umap(adata, color=[\"dataset_id\",\"donor_id\"], \n", |
1130 | | - " title=[\"SCANVI_\"+x for x in [\"dataset_id\",\"donor_id\"]])" |
| 1143 | + "sc.pl.umap(\n", |
| 1144 | + " adata,\n", |
| 1145 | + " color=[\"dataset_id\", \"donor_id\"],\n", |
| 1146 | + " title=[\"SCANVI_\" + x for x in [\"dataset_id\", \"donor_id\"]],\n", |
| 1147 | + ")" |
1131 | 1148 | ] |
1132 | 1149 | }, |
1133 | 1150 | { |
|
1147 | 1164 | } |
1148 | 1165 | ], |
1149 | 1166 | "source": [ |
1150 | | - "sc.pl.umap(adata, color=[\"assay\",\"tissue_general\"], \n", |
1151 | | - " title=[\"SCANVI_\"+x for x in [\"assay\",\"tissue_general\"]])" |
| 1167 | + "sc.pl.umap(\n", |
| 1168 | + " adata,\n", |
| 1169 | + " color=[\"assay\", \"tissue_general\"],\n", |
| 1170 | + " title=[\"SCANVI_\" + x for x in [\"assay\", \"tissue_general\"]],\n", |
| 1171 | + ")" |
1152 | 1172 | ] |
1153 | 1173 | }, |
1154 | 1174 | { |
|
1247 | 1267 | "df = adata.obs.groupby([\"cell_type\", \"predictions_scanvi\"]).size().unstack(fill_value=0)\n", |
1248 | 1268 | "norm_df = df / df.sum(axis=0)\n", |
1249 | 1269 | "import matplotlib.pyplot as plt\n", |
| 1270 | + "\n", |
1250 | 1271 | "plt.figure(figsize=(8, 8))\n", |
1251 | 1272 | "_ = plt.pcolor(norm_df)\n", |
1252 | 1273 | "_ = plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation=90)\n", |
|
1349 | 1370 | ], |
1350 | 1371 | "source": [ |
1351 | 1372 | "from scib_metrics.benchmark import Benchmarker\n", |
| 1373 | + "\n", |
1352 | 1374 | "bm = Benchmarker(\n", |
1353 | 1375 | " adata,\n", |
1354 | 1376 | " batch_key=\"batch\",\n", |
|
0 commit comments