Skip to content

Commit d5bdf80

Browse files
authored
Update Jupyter Notebook (#254)
1 parent ff9a09f commit d5bdf80

2 files changed

Lines changed: 57 additions & 57 deletions

File tree

main/COMO.ipynb

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@
256256
" raise FileNotFoundError(\"Root directory could not be determined; unable to find 'COMO.ipynb'\")\n",
257257
" current_dir = current_dir.parent"
258258
],
259-
"id": "2656ec5201a33b9f"
259+
"id": "f58450da8bd97732"
260260
},
261261
{
262262
"metadata": {},
@@ -299,7 +299,7 @@
299299
"mrna_metadata_filepath = Path(notebook_dir / \"data/config_sheets/mrna_config.xlsx\")\n",
300300
"proteomics_metadata_filepath = Path(notebook_dir / \"data/config_sheets/proteomics_config.xlsx\")\n"
301301
],
302-
"id": "3aa04b2bf1798c20"
302+
"id": "c1d957a21a4b5393"
303303
},
304304
{
305305
"metadata": {},
@@ -311,7 +311,7 @@
311311
"- `taxon_id`: The [NCBI Taxon ID](https://www.ncbi.nlm.nih.gov/taxonomy) to use\n",
312312
"- `preprocess_mode`: This should be set to `\"create-matrix\"` if you are **not** providing a matrix, otherwise set it to `\"provide-matrix\"`"
313313
],
314-
"id": "11852d88cdc2ee32"
314+
"id": "7d813235940a2e89"
315315
},
316316
{
317317
"metadata": {},
@@ -322,7 +322,7 @@
322322
"for context in context_names:\n",
323323
" if context not in {*trna_matrix_filepath, *mrna_matrix_filepath}:\n",
324324
" continue\n",
325-
" await rnaseq_preprocess(\n",
325+
" rnaseq_preprocess(\n",
326326
" context_name=context,\n",
327327
" taxon=taxon_id,\n",
328328
" como_context_dir=como_context_dir[context],\n",
@@ -336,7 +336,7 @@
336336
" log_level=\"INFO\",\n",
337337
" )"
338338
],
339-
"id": "49a9206c10732797"
339+
"id": "b2cb28bba57ed02f"
340340
},
341341
{
342342
"metadata": {},
@@ -369,7 +369,7 @@
369369
"\n",
370370
"This method is not recommended, as zFPKM is much more robust for a similar level of \"hands-off\" model building\n"
371371
],
372-
"id": "addc4f6dd55a33f3"
372+
"id": "b9227a4f747356f2"
373373
},
374374
{
375375
"metadata": {},
@@ -390,7 +390,7 @@
390390
"#### Single Cell RNA Sequencing\n",
391391
"While the Snakemake pipeline does not yet support single-cell alignment, and COMO does not yet support automated configuration file and counts matrix file creation for single-cell alignment output from STAR, it is possible to use single-cell data to create a model with COMO. Because normalization strategies can be applied to single-cell data in the same way it is applied to bulk RNA sequencing, `como/rnaseq_gen.py` can be used with a provided counts matrix and configuration file, from [Step 1](Step-1:-Initialize-and-Preprocess-RNA-seq-data), above. Just like `\"total\"` and `\"mRNA\"`, `como/rnaseq_gen.py` can be executed with `\"SC\"` as the \"`--library-prep`\" argument to help COMO differentiate it from any bulk RNA sequencing data if multiple strategies are being used."
392392
],
393-
"id": "ed35195f4278ae5c"
393+
"id": "4c960cd5ddcdd542"
394394
},
395395
{
396396
"metadata": {},
@@ -409,7 +409,7 @@
409409
"- `min_zfpkm`: The cutoff for Counts-Per-Million filtering\n",
410410
"- `prep_method`: The library method used for preparation. Options are: `\"total\"`, `\"mRNA\"`, or `\"SC\"`,\n"
411411
],
412-
"id": "6dd118e06e9e4da0"
412+
"id": "cbacf586116ce040"
413413
},
414414
{
415415
"metadata": {},
@@ -429,15 +429,17 @@
429429
"for context in context_names:\n",
430430
" if context not in trna_matrix_filepath:\n",
431431
" continue\n",
432-
" output_zscore_norm_filepath = Path(get_notebook_dir() / f\"data/results/{context}/z_score_normalization.csv\")\n",
433-
" await rnaseq_gen(\n",
432+
" output_zscore_norm_filepath = Path(\n",
433+
" get_notebook_dir() / \"data\" / \"results\" / context / \"trna_z_score_normalization.csv\"\n",
434+
" )\n",
435+
" rnaseq_gen(\n",
434436
" context_name=context,\n",
437+
" input_metadata_filepath_or_df=trna_metadata_filepath,\n",
435438
" input_rnaseq_filepath=trna_matrix_filepath[context],\n",
436439
" input_gene_info_filepath=gene_info_filepath[context],\n",
437440
" output_boolean_activity_filepath=trna_matrix_filepath[context],\n",
438441
" prep=RNAType.TRNA,\n",
439442
" taxon_id=taxon_id,\n",
440-
" input_metadata_filepath_or_df=trna_metadata_filepath,\n",
441443
" replicate_ratio=replicate_ratio,\n",
442444
" high_replicate_ratio=high_confidence_replicate_ratio,\n",
443445
" batch_ratio=batch_ratio,\n",
@@ -447,7 +449,7 @@
447449
" output_zscore_normalization_filepath=output_zscore_norm_filepath,\n",
448450
" )"
449451
],
450-
"id": "bd15ec97dd0a38a8"
452+
"id": "6f7e1634d7a912ba"
451453
},
452454
{
453455
"metadata": {},
@@ -467,7 +469,7 @@
467469
"- `min_zfpkm`: The cutoff for Counts-Per-Million filtering\n",
468470
"- `prep_method`: The library method used for preparation. Options are: `\"total\"`, `\"mRNA\"`, or `\"SC\"`,\n"
469471
],
470-
"id": "71fd3eab25176aad"
472+
"id": "ca2918f7d6e23e5c"
471473
},
472474
{
473475
"metadata": {},
@@ -487,26 +489,27 @@
487489
"for context in context_names:\n",
488490
" if context not in mrna_matrix_filepath:\n",
489491
" continue\n",
490-
" await rnaseq_gen(\n",
492+
" rnaseq_gen(\n",
491493
" context_name=context,\n",
492494
" input_metadata_filepath_or_df=mrna_metadata_filepath,\n",
493495
" input_rnaseq_filepath=mrna_matrix_filepath[context],\n",
494496
" input_gene_info_filepath=gene_info_filepath[context],\n",
495497
" output_boolean_activity_filepath=mrna_matrix_filepath[context],\n",
496498
" prep=RNAType.MRNA,\n",
497499
" taxon_id=taxon_id,\n",
498-
" input_metadata_filepath=mrna_metadata_filepath,\n",
499500
" replicate_ratio=replicate_ratio,\n",
500501
" high_replicate_ratio=high_confidence_replicate_ratio,\n",
501502
" batch_ratio=batch_ratio,\n",
502503
" high_batch_ratio=high_confidence_batch_ratio,\n",
503504
" technique=technique,\n",
504505
" cutoff=cutoff,\n",
505-
" output_zscore_normalization_filepath=Path(get_notebook_dir(),\n",
506-
" f\"data/results/{context}/z_score_normalization.csv\"),\n",
506+
" output_zscore_normalization_filepath=Path(\n",
507+
" get_notebook_dir(),\n",
508+
" f\"data/results/{context}/z_score_normalization.csv\"\n",
509+
" ),\n",
507510
" )"
508511
],
509-
"id": "925f939b1f318673"
512+
"id": "af4293b08391ad2f"
510513
},
511514
{
512515
"metadata": {},
@@ -526,7 +529,7 @@
526529
"- `min_zfpkm`: The cutoff for Counts-Per-Million filtering\n",
527530
"- `prep_method`: The library method used for preparation. Options are: `\"total\"`, `\"mRNA\"`, or `\"scrna\"`,\n"
528531
],
529-
"id": "ddc6b8d634feaacc"
532+
"id": "24d6510e09b88018"
530533
},
531534
{
532535
"metadata": {},
@@ -546,24 +549,27 @@
546549
"for context in context_names:\n",
547550
" if context not in scrna_matrix_filepath:\n",
548551
" continue\n",
549-
" await rnaseq_gen(\n",
552+
" output_zscore_norm_filepath = Path(\n",
553+
" get_notebook_dir() / \"data\" / \"results\" / context / \"mrna_zscore_normalization.csv\"\n",
554+
" )\n",
555+
" rnaseq_gen(\n",
550556
" context_name=context,\n",
557+
" input_metadata_filepath_or_df=mrna_metadata_filepath,\n",
551558
" input_rnaseq_filepath=scrna_matrix_filepath[context],\n",
552559
" input_gene_info_filepath=gene_info_filepath[context],\n",
553560
" output_boolean_activity_filepath=scrna_matrix_filepath[context],\n",
554561
" prep=RNAType.SCRNA,\n",
555562
" taxon_id=taxon_id,\n",
556-
" input_metadata_filepath=Path(\"./data/config_sheets/scrna_config.xlsx\"),\n",
557563
" replicate_ratio=replicate_ratio,\n",
558564
" high_replicate_ratio=high_confidence_replicate_ratio,\n",
559565
" batch_ratio=batch_ratio,\n",
560566
" high_batch_ratio=high_confidence_batch_ratio,\n",
561567
" technique=technique,\n",
562568
" cutoff=cutoff,\n",
563-
" output_zscore_normalization_filepath=None,\n",
569+
" output_zscore_normalization_filepath=output_zscore_norm_filepath\n",
564570
" )"
565571
],
566-
"id": "ff137d18eed6995b"
572+
"id": "ef57ea1b08c1b121"
567573
},
568574
{
569575
"metadata": {},
@@ -580,7 +586,7 @@
580586
"- `high_batch_ratio`: The ratio required before a gene is considered \"high-confidence\" in the study\n",
581587
"- `quantile`: The cutoff Transcripts-Per-Million quantile for filtering"
582588
],
583-
"id": "8ca2a08af58c517d"
589+
"id": "c2c5cc7eb9d2e44f"
584590
},
585591
{
586592
"metadata": {},
@@ -592,7 +598,7 @@
592598
"\n",
593599
"for context in context_names:\n",
594600
" await proteomics_gen(\n",
595-
" context_name=context_names,\n",
601+
" context_name=context,\n",
596602
" config_filepath=proteomics_metadata_filepath,\n",
597603
" matrix_filepath=proteomics_matrix_filepath[context],\n",
598604
" output_boolean_filepath=Path(f\"data/results/{context}/proteomics/{context}_proteomics_boolean_matrix.csv\"),\n",
@@ -607,7 +613,7 @@
607613
" quantile=25,\n",
608614
" )"
609615
],
610-
"id": "2124f206489b1002"
616+
"id": "ca39fe2e0744a401"
611617
},
612618
{
613619
"metadata": {},
@@ -633,7 +639,7 @@
633639
"- `n_neighbors_context`: N nearest neighbors for context clustering. The default is `\"default\"`, which is the total number of contexts\n",
634640
"- `seed`: The random seed for clustering algorithm initialization. If not specified, `np.random.randint(0, 100000)` is used"
635641
],
636-
"id": "50c625b676e3d643"
642+
"id": "6a7f40910eefa1cc"
637643
},
638644
{
639645
"metadata": {},
@@ -677,7 +683,7 @@
677683
"\n",
678684
"!{cmd}"
679685
],
680-
"id": "9f1a7a85673fc17"
686+
"id": "1728cd710f834c2f"
681687
},
682688
{
683689
"metadata": {},
@@ -717,7 +723,7 @@
717723
"\n",
718724
"Each of the \"weights\" (`total_rna_weight`, `mrna_weight`, etc.) are used to place a significance on each method. Becuase there are many steps in the Dogma from transcription to translation, the gene expression as seen by total RNA or mRNA sequencing may not be representative of the gene's protein expression, and this its metabolic impact. Because of this, you are able to weight each source more (or less) than another."
719725
],
720-
"id": "5bb309f8d441ddcf"
726+
"id": "db5681f5e8c4111e"
721727
},
722728
{
723729
"metadata": {},
@@ -746,16 +752,13 @@
746752
"trna_batches = {\"naiveB\": [\"naiveB_S3R1\", \"naiveB_S3R2\", \"naiveB_S3R3\"]}\n",
747753
"\n",
748754
"for context in context_names:\n",
749-
" await merge_xomics(\n",
755+
" merge_xomics(\n",
750756
" context_name=context,\n",
757+
" taxon_id=taxon_id,\n",
751758
" trna_matrix_or_filepath=trna_matrix_filepath[context],\n",
752759
" mrna_matrix_or_filepath=mrna_matrix_filepath[context],\n",
753760
" scrna_matrix_or_filepath=None, # scrna_matrix_filepath[context],\n",
754761
" proteomic_matrix_or_filepath=None, # proteomics_matrix_filepath[context],\n",
755-
" trna_batches=trna_batches,\n",
756-
" mrna_batches=mrna_batches,\n",
757-
" scrna_batches=None,\n",
758-
" proteomic_batches=None,\n",
759762
" trna_weight=total_rna_weight,\n",
760763
" mrna_weight=mrna_weight,\n",
761764
" scrna_weight=single_cell_weight,\n",
@@ -765,10 +768,7 @@
765768
" adjust_method=requirement_adjustment_method,\n",
766769
" force_activate_high_confidence=force_activate_high_confidence,\n",
767770
" adjust_for_na=adjust_for_na_sources,\n",
768-
" merge_zfpkm_distribution=merge_zfpkm_distrubution,\n",
769-
" keep_transcriptomics_score=keep_transcriptomics_score,\n",
770771
" output_merge_activity_filepath=Path(f\"data/results/{context}/ActiveGenes_{context}_Merged.csv\"),\n",
771-
" output_transcriptomic_details_filepath=Path(f\"data/results/{context}/TranscriptomicDetails_{context}.csv\"),\n",
772772
" output_trna_activity_filepath=Path(f\"data/results/{context}/total-rna/trna_activity_{context}.csv\"),\n",
773773
" output_mrna_activity_filepath=Path(f\"data/results/{context}/mrna/mrna_activity_{context}.csv\"),\n",
774774
" output_scrna_activity_filepath=Path(f\"data/results/{context}/scrna/scrna_activity_{context}.csv\"),\n",
@@ -777,7 +777,7 @@
777777
" output_figure_dirpath=Path(f\"data/results/{context}/figures\")\n",
778778
" )"
779779
],
780-
"id": "5cf72339439acf79"
780+
"id": "c48d8b8d6ab11e16"
781781
},
782782
{
783783
"metadata": {},
@@ -844,7 +844,7 @@
844844
"- `force_reactions_filename`: The filename of the force reactions to be used. Force reactions will (as the name implies) force the optimizer to use these reactions, **no matter their expression**\n",
845845
"- `exclude_reactions_filename`: The filename of reactions to exclude from the model, no matter their expression"
846846
],
847-
"id": "691335a66e36ee7c"
847+
"id": "e80c7864a129ea83"
848848
},
849849
{
850850
"metadata": {},
@@ -870,31 +870,31 @@
870870
"recon_algorithms = [\"IMAT\"]\n",
871871
"solver = \"GUROBI\"\n",
872872
"\n",
873-
"config = Config()\n",
874-
"\n",
875873
"for recon_alg in recon_algorithms:\n",
876874
" for context in context_names:\n",
877-
" await create_context_specific_model(\n",
875+
" create_context_specific_model(\n",
878876
" context_name=context,\n",
879-
" reference_model=Path(\n",
880-
" \"/Users/satominakamura/Desktop/Dr.Helikar Lab/COMO/main/data/GeneralModelUpdatedV2.mat\"),\n",
877+
" taxon_id=taxon_id,\n",
878+
" reference_model_filepath=Path(\n",
879+
" get_notebook_dir() / \"data\" / \"reference_models\" / \"GeneralModelUpdatedV3.json\"\n",
880+
" ),\n",
881881
" active_genes_filepath=Path(f\"{notebook_dir}/data/results/{context}/ActiveGenes_{context}_Merged.csv\"),\n",
882882
" output_infeasible_reactions_filepath=Path(\n",
883-
" f\"{notebook_dir}/data/results/{context}/infeasible_reactions_{context}.csv\"),\n",
883+
" f\"{notebook_dir}/data/results/{context}/infeasible_reactions_{context}.csv\"\n",
884+
" ),\n",
884885
" output_flux_result_filepath=Path(f\"{notebook_dir}/data/results/{context}/FluxResults_{context}.csv\"),\n",
885886
" output_model_filepaths=Path(\n",
886-
" f\"{notebook_dir}/data/results/{context}/{context}_{recon_alg}_model.json\"),\n",
887+
" f\"{notebook_dir}/data/results/{context}/{context}_{recon_alg}_model.json\"\n",
888+
" ),\n",
887889
" objective=\"biomass_maintenance\",\n",
888890
" boundary_rxns_filepath=Path(f\"{notebook_dir}/data/boundary_rxns/{context}_boundary_rxns.csv\"),\n",
889891
" exclude_rxns_filepath=Path(f\"{notebook_dir}/data/exclude_rxns/{context}_exclude_rxns.csv\"),\n",
890892
" force_rxns_filepath=Path(f\"{notebook_dir}/data/force_rxns/{context}_force_rxns.csv\"),\n",
891893
" algorithm=Algorithm.IMAT,\n",
892894
" solver=Solver.GUROBI,\n",
893-
" )\n",
894-
" # fmt: on\n",
895-
" !{cmd}"
895+
" )"
896896
],
897-
"id": "961737f5e356c2b6"
897+
"id": "4c9747c67bc80e88"
898898
},
899899
{
900900
"metadata": {},
@@ -919,7 +919,7 @@
919919
"- `exampleTissue`: This is the name of the tissue context\n",
920920
"- `ALGORITHM`: This is the algorithm (`recon_algorithm`) used in the above model creation step\n"
921921
],
922-
"id": "d5ce2da2d3b27868"
922+
"id": "6e60a22a03b54803"
923923
},
924924
{
925925
"metadata": {},
@@ -997,7 +997,7 @@
997997
"\n",
998998
" !{cmd}"
999999
],
1000-
"id": "a8ed4f4f6f10fec8"
1000+
"id": "14d4ca54309049b0"
10011001
},
10021002
{
10031003
"metadata": {},
@@ -1016,7 +1016,7 @@
10161016
"- `data_source`: The datasource you are using for disease analysis. This should be`\"rnaseq\"`\n",
10171017
"- `taxon_id`: The [NCBI Taxon ID](https://www.ncbi.nlm.nih.gov/taxonomy) to use for disease analysis"
10181018
],
1019-
"id": "84dd6096f89ad000"
1019+
"id": "51cd2ec29bbbd4a0"
10201020
},
10211021
{
10221022
"metadata": {},
@@ -1047,7 +1047,7 @@
10471047
"\n",
10481048
" !{cmd}"
10491049
],
1050-
"id": "6daddfb1edac03ab"
1050+
"id": "ead8911867f0e352"
10511051
},
10521052
{
10531053
"metadata": {},
@@ -1082,7 +1082,7 @@
10821082
"\n",
10831083
"- `solver`: The solver you would like to use. Available options are `\"gurobi\"` or `\"glpk\"`\n"
10841084
],
1085-
"id": "645d829a6b3266cd"
1085+
"id": "a1ba8eca6e7c6991"
10861086
},
10871087
{
10881088
"metadata": {},
@@ -1153,7 +1153,7 @@
11531153
" cmd = \" \".join(cmd)\n",
11541154
" !{cmd}"
11551155
],
1156-
"id": "9c1731579d3711b2"
1156+
"id": "fcdf3a87b72665a6"
11571157
}
11581158
],
11591159
"metadata": {},

main/como/proteomics_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def load_empty_dict():
188188
)
189189
return load_empty_dict()
190190

191-
191+
# TODO: Convert to synchronous function
192192
async def proteomics_gen(
193193
context_name: str,
194194
config_filepath: Path,

0 commit comments

Comments
 (0)