2525from como .migrations import gene_info_migrations
2626from como .pipelines .identifier import contains_identical_gene_types , determine_gene_type
2727from como .project import Config
28- from como .utils import read_file , set_up_logging
28+ from como .utils import asyncable , read_file , set_up_logging
2929
3030
3131class _FilteringOptions (NamedTuple ):
@@ -185,6 +185,9 @@ def _build_matrix_results(
185185 elif isinstance (matrix , sc .AnnData ):
186186 if not isinstance (matrix .var , pd .DataFrame ):
187187 raise TypeError (f"Expected matrix.var object to be 'pd.DataFrame', got '{ type (matrix .var )} '" )
188+
189+ if matrix .raw is not None :
190+ matrix = matrix .raw .to_adata ()
188191
189192 gene_info = gene_info .sort_values (["entrez_gene_id" , "size" ], ascending = [True , True ]).drop_duplicates (
190193 subset = ["entrez_gene_id" ], keep = "first"
@@ -538,8 +541,6 @@ def umi_filter(
538541 adata : sc .AnnData = metric .count_matrix .copy ()
539542
540543 if perform_normalization :
541- if adata .raw is not None :
542- adata .X = adata .raw .X .copy ()
543544 sc .pp .filter_cells (adata , min_genes = 10 )
544545 sc .pp .filter_genes (adata , min_cells = 1 )
545546 sc .pp .normalize_total (adata , target_sum = target_sum )
@@ -549,8 +550,8 @@ def umi_filter(
549550
550551 adata_x = adata .X
551552 n_cells , n_genes = adata .shape
552-
553- min_samples : float = round (min_sample_expression * n_cells )
553+
554+ min_samples = round (min_sample_expression * n_cells )
554555 min_func = k_over_a (min_samples , cut_off )
555556 min_genes_mask = np .zeros (n_genes , dtype = bool )
556557 for j in range (n_genes ):
@@ -709,7 +710,7 @@ def _process(
709710
710711 merged_zscores = merged_zscores .reindex (columns = sorted (merged_zscores .columns ))
711712 merged_zscores = merged_zscores .groupby (by = merged_zscores .index .name ).mean ()
712- merged_zscores .to_csv (output_zscore_normalization_filepath , index = True )
713+ merged_zscores .to_csv (output_zscore_normalization_filepath . with_suffix ( ".csv" ) , index = True )
713714 elif isinstance (rnaseq_matrix , sc .AnnData ):
714715 merged_zscores = ad .concat ([m .z_score_matrix for m in metrics .values ()], axis = "obs" )
715716 merged_zscores .var .index .name = "entrez_gene_id"
@@ -905,55 +906,4 @@ def rnaseq_gen( # noqa: C901
905906 )
906907
907908
908- if __name__ == "__main__" :
909- import matplotlib .pyplot as plt
910-
911- data = pd .read_csv ("/Users/joshl/Downloads/fpkm_example_data/CD8.genes.results.txt" , sep = "\t " )
912- data ["gene_id" ] = data ["gene_id" ].str .partition ("." )[0 ]
913- counts = (
914- data [["gene_id" , "expected_count" ]]
915- .copy ()
916- .set_index ("gene_id" )
917- .sort_index ()
918- .rename (columns = {"expected_count" : "actual" })
919- )
920- eff_len = (
921- data [["gene_id" , "effective_length" ]]
922- .copy ()
923- .set_index ("gene_id" )
924- .sort_index ()
925- .rename (columns = {"effective_length" : "actual" })
926- )
927- expected_fpkm = (
928- data [["gene_id" , "FPKM" ]].copy ().set_index ("gene_id" ).sort_index ().rename (columns = {"FPKM" : "expected" })
929- )
930-
931- metrics = {
932- "S1" : _StudyMetrics (
933- study = "S1" ,
934- num_samples = 1 ,
935- count_matrix = counts ,
936- eff_length = eff_len ,
937- sample_names = ["" ],
938- layout = [LayoutMethod .paired_end ],
939- entrez_gene_ids = np .ndarray ([0 ]),
940- gene_sizes = np .ndarray ([0 ]),
941- )
942- }
943- calculated_fpkm = _calculate_fpkm (metrics )["S1" ].normalization_matrix
944- calculated_fpkm = calculated_fpkm .round (2 )
945-
946- joined = calculated_fpkm .join (expected_fpkm , how = "inner" )
947- joined ["actual" ] = joined ["actual" ].replace ([np .nan , np .inf ], 0 )
948-
949- zfpkm_df , _ = zFPKM (joined , remove_na = True )
950- zfpkm_df = zfpkm_df .replace (- np .inf , np .nan )
951-
952- fig , axes = cast (tuple [plt .Figure , list [plt .Axes ]], plt .subplots (nrows = 2 , ncols = 1 ))
953- axes [0 ].hist (zfpkm_df ["actual" ].to_numpy ())
954- axes [0 ].set_title ("Expected zFPKM" )
955-
956- axes [1 ].hist (zfpkm_df ["expected" ].to_numpy ())
957- axes [1 ].set_title ("Actual zFPKM" )
958- fig .tight_layout ()
959- fig .show ()
909+ async_rnaseq_gen = asyncable (rnaseq_gen )
0 commit comments