From 84ffa3e825ea0ea0948d889ab7879bb77daab2fa Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:34:03 -0500 Subject: [PATCH 1/7] chore: bump zFPKM>=1.1.1 Signed-off-by: Josh Loecker --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7a277114..799612be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ "statsmodels>=0.13.0; python_version < '3.12'", "statsmodels>=0.14.0; python_version >= '3.12'", "troppo@git+https://github.com/JoshLoecker/troppo@master", - "zfpkm>=1.1.0", + "zfpkm>=1.1.1", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 09d79c73..e5a0d6f4 100644 --- a/uv.lock +++ b/uv.lock @@ -526,7 +526,7 @@ requires-dist = [ { name = "statsmodels", marker = "python_full_version < '3.12'", specifier = ">=0.13.0" }, { name = "statsmodels", marker = "python_full_version >= '3.12'", specifier = ">=0.14.0" }, { name = "troppo", git = "https://github.com/JoshLoecker/troppo?rev=master" }, - { name = "zfpkm", specifier = ">=1.1.0" }, + { name = "zfpkm", specifier = ">=1.1.1" }, ] provides-extras = ["gurobi", "interactive"] @@ -3697,7 +3697,7 @@ wheels = [ [[package]] name = "zfpkm" -version = "1.1.0" +version = "1.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "loguru" }, @@ -3705,9 +3705,9 @@ dependencies = [ { name = "numpy" }, { name = "pandas" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0d/bf/43471c26e47fc38b5f748723e3bfa104f77ee0352f5b6a242f8b01786ae4/zfpkm-1.1.0.tar.gz", hash = "sha256:c159f78703d9d853c5f8cbbc590fb9381f097329487947d4eaf95c72ab309870", size = 15623, upload-time = "2026-03-06T16:22:29.929Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/e1/18c81f4c903a049319ccb15e94998d574d96fd67be4f6f521fe7939fae88/zfpkm-1.1.1.tar.gz", hash = "sha256:d451fcce4b52f127212d515517954b64a83b7336121d6a088920123fffce45c4", size = 15858, upload-time = "2026-03-16T18:31:50.807Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/a1/10cf3c5268131d37a4a968a1f5f935a07e81cb78dbb83e8bedc4a835c048/zfpkm-1.1.0-py3-none-any.whl", hash = "sha256:765d1785a22729adeb89732da01ba47abcbc9597c17c587e42176398a758b7a3", size = 18103, upload-time = "2026-03-06T16:22:29.104Z" }, + { url = "https://files.pythonhosted.org/packages/14/3c/f50dd379887cf4dfcd3f00741ea74316e92c582f8e1827ad2395e97545c7/zfpkm-1.1.1-py3-none-any.whl", hash = "sha256:4bf49b461f2671586480cad26574fa3975b42ac90b914d52bf25a1bb224e74f0", size = 18386, upload-time = "2026-03-16T18:31:49.776Z" }, ] [[package]] From 99945745cb76ce6ddc28d9d9f8e3138cad984da2 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:56:32 -0500 Subject: [PATCH 2/7] feat: convert proteomics_gen to synchronous function with optional async capabilities Signed-off-by: Josh Loecker --- main/como/proteomics_gen.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index bbd3708c..9d38fffb 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -3,7 +3,7 @@ import itertools import sys from pathlib import Path -from typing import TextIO, cast +from typing import TextIO import numpy as np import pandas as pd @@ -13,7 +13,7 @@ from como.data_types import LogLevel from como.project import Config from como.proteomics_preprocessing import protein_transform_main -from como.utils import return_placeholder_data, set_up_logging +from como.utils import asyncable, return_placeholder_data, set_up_logging # Load Proteomics @@ -43,7 +43,7 @@ def process_proteomics_data(path: Path) -> pd.DataFrame: # read map to convert to entrez -async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None = None): +def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None = None): """Load a mapping from gene symbols to Entrez IDs. Args: @@ -188,8 +188,9 @@ def load_empty_dict(): ) return load_empty_dict() + # TODO: Convert to synchronous function -async def proteomics_gen( +def proteomics_gen( context_name: str, config_filepath: Path, matrix_filepath: Path, @@ -230,9 +231,9 @@ async def proteomics_gen( for group in groups: indices = np.where([g == group for g in config_df["group"]]) sample_columns = [*np.take(config_df["sample_name"].to_numpy(), indices).ravel().tolist(), "gene_symbol"] - matrix = cast(pd.DataFrame, matrix.loc[:, sample_columns]) - - symbols_to_gene_ids = await load_gene_symbol_map( + matrix = matrix.loc[:, sample_columns] + + symbols_to_gene_ids = load_gene_symbol_map( gene_symbols=matrix["gene_symbol"].tolist(), entrez_map=input_entrez_map, ) @@ -264,3 +265,6 @@ async def proteomics_gen( hi_group_ratio=high_confidence_batch_ratio, group_names=groups, ) + + +async_proteomics_gen = asyncable(proteomics_gen) From a1162fb66e29e778b927507764a3883b556e6ddd Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:57:05 -0500 Subject: [PATCH 3/7] feat: make merge_xomics asyncable Signed-off-by: Josh Loecker --- main/como/merge_xomics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main/como/merge_xomics.py b/main/como/merge_xomics.py index 6221e455..b6a0cf13 100644 --- a/main/como/merge_xomics.py +++ b/main/como/merge_xomics.py @@ -22,7 +22,7 @@ _SourceWeights, ) from como.project import Config -from como.utils import get_missing_gene_data, read_file, return_placeholder_data, set_up_logging +from como.utils import asyncable, get_missing_gene_data, read_file, return_placeholder_data, set_up_logging class _MergedHeaderNames: @@ -616,3 +616,6 @@ def merge_xomics( # noqa: C901 output_final_model_scores_filepath=output_final_model_scores_filepath, output_figure_dirpath=output_figure_dirpath, ) + + +async_merge_xomics = asyncable(merge_xomics) From dbf5d2561b57cf0eb309b459745ca84c6d31124b Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:57:26 -0500 Subject: [PATCH 4/7] feat: make rnaseq_gen asyncable Signed-off-by: Josh Loecker --- main/como/rnaseq_gen.py | 66 +++++------------------------------------ 1 file changed, 8 insertions(+), 58 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 1d2b258a..2cfc1a1e 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -25,7 +25,7 @@ from como.migrations import gene_info_migrations from como.pipelines.identifier import contains_identical_gene_types, determine_gene_type from como.project import Config -from como.utils import read_file, set_up_logging +from como.utils import asyncable, read_file, set_up_logging class _FilteringOptions(NamedTuple): @@ -185,6 +185,9 @@ def _build_matrix_results( elif isinstance(matrix, sc.AnnData): if not isinstance(matrix.var, pd.DataFrame): raise TypeError(f"Expected matrix.var object to be 'pd.DataFrame', got '{type(matrix.var)}'") + + if matrix.raw is not None: + matrix = matrix.raw.to_adata() gene_info = gene_info.sort_values(["entrez_gene_id", "size"], ascending=[True, True]).drop_duplicates( subset=["entrez_gene_id"], keep="first" @@ -538,8 +541,6 @@ def umi_filter( adata: sc.AnnData = metric.count_matrix.copy() if perform_normalization: - if adata.raw is not None: - adata.X = adata.raw.X.copy() sc.pp.filter_cells(adata, min_genes=10) sc.pp.filter_genes(adata, min_cells=1) sc.pp.normalize_total(adata, target_sum=target_sum) @@ -549,8 +550,8 @@ def umi_filter( adata_x = adata.X n_cells, n_genes = adata.shape - - min_samples: float = round(min_sample_expression * n_cells) + + min_samples = round(min_sample_expression * n_cells) min_func = k_over_a(min_samples, cut_off) min_genes_mask = np.zeros(n_genes, dtype=bool) for j in range(n_genes): @@ -709,7 +710,7 @@ def _process( merged_zscores = merged_zscores.reindex(columns=sorted(merged_zscores.columns)) merged_zscores = merged_zscores.groupby(by=merged_zscores.index.name).mean() - merged_zscores.to_csv(output_zscore_normalization_filepath, index=True) + merged_zscores.to_csv(output_zscore_normalization_filepath.with_suffix(".csv"), index=True) elif isinstance(rnaseq_matrix, sc.AnnData): merged_zscores = ad.concat([m.z_score_matrix for m in metrics.values()], axis="obs") merged_zscores.var.index.name = "entrez_gene_id" @@ -905,55 +906,4 @@ def rnaseq_gen( # noqa: C901 ) -if __name__ == "__main__": - import matplotlib.pyplot as plt - - data = pd.read_csv("/Users/joshl/Downloads/fpkm_example_data/CD8.genes.results.txt", sep="\t") - data["gene_id"] = data["gene_id"].str.partition(".")[0] - counts = ( - data[["gene_id", "expected_count"]] - .copy() - .set_index("gene_id") - .sort_index() - .rename(columns={"expected_count": "actual"}) - ) - eff_len = ( - data[["gene_id", "effective_length"]] - .copy() - .set_index("gene_id") - .sort_index() - .rename(columns={"effective_length": "actual"}) - ) - expected_fpkm = ( - data[["gene_id", "FPKM"]].copy().set_index("gene_id").sort_index().rename(columns={"FPKM": "expected"}) - ) - - metrics = { - "S1": _StudyMetrics( - study="S1", - num_samples=1, - count_matrix=counts, - eff_length=eff_len, - sample_names=[""], - layout=[LayoutMethod.paired_end], - entrez_gene_ids=np.ndarray([0]), - gene_sizes=np.ndarray([0]), - ) - } - calculated_fpkm = _calculate_fpkm(metrics)["S1"].normalization_matrix - calculated_fpkm = calculated_fpkm.round(2) - - joined = calculated_fpkm.join(expected_fpkm, how="inner") - joined["actual"] = joined["actual"].replace([np.nan, np.inf], 0) - - zfpkm_df, _ = zFPKM(joined, remove_na=True) - zfpkm_df = zfpkm_df.replace(-np.inf, np.nan) - - fig, axes = cast(tuple[plt.Figure, list[plt.Axes]], plt.subplots(nrows=2, ncols=1)) - axes[0].hist(zfpkm_df["actual"].to_numpy()) - axes[0].set_title("Expected zFPKM") - - axes[1].hist(zfpkm_df["expected"].to_numpy()) - axes[1].set_title("Actual zFPKM") - fig.tight_layout() - fig.show() +async_rnaseq_gen = asyncable(rnaseq_gen) From fc0f1e83927f8231a15ef99c17cf28ed7bfb6b94 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:57:38 -0500 Subject: [PATCH 5/7] feat: make rnaseq_preprocess asyncable Signed-off-by: Josh Loecker --- main/como/rnaseq_preprocess.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index edd1c88d..cf7d0842 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -15,7 +15,7 @@ from como.data_types import LogLevel, RNAType from como.pipelines.identifier import build_gene_info, get_remaining_identifiers -from como.utils import read_file, set_up_logging +from como.utils import asyncable, read_file, set_up_logging @dataclass @@ -739,3 +739,6 @@ def rnaseq_preprocess( # noqa: C901 cache=cache, create_gene_info_only=create_gene_info_only, ) + + +async_rnaseq_preprocess = asyncable(rnaseq_preprocess) From 0125047c2f6142450b930dfe285e079371f3684f Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:57:58 -0500 Subject: [PATCH 6/7] feat: commit `asyncable` function to make any function async compatible Signed-off-by: Josh Loecker --- main/como/utils.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/main/como/utils.py b/main/como/utils.py index 11a34f16..15162942 100644 --- a/main/como/utils.py +++ b/main/como/utils.py @@ -1,11 +1,13 @@ from __future__ import annotations +import asyncio import contextlib +import functools import io import sys -from collections.abc import Iterator, Sequence +from collections.abc import Awaitable, Callable, Iterator, Sequence from pathlib import Path -from typing import Any, Literal, NoReturn, TextIO, TypeVar, overload +from typing import Any, Literal, ParamSpec, TextIO, TypeVar, overload import numpy.typing as npt import pandas as pd @@ -15,8 +17,11 @@ from como.data_types import LOG_FORMAT, Algorithm, LogLevel from como.pipelines.identifier import get_remaining_identifiers +P = ParamSpec("P") T = TypeVar("T") + __all__ = [ + "asyncable", "get_missing_gene_data", "num_columns", "num_rows", @@ -309,3 +314,23 @@ def set_up_logging( with contextlib.suppress(ValueError): logger.remove(0) logger.add(sink=location, level=level.value, format=formatting) + + +def asyncable(func: Callable[P, T]) -> Callable[P, Awaitable[T]]: + """Converts a synchronous function to asynchronous. + + This wrapper functions by running the synchronous function in a separate thread using asyncio's run_in_executor + This allows the synchronous function to be called in an asynchronous context without blocking the event loop. + + :param func: The synchronous function to convert. + :return: An asynchronous version of the input function that runs in a separate thread. + """ + + @functools.wraps(func) + async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: + loop = asyncio.get_running_loop() + call = functools.partial(func, *args, **kwargs) + return await loop.run_in_executor(None, call) + # return await loop.run_in_executor(None, lambda: func(*args, **kwargs)) + + return wrapper From 57bd227dc55a6a78387d8267599f14f466aab936 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 16 Mar 2026 13:58:47 -0500 Subject: [PATCH 7/7] feat: make create_context_specific_model asyncable Signed-off-by: Josh Loecker --- main/como/create_context_specific_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py index b0dff92a..d479bd03 100644 --- a/main/como/create_context_specific_model.py +++ b/main/como/create_context_specific_model.py @@ -32,7 +32,7 @@ Solver, _BoundaryReactions, ) -from como.utils import set_up_logging, split_gene_expression_data +from como.utils import asyncable, set_up_logging, split_gene_expression_data def _reaction_indices_to_ids( @@ -1014,3 +1014,6 @@ def create_context_specific_model( # noqa: C901 f"metabolites={len(context_model.metabolites)}" ) return context_model + + +async_create_context_specific_model = asyncable(create_context_specific_model)