Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion main/como/create_context_specific_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
Solver,
_BoundaryReactions,
)
from como.utils import set_up_logging, split_gene_expression_data
from como.utils import asyncable, set_up_logging, split_gene_expression_data


def _reaction_indices_to_ids(
Expand Down Expand Up @@ -1014,3 +1014,6 @@ def create_context_specific_model( # noqa: C901
f"metabolites={len(context_model.metabolites)}"
)
return context_model


async_create_context_specific_model = asyncable(create_context_specific_model)
5 changes: 4 additions & 1 deletion main/como/merge_xomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
_SourceWeights,
)
from como.project import Config
from como.utils import get_missing_gene_data, read_file, return_placeholder_data, set_up_logging
from como.utils import asyncable, get_missing_gene_data, read_file, return_placeholder_data, set_up_logging


class _MergedHeaderNames:
Expand Down Expand Up @@ -616,3 +616,6 @@ def merge_xomics( # noqa: C901
output_final_model_scores_filepath=output_final_model_scores_filepath,
output_figure_dirpath=output_figure_dirpath,
)


async_merge_xomics = asyncable(merge_xomics)
18 changes: 11 additions & 7 deletions main/como/proteomics_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import itertools
import sys
from pathlib import Path
from typing import TextIO, cast
from typing import TextIO

import numpy as np
import pandas as pd
Expand All @@ -13,7 +13,7 @@
from como.data_types import LogLevel
from como.project import Config
from como.proteomics_preprocessing import protein_transform_main
from como.utils import return_placeholder_data, set_up_logging
from como.utils import asyncable, return_placeholder_data, set_up_logging


# Load Proteomics
Expand Down Expand Up @@ -43,7 +43,7 @@ def process_proteomics_data(path: Path) -> pd.DataFrame:


# read map to convert to entrez
async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None = None):
def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None = None):
"""Load a mapping from gene symbols to Entrez IDs.

Args:
Expand Down Expand Up @@ -188,8 +188,9 @@ def load_empty_dict():
)
return load_empty_dict()


# TODO: Convert to synchronous function
async def proteomics_gen(
def proteomics_gen(
context_name: str,
config_filepath: Path,
matrix_filepath: Path,
Expand Down Expand Up @@ -230,9 +231,9 @@ async def proteomics_gen(
for group in groups:
indices = np.where([g == group for g in config_df["group"]])
sample_columns = [*np.take(config_df["sample_name"].to_numpy(), indices).ravel().tolist(), "gene_symbol"]
matrix = cast(pd.DataFrame, matrix.loc[:, sample_columns])

symbols_to_gene_ids = await load_gene_symbol_map(
matrix = matrix.loc[:, sample_columns]
symbols_to_gene_ids = load_gene_symbol_map(
gene_symbols=matrix["gene_symbol"].tolist(),
entrez_map=input_entrez_map,
)
Expand Down Expand Up @@ -264,3 +265,6 @@ async def proteomics_gen(
hi_group_ratio=high_confidence_batch_ratio,
group_names=groups,
)


async_proteomics_gen = asyncable(proteomics_gen)
66 changes: 8 additions & 58 deletions main/como/rnaseq_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from como.migrations import gene_info_migrations
from como.pipelines.identifier import contains_identical_gene_types, determine_gene_type
from como.project import Config
from como.utils import read_file, set_up_logging
from como.utils import asyncable, read_file, set_up_logging


class _FilteringOptions(NamedTuple):
Expand Down Expand Up @@ -185,6 +185,9 @@ def _build_matrix_results(
elif isinstance(matrix, sc.AnnData):
if not isinstance(matrix.var, pd.DataFrame):
raise TypeError(f"Expected matrix.var object to be 'pd.DataFrame', got '{type(matrix.var)}'")

if matrix.raw is not None:
matrix = matrix.raw.to_adata()

gene_info = gene_info.sort_values(["entrez_gene_id", "size"], ascending=[True, True]).drop_duplicates(
subset=["entrez_gene_id"], keep="first"
Expand Down Expand Up @@ -538,8 +541,6 @@ def umi_filter(
adata: sc.AnnData = metric.count_matrix.copy()

if perform_normalization:
if adata.raw is not None:
adata.X = adata.raw.X.copy()
sc.pp.filter_cells(adata, min_genes=10)
sc.pp.filter_genes(adata, min_cells=1)
sc.pp.normalize_total(adata, target_sum=target_sum)
Expand All @@ -549,8 +550,8 @@ def umi_filter(

adata_x = adata.X
n_cells, n_genes = adata.shape

min_samples: float = round(min_sample_expression * n_cells)
min_samples = round(min_sample_expression * n_cells)
min_func = k_over_a(min_samples, cut_off)
min_genes_mask = np.zeros(n_genes, dtype=bool)
for j in range(n_genes):
Expand Down Expand Up @@ -709,7 +710,7 @@ def _process(

merged_zscores = merged_zscores.reindex(columns=sorted(merged_zscores.columns))
merged_zscores = merged_zscores.groupby(by=merged_zscores.index.name).mean()
merged_zscores.to_csv(output_zscore_normalization_filepath, index=True)
merged_zscores.to_csv(output_zscore_normalization_filepath.with_suffix(".csv"), index=True)
elif isinstance(rnaseq_matrix, sc.AnnData):
merged_zscores = ad.concat([m.z_score_matrix for m in metrics.values()], axis="obs")
merged_zscores.var.index.name = "entrez_gene_id"
Expand Down Expand Up @@ -905,55 +906,4 @@ def rnaseq_gen( # noqa: C901
)


if __name__ == "__main__":
import matplotlib.pyplot as plt

data = pd.read_csv("/Users/joshl/Downloads/fpkm_example_data/CD8.genes.results.txt", sep="\t")
data["gene_id"] = data["gene_id"].str.partition(".")[0]
counts = (
data[["gene_id", "expected_count"]]
.copy()
.set_index("gene_id")
.sort_index()
.rename(columns={"expected_count": "actual"})
)
eff_len = (
data[["gene_id", "effective_length"]]
.copy()
.set_index("gene_id")
.sort_index()
.rename(columns={"effective_length": "actual"})
)
expected_fpkm = (
data[["gene_id", "FPKM"]].copy().set_index("gene_id").sort_index().rename(columns={"FPKM": "expected"})
)

metrics = {
"S1": _StudyMetrics(
study="S1",
num_samples=1,
count_matrix=counts,
eff_length=eff_len,
sample_names=[""],
layout=[LayoutMethod.paired_end],
entrez_gene_ids=np.ndarray([0]),
gene_sizes=np.ndarray([0]),
)
}
calculated_fpkm = _calculate_fpkm(metrics)["S1"].normalization_matrix
calculated_fpkm = calculated_fpkm.round(2)

joined = calculated_fpkm.join(expected_fpkm, how="inner")
joined["actual"] = joined["actual"].replace([np.nan, np.inf], 0)

zfpkm_df, _ = zFPKM(joined, remove_na=True)
zfpkm_df = zfpkm_df.replace(-np.inf, np.nan)

fig, axes = cast(tuple[plt.Figure, list[plt.Axes]], plt.subplots(nrows=2, ncols=1))
axes[0].hist(zfpkm_df["actual"].to_numpy())
axes[0].set_title("Expected zFPKM")

axes[1].hist(zfpkm_df["expected"].to_numpy())
axes[1].set_title("Actual zFPKM")
fig.tight_layout()
fig.show()
async_rnaseq_gen = asyncable(rnaseq_gen)
5 changes: 4 additions & 1 deletion main/como/rnaseq_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from como.data_types import LogLevel, RNAType
from como.pipelines.identifier import build_gene_info, get_remaining_identifiers
from como.utils import read_file, set_up_logging
from como.utils import asyncable, read_file, set_up_logging


@dataclass
Expand Down Expand Up @@ -739,3 +739,6 @@ def rnaseq_preprocess( # noqa: C901
cache=cache,
create_gene_info_only=create_gene_info_only,
)


async_rnaseq_preprocess = asyncable(rnaseq_preprocess)
29 changes: 27 additions & 2 deletions main/como/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

import asyncio
import contextlib
import functools
import io
import sys
from collections.abc import Iterator, Sequence
from collections.abc import Awaitable, Callable, Iterator, Sequence
from pathlib import Path
from typing import Any, Literal, NoReturn, TextIO, TypeVar, overload
from typing import Any, Literal, ParamSpec, TextIO, TypeVar, overload

import numpy.typing as npt
import pandas as pd
Expand All @@ -15,8 +17,11 @@
from como.data_types import LOG_FORMAT, Algorithm, LogLevel
from como.pipelines.identifier import get_remaining_identifiers

P = ParamSpec("P")
T = TypeVar("T")

__all__ = [
"asyncable",
"get_missing_gene_data",
"num_columns",
"num_rows",
Expand Down Expand Up @@ -309,3 +314,23 @@ def set_up_logging(
with contextlib.suppress(ValueError):
logger.remove(0)
logger.add(sink=location, level=level.value, format=formatting)


def asyncable(func: Callable[P, T]) -> Callable[P, Awaitable[T]]:
"""Converts a synchronous function to asynchronous.

This wrapper functions by running the synchronous function in a separate thread using asyncio's run_in_executor
This allows the synchronous function to be called in an asynchronous context without blocking the event loop.

:param func: The synchronous function to convert.
:return: An asynchronous version of the input function that runs in a separate thread.
"""

@functools.wraps(func)
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
loop = asyncio.get_running_loop()
call = functools.partial(func, *args, **kwargs)
return await loop.run_in_executor(None, call)
# return await loop.run_in_executor(None, lambda: func(*args, **kwargs))

return wrapper
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
"statsmodels>=0.13.0; python_version < '3.12'",
"statsmodels>=0.14.0; python_version >= '3.12'",
"troppo@git+https://github.com/JoshLoecker/troppo@master",
"zfpkm>=1.1.0",
"zfpkm>=1.1.1",
]

[project.optional-dependencies]
Expand Down
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading