From c97e6e796ba27f3bfc71b150f984cedef711f077 Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 15:52:53 -0500
Subject: [PATCH 1/8] fix: allow `None` type for figure dirpath

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/combine_distributions.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/main/como/combine_distributions.py b/main/como/combine_distributions.py
index f08430fd..88ae6a69 100644
--- a/main/como/combine_distributions.py
+++ b/main/como/combine_distributions.py
@@ -28,7 +28,7 @@ def _combine_z_distribution_for_batch(
     matrix: pd.DataFrame,
     source: SourceTypes,
     output_combined_matrix_filepath: Path,
-    output_figure_dirpath: Path,
+    output_figure_dirpath: Path | None,
 ) -> pd.DataFrame:
     """Combine z-score distributions across samples for a single batch.
 
@@ -42,7 +42,9 @@ def _combine_z_distribution_for_batch(
     :returns: A pandas dataframe of the weighted z-distributions
     """
     output_combined_matrix_filepath.parent.mkdir(parents=True, exist_ok=True)
-    output_figure_dirpath.mkdir(parents=True, exist_ok=True)
+    
+    if output_figure_dirpath is not None:
+        output_figure_dirpath.mkdir(parents=True, exist_ok=True)
 
     logger.trace(
         f"Combining z-score distributions: batch #{batch.batch_num}, "
@@ -107,7 +109,7 @@ def _combine_z_distribution_for_source(
     context_name: str,
     replicates_per_batch: Sequence[int],
     output_combined_matrix_filepath: Path,
-    output_figure_filepath: Path,
+    output_figure_filepath: Path | None,
     weighted_z_floor: int = -6,
     weighted_z_ceiling: int = 6,
 ) -> pd.DataFrame:
@@ -135,7 +137,9 @@ def _combine_z_distribution_for_source(
         return out_df
 
     output_combined_matrix_filepath.parent.mkdir(parents=True, exist_ok=True)
-    output_figure_filepath.parent.mkdir(parents=True, exist_ok=True)
+    
+    if output_figure_filepath is not None:
+        output_figure_filepath.parent.mkdir(parents=True, exist_ok=True)
 
     # alidate alignment between columns and replicate vector
     batch_column_names: list[str] = list(merged_source_data.columns)
@@ -190,7 +194,7 @@ def _combine_z_distribution_for_source(
 def _combine_z_distribution_for_context(
     context: str,
     zscore_results: list[_CombineOmicsInput],
-    output_graph_filepath: Path,
+    output_graph_filepath: Path | None,
 ) -> pd.DataFrame:
     if not zscore_results:
         logger.warning("No zscore results exist, returning empty dataframe")
@@ -275,14 +279,12 @@ def _begin_combining_distributions(
     batch_names: _BatchNames,
     source_weights: _SourceWeights,
     output_filepaths: _OutputCombinedSourceFilepath,
-    output_figure_dirpath: Path,
+    output_figure_dirpath: Path | None,
     output_final_model_scores: Path,
     weighted_z_floor: int = -6,
     weighted_z_ceiling: int = 6,
 ):
     logger.info(f"Starting to combine z-scores for context '{context_name}'")
-    output_figure_dirpath.mkdir(parents=True, exist_ok=True)
-
     z_score_results: list[_CombineOmicsInput] = []
 
     matrix: pd.DataFrame | None
@@ -372,7 +374,9 @@ def _begin_combining_distributions(
                 / f"{context_name}_{source.value}_combined_zscore_distribution.csv"
             ),
             output_figure_filepath=(
-                output_figure_dirpath / f"{context_name}_{source.value}_combined_zscore_distribution.pdf"
+                (output_figure_dirpath / f"{context_name}_{source.value}_combined_zscore_distribution.pdf")
+                if output_figure_dirpath is not None
+                else None
             ),
             weighted_z_floor=weighted_z_floor,
             weighted_z_ceiling=weighted_z_ceiling,
@@ -394,7 +398,11 @@ def _begin_combining_distributions(
     merged_context_results = _combine_z_distribution_for_context(
         context=context_name,
         zscore_results=z_score_results,
-        output_graph_filepath=output_figure_dirpath / f"{context_name}_combined_omics_distribution.pdf",
+        output_graph_filepath=(
+            (output_figure_dirpath / f"{context_name}_combined_omics_distribution.pdf")
+            if output_figure_dirpath is not None
+            else None
+        ),
     )
     merged_context_results.to_csv(output_final_model_scores, index=len(merged_context_results.columns) <= 1)
     logger.success(f"Wrote combined z-scores for context '{context_name}' to {output_final_model_scores}")

From 06251eb61f8fb8dc2c823991545268e49c9d6491 Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 15:53:04 -0500
Subject: [PATCH 2/8] fix: allow `None` for output matrix filepaths

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/rnaseq_preprocess.py | 91 ++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py
index cf7d0842..93141b6a 100644
--- a/main/como/rnaseq_preprocess.py
+++ b/main/como/rnaseq_preprocess.py
@@ -307,9 +307,9 @@ def _write_matrices(
     fragment_lengths: pd.DataFrame,
     tpm_df: pd.DataFrame,
     como_context_dir: Path,
-    out_counts_filepath: Path,
-    out_frag_length_filepath: Path,
-    out_tpm_filepath: Path,
+    out_counts_filepath: Path | None,
+    out_frag_length_filepath: Path | None,
+    out_tpm_filepath: Path | None,
     rna: RNAType,
 ) -> pd.DataFrame:
     """Create a counts matrix file by reading gene counts table(s).
@@ -326,25 +326,30 @@ def _write_matrices(
     :return: A pandas DataFrame representing the final counts matrix.
     """
     study_metrics = _organize_gene_counts_files(data_dir=como_context_dir)
-    counts: list[pd.DataFrame] = [_create_sample_counts_matrix(metric) for metric in study_metrics]
     rna_specific_sample_names = sorted(
         set(config_df.loc[config_df["library_prep"].str.lower() == rna.value.lower(), "sample_name"].tolist())
     )
-
-    final_matrix: pd.DataFrame = functools.reduce(
-        lambda left, right: pd.merge(left, right, on="ensembl_gene_id", how="outer"), counts
-    )
-    final_matrix.fillna(value=0, inplace=True)
-    final_matrix = final_matrix[["ensembl_gene_id", *rna_specific_sample_names]]
-    final_matrix = final_matrix.reindex(columns=["ensembl_gene_id", *rna_specific_sample_names])
-    fragment_lengths = fragment_lengths.reindex(columns=rna_specific_sample_names).sort_index()
-    tpm_df = tpm_df.reindex(columns=rna_specific_sample_names).sort_index()
-
-    out_counts_filepath.parent.mkdir(parents=True, exist_ok=True)
-    out_frag_length_filepath.parent.mkdir(parents=True, exist_ok=True)
-    final_matrix.to_csv(out_counts_filepath, index=False, float_format="%.15g")
-    fragment_lengths[rna_specific_sample_names].to_csv(out_frag_length_filepath, index=True, float_format="%.15g")
-    tpm_df[rna_specific_sample_names].to_csv(out_tpm_filepath, index=True, float_format="%.15g")
+    
+    if out_counts_filepath:
+        out_counts_filepath.parent.mkdir(parents=True, exist_ok=True)
+        counts: list[pd.DataFrame] = [_create_sample_counts_matrix(metric) for metric in study_metrics]
+        final_matrix: pd.DataFrame = functools.reduce(
+            lambda left, right: pd.merge(left, right, on="ensembl_gene_id", how="outer"), counts
+        )
+        final_matrix.fillna(value=0, inplace=True)
+        final_matrix = final_matrix[["ensembl_gene_id", *rna_specific_sample_names]]
+        final_matrix = final_matrix.reindex(columns=["ensembl_gene_id", *rna_specific_sample_names])
+        final_matrix.to_csv(out_counts_filepath, index=False, float_format="%.15g")
+    
+    if out_frag_length_filepath:
+        out_frag_length_filepath.parent.mkdir(parents=True, exist_ok=True)
+        fragment_lengths = fragment_lengths.reindex(columns=rna_specific_sample_names).sort_index()
+        fragment_lengths[rna_specific_sample_names].to_csv(out_frag_length_filepath, index=True, float_format="%.15g")
+    
+    if out_tpm_filepath:
+        out_tpm_filepath.parent.mkdir(parents=True, exist_ok=True)
+        tpm_df = tpm_df.reindex(columns=rna_specific_sample_names).sort_index()
+        tpm_df[rna_specific_sample_names].to_csv(out_tpm_filepath, index=True, float_format="%.15g")
 
     logger.success(f"Wrote gene count matrix for '{rna.value}' RNA at '{out_counts_filepath}'")
 
@@ -499,33 +504,33 @@ def _process_como_input(
     out_trna_tpm: Path | None,
 ) -> None:
     config_df, fragment_lengths, tpm_df = _create_config_df(context_name, como_context_dir=como_context_dir)
-
-    if out_trna_config and out_trna_counts and out_trna_fragments and out_trna_tpm:
-        _write_matrices(
-            config_df=config_df,
-            fragment_lengths=fragment_lengths,
-            tpm_df=tpm_df,
-            como_context_dir=como_context_dir,
-            out_counts_filepath=out_trna_counts,
-            out_frag_length_filepath=out_trna_fragments,
-            out_tpm_filepath=out_trna_tpm,
-            rna=RNAType.TRNA,
-        )
+    
+    _write_matrices(
+        config_df=config_df,
+        fragment_lengths=fragment_lengths,
+        tpm_df=tpm_df,
+        como_context_dir=como_context_dir,
+        out_counts_filepath=out_trna_counts,
+        out_frag_length_filepath=out_trna_fragments,
+        out_tpm_filepath=out_trna_tpm,
+        rna=RNAType.TRNA,
+    )
+    if out_trna_config:
         with pd.ExcelWriter(out_trna_config) as writer:
             subset_config = config_df[config_df["library_prep"].str.lower() == RNAType.TRNA.value.lower()]
             subset_config.to_excel(writer, sheet_name=context_name, header=True, index=False)
-
-    if out_mrna_config and out_mrna_counts and out_mrna_fragments and out_mrna_tpm:
-        _write_matrices(
-            config_df=config_df,
-            fragment_lengths=fragment_lengths,
-            tpm_df=tpm_df,
-            como_context_dir=como_context_dir,
-            out_counts_filepath=out_mrna_counts,
-            out_frag_length_filepath=out_mrna_fragments,
-            out_tpm_filepath=out_mrna_tpm,
-            rna=RNAType.MRNA,
-        )
+    
+    _write_matrices(
+        config_df=config_df,
+        fragment_lengths=fragment_lengths,
+        tpm_df=tpm_df,
+        como_context_dir=como_context_dir,
+        out_counts_filepath=out_mrna_counts,
+        out_frag_length_filepath=out_mrna_fragments,
+        out_tpm_filepath=out_mrna_tpm,
+        rna=RNAType.MRNA,
+    )
+    if out_mrna_config:
         with pd.ExcelWriter(out_mrna_config) as writer:
             subset_config = config_df[config_df["library_prep"].str.lower() == RNAType.MRNA.value.lower()]
             subset_config.to_excel(writer, sheet_name=context_name, header=True, index=False)

From fdba79cb18342f5330087a80b0d75f986175ac6c Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 21:11:36 -0500
Subject: [PATCH 3/8] fix: only use gurobi-specific solver settings if gurobi
 is being used

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/create_context_specific_model.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index d479bd03..1beba460 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -292,10 +292,12 @@ def _build_with_imat(
     cfg.verbosity = build_settings.solver_verbosity
     cfg.timeout = build_settings.solver_timeout
     cfg.tolerances.feasibility = build_settings.solver_feasibility
-    cfg.tolerances.optimality = build_settings.solver_optimality
     cfg.tolerances.integrality = build_settings.solver_integrality
+    if solver.upper() == "GUROBI":
+        cfg.tolerances.optimality = build_settings.solver_optimality
+    
     grb_model = getattr(getattr(cfg, "problem", None), "problem", None)  # `getattr` to handle non-gurobi solvers
-    if grb_model is not None:
+    if grb_model is not None and solver.upper() == "GUROBI":
         grb_model.Params.MIPGap = build_settings.gurobi_mipgap
         grb_model.Params.IntegralityFocus = build_settings.gurobi_integrality_focus
         grb_model.Params.NumericFocus = build_settings.gurobi_numeric_focus

From 8f21380b8d6d99ef67638afe73a8463be401758c Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 21:13:28 -0500
Subject: [PATCH 4/8] refactor: import required modules

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/create_context_specific_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index 1beba460..d3c76074 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -7,9 +7,8 @@
 from collections.abc import Sequence
 from io import TextIOWrapper
 from math import isfinite
-from multiprocessing import Value
 from pathlib import Path
-from typing import Literal, Never, TextIO, cast, reveal_type
+from typing import Literal, TextIO, cast
 
 import cobamp.core.optimization
 import cobra
@@ -17,6 +16,7 @@
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
+import scanpy as sc
 from loguru import logger
 from troppo.methods.reconstruction.corda import CORDA, CORDAProperties
 from troppo.methods.reconstruction.fastcore import FASTcore, FastcoreProperties

From bddf2a3e80810b52e3516d455b91a19b7c333c56 Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 21:15:03 -0500
Subject: [PATCH 5/8] feat: allow zFPKM cutoffs (e.g., -3)

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/create_context_specific_model.py | 74 +++++++++++++++++-----
 1 file changed, 57 insertions(+), 17 deletions(-)

diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index d3c76074..2ba73bd7 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -412,6 +412,8 @@ def _map_expression_to_reaction(
     taxon: int | str,
     low_percentile: int,
     high_percentile: int,
+    low_bin_cutoff: int,
+    high_bin_cutoff: int,
 ) -> tuple[collections.OrderedDict[str, float], float, float, float]:
     """Map gene ids to a reaction based on GPR (gene to protein to reaction) association rules.
 
@@ -424,6 +426,9 @@ def _map_expression_to_reaction(
         taxon: Taxon ID or Taxon object for gene ID conversion.
         low_percentile: Low percentile for threshold calculation
         high_percentile: High percentile for threshold calculation
+        low_bin_cutoff: Values lower than this (e.g., -3 zFPKM or 25th percentile) will be placed in the low bin.
+        high_bin_cutoff: Values equal to or greater than this (e.g., 0 zFPKM or 75th percentile) will be placed in the high bin.
+        activity_is_zfpkm_data: Does `gene_expression_file` contain zFPKM-normalized data?
 
     Returns:
         [0]: An ordered dictionary mapping reaction IDs to their corresponding expression values.
@@ -434,7 +439,15 @@ def _map_expression_to_reaction(
     Raises:
         ValueError: If neither 'entrez_gene_id' nor 'ensembl_gene_id' columns are found in the gene expression file.
     """
-    expression_data = pd.read_csv(gene_expression_file)
+    if gene_expression_file.suffix in {".csv", ".tsv"}:
+        expression_data = pd.read_csv(gene_expression_file)
+    elif gene_expression_file.suffix == ".h5ad":
+        adata = sc.read_h5ad(gene_expression_file)
+        expression_data = adata.to_df()
+    else:
+        raise ValueError(
+            f"Unknown file extension '{gene_expression_file.suffix}' for gene expression file: {gene_expression_file}"
+        )
     gene_activity = split_gene_expression_data(
         expression_data, identifier_column="entrez_gene_id", recon_algorithm=recon_algorithm
     )
@@ -444,10 +457,30 @@ def _map_expression_to_reaction(
     min_val = float(gene_arr[np.isfinite(gene_arr)].min())
     if min_val < 0:
         gene_arr[np.isfinite(gene_arr)] += abs(min_val)
-
-    # We are computing percentiles on non-zero data because we need to make sure that
-    #   zero values are definitely placed in the low bin
-    low_expr, high_expr = np.nanpercentile(gene_arr[gene_arr > 0], [low_percentile, high_percentile])
+    
+    if not activity_is_zfpkm_data and low_bin_cutoff < 1:
+        logger.warning(
+            f"Low bin cutoff was less than 1, but not using zFPKM data (got: {low_bin_cutoff}). "
+            f"Assuming this is a percentile value, it should be in the range [1, 100], converting now."
+        )
+        low_bin_cutoff *= 100
+    if not activity_is_zfpkm_data and high_bin_cutoff < 1:
+        logger.warning(
+            f"High bin cutoff was less than 1, but not with zFPKM data (got: {high_bin_cutoff}). "
+            "Assuming this is a percentile value, it should be in the range [1, 100], converting now."
+        )
+        high_bin_cutoff *= 100
+    
+    # If the user provided zFPKM-normalized data, the low and high bin cutoffs should be kept constant
+    # Otherwise, we will calculate percentile expression values
+    if activity_is_zfpkm_data:
+        low_expr = low_bin_cutoff
+        high_expr = high_bin_cutoff
+    else:
+        # We are computing percentiles on non-zero data because we need to make sure that
+        #   zero values are definitely placed in the low bin
+        low_expr, high_expr = np.nanpercentile(gene_arr[gene_arr > 0], [low_bin_cutoff, high_bin_cutoff])
+    
     gene_activity["active"] = gene_arr
     gene_activity.index = gene_activity.index.astype(str)  # maintain strings for reference model gene_ids
 
@@ -546,14 +579,15 @@ def _build_model(
     lower_bounds: list[float],
     upper_bounds: list[float],
     solver: str,
-    low_percentile: int,
-    high_percentile: int,
+    low_bin_cutoff: int,
+    high_bin_cutoff: int,
     output_flux_result_filepath: Path,
     taxon: int | str,
     objective_direction: Literal["min", "max"],
     build_settings: ModelBuildSettings,
     *,
     force_boundary_rxn_inclusion: bool,
+    activity_is_zfpkm_data: bool,
 ) -> cobra.Model:
     """Seed a context specific reference_model.
 
@@ -573,9 +607,12 @@ def _build_model(
         lower_bounds: List of lower bounds corresponding to boundary reactions
         upper_bounds: List of upper bounds corresponding to boundary reactions
         solver: Solver to use (e.g., 'glpk', 'cplex', 'gurobi')
+        low_bin_cutoff: Values lower than this (e.g., -3 zFPKM or 25th percentile) will be placed in the low bin.
+        high_bin_cutoff: Values equal to or greater than this (e.g., 0 zFPKM or 75th percentile) will be placed in the high bin.
         output_flux_result_filepath: Path to save flux results (for iMAT only)
         taxon: Taxon ID or Taxon object for gene ID conversion.
         force_boundary_rxn_inclusion: If True, ensure that all boundary reactions are included in the final model.
+        activity_is_zfpkm_data: Does the `gene_expression_file` contain zFPKM-normalized data?
 
     Returns:
         A _BuildResults object containing:
@@ -621,6 +658,8 @@ def _build_model(
         taxon=taxon,
         low_percentile=low_percentile,
         high_percentile=high_percentile,
+        low_bin_cutoff=low_bin_cutoff,
+        high_bin_cutoff=high_bin_cutoff,
     )
     expression_vector: npt.NDArray[np.floating] = np.asarray(list(reaction_expression.values()), dtype=float)
     for rxn_id in force_reactions:
@@ -842,6 +881,9 @@ def create_context_specific_model(  # noqa: C901
     output_infeasible_reactions_filepath: Path,
     output_flux_result_filepath: Path,
     output_model_filepaths: Path | list[Path],
+    activity_is_zfpkm_data: bool,
+    low_bin_cutoff: int,
+    high_bin_cutoff: int,
     output_fastcore_expression_index_filepath: Path | None = None,
     objective: str = "biomass_reaction",
     objective_direction: Literal["min", "max"] = "max",
@@ -850,8 +892,6 @@ def create_context_specific_model(  # noqa: C901
     exclude_rxns_filepath: str | Path | None = None,
     force_rxns_filepath: str | Path | None = None,
     algorithm: Algorithm = Algorithm.GIMME,
-    low_percentile: int | None = None,
-    high_percentile: int | None = None,
     solver: Solver = Solver.GLPK,
     log_level: LogLevel = LogLevel.INFO,
     log_location: str | TextIO | TextIOWrapper = sys.stderr,
@@ -877,8 +917,8 @@ def create_context_specific_model(  # noqa: C901
     :param exclude_rxns_filepath: Optional path to reactions to exclude file (csv, tsv, or Excel).
     :param force_rxns_filepath: Optional path to reactions to force include file (csv, tsv, or Excel).
     :param algorithm: Algorithm to use for reconstruction.
-    :param low_percentile: Low percentile for gene expression threshold calculation.
-    :param high_percentile: High percentile for gene expression threshold calculation.
+    :param low_bin_cutoff: Values lower than this (e.g., -3 zFPKM or 25th percentile) will be placed in the low bin.
+    :param high_bin_cutoff: Values equal to or greater than this (e.g., 0 zFPKM or 75th percentile) will be placed in the high bin.
     :param solver: Solver to use. One of Solver.GLPK, Solver.CPLEX, Solver.GUROBI
     :param log_level: Logging level
     :param log_location: Location for log output. Can be a file path or sys.stderr/sys.stdout.
@@ -889,10 +929,10 @@ def create_context_specific_model(  # noqa: C901
     :raises ImportError: If Gurobi solver is selected but gurobipy is not installed.
     """  # noqa: E501
     set_up_logging(level=log_level, location=log_location)
-    if low_percentile is None:
-        raise ValueError("low_percentile must be provided")
-    if high_percentile is None:
-        raise ValueError("high_percentile must be provided")
+    if not activity_is_zfpkm_data and low_bin_cutoff < 0:
+        raise ValueError(f"Percentiles should be in the range [1, 100]. Got `{low_bin_cutoff=}`")
+    if not activity_is_zfpkm_data and high_bin_cutoff < 0:
+        raise ValueError(f"Percentiles should be in the range [1, 100]. Got `{high_bin_cutoff=}`")
 
     boundary_rxns_filepath = Path(boundary_rxns_filepath) if boundary_rxns_filepath else None
     output_model_filepaths = (
@@ -987,8 +1027,8 @@ def create_context_specific_model(  # noqa: C901
         lower_bounds=boundary_reactions.lower_bounds,
         upper_bounds=boundary_reactions.upper_bounds,
         solver=solver.value.lower(),
-        low_percentile=low_percentile,
-        high_percentile=high_percentile,
+        low_bin_cutoff=low_bin_cutoff,
+        high_bin_cutoff=high_bin_cutoff,
         output_flux_result_filepath=output_flux_result_filepath,
         force_boundary_rxn_inclusion=force_boundary_rxn_inclusion,
         taxon=taxon_id,

From 6617b6234c2852f571a73a7ad4dc4558de0f7f72 Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 21:15:12 -0500
Subject: [PATCH 6/8] feat: allow zFPKM cutoffs (e.g., -3)

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/create_context_specific_model.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index 2ba73bd7..250afd67 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -410,10 +410,11 @@ def _map_expression_to_reaction(
     gene_expression_file: Path,
     recon_algorithm: Algorithm,
     taxon: int | str,
-    low_percentile: int,
-    high_percentile: int,
     low_bin_cutoff: int,
     high_bin_cutoff: int,
+    *,
+    force_add_base_exchange_reactions: bool,
+    activity_is_zfpkm_data: bool,
 ) -> tuple[collections.OrderedDict[str, float], float, float, float]:
     """Map gene ids to a reaction based on GPR (gene to protein to reaction) association rules.
 
@@ -424,10 +425,12 @@ def _map_expression_to_reaction(
         gene_expression_file: Path to a gene expression file (.csv, .tsv, .xlsx, or .xls)
         recon_algorithm: Algorithm to use for reconstruction (GIMME, FASTCORE, iMAT, or tINIT)
         taxon: Taxon ID or Taxon object for gene ID conversion.
-        low_percentile: Low percentile for threshold calculation
-        high_percentile: High percentile for threshold calculation
         low_bin_cutoff: Values lower than this (e.g., -3 zFPKM or 25th percentile) will be placed in the low bin.
         high_bin_cutoff: Values equal to or greater than this (e.g., 0 zFPKM or 75th percentile) will be placed in the high bin.
+        force_add_base_exchange_reactions: If True, add a set of basic exchange reactions to the model
+            before mapping expression to reactions.
+            This is required for some algorithms (e.g., FASTCORE) to ensure that key exchange reactions are included
+            in the output model, but can be skipped if these reactions are already included in the reference model.
         activity_is_zfpkm_data: Does `gene_expression_file` contain zFPKM-normalized data?
 
     Returns:
@@ -587,6 +590,7 @@ def _build_model(
     build_settings: ModelBuildSettings,
     *,
     force_boundary_rxn_inclusion: bool,
+    force_add_base_exchange_reactions: bool,
     activity_is_zfpkm_data: bool,
 ) -> cobra.Model:
     """Seed a context specific reference_model.
@@ -656,10 +660,10 @@ def _build_model(
         gene_expression_file,
         recon_algorithm,
         taxon=taxon,
-        low_percentile=low_percentile,
-        high_percentile=high_percentile,
         low_bin_cutoff=low_bin_cutoff,
         high_bin_cutoff=high_bin_cutoff,
+        force_add_base_exchange_reactions=force_add_base_exchange_reactions,
+        activity_is_zfpkm_data=activity_is_zfpkm_data,
     )
     expression_vector: npt.NDArray[np.floating] = np.asarray(list(reaction_expression.values()), dtype=float)
     for rxn_id in force_reactions:

From b721bcaefa03c305095c08c74a8edf921a683c31 Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 21:15:45 -0500
Subject: [PATCH 7/8] feat: allow forcibly adding basal exchange reactions

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/create_context_specific_model.py | 52 +++++++++++++++++++---
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index 250afd67..cde23957 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -504,6 +504,31 @@ def _map_expression_to_reaction(
         else 0 if recon_algorithm == Algorithm.FASTCORE
         else 1
     )
+    
+    # Some models use bracket notation for reactions: EX_glc_D[e]
+    #   and others use underscores: EX_glc_D_e
+    # As a result, we need to make sure we can match either of these identifiers
+    basal_metabolite_ids: tuple[str, ...] = (
+        # Ions
+        "fe3", "ca2", "na1", "cl", "k", "h", "h2o",
+        "o2", "pi", "ppi", "co2", "nh4", "no2", "co",
+        # Amino acids
+        # We are including single-underscore and double-underscore amino acids here because
+        #   some models (e.g., Recon3D) use single underscores in the extracellular compartment,
+        #   while others (e.g., Human-GEM) use double underscores
+        "ala_L", "arg_L", "asn_L", "asp_L", "Lcystin", "glu_L", "gln_L",
+        "gly", "his_L", "ile_L", "leu_L", "lys_L", "met_L", "phe_L",
+        "pro_L", "ser_L", "thr_L", "trp_L", "tyr_L", "val_L", "4hpro_LT",
+        #   Dunder amino acids
+        "ala__L", "arg__L", "asn__L", "asp__L", "Lcystin", "glu__L", "gln__L",
+        "gly", "his__L", "ile__L", "leu__L", "lys__L", "met__L", "phe__L",
+        "pro__L", "ser__L", "thr__L", "trp__L", "tyr__L", "val__L", "4hpro__LT",
+        # Lipids
+        "lnlc", "lnlnca", "eicostet", "5eipenc", "hdca",
+        # Other
+        "ac",
+    )
+    basal_exchange_reactions: set[str] = {f"EX_{i}{suffix}" for i in basal_metabolite_ids for suffix in ("[e]", "_e")}
     # fmt: on
 
     reaction_expression: collections.OrderedDict[str, float] = collections.OrderedDict()
@@ -513,7 +538,12 @@ def _map_expression_to_reaction(
     for rxn in reference_model.reactions:
         rxn: cobra.Reaction
         gene_reaction_rule = rxn.gene_reaction_rule
-
+        
+        # Forcibly set exchange reaction expressions in the high bin, irrespective of its GPR
+        if force_add_base_exchange_reactions and rxn.id in basal_exchange_reactions:
+            reaction_expression[rxn.id] = high_expr + 1
+            continue
+        
         if not gene_reaction_rule:
             reaction_expression[rxn.id] = missing_gpr
             continue
@@ -656,9 +686,9 @@ def _build_model(
         raise ValueError("Upper bounds contains unfilled values!")
 
     reaction_expression, min_expr_val, low_expr, high_expr = _map_expression_to_reaction(
-        reference_model,
-        gene_expression_file,
-        recon_algorithm,
+        reference_model=reference_model,
+        gene_expression_file=gene_expression_file,
+        recon_algorithm=recon_algorithm,
         taxon=taxon,
         low_bin_cutoff=low_bin_cutoff,
         high_bin_cutoff=high_bin_cutoff,
@@ -751,8 +781,12 @@ def _build_model(
         cast(cobra.Reaction, context_model_cobra.reactions.get_by_id(rxn.id)).subsystem = cast(
             cobra.Reaction, reference_model.reactions.get_by_id(rxn.id)
         ).subsystem
-
-    context_model_cobra.objective = objective
+    
+    try:
+        context_model_cobra.objective = objective
+    except ValueError:
+        logger.critical(f"Unable to set the model's objective value to '{objective}'.")
+    
     flux_sol: cobra.Solution = context_model_cobra.optimize()
     fluxes = flux_sol.fluxes
     model_reactions: list[str] = [reaction.id for reaction in context_model_cobra.reactions]
@@ -901,6 +935,7 @@ def create_context_specific_model(  # noqa: C901
     log_location: str | TextIO | TextIOWrapper = sys.stderr,
     build_settings: ModelBuildSettings | None = None,
     *,
+    force_add_base_exchange_reactions: bool = False,
     force_boundary_rxn_inclusion: bool = False,
     close_unlisted_exchanges: bool = False,
 ) -> cobra.Model:
@@ -913,6 +948,7 @@ def create_context_specific_model(  # noqa: C901
     :param output_infeasible_reactions_filepath: Path to save infeasible reactions (csv).
     :param output_flux_result_filepath: Path to save flux results (csv).
     :param output_model_filepaths: Path or list of paths to save the context-specific model (.xml, .mat, or .json).
+    :param activity_is_zfpkm_data: Does the `active_genes_filepath` file contains zFPKM-normalized data
     :param output_fastcore_expression_index_filepath: Path to save Fastcore expression indices (txt).
     :param objective: Objective function reaction ID.
     :param objective_direction: Direction of the objective function, either 'min' or 'max'.
@@ -926,6 +962,8 @@ def create_context_specific_model(  # noqa: C901
     :param solver: Solver to use. One of Solver.GLPK, Solver.CPLEX, Solver.GUROBI
     :param log_level: Logging level
     :param log_location: Location for log output. Can be a file path or sys.stderr/sys.stdout.
+    :param force_add_base_exchange_reactions: If True, ensure that basal metabolite exchange reactions are included in the final model.
+        View the file `add_reactions` to see which reactions are added to include basal metabolites.
     :param force_boundary_rxn_inclusion: If True, ensure that all provided boundary reactions are included in the final model.
     :param build_settings: Optional ModelBuildSettings object to customize model building parameters.
     :param close_unlisted_exchanges: If True, all exchanges not listed in the boundary reactions input will be closed
@@ -1037,6 +1075,8 @@ def create_context_specific_model(  # noqa: C901
         force_boundary_rxn_inclusion=force_boundary_rxn_inclusion,
         taxon=taxon_id,
         build_settings=build_settings or ModelBuildSettings(),
+        force_add_base_exchange_reactions=force_add_base_exchange_reactions,
+        activity_is_zfpkm_data=activity_is_zfpkm_data,
     )
     context_model.id = contextualized_model_id or context_name
 

From 76bfcd8746bf7518b3a65174d041aee65b94cae9 Mon Sep 17 00:00:00 2001
From: Josh Loecker <joshloecker@icloud.com>
Date: Mon, 16 Mar 2026 21:16:01 -0500
Subject: [PATCH 8/8] refactor: re-add legacy column names for CC integration

Signed-off-by: Josh Loecker <joshloecker@icloud.com>
---
 main/como/create_context_specific_model.py | 52 +++++++++++++---------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/main/como/create_context_specific_model.py b/main/como/create_context_specific_model.py
index cde23957..3f7c583e 100644
--- a/main/como/create_context_specific_model.py
+++ b/main/como/create_context_specific_model.py
@@ -815,28 +815,34 @@ def _collect_boundary_reactions(
     close_unlisted_exchanges: bool,
 ) -> _BoundaryReactions:
     df: pd.DataFrame = _create_df(path, lowercase_col_names=True)
-
-    for column in df.columns:
-        if column not in [
-            "reaction",
-            "abbreviation",
-            "compartment",
-            "lower bound",
-            "upper bound",
-        ]:
-            raise ValueError(
-                f"Boundary reactions file must have columns named 'reaction', 'abbreviation', 'compartment', "
-                f"'lower bound', and 'upper bound'. Found: '{column}'"
-            )
-
-    for compartment in df["compartment"].unique():
+    
+    required = {
+        "reaction",
+        "abbreviation",
+        "compartment",
+        "lower bound",
+        "upper bound",
+        # The following columns are legacy column names retained for backwards compatibility.
+        # All future code should work on the presumption of 'lower bound' and 'upper bound'
+        "minimum reaction rate",
+        "maximum reaction rate",
+    }
+    extra_columns = set(df.columns) - required
+    if extra_columns:
+        raise ValueError(f"Boundary reactions file has invalid columns. Extra: {', '.join(extra_columns)}. ")
+    df = df.rename(
+        columns={"minimum reaction rate": "lower bound", "maximum reaction rate": "upper bound"},
+        errors="ignore",
+    )
+    
+    for compartment in df["compartment"].str.lower().unique():
         as_shorthand = CobraCompartments.get_shorthand(compartment) if len(compartment) > 2 else compartment
-        if as_shorthand not in reference_model.compartments:
-            raise ValueError(f"Unknown compartment: '{compartment}'")
+        if not as_shorthand:
+            raise ValueError(f"Unknown compartment: {compartment}")
 
     boundary_map = {"exchange": "EX", "demand": "DM", "sink": "SK"}
     for exchange in df["reaction"].str.lower().unique():
-        if boundary_map.get(exchange) is None:
+        if not boundary_map.get(exchange):
             raise ValueError(
                 f"Unable to determine reaction type for: '{exchange}'\n"
                 f"Validate the boundary reactions file has a 'Reaction' value of: 'Exchange', 'Demand', or 'Sink'."
@@ -1019,17 +1025,21 @@ def create_context_specific_model(  # noqa: C901
     exclude_rxns: list[str] = []
     if exclude_rxns_filepath:
         exclude_rxns_filepath = Path(exclude_rxns_filepath)
-        df = _create_df(exclude_rxns_filepath)
-        if "abbreviation" not in df.columns:
+        df = _create_df(exclude_rxns_filepath, lowercase_col_names=True)
+        # `reaction id` is a legacy column name; all assumptions going forward should use `abbreviation`
+        if "abbreviation" not in df.columns and "reaction id" not in df.columns:
             raise ValueError("The exclude reactions file should have a single column with a header named Abbreviation")
+        df = df.rename(columns={"reaction id": "abbreviation"}, errors="ignore")
         exclude_rxns = df["abbreviation"].tolist()
 
     force_rxns: list[str] = []
     if force_rxns_filepath:
         force_rxns_filepath = Path(force_rxns_filepath)
         df = _create_df(force_rxns_filepath, lowercase_col_names=True)
-        if "abbreviation" not in df.columns:
+        # `reaction id` is a legacy column name; all assumptions going forward should use `abbreviation`
+        if "abbreviation" not in df.columns and "reaction id" not in df.columns:
             raise ValueError("The force reactions file should have a single column with a header named Abbreviation")
+        df = df.rename(columns={"reaction id": "abbreviation"}, errors="ignore")
         force_rxns = df["abbreviation"].tolist()
 
     # Test that gurobi is using a valid license file