From c60cfde51817d618374efa33f7e59e1e7f5867c5 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 11:16:50 +0200
Subject: [PATCH 01/17] fix(kpplot): add explicit re-export for KpPlotBase in
 init

---
 krakenparser/kpplot/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/krakenparser/kpplot/__init__.py b/krakenparser/kpplot/__init__.py
index 147afc0..c353a3e 100644
--- a/krakenparser/kpplot/__init__.py
+++ b/krakenparser/kpplot/__init__.py
@@ -1 +1,3 @@
 from .base import KpPlotBase
+
+__all__ = ["KpPlotBase"]

From ea101872c108512f660a2885b101fc8ce066e511 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 11:17:11 +0200
Subject: [PATCH 02/17] ci: migrate workflow from flake8 to ruff

---
 .github/workflows/python-package.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 553faa6..fe4fe56 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -27,14 +27,14 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 setuptools wheel
+        python -m pip install ruff setuptools wheel
         pip install -e ".[dev]" --no-build-isolation
-    - name: Lint with flake8
+    - name: Lint with ruff
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        # Run linter and fail on any rule violation
+        ruff check .
+        # Check if the code format matches Ruff style guide
+        ruff format --check .
     - name: Test with pytest
       run: |
         pytest --cov=krakenparser --cov-report=xml

From 28eed0799f4a0db35faa5dcd5ba469d847386b00 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 11:17:37 +0200
Subject: [PATCH 03/17] style(script): apply Ruff import sorting

---
 krakenparser/pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/krakenparser/pipeline.py b/krakenparser/pipeline.py
index c54b4e9..768b5d8 100644
--- a/krakenparser/pipeline.py
+++ b/krakenparser/pipeline.py
@@ -7,8 +7,6 @@
 import sys
 from pathlib import Path
 
-_log = logging.getLogger(__name__)
-
 import pandas as pd
 
 from krakenparser.counts.convert2csv import convert_to_csv
@@ -19,6 +17,8 @@
 from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
 from krakenparser.stats.relabund import calculate_rel_abund
 
+_log = logging.getLogger(__name__)
+
 
 def _is_processable(path: Path) -> bool:
     """Return False for hidden files, files with null bytes, or non-UTF-8 files."""

From 15267a471d18a9f4c95a8aa907c8b6ef12c374b8 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 11:41:53 +0200
Subject: [PATCH 04/17] refactor(types): fix Pylance warnings in calc_beta_div

---
 krakenparser/stats/diversity.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/krakenparser/stats/diversity.py b/krakenparser/stats/diversity.py
index dd76098..ad49651 100644
--- a/krakenparser/stats/diversity.py
+++ b/krakenparser/stats/diversity.py
@@ -61,30 +61,31 @@ def calc_alpha_div(df, output_path):
 
 def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
     rng = np.random.default_rng(seed)
-    rarefied_counts = []
-    sample_ids = []
+    rarefied_counts: list[np.ndarray] = []
+    sample_ids: list[str] = []
 
     for sample, row in df.iterrows():
         counts = np.round(row.values).astype(int)
         if counts.sum() >= rarefaction_depth:
             rarefied = _subsample_counts(counts, n=rarefaction_depth, rng=rng)
             rarefied_counts.append(rarefied)
-            sample_ids.append(sample)
+            sample_ids.append(str(sample))
 
     if len(rarefied_counts) < 2:
         raise ValueError("Not enough samples passed the rarefaction threshold.")
 
     X = np.array(rarefied_counts, dtype=float)
+    idx = pd.Index(sample_ids)
 
     bray_df = pd.DataFrame(
         squareform(pdist(X, metric="braycurtis")),
-        index=sample_ids,
-        columns=sample_ids,
+        index=idx,
+        columns=idx,
     )
     jaccard_df = pd.DataFrame(
         squareform(pdist(X.astype(bool).astype(float), metric="jaccard")),
-        index=sample_ids,
-        columns=sample_ids,
+        index=idx,
+        columns=idx,
     )
 
     bray_df.to_csv(output_path / "beta_div_bray.csv")

From 26afe67ded0cf9f18f0ea0d4e4b14cd1ecfd0d59 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:10:44 +0200
Subject: [PATCH 05/17] feat(utils): add ensure_output_dir helper

---
 krakenparser/utils.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 krakenparser/utils.py

diff --git a/krakenparser/utils.py b/krakenparser/utils.py
new file mode 100644
index 0000000..7f9a780
--- /dev/null
+++ b/krakenparser/utils.py
@@ -0,0 +1,10 @@
+# krakenparser/utils.py
+from pathlib import Path
+
+
+def ensure_output_dir(path: str | Path, is_file: bool = True) -> Path:
+    """Create parent directory for a file output, or the directory itself."""
+    p = Path(path)
+    target = p.parent if is_file else p
+    target.mkdir(parents=True, exist_ok=True)
+    return p

From 399380630960a6925db4e7e25a6fd42dc2d9bd5c Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:11:14 +0200
Subject: [PATCH 06/17] refactor(mpa): use ensure_output_dir, add logging and
 input validation

---
 krakenparser/mpa/mpa_table.py     | 11 ++++++++++-
 krakenparser/mpa/transform2mpa.py | 13 +++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/krakenparser/mpa/mpa_table.py b/krakenparser/mpa/mpa_table.py
index c808383..2837972 100644
--- a/krakenparser/mpa/mpa_table.py
+++ b/krakenparser/mpa/mpa_table.py
@@ -3,17 +3,25 @@
 
 import argparse
 import logging
+from pathlib import Path
+
+from krakenparser.utils import ensure_output_dir
 
 _log = logging.getLogger(__name__)
 
 
 def combine_mpa(in_files: list[str], o_file: str) -> None:
+    out_path = ensure_output_dir(o_file, is_file=True)
     # Plain dict preserves insertion order (Python 3.7+).
     taxa: dict[str, dict[int, str]] = {}
     sample_names: list[str] = []
 
     _log.info("Number of files to parse: %d", len(in_files))
 
+    for in_path in in_files:
+        if not Path(in_path).is_file():
+            raise FileNotFoundError(f"Input file not found: {in_path}")
+
     for idx, in_path in enumerate(in_files):
         sample_name = f"Sample #{idx + 1}"
         with open(in_path) as fh:
@@ -39,7 +47,7 @@ def combine_mpa(in_files: list[str], o_file: str) -> None:
     n_taxa = len(taxa)
     _log.info("Number of classifications to write: %d", n_taxa)
 
-    with open(o_file, "w") as fh:
+    with open(out_path, "w") as fh:
         fh.write("#Classification\t" + "\t".join(sample_names) + "\n")
         for taxon, counts in taxa.items():
             row = [counts.get(i, "0") for i in range(n_samples)]
@@ -49,6 +57,7 @@ def combine_mpa(in_files: list[str], o_file: str) -> None:
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Combine MPA files into a single tab-delimited table."
     )
diff --git a/krakenparser/mpa/transform2mpa.py b/krakenparser/mpa/transform2mpa.py
index a28cde6..8d362f7 100644
--- a/krakenparser/mpa/transform2mpa.py
+++ b/krakenparser/mpa/transform2mpa.py
@@ -2,10 +2,13 @@
 """Convert a Kraken2 report to MetaPhlAn (MPA) format."""
 
 import argparse
+import logging
 import os
 import sys
 from pathlib import Path
 
+from krakenparser.utils import ensure_output_dir
+
 # Maps Kraken2 single-letter rank codes to MPA prefixes
 _RANK_PREFIX = {
     "D": "d",
@@ -18,6 +21,8 @@
     "S": "s",
 }
 
+_log = logging.getLogger(__name__)
+
 
 def _parse_line(line: str):
     """
@@ -67,10 +72,13 @@ def kreport_to_mpa(
     depth d is encountered, all stack entries with depth >= d are popped
     before the new entry is pushed, keeping the path consistent.
     """
+    if not Path(report_path).is_file():
+        raise FileNotFoundError(f"Input file not found: {report_path}")
+    out_path = ensure_output_dir(output_path, is_file=True)
     # Stack entries: (structural_depth, mpa_segment, is_standard_rank)
     stack: list[tuple[int, str, bool]] = []
 
-    with open(report_path) as r_fh, open(output_path, "w") as o_fh:
+    with open(report_path) as r_fh, open(out_path, "w") as o_fh:
         if display_header:
             o_fh.write("#Classification\t" + os.path.basename(report_path) + "\n")
 
@@ -110,6 +118,7 @@ def kreport_to_mpa(
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Convert a Kraken2 report to MetaPhlAn (MPA) format."
     )
@@ -203,7 +212,7 @@ def main() -> None:
                 continue
             out_name = f.name.replace(".kreport", ".MPA.TXT")
             kreport_to_mpa(str(f), str(output_dir / out_name), **kwargs)
-        print(f"Converted to MPA successfully. Output stored in {output_dir}")
+        _log.info(f"Converted to MPA successfully. Output stored in {output_dir}")
     else:
         kreport_to_mpa(args.r_file, args.o_file, **kwargs)
 

From 40e401d8221d7dd2771a8e6e635d856376e97272 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:11:34 +0200
Subject: [PATCH 07/17] refactor(counts): use ensure_output_dir and extract
 main()

---
 krakenparser/counts/convert2csv.py | 17 +++++++++--------
 krakenparser/counts/split_mpa.py   |  7 +++++--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/krakenparser/counts/convert2csv.py b/krakenparser/counts/convert2csv.py
index 1177b91..9fb763e 100755
--- a/krakenparser/counts/convert2csv.py
+++ b/krakenparser/counts/convert2csv.py
@@ -6,6 +6,8 @@
 
 import pandas as pd
 
+from krakenparser.utils import ensure_output_dir
+
 _log = logging.getLogger(__name__)
 
 
@@ -13,17 +15,15 @@ def convert_to_csv(input_file, output_file):
     in_path = Path(input_file)
     if not in_path.is_file():
         raise FileNotFoundError(f"Input file not found: {in_path}")
-    out_path = Path(output_file)
-    if not out_path.parent.exists():
-        raise FileNotFoundError(f"Output directory does not exist: {out_path.parent}")
+    out_path = ensure_output_dir(output_file, is_file=True)
 
     data = pd.read_csv(in_path, sep="\t", index_col=0)
     data.T.to_csv(out_path, index_label="Sample_id")
     _log.info("Data converted and saved as '%s'.", output_file)
 
 
-if __name__ == "__main__":
-    # Use argparse to handle command-line arguments
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Reads a TXT file, reorganizes the data, and converts it into a CSV file."
     )
@@ -39,8 +39,9 @@ def convert_to_csv(input_file, output_file):
         required=True,
         help="Path to the output CSV file. The script will restructure the data and save it here.",
     )
-
     args = parser.parse_args()
-
-    # Call function with parsed arguments
     convert_to_csv(args.input, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/krakenparser/counts/split_mpa.py b/krakenparser/counts/split_mpa.py
index 0ee87b8..1af110e 100644
--- a/krakenparser/counts/split_mpa.py
+++ b/krakenparser/counts/split_mpa.py
@@ -9,6 +9,8 @@
 import re
 from pathlib import Path
 
+from krakenparser.utils import ensure_output_dir
+
 _log = logging.getLogger(__name__)
 
 
@@ -53,8 +55,8 @@ def split_mpa(
     in_path = Path(input_file)
     if not in_path.is_file():
         raise FileNotFoundError(f"Input file not found: {in_path}")
-    out_path = Path(output_dir)
-    (out_path / "txt").mkdir(parents=True, exist_ok=True)
+    out_path = ensure_output_dir(output_dir, is_file=False)
+    (out_path / "txt").mkdir(exist_ok=True)
 
     lines = in_path.read_text().splitlines()
     data_lines = [ln for ln in lines if not ln.startswith("#") and ln.strip()]
@@ -86,6 +88,7 @@ def split_mpa(
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Split a combined MPA table into per-rank TXT files."
     )

From 9e2d738502644b6a3badb754e7b4af359e18b4a3 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:11:51 +0200
Subject: [PATCH 08/17] refactor(counts): extract main(), add logging, restore
 dest-file validation

---
 krakenparser/counts/processing_script.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/krakenparser/counts/processing_script.py b/krakenparser/counts/processing_script.py
index 831910e..0d6d59a 100755
--- a/krakenparser/counts/processing_script.py
+++ b/krakenparser/counts/processing_script.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python
 
 import argparse
+import logging
 import os
 import tempfile
 from pathlib import Path
 
+_log = logging.getLogger(__name__)
+
 
 def modify_taxa_names(line):
     prefixes = ["s__", "g__", "f__", "o__", "c__", "p__"]
@@ -47,11 +50,11 @@ def process_files(source_file, destination_file):
         tmp_path = tmp.name
     os.replace(tmp_path, dest_path)
 
-    print(f"Processed {destination_file} successfully.")
+    _log.info(f"Processed {destination_file} successfully.")
 
 
-if __name__ == "__main__":
-    # Use argparse to parse command-line arguments
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Reads a source file, processes its first line, modifies taxa names in a destination file, and updates it."
     )
@@ -67,8 +70,9 @@ def process_files(source_file, destination_file):
         required=True,
         help="Path to the destination file. This file's contents will be updated with cleaned taxa names.",
     )
-
     args = parser.parse_args()
-
-    # Call the function with parsed arguments
     process_files(args.input, args.output)
+
+
+if __name__ == "__main__":
+    main()

From c320f0f1139218b12d111354691675589cbc628b Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:12:10 +0200
Subject: [PATCH 09/17] refactor(stats): use ensure_output_dir, add logging and
 startup info

---
 krakenparser/stats/diversity.py | 34 ++++++++++++++++++++++++++-------
 krakenparser/stats/relabund.py  | 15 ++++++++++-----
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/krakenparser/stats/diversity.py b/krakenparser/stats/diversity.py
index ad49651..dc39eb4 100644
--- a/krakenparser/stats/diversity.py
+++ b/krakenparser/stats/diversity.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import argparse
+import logging
 import sys
 from pathlib import Path
 
@@ -8,6 +9,10 @@
 import pandas as pd
 from scipy.spatial.distance import pdist, squareform
 
+from krakenparser.utils import ensure_output_dir
+
+_log = logging.getLogger(__name__)
+
 
 def shannon_index(counts):
     counts = np.array(counts)
@@ -44,6 +49,7 @@ def _subsample_counts(
 
 
 def calc_alpha_div(df, output_path):
+    out_path = ensure_output_dir(output_path, is_file=False)
     results = []
     for sample_id, row in df.iterrows():
         counts = row.values
@@ -56,10 +62,15 @@ def calc_alpha_div(df, output_path):
             }
         )
     alpha_df = pd.DataFrame(results).set_index("Sample")
-    alpha_df.to_csv(output_path / "alpha_div.csv")
+    alpha_df.to_csv(out_path / "alpha_div.csv")
+
+    _log.info(
+        f"α-diversity has been successfully calculated and saved to '{output_path}'."
+    )
 
 
 def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
+    out_path = ensure_output_dir(output_path, is_file=False)
     rng = np.random.default_rng(seed)
     rarefied_counts: list[np.ndarray] = []
     sample_ids: list[str] = []
@@ -88,11 +99,16 @@ def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
         columns=idx,
     )
 
-    bray_df.to_csv(output_path / "beta_div_bray.csv")
-    jaccard_df.to_csv(output_path / "beta_div_jaccard.csv")
+    bray_df.to_csv(out_path / "beta_div_bray.csv")
+    jaccard_df.to_csv(out_path / "beta_div_jaccard.csv")
 
+    _log.info(
+        f"β-diversity has been successfully calculated and saved to '{output_path}'."
+    )
 
-if __name__ == "__main__":
+
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(description="Calculate α & β-diversities.")
     parser.add_argument(
         "-i",
@@ -117,6 +133,9 @@ def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
     )
     args = parser.parse_args()
 
+    seed_label = str(args.seed) if args.seed is not None else "not set (results will vary between runs)"
+    _log.info("Rarefaction depth: %d | seed: %s", args.depth, seed_label)
+
     input_file = Path(args.input)
     if not input_file.is_file():
         sys.exit(f"Error: input file not found: {input_file}")
@@ -127,6 +146,7 @@ def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
 
     calc_alpha_div(df, output_dir)
     calc_beta_div(df, output_dir, args.depth, seed=args.seed)
-    print(
-        f"α & β-diversities have been successfully calculated and saved to '{output_dir}'."
-    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/krakenparser/stats/relabund.py b/krakenparser/stats/relabund.py
index 5b1b0c5..0957329 100644
--- a/krakenparser/stats/relabund.py
+++ b/krakenparser/stats/relabund.py
@@ -7,6 +7,8 @@
 
 import pandas as pd
 
+from krakenparser.utils import ensure_output_dir
+
 _log = logging.getLogger(__name__)
 
 
@@ -14,9 +16,7 @@ def calculate_rel_abund(input_file, output_file, other_threshold=None):
     in_path = Path(input_file)
     if not in_path.is_file():
         raise FileNotFoundError(f"Input file not found: {in_path}")
-    out_path = Path(output_file)
-    if not out_path.parent.exists():
-        raise FileNotFoundError(f"Output directory does not exist: {out_path.parent}")
+    out_path = ensure_output_dir(output_file, is_file=True)
 
     # Load counts table
     df = pd.read_csv(in_path)
@@ -59,11 +59,12 @@ def calculate_rel_abund(input_file, output_file, other_threshold=None):
     )
 
     # Save to CSV
-    result.to_csv(output_file, index=False)
+    result.to_csv(out_path, index=False)
     _log.info("Relative abundance saved as '%s'.", output_file)
 
 
-if __name__ == "__main__":
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Calculates taxa relative abundance and saves it to a CSV file."
     )
@@ -81,3 +82,7 @@ def calculate_rel_abund(input_file, output_file, other_threshold=None):
 
     args = parser.parse_args()
     calculate_rel_abund(args.input, args.output, args.other)
+
+
+if __name__ == "__main__":
+    main()

From 17393de5d31573befc477ae0e4c4400a8664db03 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:12:27 +0200
Subject: [PATCH 10/17] refactor(pipeline): raise exceptions instead of
 sys.exit in run_pipeline

---
 krakenparser/pipeline.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/krakenparser/pipeline.py b/krakenparser/pipeline.py
index 768b5d8..4f51e10 100644
--- a/krakenparser/pipeline.py
+++ b/krakenparser/pipeline.py
@@ -47,7 +47,7 @@ def run_pipeline(
 ) -> None:
     source_dir = Path(input_dir)
     if not source_dir.is_dir():
-        sys.exit(f"Error: input directory not found: {source_dir}")
+        raise FileNotFoundError(f"Input directory not found: {source_dir}")
 
     out_dir = Path(output_dir) if output_dir else source_dir.parent
     out_dir.mkdir(parents=True, exist_ok=True)
@@ -55,8 +55,8 @@ def run_pipeline(
     existing = [out_dir / d for d in _OUTPUT_SUBDIRS if (out_dir / d).exists()]
     if existing and not overwrite:
         names = ", ".join(d.name for d in existing)
-        sys.exit(
-            f"Error: output already exists in '{out_dir}' ({names}).\n"
+        raise FileExistsError(
+            f"Output already exists in '{out_dir}' ({names}).\n"
             "Use --overwrite to overwrite it."
         )
     if overwrite:
@@ -164,14 +164,17 @@ def main() -> None:
         help="Overwrite the output directory if it already exists",
     )
     args = parser.parse_args()
-    run_pipeline(
-        args.input,
-        args.output,
-        keep_human=args.keep_human,
-        rarefaction_depth=args.depth,
-        seed=args.seed,
-        overwrite=args.overwrite,
-    )
+    try:
+        run_pipeline(
+            args.input,
+            args.output,
+            keep_human=args.keep_human,
+            rarefaction_depth=args.depth,
+            seed=args.seed,
+            overwrite=args.overwrite,
+        )
+    except (FileNotFoundError, FileExistsError) as e:
+        sys.exit(f"Error: {e}")
 
 
 if __name__ == "__main__":

From 2f51ffbb1fff8612eac4d80ac62a2745115d8996 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:12:40 +0200
Subject: [PATCH 11/17] test: add ensure_output_dir unit tests

---
 tests/test_units.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/test_units.py b/tests/test_units.py
index 39435d3..4f977e1 100644
--- a/tests/test_units.py
+++ b/tests/test_units.py
@@ -1,12 +1,14 @@
 """Pure-function unit tests — no I/O, fully deterministic."""
 
 import math
+from pathlib import Path
 
 import pytest
 
 from krakenparser.counts.processing_script import modify_taxa_names
 from krakenparser.mpa.transform2mpa import _parse_line
 from krakenparser.stats.diversity import chao1_index, pielou_evenness, shannon_index
+from krakenparser.utils import ensure_output_dir
 
 # ---------------------------------------------------------------------------
 # _parse_line
@@ -147,3 +149,37 @@ def test_modify_taxa_names_count_fields_not_modified():
     # Underscores in tab-separated count fields must be preserved
     result = modify_taxa_names("s__My_taxon\t1_000\t2_000")
     assert result == "My taxon\t1_000\t2_000"
+
+
+# ---------------------------------------------------------------------------
+# ensure_output_dir
+# ---------------------------------------------------------------------------
+
+
+def test_ensure_output_dir_file_creates_parent(tmp_path):
+    p = ensure_output_dir(tmp_path / "subdir" / "output.csv", is_file=True)
+    assert (tmp_path / "subdir").is_dir()
+    assert not p.exists()  # only the parent is created, not the file itself
+
+
+def test_ensure_output_dir_dir_creates_directory(tmp_path):
+    p = ensure_output_dir(tmp_path / "output_dir", is_file=False)
+    assert p.is_dir()
+
+
+def test_ensure_output_dir_nested_creates_all_parents(tmp_path):
+    p = ensure_output_dir(tmp_path / "a" / "b" / "c", is_file=False)
+    assert p.is_dir()
+
+
+def test_ensure_output_dir_returns_path_object(tmp_path):
+    p = ensure_output_dir(str(tmp_path / "out.csv"), is_file=True)
+    assert isinstance(p, Path)
+
+
+def test_ensure_output_dir_idempotent_for_existing_dir(tmp_path):
+    existing = tmp_path / "already_exists"
+    existing.mkdir()
+    p = ensure_output_dir(existing, is_file=False)
+    assert p == existing
+    assert p.is_dir()

From 758b07ecc4b41b8bd4b89020a2161fb6f46fd24b Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:12:52 +0200
Subject: [PATCH 12/17] test: cover auto-create dirs, new validations, new
 exception types

---
 tests/test_integration.py | 80 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index 35913d3..18d747f 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -9,6 +9,7 @@
 from krakenparser.counts.convert2csv import convert_to_csv
 from krakenparser.counts.processing_script import process_files
 from krakenparser.counts.split_mpa import split_mpa
+from krakenparser.mpa.mpa_table import combine_mpa
 from krakenparser.mpa.transform2mpa import kreport_to_mpa
 from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
 from krakenparser.stats.relabund import calculate_rel_abund
@@ -361,3 +362,82 @@ def test_split_mpa_genus_excludes_species_lines(combined_mpa_file, tmp_path):
 def test_split_mpa_missing_input_raises(tmp_path):
     with pytest.raises(FileNotFoundError):
         split_mpa(str(tmp_path / "ghost.txt"), str(tmp_path / "out"))
+
+
+# ---------------------------------------------------------------------------
+# auto-create output directories (ensure_output_dir behaviour)
+# ---------------------------------------------------------------------------
+
+
+def test_kreport_to_mpa_creates_output_dir(kreport_file, tmp_path):
+    out = tmp_path / "new_subdir" / "out.MPA.TXT"
+    kreport_to_mpa(str(kreport_file), str(out))
+    assert out.exists()
+
+
+def test_kreport_to_mpa_missing_input_raises(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        kreport_to_mpa(str(tmp_path / "ghost.kreport"), str(tmp_path / "out.MPA.TXT"))
+
+
+def test_convert_to_csv_creates_output_dir(counts_txt_file, tmp_path):
+    out = tmp_path / "new_subdir" / "counts.csv"
+    convert_to_csv(str(counts_txt_file), str(out))
+    assert out.exists()
+
+
+def test_relabund_creates_output_dir(counts_csv_file, tmp_path):
+    out = tmp_path / "new_subdir" / "ra.csv"
+    calculate_rel_abund(str(counts_csv_file), str(out))
+    assert out.exists()
+
+
+def test_alpha_div_creates_output_dir(counts_csv_file, tmp_path):
+    df = pd.read_csv(counts_csv_file, index_col=0)
+    out_dir = tmp_path / "new_dir" / "nested"
+    calc_alpha_div(df, out_dir)
+    assert (out_dir / "alpha_div.csv").exists()
+
+
+def test_beta_div_creates_output_dir(counts_csv_file, tmp_path):
+    df = pd.read_csv(counts_csv_file, index_col=0)
+    out_dir = tmp_path / "new_dir" / "nested"
+    calc_beta_div(df, out_dir, rarefaction_depth=1000, seed=42)
+    assert (out_dir / "beta_div_bray.csv").exists()
+
+
+# ---------------------------------------------------------------------------
+# combine_mpa — new input validation
+# ---------------------------------------------------------------------------
+
+SAMPLE_MPA_A = "#Classification\tsample1\nd__Bacteria|s__Pseudomonas_aeruginosa\t300\n"
+SAMPLE_MPA_B = "#Classification\tsample2\nd__Bacteria|s__Pseudomonas_aeruginosa\t100\n"
+
+
+def test_combine_mpa_creates_output_dir(tmp_path):
+    a = tmp_path / "a.MPA.TXT"
+    b = tmp_path / "b.MPA.TXT"
+    a.write_text(SAMPLE_MPA_A)
+    b.write_text(SAMPLE_MPA_B)
+    out = tmp_path / "new_subdir" / "COMBINED.txt"
+    combine_mpa([str(a), str(b)], str(out))
+    assert out.exists()
+
+
+def test_combine_mpa_missing_input_raises(tmp_path):
+    existing = tmp_path / "a.MPA.TXT"
+    existing.write_text(SAMPLE_MPA_A)
+    with pytest.raises(FileNotFoundError):
+        combine_mpa([str(existing), str(tmp_path / "ghost.MPA.TXT")], str(tmp_path / "out.txt"))
+
+
+# ---------------------------------------------------------------------------
+# process_files — destination must already exist (in-place modifier)
+# ---------------------------------------------------------------------------
+
+
+def test_process_files_missing_dest_still_raises(tmp_path):
+    source = tmp_path / "COMBINED.txt"
+    source.write_text("#Classification\tsample1.kreport\n")
+    with pytest.raises(FileNotFoundError):
+        process_files(str(source), str(tmp_path / "nonexistent.txt"))

From 87b9da04b319f6d8509d3af7ecbcfc745327fc4f Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:13:06 +0200
Subject: [PATCH 13/17] =?UTF-8?q?test(pipeline):=20fix=20overwrite=20test?=
 =?UTF-8?q?=20=E2=80=94=20FileExistsError,=20not=20SystemExit?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_full_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_full_pipeline.py b/tests/test_full_pipeline.py
index efa6854..72df80f 100644
--- a/tests/test_full_pipeline.py
+++ b/tests/test_full_pipeline.py
@@ -61,8 +61,8 @@ def test_pipeline_overwrite_protection(demo_run):
 
     run_pipeline(str(kreports_path))
 
-    # Second run without --overwrite must exit
-    with pytest.raises(SystemExit):
+    # Second run without --overwrite must raise (library function, not sys.exit)
+    with pytest.raises(FileExistsError):
         run_pipeline(str(kreports_path))
 
 

From fff6234b4e9eeeeb4027a678d4176e0d33a7d070 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:24:09 +0200
Subject: [PATCH 14/17] test: add CLI smoke tests for all main() entry points

---
 tests/test_cli.py | 199 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)
 create mode 100644 tests/test_cli.py

diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..27eadd5
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,199 @@
+"""Smoke tests for CLI entry-points (main() functions via sys.argv monkeypatching)."""
+
+import shutil
+import sys
+import warnings
+
+import pandas as pd
+import pytest
+
+from krakenparser.counts.convert2csv import main as convert2csv_main
+from krakenparser.counts.processing_script import main as processing_main
+from krakenparser.counts.split_mpa import main as split_mpa_main
+from krakenparser.mpa.mpa_table import main as mpa_table_main
+from krakenparser.mpa.transform2mpa import main as transform2mpa_main
+from krakenparser.pipeline import main as pipeline_main
+from krakenparser.stats.diversity import main as diversity_main
+from krakenparser.stats.relabund import main as relabund_main
+
+_MPA_A = "#Classification\tsample1\nd__Bacteria|s__Pseudomonas_aeruginosa\t300\n"
+_MPA_B = "#Classification\tsample2\nd__Bacteria|s__Pseudomonas_aeruginosa\t100\n"
+
+_COMBINED_MPA = (
+    "#Classification\tsample1\tsample2\n"
+    "d__Bacteria|p__Pseudomonadota|g__Pseudomonas|s__Pseudomonas_aeruginosa\t300\t100\n"
+    "d__Bacteria|p__Bacteroidota\t100\t80\n"
+)
+
+
+# ---------------------------------------------------------------------------
+# convert2csv
+# ---------------------------------------------------------------------------
+
+
+def test_convert2csv_main(counts_txt_file, tmp_path, monkeypatch):
+    out = tmp_path / "out.csv"
+    monkeypatch.setattr(sys, "argv", ["c2c", "-i", str(counts_txt_file), "-o", str(out)])
+    convert2csv_main()
+    assert out.exists()
+
+
+# ---------------------------------------------------------------------------
+# processing_script
+# ---------------------------------------------------------------------------
+
+
+def test_processing_main(tmp_path, monkeypatch):
+    source = tmp_path / "COMBINED.txt"
+    source.write_text("#Classification\tsample1.kreport\n")
+    dest = tmp_path / "counts.txt"
+    dest.write_text("s__Pseudomonas_aeruginosa\t100\n")
+    monkeypatch.setattr(sys, "argv", ["ps", "-i", str(source), "-o", str(dest)])
+    processing_main()
+
+
+# ---------------------------------------------------------------------------
+# split_mpa
+# ---------------------------------------------------------------------------
+
+
+def test_split_mpa_main(tmp_path, monkeypatch):
+    combined = tmp_path / "COMBINED.txt"
+    combined.write_text(_COMBINED_MPA)
+    out = tmp_path / "out"
+    monkeypatch.setattr(sys, "argv", ["sm", "-i", str(combined), "-o", str(out)])
+    split_mpa_main()
+    assert (out / "txt" / "counts_species.txt").exists()
+
+
+def test_split_mpa_main_viruses_only(tmp_path, monkeypatch):
+    combined = tmp_path / "COMBINED.txt"
+    combined.write_text(_COMBINED_MPA + "d__Viruses|s__Virus_X\t5\t3\n")
+    out = tmp_path / "out"
+    monkeypatch.setattr(
+        sys, "argv", ["sm", "-i", str(combined), "-o", str(out), "--viruses-only"]
+    )
+    split_mpa_main()
+
+
+def test_split_mpa_main_keep_human(tmp_path, monkeypatch):
+    combined = tmp_path / "COMBINED.txt"
+    combined.write_text(_COMBINED_MPA)
+    out = tmp_path / "out"
+    monkeypatch.setattr(
+        sys, "argv", ["sm", "-i", str(combined), "-o", str(out), "--keep-human"]
+    )
+    split_mpa_main()
+
+
+# ---------------------------------------------------------------------------
+# mpa_table
+# ---------------------------------------------------------------------------
+
+
+def test_mpa_table_main(tmp_path, monkeypatch):
+    a, b = tmp_path / "a.MPA.TXT", tmp_path / "b.MPA.TXT"
+    a.write_text(_MPA_A)
+    b.write_text(_MPA_B)
+    out = tmp_path / "COMBINED.txt"
+    monkeypatch.setattr(
+        sys, "argv", ["mt", "-i", str(a), str(b), "-o", str(out)]
+    )
+    mpa_table_main()
+    assert out.exists()
+
+
+# ---------------------------------------------------------------------------
+# transform2mpa
+# ---------------------------------------------------------------------------
+
+
+def test_transform2mpa_main_single(kreport_file, tmp_path, monkeypatch):
+    out = tmp_path / "out.MPA.TXT"
+    monkeypatch.setattr(
+        sys, "argv", ["t2m", "-r", str(kreport_file), "-o", str(out)]
+    )
+    transform2mpa_main()
+    assert out.exists()
+
+
+def test_transform2mpa_main_batch(kreport_file, tmp_path, monkeypatch):
+    kreports_dir = tmp_path / "kreports"
+    kreports_dir.mkdir()
+    shutil.copy(kreport_file, kreports_dir / kreport_file.name)
+    out_dir = tmp_path / "mpa_out"
+    monkeypatch.setattr(
+        sys, "argv", ["t2m", "-i", str(kreports_dir), "-o", str(out_dir)]
+    )
+    transform2mpa_main()
+    assert out_dir.is_dir()
+
+
+# ---------------------------------------------------------------------------
+# diversity
+# ---------------------------------------------------------------------------
+
+
+def test_diversity_main_with_seed(counts_csv_file, tmp_path, monkeypatch):
+    out_dir = tmp_path / "div"
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["div", "-i", str(counts_csv_file), "-o", str(out_dir), "-d", "1000", "-s", "42"],
+    )
+    diversity_main()
+    assert (out_dir / "alpha_div.csv").exists()
+
+
+def test_diversity_main_no_seed(counts_csv_file, tmp_path, monkeypatch):
+    out_dir = tmp_path / "div"
+    monkeypatch.setattr(
+        sys, "argv", ["div", "-i", str(counts_csv_file), "-o", str(out_dir), "-d", "1000"]
+    )
+    diversity_main()
+
+
+# ---------------------------------------------------------------------------
+# relabund
+# ---------------------------------------------------------------------------
+
+
+def test_relabund_main(counts_csv_file, tmp_path, monkeypatch):
+    out = tmp_path / "ra.csv"
+    monkeypatch.setattr(sys, "argv", ["ra", "-i", str(counts_csv_file), "-o", str(out)])
+    relabund_main()
+    assert out.exists()
+
+
+def test_relabund_main_with_other_threshold(counts_csv_file, tmp_path, monkeypatch):
+    out = tmp_path / "ra.csv"
+    monkeypatch.setattr(
+        sys, "argv", ["ra", "-i", str(counts_csv_file), "-o", str(out), "-O", "50"]
+    )
+    relabund_main()
+
+
+def test_relabund_warns_zero_abundance_sample(tmp_path):
+    df = pd.DataFrame(
+        {"Sample_id": ["S1", "S2"], "Taxon_A": [0, 100], "Taxon_B": [0, 200]}
+    )
+    csv_in = tmp_path / "counts.csv"
+    df.to_csv(csv_in, index=False)
+    out = tmp_path / "ra.csv"
+    with warnings.catch_warnings(record=True) as caught:
+        warnings.simplefilter("always")
+        from krakenparser.stats.relabund import calculate_rel_abund
+
+        calculate_rel_abund(str(csv_in), str(out))
+    assert any("zero total abundance" in str(w.message) for w in caught)
+
+
+# ---------------------------------------------------------------------------
+# pipeline (error paths only — success path covered by test_full_pipeline.py)
+# ---------------------------------------------------------------------------
+
+
+def test_pipeline_main_missing_input_exits(tmp_path, monkeypatch):
+    monkeypatch.setattr(sys, "argv", ["pipeline", "-i", str(tmp_path / "ghost")])
+    with pytest.raises(SystemExit):
+        pipeline_main()

From da16a3bcf2ffd06459225401e0ec45c045289613 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:24:30 +0200
Subject: [PATCH 15/17] =?UTF-8?q?test:=20cover=20edge=20cases=20=E2=80=94?=
 =?UTF-8?q?=20=5Fstrip=5Fpath=5Fprefix,=20=5Fis=5Fprocessable,=20t=5F=5F?=
 =?UTF-8?q?=20filter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_integration.py | 47 +++++++++++++++++++++++++++++++++++++++
 tests/test_units.py       | 14 ++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index 18d747f..5ea580f 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -11,6 +11,7 @@
 from krakenparser.counts.split_mpa import split_mpa
 from krakenparser.mpa.mpa_table import combine_mpa
 from krakenparser.mpa.transform2mpa import kreport_to_mpa
+from krakenparser.pipeline import _is_processable
 from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
 from krakenparser.stats.relabund import calculate_rel_abund
 
@@ -441,3 +442,49 @@ def test_process_files_missing_dest_still_raises(tmp_path):
     source.write_text("#Classification\tsample1.kreport\n")
     with pytest.raises(FileNotFoundError):
         process_files(str(source), str(tmp_path / "nonexistent.txt"))
+
+
+# ---------------------------------------------------------------------------
+# split_mpa — t__ rank filter (intermediate terminal nodes)
+# ---------------------------------------------------------------------------
+
+
+def test_split_mpa_filters_terminal_rank_nodes(tmp_path):
+    combined = tmp_path / "COMBINED.txt"
+    combined.write_text(
+        "#Classification\tsample1\n"
+        "d__Bacteria|p__Pseudomonadota|s__Pseudomonas_aeruginosa\t300\n"
+        "d__Bacteria|p__Pseudomonadota|s__Pseudomonas_aeruginosa|t__strain_X\t10\n"
+    )
+    split_mpa(str(combined), str(tmp_path / "out"))
+    species = (tmp_path / "out" / "txt" / "counts_species.txt").read_text()
+    assert "t__" not in species
+
+
+# ---------------------------------------------------------------------------
+# _is_processable — hidden files, null bytes, non-UTF-8
+# ---------------------------------------------------------------------------
+
+
+def test_is_processable_hidden_file(tmp_path):
+    f = tmp_path / ".hidden"
+    f.write_text("content")
+    assert not _is_processable(f)
+
+
+def test_is_processable_null_bytes(tmp_path):
+    f = tmp_path / "binary.bin"
+    f.write_bytes(b"hello\x00world")
+    assert not _is_processable(f)
+
+
+def test_is_processable_non_utf8(tmp_path):
+    f = tmp_path / "latin1.txt"
+    f.write_bytes(b"\xff\xfe bad encoding")
+    assert not _is_processable(f)
+
+
+def test_is_processable_valid_kreport(tmp_path):
+    f = tmp_path / "sample.kreport"
+    f.write_text("50.0\t500\t100\tS\t1\tBacteria\n")
+    assert _is_processable(f)
diff --git a/tests/test_units.py b/tests/test_units.py
index 4f977e1..ee96f56 100644
--- a/tests/test_units.py
+++ b/tests/test_units.py
@@ -6,6 +6,7 @@
 import pytest
 
 from krakenparser.counts.processing_script import modify_taxa_names
+from krakenparser.counts.split_mpa import _strip_path_prefix
 from krakenparser.mpa.transform2mpa import _parse_line
 from krakenparser.stats.diversity import chao1_index, pielou_evenness, shannon_index
 from krakenparser.utils import ensure_output_dir
@@ -151,6 +152,19 @@ def test_modify_taxa_names_count_fields_not_modified():
     assert result == "My taxon\t1_000\t2_000"
 
 
+# ---------------------------------------------------------------------------
+# _strip_path_prefix
+# ---------------------------------------------------------------------------
+
+
+def test_strip_path_prefix_tab_less_line():
+    assert _strip_path_prefix("no_tab_here") == "no_tab_here"
+
+
+def test_strip_path_prefix_normal():
+    assert _strip_path_prefix("d__Bacteria|s__E_coli\t100\t200") == "s__E_coli\t100\t200"
+
+
 # ---------------------------------------------------------------------------
 # ensure_output_dir
 # ---------------------------------------------------------------------------

From 672e9532573ed81fb4f781c0c88783c20ff14532 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:24:41 +0200
Subject: [PATCH 16/17] fix(codecov): correct status schema and move patch to
 informational

---
 codecov.yml | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/codecov.yml b/codecov.yml
index 4200a1c..433678e 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -1,4 +1,9 @@
 coverage:
-  patch:
-    target: 78%
-    informational: true
\ No newline at end of file
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+    patch:
+      default:
+        informational: true

From ae1a78cd1db491fbdf014896ecae2524e7a1d20b Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Fri, 22 May 2026 15:27:42 +0200
Subject: [PATCH 17/17] style(scripts): apply Ruff formatter

---
 krakenparser/stats/diversity.py |  6 +++++-
 tests/test_cli.py               | 28 +++++++++++++++++++---------
 tests/test_integration.py       |  4 +++-
 tests/test_units.py             |  4 +++-
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/krakenparser/stats/diversity.py b/krakenparser/stats/diversity.py
index dc39eb4..4977f55 100644
--- a/krakenparser/stats/diversity.py
+++ b/krakenparser/stats/diversity.py
@@ -133,7 +133,11 @@ def main() -> None:
     )
     args = parser.parse_args()
 
-    seed_label = str(args.seed) if args.seed is not None else "not set (results will vary between runs)"
+    seed_label = (
+        str(args.seed)
+        if args.seed is not None
+        else "not set (results will vary between runs)"
+    )
     _log.info("Rarefaction depth: %d | seed: %s", args.depth, seed_label)
 
     input_file = Path(args.input)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 27eadd5..9badb19 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -33,7 +33,9 @@
 
 def test_convert2csv_main(counts_txt_file, tmp_path, monkeypatch):
     out = tmp_path / "out.csv"
-    monkeypatch.setattr(sys, "argv", ["c2c", "-i", str(counts_txt_file), "-o", str(out)])
+    monkeypatch.setattr(
+        sys, "argv", ["c2c", "-i", str(counts_txt_file), "-o", str(out)]
+    )
     convert2csv_main()
     assert out.exists()
 
@@ -96,9 +98,7 @@ def test_mpa_table_main(tmp_path, monkeypatch):
     a.write_text(_MPA_A)
     b.write_text(_MPA_B)
     out = tmp_path / "COMBINED.txt"
-    monkeypatch.setattr(
-        sys, "argv", ["mt", "-i", str(a), str(b), "-o", str(out)]
-    )
+    monkeypatch.setattr(sys, "argv", ["mt", "-i", str(a), str(b), "-o", str(out)])
     mpa_table_main()
     assert out.exists()
 
@@ -110,9 +110,7 @@ def test_mpa_table_main(tmp_path, monkeypatch):
 
 def test_transform2mpa_main_single(kreport_file, tmp_path, monkeypatch):
     out = tmp_path / "out.MPA.TXT"
-    monkeypatch.setattr(
-        sys, "argv", ["t2m", "-r", str(kreport_file), "-o", str(out)]
-    )
+    monkeypatch.setattr(sys, "argv", ["t2m", "-r", str(kreport_file), "-o", str(out)])
     transform2mpa_main()
     assert out.exists()
 
@@ -139,7 +137,17 @@ def test_diversity_main_with_seed(counts_csv_file, tmp_path, monkeypatch):
     monkeypatch.setattr(
         sys,
         "argv",
-        ["div", "-i", str(counts_csv_file), "-o", str(out_dir), "-d", "1000", "-s", "42"],
+        [
+            "div",
+            "-i",
+            str(counts_csv_file),
+            "-o",
+            str(out_dir),
+            "-d",
+            "1000",
+            "-s",
+            "42",
+        ],
     )
     diversity_main()
     assert (out_dir / "alpha_div.csv").exists()
@@ -148,7 +156,9 @@ def test_diversity_main_with_seed(counts_csv_file, tmp_path, monkeypatch):
 def test_diversity_main_no_seed(counts_csv_file, tmp_path, monkeypatch):
     out_dir = tmp_path / "div"
     monkeypatch.setattr(
-        sys, "argv", ["div", "-i", str(counts_csv_file), "-o", str(out_dir), "-d", "1000"]
+        sys,
+        "argv",
+        ["div", "-i", str(counts_csv_file), "-o", str(out_dir), "-d", "1000"],
     )
     diversity_main()
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 5ea580f..3fe61e6 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -429,7 +429,9 @@ def test_combine_mpa_missing_input_raises(tmp_path):
     existing = tmp_path / "a.MPA.TXT"
     existing.write_text(SAMPLE_MPA_A)
     with pytest.raises(FileNotFoundError):
-        combine_mpa([str(existing), str(tmp_path / "ghost.MPA.TXT")], str(tmp_path / "out.txt"))
+        combine_mpa(
+            [str(existing), str(tmp_path / "ghost.MPA.TXT")], str(tmp_path / "out.txt")
+        )
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_units.py b/tests/test_units.py
index ee96f56..414d87d 100644
--- a/tests/test_units.py
+++ b/tests/test_units.py
@@ -162,7 +162,9 @@ def test_strip_path_prefix_tab_less_line():
 
 
 def test_strip_path_prefix_normal():
-    assert _strip_path_prefix("d__Bacteria|s__E_coli\t100\t200") == "s__E_coli\t100\t200"
+    assert (
+        _strip_path_prefix("d__Bacteria|s__E_coli\t100\t200") == "s__E_coli\t100\t200"
+    )
 
 
 # ---------------------------------------------------------------------------