PopovIILab · iliapopov17 · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -27,14 +27,14 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 setuptools wheel
+        python -m pip install ruff setuptools wheel
         pip install -e ".[dev]" --no-build-isolation
-    - name: Lint with flake8
+    - name: Lint with ruff
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        # Run linter and fail on any rule violation
+        ruff check .
+        # Check if the code format matches Ruff style guide
+        ruff format --check .
     - name: Test with pytest
       run: |
         pytest --cov=krakenparser --cov-report=xml

diff --git a/codecov.yml b/codecov.yml
@@ -1,4 +1,9 @@
 coverage:
-  patch:
-    target: 78%
-    informational: true
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+    patch:
+      default:
+        informational: true
diff --git a/krakenparser/counts/convert2csv.py b/krakenparser/counts/convert2csv.py
@@ -6,24 +6,24 @@
 
 import pandas as pd
 
+from krakenparser.utils import ensure_output_dir
+
 _log = logging.getLogger(__name__)
 
 
 def convert_to_csv(input_file, output_file):
     in_path = Path(input_file)
     if not in_path.is_file():
         raise FileNotFoundError(f"Input file not found: {in_path}")
-    out_path = Path(output_file)
-    if not out_path.parent.exists():
-        raise FileNotFoundError(f"Output directory does not exist: {out_path.parent}")
+    out_path = ensure_output_dir(output_file, is_file=True)
 
     data = pd.read_csv(in_path, sep="\t", index_col=0)
     data.T.to_csv(out_path, index_label="Sample_id")
     _log.info("Data converted and saved as '%s'.", output_file)
 
 
-if __name__ == "__main__":
-    # Use argparse to handle command-line arguments
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Reads a TXT file, reorganizes the data, and converts it into a CSV file."
     )
@@ -39,8 +39,9 @@ def convert_to_csv(input_file, output_file):
         required=True,
         help="Path to the output CSV file. The script will restructure the data and save it here.",
     )
-
     args = parser.parse_args()
-
-    # Call function with parsed arguments
     convert_to_csv(args.input, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/krakenparser/counts/processing_script.py b/krakenparser/counts/processing_script.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python
 
 import argparse
+import logging
 import os
 import tempfile
 from pathlib import Path
 
+_log = logging.getLogger(__name__)
+
 
 def modify_taxa_names(line):
     prefixes = ["s__", "g__", "f__", "o__", "c__", "p__"]
@@ -47,11 +50,11 @@ def process_files(source_file, destination_file):
         tmp_path = tmp.name
     os.replace(tmp_path, dest_path)
 
-    print(f"Processed {destination_file} successfully.")
+    _log.info(f"Processed {destination_file} successfully.")
 
 
-if __name__ == "__main__":
-    # Use argparse to parse command-line arguments
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Reads a source file, processes its first line, modifies taxa names in a destination file, and updates it."
     )
@@ -67,8 +70,9 @@ def process_files(source_file, destination_file):
         required=True,
         help="Path to the destination file. This file's contents will be updated with cleaned taxa names.",
     )
-
     args = parser.parse_args()
-
-    # Call the function with parsed arguments
     process_files(args.input, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/krakenparser/counts/split_mpa.py b/krakenparser/counts/split_mpa.py
@@ -9,6 +9,8 @@
 import re
 from pathlib import Path
 
+from krakenparser.utils import ensure_output_dir
+
 _log = logging.getLogger(__name__)
 
 
@@ -53,8 +55,8 @@ def split_mpa(
     in_path = Path(input_file)
     if not in_path.is_file():
         raise FileNotFoundError(f"Input file not found: {in_path}")
-    out_path = Path(output_dir)
-    (out_path / "txt").mkdir(parents=True, exist_ok=True)
+    out_path = ensure_output_dir(output_dir, is_file=False)
+    (out_path / "txt").mkdir(exist_ok=True)
 
     lines = in_path.read_text().splitlines()
     data_lines = [ln for ln in lines if not ln.startswith("#") and ln.strip()]
@@ -86,6 +88,7 @@ def split_mpa(
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Split a combined MPA table into per-rank TXT files."
     )

diff --git a/krakenparser/kpplot/__init__.py b/krakenparser/kpplot/__init__.py
@@ -1 +1,3 @@
 from .base import KpPlotBase
+
+__all__ = ["KpPlotBase"]
diff --git a/krakenparser/mpa/mpa_table.py b/krakenparser/mpa/mpa_table.py
@@ -3,17 +3,25 @@
 
 import argparse
 import logging
+from pathlib import Path
+
+from krakenparser.utils import ensure_output_dir
 
 _log = logging.getLogger(__name__)
 
 
 def combine_mpa(in_files: list[str], o_file: str) -> None:
+    out_path = ensure_output_dir(o_file, is_file=True)
     # Plain dict preserves insertion order (Python 3.7+).
     taxa: dict[str, dict[int, str]] = {}
     sample_names: list[str] = []
 
     _log.info("Number of files to parse: %d", len(in_files))
 
+    for in_path in in_files:
+        if not Path(in_path).is_file():
+            raise FileNotFoundError(f"Input file not found: {in_path}")
+
     for idx, in_path in enumerate(in_files):
         sample_name = f"Sample #{idx + 1}"
         with open(in_path) as fh:
@@ -39,7 +47,7 @@ def combine_mpa(in_files: list[str], o_file: str) -> None:
     n_taxa = len(taxa)
     _log.info("Number of classifications to write: %d", n_taxa)
 
-    with open(o_file, "w") as fh:
+    with open(out_path, "w") as fh:
         fh.write("#Classification\t" + "\t".join(sample_names) + "\n")
         for taxon, counts in taxa.items():
             row = [counts.get(i, "0") for i in range(n_samples)]
@@ -49,6 +57,7 @@ def combine_mpa(in_files: list[str], o_file: str) -> None:
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Combine MPA files into a single tab-delimited table."
     )

diff --git a/krakenparser/mpa/transform2mpa.py b/krakenparser/mpa/transform2mpa.py
@@ -2,10 +2,13 @@
 """Convert a Kraken2 report to MetaPhlAn (MPA) format."""
 
 import argparse
+import logging
 import os
 import sys
 from pathlib import Path
 
+from krakenparser.utils import ensure_output_dir
+
 # Maps Kraken2 single-letter rank codes to MPA prefixes
 _RANK_PREFIX = {
     "D": "d",
@@ -18,6 +21,8 @@
     "S": "s",
 }
 
+_log = logging.getLogger(__name__)
+
 
 def _parse_line(line: str):
     """
@@ -67,10 +72,13 @@ def kreport_to_mpa(
     depth d is encountered, all stack entries with depth >= d are popped
     before the new entry is pushed, keeping the path consistent.
     """
+    if not Path(report_path).is_file():
+        raise FileNotFoundError(f"Input file not found: {report_path}")
+    out_path = ensure_output_dir(output_path, is_file=True)
     # Stack entries: (structural_depth, mpa_segment, is_standard_rank)
     stack: list[tuple[int, str, bool]] = []
 
-    with open(report_path) as r_fh, open(output_path, "w") as o_fh:
+    with open(report_path) as r_fh, open(out_path, "w") as o_fh:
         if display_header:
             o_fh.write("#Classification\t" + os.path.basename(report_path) + "\n")
 
@@ -110,6 +118,7 @@ def kreport_to_mpa(
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
     parser = argparse.ArgumentParser(
         description="Convert a Kraken2 report to MetaPhlAn (MPA) format."
     )
@@ -203,7 +212,7 @@ def main() -> None:
                 continue
             out_name = f.name.replace(".kreport", ".MPA.TXT")
             kreport_to_mpa(str(f), str(output_dir / out_name), **kwargs)
-        print(f"Converted to MPA successfully. Output stored in {output_dir}")
+        _log.info(f"Converted to MPA successfully. Output stored in {output_dir}")
     else:
         kreport_to_mpa(args.r_file, args.o_file, **kwargs)
 

diff --git a/krakenparser/pipeline.py b/krakenparser/pipeline.py
@@ -7,8 +7,6 @@
 import sys
 from pathlib import Path
 
-_log = logging.getLogger(__name__)
-
 import pandas as pd
 
 from krakenparser.counts.convert2csv import convert_to_csv
@@ -19,6 +17,8 @@
 from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
 from krakenparser.stats.relabund import calculate_rel_abund
 
+_log = logging.getLogger(__name__)
+
 
 def _is_processable(path: Path) -> bool:
     """Return False for hidden files, files with null bytes, or non-UTF-8 files."""
@@ -47,16 +47,16 @@ def run_pipeline(
 ) -> None:
     source_dir = Path(input_dir)
     if not source_dir.is_dir():
-        sys.exit(f"Error: input directory not found: {source_dir}")
+        raise FileNotFoundError(f"Input directory not found: {source_dir}")
 
     out_dir = Path(output_dir) if output_dir else source_dir.parent
     out_dir.mkdir(parents=True, exist_ok=True)
 
     existing = [out_dir / d for d in _OUTPUT_SUBDIRS if (out_dir / d).exists()]
     if existing and not overwrite:
         names = ", ".join(d.name for d in existing)
-        sys.exit(
-            f"Error: output already exists in '{out_dir}' ({names}).\n"
+        raise FileExistsError(
+            f"Output already exists in '{out_dir}' ({names}).\n"
             "Use --overwrite to overwrite it."
         )
     if overwrite:
@@ -164,14 +164,17 @@ def main() -> None:
         help="Overwrite the output directory if it already exists",
     )
     args = parser.parse_args()
-    run_pipeline(
-        args.input,
-        args.output,
-        keep_human=args.keep_human,
-        rarefaction_depth=args.depth,
-        seed=args.seed,
-        overwrite=args.overwrite,
-    )
+    try:
+        run_pipeline(
+            args.input,
+            args.output,
+            keep_human=args.keep_human,
+            rarefaction_depth=args.depth,
+            seed=args.seed,
+            overwrite=args.overwrite,
+        )
+    except (FileNotFoundError, FileExistsError) as e:
+        sys.exit(f"Error: {e}")
 
 
 if __name__ == "__main__":
Original file line number	Diff line number	Diff line change
		@@ -1 +1,3 @@
		from .base import KpPlotBase

		__all__ = ["KpPlotBase"]