Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
c60cfde
fix(kpplot): add explicit re-export for KpPlotBase in init
iliapopov17 May 22, 2026
ea10187
ci: migrate workflow from flake8 to ruff
iliapopov17 May 22, 2026
28eed07
style(script): apply Ruff import sorting
iliapopov17 May 22, 2026
15267a4
refactor(types): fix Pylance warnings in calc_beta_div
iliapopov17 May 22, 2026
26afe67
feat(utils): add ensure_output_dir helper
iliapopov17 May 22, 2026
3993806
refactor(mpa): use ensure_output_dir, add logging and input validation
iliapopov17 May 22, 2026
40e401d
refactor(counts): use ensure_output_dir and extract main()
iliapopov17 May 22, 2026
9e2d738
refactor(counts): extract main(), add logging, restore dest-file vali…
iliapopov17 May 22, 2026
c320f0f
refactor(stats): use ensure_output_dir, add logging and startup info
iliapopov17 May 22, 2026
17393de
refactor(pipeline): raise exceptions instead of sys.exit in run_pipeline
iliapopov17 May 22, 2026
2f51ffb
test: add ensure_output_dir unit tests
iliapopov17 May 22, 2026
758b07e
test: cover auto-create dirs, new validations, new exception types
iliapopov17 May 22, 2026
87b9da0
test(pipeline): fix overwrite test — FileExistsError, not SystemExit
iliapopov17 May 22, 2026
fff6234
test: add CLI smoke tests for all main() entry points
iliapopov17 May 22, 2026
da16a3b
test: cover edge cases — _strip_path_prefix, _is_processable, t__ filter
iliapopov17 May 22, 2026
672e953
fix(codecov): correct status schema and move patch to informational
iliapopov17 May 22, 2026
ae1a78c
style(scripts): apply Ruff formatter
iliapopov17 May 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 setuptools wheel
python -m pip install ruff setuptools wheel
pip install -e ".[dev]" --no-build-isolation
- name: Lint with flake8
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# Run linter and fail on any rule violation
ruff check .
# Check if the code format matches Ruff style guide
ruff format --check .
- name: Test with pytest
run: |
pytest --cov=krakenparser --cov-report=xml
Expand Down
11 changes: 8 additions & 3 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
coverage:
patch:
target: 78%
informational: true
status:
project:
default:
target: auto
threshold: 1%
patch:
default:
informational: true
17 changes: 9 additions & 8 deletions krakenparser/counts/convert2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@

import pandas as pd

from krakenparser.utils import ensure_output_dir

_log = logging.getLogger(__name__)


def convert_to_csv(input_file, output_file):
in_path = Path(input_file)
if not in_path.is_file():
raise FileNotFoundError(f"Input file not found: {in_path}")
out_path = Path(output_file)
if not out_path.parent.exists():
raise FileNotFoundError(f"Output directory does not exist: {out_path.parent}")
out_path = ensure_output_dir(output_file, is_file=True)

data = pd.read_csv(in_path, sep="\t", index_col=0)
data.T.to_csv(out_path, index_label="Sample_id")
_log.info("Data converted and saved as '%s'.", output_file)


if __name__ == "__main__":
# Use argparse to handle command-line arguments
def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(message)s")
parser = argparse.ArgumentParser(
description="Reads a TXT file, reorganizes the data, and converts it into a CSV file."
)
Expand All @@ -39,8 +39,9 @@ def convert_to_csv(input_file, output_file):
required=True,
help="Path to the output CSV file. The script will restructure the data and save it here.",
)

args = parser.parse_args()

# Call function with parsed arguments
convert_to_csv(args.input, args.output)


if __name__ == "__main__":
main()
16 changes: 10 additions & 6 deletions krakenparser/counts/processing_script.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#!/usr/bin/env python

import argparse
import logging
import os
import tempfile
from pathlib import Path

_log = logging.getLogger(__name__)


def modify_taxa_names(line):
prefixes = ["s__", "g__", "f__", "o__", "c__", "p__"]
Expand Down Expand Up @@ -47,11 +50,11 @@ def process_files(source_file, destination_file):
tmp_path = tmp.name
os.replace(tmp_path, dest_path)

print(f"Processed {destination_file} successfully.")
_log.info(f"Processed {destination_file} successfully.")


if __name__ == "__main__":
# Use argparse to parse command-line arguments
def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(message)s")
parser = argparse.ArgumentParser(
description="Reads a source file, processes its first line, modifies taxa names in a destination file, and updates it."
)
Expand All @@ -67,8 +70,9 @@ def process_files(source_file, destination_file):
required=True,
help="Path to the destination file. This file's contents will be updated with cleaned taxa names.",
)

args = parser.parse_args()

# Call the function with parsed arguments
process_files(args.input, args.output)


if __name__ == "__main__":
main()
7 changes: 5 additions & 2 deletions krakenparser/counts/split_mpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import re
from pathlib import Path

from krakenparser.utils import ensure_output_dir

_log = logging.getLogger(__name__)


Expand Down Expand Up @@ -53,8 +55,8 @@ def split_mpa(
in_path = Path(input_file)
if not in_path.is_file():
raise FileNotFoundError(f"Input file not found: {in_path}")
out_path = Path(output_dir)
(out_path / "txt").mkdir(parents=True, exist_ok=True)
out_path = ensure_output_dir(output_dir, is_file=False)
(out_path / "txt").mkdir(exist_ok=True)

lines = in_path.read_text().splitlines()
data_lines = [ln for ln in lines if not ln.startswith("#") and ln.strip()]
Expand Down Expand Up @@ -86,6 +88,7 @@ def split_mpa(


def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(message)s")
parser = argparse.ArgumentParser(
description="Split a combined MPA table into per-rank TXT files."
)
Expand Down
2 changes: 2 additions & 0 deletions krakenparser/kpplot/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .base import KpPlotBase

__all__ = ["KpPlotBase"]
11 changes: 10 additions & 1 deletion krakenparser/mpa/mpa_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,25 @@

import argparse
import logging
from pathlib import Path

from krakenparser.utils import ensure_output_dir

_log = logging.getLogger(__name__)


def combine_mpa(in_files: list[str], o_file: str) -> None:
out_path = ensure_output_dir(o_file, is_file=True)
# Plain dict preserves insertion order (Python 3.7+).
taxa: dict[str, dict[int, str]] = {}
sample_names: list[str] = []

_log.info("Number of files to parse: %d", len(in_files))

for in_path in in_files:
if not Path(in_path).is_file():
raise FileNotFoundError(f"Input file not found: {in_path}")

for idx, in_path in enumerate(in_files):
sample_name = f"Sample #{idx + 1}"
with open(in_path) as fh:
Expand All @@ -39,7 +47,7 @@ def combine_mpa(in_files: list[str], o_file: str) -> None:
n_taxa = len(taxa)
_log.info("Number of classifications to write: %d", n_taxa)

with open(o_file, "w") as fh:
with open(out_path, "w") as fh:
fh.write("#Classification\t" + "\t".join(sample_names) + "\n")
for taxon, counts in taxa.items():
row = [counts.get(i, "0") for i in range(n_samples)]
Expand All @@ -49,6 +57,7 @@ def combine_mpa(in_files: list[str], o_file: str) -> None:


def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(message)s")
parser = argparse.ArgumentParser(
description="Combine MPA files into a single tab-delimited table."
)
Expand Down
13 changes: 11 additions & 2 deletions krakenparser/mpa/transform2mpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
"""Convert a Kraken2 report to MetaPhlAn (MPA) format."""

import argparse
import logging
import os
import sys
from pathlib import Path

from krakenparser.utils import ensure_output_dir

# Maps Kraken2 single-letter rank codes to MPA prefixes
_RANK_PREFIX = {
"D": "d",
Expand All @@ -18,6 +21,8 @@
"S": "s",
}

_log = logging.getLogger(__name__)


def _parse_line(line: str):
"""
Expand Down Expand Up @@ -67,10 +72,13 @@ def kreport_to_mpa(
depth d is encountered, all stack entries with depth >= d are popped
before the new entry is pushed, keeping the path consistent.
"""
if not Path(report_path).is_file():
raise FileNotFoundError(f"Input file not found: {report_path}")
out_path = ensure_output_dir(output_path, is_file=True)
# Stack entries: (structural_depth, mpa_segment, is_standard_rank)
stack: list[tuple[int, str, bool]] = []

with open(report_path) as r_fh, open(output_path, "w") as o_fh:
with open(report_path) as r_fh, open(out_path, "w") as o_fh:
if display_header:
o_fh.write("#Classification\t" + os.path.basename(report_path) + "\n")

Expand Down Expand Up @@ -110,6 +118,7 @@ def kreport_to_mpa(


def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(message)s")
parser = argparse.ArgumentParser(
description="Convert a Kraken2 report to MetaPhlAn (MPA) format."
)
Expand Down Expand Up @@ -203,7 +212,7 @@ def main() -> None:
continue
out_name = f.name.replace(".kreport", ".MPA.TXT")
kreport_to_mpa(str(f), str(output_dir / out_name), **kwargs)
print(f"Converted to MPA successfully. Output stored in {output_dir}")
_log.info(f"Converted to MPA successfully. Output stored in {output_dir}")
else:
kreport_to_mpa(args.r_file, args.o_file, **kwargs)

Expand Down
29 changes: 16 additions & 13 deletions krakenparser/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import sys
from pathlib import Path

_log = logging.getLogger(__name__)

import pandas as pd

from krakenparser.counts.convert2csv import convert_to_csv
Expand All @@ -19,6 +17,8 @@
from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
from krakenparser.stats.relabund import calculate_rel_abund

_log = logging.getLogger(__name__)


def _is_processable(path: Path) -> bool:
"""Return False for hidden files, files with null bytes, or non-UTF-8 files."""
Expand Down Expand Up @@ -47,16 +47,16 @@ def run_pipeline(
) -> None:
source_dir = Path(input_dir)
if not source_dir.is_dir():
sys.exit(f"Error: input directory not found: {source_dir}")
raise FileNotFoundError(f"Input directory not found: {source_dir}")

out_dir = Path(output_dir) if output_dir else source_dir.parent
out_dir.mkdir(parents=True, exist_ok=True)

existing = [out_dir / d for d in _OUTPUT_SUBDIRS if (out_dir / d).exists()]
if existing and not overwrite:
names = ", ".join(d.name for d in existing)
sys.exit(
f"Error: output already exists in '{out_dir}' ({names}).\n"
raise FileExistsError(
f"Output already exists in '{out_dir}' ({names}).\n"
"Use --overwrite to overwrite it."
)
if overwrite:
Expand Down Expand Up @@ -164,14 +164,17 @@ def main() -> None:
help="Overwrite the output directory if it already exists",
)
args = parser.parse_args()
run_pipeline(
args.input,
args.output,
keep_human=args.keep_human,
rarefaction_depth=args.depth,
seed=args.seed,
overwrite=args.overwrite,
)
try:
run_pipeline(
args.input,
args.output,
keep_human=args.keep_human,
rarefaction_depth=args.depth,
seed=args.seed,
overwrite=args.overwrite,
)
except (FileNotFoundError, FileExistsError) as e:
sys.exit(f"Error: {e}")


if __name__ == "__main__":
Expand Down
Loading
Loading