PopovIILab · iliapopov17 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/krakenparser/__init__.py b/krakenparser/__init__.py
@@ -2,7 +2,7 @@
 from .kpplot.stackedbar import stacked_barplot
 from .kpplot.streamgraph import streamgraph
 
-__all__ = [
+__all__: list[str] = [
     "stacked_barplot",
     "streamgraph",
     "clustermap",

diff --git a/krakenparser/counts/convert2csv.py b/krakenparser/counts/convert2csv.py
@@ -1,6 +1,12 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+"""Matrix manipulation utility for restructuring metagenomic abundance tables.
+
+This module converts tab-delimited abundance tables (traditionally structured
+with features/taxa as rows and samples as columns) into standardized,
+transposed CSV sheets conforming to the tidy data format (samples as rows).
+"""
+
 import logging
-import sys
 from pathlib import Path
 from typing import Optional
 
@@ -9,40 +15,59 @@
 
 from krakenparser.utils import ensure_output_dir
 
-_log = logging.getLogger(__name__)
+# Initialize module-level isolated logger
+_log: logging.Logger = logging.getLogger(__name__)
 
-app = typer.Typer(
+# Dedicated Typer routing application instantiation
+app: typer.Typer = typer.Typer(
     name="csv",
     add_completion=False,
     context_settings={"help_option_names": ["-h", "--help"]},
 )
 
 
-def convert_to_csv(input_file: str, output_file: str) -> None:
-    in_path = Path(input_file)
-    if not in_path.is_file():
-        raise FileNotFoundError(f"Input file not found: {in_path}")
-    out_path = ensure_output_dir(output_file, is_file=True)
+def convert_to_csv(input_file: Path, output_file: Path) -> None:
+    """Transpose a tab-separated matrix and export it as a sample-centric CSV.
+
+    Reads a matrix where columns represent samples and rows represent taxa,
+    performs an algebraic transposition operation (.T), and locks the new row
+    index under the canonical 'Sample_id' header label.
+
+    Args:
+        input_file: Path to the validated incoming tab-separated matrix file.
+        output_file: Target path where the restructured CSV matrix will be dumped.
 
-    data = pd.read_csv(in_path, sep="\t", index_col=0)
+    Raises:
+        FileNotFoundError: Triggered if the specified input text resource is missing.
+    """
+    if not input_file.is_file():
+        raise FileNotFoundError(f"Input file not found: {input_file}")
+
+    out_path: Path = ensure_output_dir(output_file, is_file=True)
+
+    # Load high-dimensional matrix (Rows: Taxa, Columns: Samples)
+    data: pd.DataFrame = pd.read_csv(input_file, sep="\t", index_col=0)
+
+    # Execute matrix transposition to shift samples to rows (Tidy Data layout)
     data.T.to_csv(out_path, index_label="Sample_id")
-    _log.info("Data converted and saved as '%s'.", output_file)
+
+    _log.info("Data successfully transposed and saved to '%s'.", output_file)
 
 
 @app.callback(invoke_without_command=True)
 def main(
     ctx: typer.Context,
-    input_file: Optional[str] = typer.Option(
+    input_file: Optional[Path] = typer.Option(
         None,
         "-i",
         "--input",
-        help="Path to the input TXT file. This file should contain sample names in columns and microbial taxa in rows.",
+        help="Path to the input tab-delimited TXT file (samples in columns, taxa in rows).",
     ),
-    output_file: Optional[str] = typer.Option(
+    output_file: Optional[Path] = typer.Option(
         None,
         "-o",
         "--output",
-        help="Path to the output CSV file. The script will restructure the data and save it here.",
+        help="Path to the output transposed CSV file.",
     ),
 ) -> None:
     """Reads a TXT file, reorganizes the data, and converts it into a CSV file."""
@@ -53,16 +78,13 @@ def main(
         raise typer.Exit()
 
     if not input_file or not output_file:
-        print(
-            "Error: Missing required options '-i / --input' and '-o / --output'.",
-            file=sys.stderr,
-        )
+        print("Error: Missing required options '-i / --input' and '-o / --output'.")
         raise typer.Exit(code=1)
 
     try:
         convert_to_csv(input_file, output_file)
     except FileNotFoundError as e:
-        print(f"Error: {e}", file=sys.stderr)
+        print(f"Error: {e}")
         raise typer.Exit(code=1)
 
 

diff --git a/krakenparser/counts/processing_script.py b/krakenparser/counts/processing_script.py
@@ -1,4 +1,11 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+"""Post-processing matrix utility for metadata refinement and taxonomic sanitization.
+
+This module cleans upstream pipeline artifacts by removing technical file extensions
+from sample headers and restoring canonical spaces to underscore-separated taxonomic
+nomenclature strings (e.g., converting 's__Escherichia_coli' to 'Escherichia coli').
+File mutations are executed via atomic filesystem transactions.
+"""
 
 import logging
 import os
@@ -9,73 +16,107 @@
 
 import typer
 
-_log = logging.getLogger(__name__)
+# Initialize module-level isolated logger
+_log: logging.Logger = logging.getLogger(__name__)
 
-app = typer.Typer(
+# Dedicated Typer routing application instantiation
+app: typer.Typer = typer.Typer(
     name="process",
     add_completion=False,
     context_settings={"help_option_names": ["-h", "--help"]},
 )
 
 
 def modify_taxa_names(line: str) -> str:
-    prefixes = ["s__", "g__", "f__", "o__", "c__", "p__"]
+    """Sanitize taxonomic names by replacing internal underscores with spaces.
+
+    Scans the line for standard taxonomic rank prefixes (s__, g__, etc.). If found,
+    the primary taxon descriptor string is decoupled, sanitized of internal
+    technical underscores, and reconstructed while preserving tailing tab metrics.
+
+    Args:
+        line: A raw text row from the matrix containing taxonomic descriptors.
+
+    Returns:
+        str: The structurally preserved string with restored space characters.
+    """
+    prefixes: list[str] = ["s__", "g__", "f__", "o__", "c__", "p__"]
     for prefix in prefixes:
         if line.startswith(prefix):
-            parts = line[len(prefix) :].split("\t")
+            # Clean string parsing utilizing standard tab separation matrices
+            parts: list[str] = line.removeprefix(prefix).split("\t")
             parts[0] = parts[0].replace("_", " ")
             return "\t".join(parts)
     return line
 
 
-def process_files(source_file: str, destination_file: str) -> None:
-    src_path = Path(source_file)
-    if not src_path.is_file():
-        raise FileNotFoundError(f"Source file not found: {src_path}")
-    dest_path = Path(destination_file)
-    if not dest_path.is_file():
-        raise FileNotFoundError(f"Destination file not found: {dest_path}")
+def process_files(source_file: Path, destination_file: Path) -> None:
+    """Synchronize matrix headers and sanitize taxonomic profiles atomically.
+
+    Extracts clean cohort descriptors from the header of a source tracker,
+    applies string cleaning to a targeted taxonomy mapping spreadsheet,
+    and updates the destination file utilizing atomic replacement blocks.
 
-    # Read the first line from the source file and modify it
-    with open(src_path, "r") as file:
-        first_line_source = file.readline()
-    modified_first_line = "\t".join(
+    Args:
+        source_file: Validated Path to the template matrix containing pristine headers.
+        destination_file: Target Path to the file undergoing line-by-line taxonomy cleaning.
+
+    Raises:
+        FileNotFoundError: Triggered if either the source or destination targets are absent.
+    """
+    if not source_file.is_file():
+        raise FileNotFoundError(f"Source file not found: {source_file}")
+    if not destination_file.is_file():
+        raise FileNotFoundError(f"Destination file not found: {destination_file}")
+
+    # Step 1: Read and truncate raw pipeline suffixes from sample headers
+    with open(source_file, "r", encoding="utf-8") as file:
+        first_line_source: str = file.readline()
+
+    modified_first_line: str = "\t".join(
         word.split(".")[0] for word in first_line_source.split()
     )
 
-    # Read all content from the destination file and modify taxa names
-    with open(dest_path, "r") as file:
-        lines = file.readlines()
-    modified_lines = [modify_taxa_names(line.strip()) for line in lines]
+    # Step 2: Read targets and map taxonomic updates lazily across lists
+    with open(destination_file, "r", encoding="utf-8") as file:
+        lines: list[str] = file.readlines()
+
+    modified_lines: list[str] = [modify_taxa_names(line.strip()) for line in lines]
 
-    # Combine the modified first line with the modified content of the destination file
-    updated_content = modified_first_line + "\n" + "\n".join(modified_lines)
+    # Step 3: Integrate matrices and commit layout modifications to disk
+    joined_lines: str = "\n".join(modified_lines)
+    updated_content: str = f"{modified_first_line}\n{joined_lines}"
 
-    # Write atomically: write to a temp file in the same directory, then replace
+    # Secure atomic writer operations targeting adjacent scratch space regions
     with tempfile.NamedTemporaryFile(
-        mode="w", dir=dest_path.parent, delete=False, suffix=".tmp"
+        mode="w",
+        dir=destination_file.parent,
+        delete=False,
+        suffix=".tmp",
+        encoding="utf-8",
     ) as tmp:
         tmp.write(updated_content)
-        tmp_path = tmp.name
-    os.replace(tmp_path, dest_path)
+        tmp_path: str = tmp.name
 
-    _log.info(f"Processed {destination_file} successfully.")
+    # Commit transactions atomically across POSIX virtual environments
+    os.replace(tmp_path, destination_file)
+    _log.info("Processed '%s' successfully.", destination_file)
 
 
 @app.callback(invoke_without_command=True)
 def main(
     ctx: typer.Context,
-    input_file: Optional[str] = typer.Option(
+    input_file: Optional[Path] = typer.Option(
         None,
         "-i",
         "--input",
-        help="Path to the source file. This file's first line will be read and modified.",
+        help="Path to the source file (used to extract and truncate header labels).",
     ),
-    output_file: Optional[str] = typer.Option(
+    output_file: Optional[Path] = typer.Option(
         None,
         "-o",
         "--output",
-        help="Path to the destination file. This file's contents will be updated with cleaned taxa names.",
+        help="Path to the destination matrix undergoing taxonomic name sanitation.",
     ),
 ) -> None:
     """Reads a source file, processes its first line, modifies taxa names in a destination file, and updates it."""