From fdf4adcc263cc80606442cad44d2963ea6bf050e Mon Sep 17 00:00:00 2001
From: m-jahn <jahn@mpusp.mpg.de>
Date: Mon, 30 Mar 2026 16:05:32 +0200
Subject: [PATCH 1/8] fix: update CI workflows

---
 .github/workflows/conventional-prs.yml | 12 ++----
 .github/workflows/deploy-apptainer.yml | 17 ++++++++
 .github/workflows/deploy_apptainer.yml | 60 --------------------------
 .github/workflows/main.yml             | 57 ------------------------
 .github/workflows/release-please.yml   | 14 ++----
 .github/workflows/snakemake-tests.yml  | 12 ++++++
 6 files changed, 37 insertions(+), 135 deletions(-)
 create mode 100644 .github/workflows/deploy-apptainer.yml
 delete mode 100644 .github/workflows/deploy_apptainer.yml
 delete mode 100644 .github/workflows/main.yml
 create mode 100644 .github/workflows/snakemake-tests.yml

diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml
index 82028b7..d0f5164 100644
--- a/.github/workflows/conventional-prs.yml
+++ b/.github/workflows/conventional-prs.yml
@@ -1,4 +1,5 @@
-name: Lint PR
+name: Conventional PRs
+
 on:
   pull_request_target:
     types:
@@ -11,10 +12,5 @@ permissions:
   pull-requests: read
 
 jobs:
-  main:
-    name: Validate PR title
-    runs-on: ubuntu-latest
-    steps:
-      - uses: amannn/action-semantic-pull-request@v5
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  conventional-prs:
+    uses: MPUSP/mpusp-github-actions/.github/workflows/conventional-prs.yml@main
diff --git a/.github/workflows/deploy-apptainer.yml b/.github/workflows/deploy-apptainer.yml
new file mode 100644
index 0000000..aa93cbc
--- /dev/null
+++ b/.github/workflows/deploy-apptainer.yml
@@ -0,0 +1,17 @@
+name: Deploy Apptainer
+
+on:
+  workflow_run:
+    workflows: ["Release Please"]
+    types:
+      - completed
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  deploy-apptainer:
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
+    uses: MPUSP/mpusp-github-actions/.github/workflows/deploy-apptainer.yml@main
diff --git a/.github/workflows/deploy_apptainer.yml b/.github/workflows/deploy_apptainer.yml
deleted file mode 100644
index e7b8081..0000000
--- a/.github/workflows/deploy_apptainer.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: Deploy Apptainer
-
-on:
-  workflow_run:
-    workflows: ["release-please"]
-    types:
-      - completed
-  workflow_dispatch:
-
-jobs:
-  build_and_push:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
-    steps:
-      - name: checkout repo
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: add apptainer source
-        shell: bash
-        run: |
-          sudo add-apt-repository -y ppa:apptainer/ppa
-          sudo apt-get update
-
-      - name: create dockerfile
-        uses: snakemake/snakemake-github-action@v2
-        with:
-          directory: .
-          snakefile: workflow/Snakefile
-          install-apptainer: true
-          args: "--cores 1"
-          task: containerize
-
-      - name: create apptainer recipe
-        shell: bash
-        run: |
-          pip install spython
-          sed -i "2i RUN apt-get update && apt-get install -y curl" Dockerfile
-          spython recipe Dockerfile > apptainer.def
-          sed -i 's/\/environment.yaml\/environment.yaml$/\/environment.yaml/' apptainer.def
-
-      - name: create apptainer image
-        shell: bash
-        run: |
-          sudo apt-get install -y uidmap
-          apptainer build --fakeroot apptainer.sif apptainer.def
-
-      - name: authenticate to GHCR
-        run: |
-          echo ${{ secrets.GITHUB_TOKEN }} | apptainer registry login -u ${{ github.actor }} --password-stdin oras://ghcr.io
-
-      - name: push apptainer to GHCR
-        run: |
-          REPO=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')
-          echo "Pushing apptainer to: oras://ghcr.io/${REPO}:latest"
-          apptainer push apptainer.sif "oras://ghcr.io/${REPO}:latest"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index f1d5e40..0000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,57 +0,0 @@
-name: CI
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  Formatting:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor != 'github-actions[bot]' }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Test formatting
-        uses: super-linter/super-linter@v7
-        env:
-          VALIDATE_ALL_CODEBASE: false
-          DEFAULT_BRANCH: main
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VALIDATE_SNAKEMAKE_SNAKEFMT: true
-          VALIDATE_YAML_PRETTIER: true
-
-  Linting:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor != 'github-actions[bot]' }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Test linting workflow
-        uses: snakemake/snakemake-github-action@v2.0.0
-        with:
-          directory: .
-          snakefile: workflow/Snakefile
-          args: "--lint"
-
-  Testing:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor != 'github-actions[bot]' }}
-    needs:
-      - Formatting
-    steps:
-      - uses: actions/checkout@v4
-      - name: Test run workflow
-        uses: snakemake/snakemake-github-action@v2.0.0
-        with:
-          directory: .test
-          snakefile: workflow/Snakefile
-          args: "--sdm conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache"
-
-      - name: Test report
-        uses: snakemake/snakemake-github-action@v2.0.0
-        with:
-          directory: .test
-          snakefile: workflow/Snakefile
-          args: "--cores 1 --report report.zip -n"
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index 78dcfea..b103aa0 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -1,20 +1,14 @@
+name: Release Please
+
 on:
   push:
-    branches:
-      - main
+    branches: [main]
 
 permissions:
   contents: write
   pull-requests: write
   issues: write
 
-name: release-please
-
 jobs:
   release-please:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: googleapis/release-please-action@v4
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          release-type: simple
+    uses: MPUSP/mpusp-github-actions/.github/workflows/release-please.yml@main
diff --git a/.github/workflows/snakemake-tests.yml b/.github/workflows/snakemake-tests.yml
new file mode 100644
index 0000000..7e58b06
--- /dev/null
+++ b/.github/workflows/snakemake-tests.yml
@@ -0,0 +1,12 @@
+name: Snakemake Tests
+
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  snakemake-tests:
+    uses: MPUSP/mpusp-github-actions/.github/workflows/snakemake-tests.yml@main
+    with:
+      cores: 2
+      dryrun: false

From 20a6fc9fb3f75a3ba54b8ef719b311ab2412a871 Mon Sep 17 00:00:00 2001
From: m-jahn <jahn@mpusp.mpg.de>
Date: Mon, 30 Mar 2026 16:20:25 +0200
Subject: [PATCH 2/8] fix: schema and README update

---
 README.md                        |  2 +-
 config/README.md                 | 30 ------------------------------
 config/schemas/config.schema.yml | 29 +++++++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 1368e45..def7e9b 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # snakemake-assembly-postprocessing
 
 [![Snakemake](https://img.shields.io/badge/snakemake-≥8.24.1-brightgreen.svg)](https://snakemake.github.io)
-[![GitHub actions status](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/main.yml/badge.svg)](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/main.yml)
+[![GitHub Actions](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/snakemake-tests.yml/badge.svg)](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/snakemake-tests.yml)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with apptainer](https://img.shields.io/badge/run%20with-apptainer-1D355C.svg?labelColor=000000)](https://apptainer.org/)
 [![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/MPUSP/snakemake-assembly-postprocessing)
diff --git a/config/README.md b/config/README.md
index d864e26..367bc06 100644
--- a/config/README.md
+++ b/config/README.md
@@ -23,33 +23,3 @@ The samplesheet table has the following layout:
 | ...    | ...                      | ...    | ...       | ...            |
 
 **Note:** Pangenome analysis with `Panaroo` requires at least two samples.
-
-### Parameters
-
-This table lists all parameters that can be used to run the workflow.
-
-| Parameter | Type | Details | Default |
-|:---|:---|:---|:---|
-| **samplesheet** | string | Path to the sample sheet file in csv format | |
-| **tool** | array[string] | Annotation tool to use (one of `prokka`, `pgap`, `bakta`) | |
-| **pgap** | | PGAP configuration object |  |
-| bin | string | Path to the PGAP script | |
-| use_yaml_config | boolean | Whether to use YAML configuration for PGAP | `False` |
-| _prepare_yaml_files_ | | Paths to YAML templates for PGAP | |
-| generic | string | Path to the generic YAML configuration file | |
-| submol | string | Path to the submol YAML configuration file | |
-| **prokka** | | Prokka configuration object | |
-| center | string | Center name for Prokka annotation (used in sequence IDs) | |
-| extra | string | Extra command-line arguments for Prokka | `--addgenes` |
-| **bakta** | | Bakta configuration object | |
-| download_db | string | Bakta database type (`full`, `light`, or `none`) | `light` |
-| existing_db | string | Path to an existing Bakta database (optional). Needs to be combined with `download_db='none'` | `--keep-contig-headers --compliant` |
-| extra | string | Extra command-line arguments for Bakta | |
-| **quast** | | QUAST configuration object | |
-| reference_fasta | string | Path to the reference genome for QUAST | |
-| reference_gff | string | Path to the reference annotation for QUAST |
-| extra | string | Extra command-line arguments for QUAST | |
-| **panaroo** | | Panaroo configuration object | |
-| remove_source | string | Source types to remove in Panaroo (regex supported) | `cmsearch` |
-| remove_feature | string | Feature types to remove in Panaroo (regex supported) | `tRNA\|rRNA\|ncRNA\|exon\|sequence_feature` |
-| extra | string | Extra command-line arguments for Panaroo | `--clean-mode strict --remove-invalid-genes` |
diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml
index 33215e4..9e5524f 100644
--- a/config/schemas/config.schema.yml
+++ b/config/schemas/config.schema.yml
@@ -5,12 +5,16 @@ type: object
 properties:
   samplesheet:
     type: string
-    description: Path to the sample sheet file
+    description: Path to the sample sheet in CSV format
+    default: "config/samples.csv"
   tool:
     type: array
-    description: Annotation tool to use
+    description: Annotation tool(s) to use
+    default: ["prokka"]
     items:
       type: string
+      description: Name of the annotation tool
+      default: "prokka"
       enum:
         - prokka
         - pgap
@@ -21,18 +25,22 @@ properties:
       bin:
         type: string
         description: Path to the PGAP script
+        default: "path/to/pgap.py"
       use_yaml_config:
         type: boolean
         description: Whether to use YAML configuration for PGAP
+        default: true
       prepare_yaml_files:
         type: object
         properties:
           generic:
             type: string
             description: Path to the generic YAML configuration file
+            default: "config/generic.yaml"
           submol:
             type: string
             description: Path to the submol YAML configuration file
+            default: "config/submol.yaml"
         required:
           - generic
           - submol
@@ -46,9 +54,11 @@ properties:
       center:
         type: string
         description: Center name for Prokka annotation (used in sequence IDs)
+        default: ""
       extra:
         type: string
         description: Extra command-line arguments for Prokka
+        default: "--addgenes"
     required:
       - center
       - extra
@@ -58,12 +68,15 @@ properties:
       download_db:
         type: string
         description: Bakta database type, one of 'full', 'light', or 'none' if existing is used
+        default: "light"
       existing_db:
         type: string
         description: Path to an existing Bakta database (optional)
+        default: ""
       extra:
         type: string
         description: Extra command-line arguments for Bakta
+        default: "--keep-contig-headers --compliant"
     required:
       - download_db
       - existing_db
@@ -74,27 +87,38 @@ properties:
       reference_fasta:
         type: string
         description: Path to the reference genome for QUAST
+        default: ""
       reference_gff:
         type: string
         description: Path to the reference annotation for QUAST
+        default: ""
       extra:
         type: string
         description: Extra command-line arguments for QUAST
+        default: ""
+    required:
+      - reference_fasta
+      - reference_gff
+      - extra
   panaroo:
     type: object
     properties:
       skip:
         type: boolean
         description: Whether to skip Panaroo analysis
+        default: false
       remove_source:
         type: string
         description: Source types to remove in Panaroo (regex supported)
+        default: "cmsearch"
       remove_feature:
         type: string
         description: Feature types to remove in Panaroo (regex supported)
+        default: "tRNA|rRNA|ncRNA|exon|sequence_feature"
       extra:
         type: string
         description: Extra command-line arguments for Panaroo
+        default: "--clean-mode strict --remove-invalid-genes"
 
 required:
   - samplesheet
@@ -103,3 +127,4 @@ required:
   - prokka
   - bakta
   - quast
+  - panaroo

From 50f20dae6e9187f5c46b22f749ced971deac4e69 Mon Sep 17 00:00:00 2001
From: m-jahn <jahn@mpusp.mpg.de>
Date: Mon, 30 Mar 2026 16:51:30 +0200
Subject: [PATCH 3/8] fix: new snakefmt directive order

---
 workflow/Snakefile          |  2 +-
 workflow/rules/annotate.smk | 48 ++++++++++++++++++-------------------
 workflow/rules/common.smk   |  1 -
 workflow/rules/qc.smk       | 28 +++++++++++-----------
 4 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index aafcb6a..4acfad8 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -66,6 +66,6 @@ onerror:
 # target rules
 # -----------------------------------------------------
 rule all:
+    default_target: True
     input:
         get_final_input,
-    default_target: True
diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk
index 4b9aab0..1d660ba 100644
--- a/workflow/rules/annotate.smk
+++ b/workflow/rules/annotate.smk
@@ -3,10 +3,10 @@ rule get_fasta:
         get_fasta,
     output:
         fasta="results/annotation/pgap/prepare_files/{sample}/genome.fasta",
-    conda:
-        "../envs/base.yml"
     log:
         "results/annotation/pgap/prepare_files/logs/{sample}_get_fasta.log",
+    conda:
+        "../envs/base.yml"
     shell:
         "INPUT=$(realpath {input}); "
         "ln -s ${{INPUT}} {output}; "
@@ -19,6 +19,8 @@ rule prepare_yaml_files:
     output:
         input_yaml="results/annotation/pgap/prepare_files/{sample}/input.yaml",
         submol_yaml="results/annotation/pgap/prepare_files/{sample}/submol.yaml",
+    log:
+        "results/annotation/pgap/prepare_files/logs/{sample}_prepare_yaml_files.log",
     conda:
         "../envs/base.yml"
     params:
@@ -28,8 +30,6 @@ rule prepare_yaml_files:
         submol=config["pgap"]["prepare_yaml_files"]["submol"],
         sample="{sample}",
         pd_samples=samples,
-    log:
-        "results/annotation/pgap/prepare_files/logs/{sample}_prepare_yaml_files.log",
     script:
         "../scripts/prepare_yaml_files.py"
 
@@ -44,18 +44,18 @@ rule annotate_pgap:
     output:
         gff="results/annotation/pgap/{sample}/{sample}.gff",
         fasta="results/annotation/pgap/{sample}/{sample}.fna",
+    log:
+        "results/annotation/pgap/logs/{sample}_pgap.log",
     conda:
         "../envs/base.yml"
-    message:
-        """--- Running PGAP annotation for sample {wildcards.sample} ---"""
+    threads: 1
     params:
         pgap=config["pgap"]["bin"],
         use_yaml_config=config["pgap"]["use_yaml_config"],
         species=lambda wc: samples.loc[wc.sample]["species"],
         outdir=lambda wc, output: os.path.dirname(output[0]),
-    threads: 1
-    log:
-        "results/annotation/pgap/logs/{sample}_pgap.log",
+    message:
+        """--- Running PGAP annotation for sample {wildcards.sample} ---"""
     shell:
         "rm -rf {params.outdir}; "
         "if [ {params.use_yaml_config} == 'True' ]; then "
@@ -83,10 +83,11 @@ rule annotate_prokka:
     output:
         gff="results/annotation/prokka/{sample}/{sample}.gff",
         fasta="results/annotation/prokka/{sample}/{sample}.fna",
+    log:
+        "results/annotation/prokka/logs/{sample}_prokka.log",
     conda:
         "../envs/prokka.yml"
-    message:
-        """--- Running PROKKA annotation for sample {wildcards.sample} ---"""
+    threads: workflow.cores * 0.25
     params:
         prefix=lambda wc: wc.sample,
         locustag=lambda wc: samples.loc[wc.sample]["id_prefix"],
@@ -95,9 +96,8 @@ rule annotate_prokka:
         strain=lambda wc: samples.loc[wc.sample]["strain"],
         outdir=lambda wc, output: os.path.dirname(output[0]),
         extra=config["prokka"]["extra"],
-    threads: workflow.cores * 0.25
-    log:
-        "results/annotation/prokka/logs/{sample}_prokka.log",
+    message:
+        """--- Running PROKKA annotation for sample {wildcards.sample} ---"""
     shell:
         """
         prokka \
@@ -123,17 +123,17 @@ rule get_bakta_db:
                 "none": directory("results/annotation/bakta/database/custom"),
             },
         ),
+    log:
+        "results/annotation/bakta/database/db.log",
     conda:
         "../envs/bakta.yml"
-    message:
-        """--- Getting BAKTA database for annotation ---"""
+    threads: workflow.cores * 0.25
     params:
         download_db=config["bakta"]["download_db"],
         existing_db=config["bakta"]["existing_db"],
         outdir=lambda wc, output: os.path.dirname(output[0]),
-    threads: workflow.cores * 0.25
-    log:
-        "results/annotation/bakta/database/db.log",
+    message:
+        """--- Getting BAKTA database for annotation ---"""
     shell:
         """
         if [ {params.download_db} != 'none' ]; then
@@ -156,10 +156,11 @@ rule annotate_bakta:
     output:
         gff="results/annotation/bakta/{sample}/{sample}.gff",
         fasta="results/annotation/bakta/{sample}/{sample}.fna",
+    log:
+        "results/annotation/bakta/logs/{sample}_bakta.log",
     conda:
         "../envs/bakta.yml"
-    message:
-        """--- Running BAKTA annotation for sample {wildcards.sample} ---"""
+    threads: workflow.cores * 0.25
     params:
         prefix=lambda wc: wc.sample,
         locustag=lambda wc: format_bakta_locustag(samples.loc[wc.sample]["id_prefix"]),
@@ -167,9 +168,8 @@ rule annotate_bakta:
         strain=lambda wc: samples.loc[wc.sample]["strain"],
         outdir=lambda wc, output: os.path.dirname(output[0]),
         extra=config["bakta"]["extra"],
-    threads: workflow.cores * 0.25
-    log:
-        "results/annotation/bakta/logs/{sample}_bakta.log",
+    message:
+        """--- Running BAKTA annotation for sample {wildcards.sample} ---"""
     shell:
         """
         bakta \
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 47364a7..3d6466d 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -4,7 +4,6 @@ import re
 from snakemake import logging
 from snakemake.utils import validate
 
-
 # read sample sheet
 samples = (
     pd.read_csv(config["samplesheet"], sep=",", dtype={"sample": str})
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index 6135aa1..f994e3f 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -3,10 +3,11 @@ rule quast:
         fasta=get_quast_fasta,
     output:
         report="results/qc/quast/{tool}/report.txt",
+    log:
+        "results/qc/quast/{tool}/quast.log",
     conda:
         "../envs/quast.yml"
-    message:
-        """--- Running QUAST quality check for all assemblies ---"""
+    threads: 4
     params:
         outdir=lambda wc, output: os.path.dirname(output.report),
         ref_fasta=(
@@ -20,9 +21,8 @@ rule quast:
             else []
         ),
         extra=config["quast"]["extra"],
-    threads: 4
-    log:
-        "results/qc/quast/{tool}/quast.log",
+    message:
+        """--- Running QUAST quality check for all assemblies ---"""
     shell:
         """
         quast \
@@ -43,15 +43,15 @@ rule prepare_panaroo:
     output:
         fasta="results/qc/panaroo/{tool}/prepare/{sample}.fna",
         gff="results/qc/panaroo/{tool}/prepare/{sample}.gff",
+    log:
+        "results/qc/panaroo/{tool}/prepare/{sample}.log",
     conda:
         "../envs/panaroo.yml"
-    message:
-        """--- Prepare input files for pan-genome alignment ---"""
     params:
         remove_source=config["panaroo"]["remove_source"],
         remove_feature=config["panaroo"]["remove_feature"],
-    log:
-        "results/qc/panaroo/{tool}/prepare/{sample}.log",
+    message:
+        """--- Prepare input files for pan-genome alignment ---"""
     shell:
         """
         echo 'Preparing annotation for Panaroo:' > {log};
@@ -70,16 +70,16 @@ rule panaroo:
         fasta=get_panaroo_fasta,
     output:
         stats="results/qc/panaroo/{tool}/summary_statistics.txt",
+    log:
+        "results/qc/panaroo/{tool}/panaroo.log",
     conda:
         "../envs/panaroo.yml"
-    message:
-        """--- Running PANAROO to create pangenome from all annotations ---"""
+    threads: 4
     params:
         outdir=lambda wc, output: os.path.dirname(output.stats),
         extra=config["panaroo"]["extra"],
-    threads: 4
-    log:
-        "results/qc/panaroo/{tool}/panaroo.log",
+    message:
+        """--- Running PANAROO to create pangenome from all annotations ---"""
     shell:
         """
         printf '%s\n' {input.gff} | \

From 5d6c8100bfdf335047c37a98cd60d8cd9eec94bf Mon Sep 17 00:00:00 2001
From: m-jahn <jahn@mpusp.mpg.de>
Date: Tue, 31 Mar 2026 13:34:39 +0200
Subject: [PATCH 4/8] feat: fix panaroo bug, added fastANI rule, harmonized
 multi-threading

---
 .test/config/config.yml          |  4 ++++
 config/README.md                 |  3 ++-
 config/config.yml                |  4 ++++
 config/schemas/config.schema.yml | 13 ++++++++++-
 workflow/envs/fastani.yml        |  7 ++++++
 workflow/envs/panaroo.yml        |  3 ++-
 workflow/rules/annotate.smk      |  7 +++---
 workflow/rules/common.smk        |  5 +++++
 workflow/rules/qc.smk            | 38 ++++++++++++++++++++++++++++++--
 9 files changed, 75 insertions(+), 9 deletions(-)
 create mode 100644 workflow/envs/fastani.yml

diff --git a/.test/config/config.yml b/.test/config/config.yml
index 773cadc..9e99678 100644
--- a/.test/config/config.yml
+++ b/.test/config/config.yml
@@ -27,3 +27,7 @@ panaroo:
   remove_source: "cmsearch"
   remove_feature: "tRNA|rRNA|ncRNA|exon|sequence_feature"
   extra: "--clean-mode strict --remove-invalid-genes"
+
+fastani:
+  skip: False
+  extra: ""
diff --git a/config/README.md b/config/README.md
index 367bc06..43c77d8 100644
--- a/config/README.md
+++ b/config/README.md
@@ -9,6 +9,7 @@ A Snakemake workflow for the post-processing of microbial genome assemblies.
    3. [bakta](https://github.com/oschwengers/bakta), a fast, alignment-free annotation tool. Note: Bakta will automatically download its companion database from zenodo (light: 1.5 GB, full: 40 GB)
 3. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast)
 4. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/)
+5. Compute pairwise average nucleotide identity (ANI) between the assemblies using [FastANI](https://github.com/ParBLiSS/FastANI) and plot a phylogenetic tree based on the ANI distances.
 
 ## Running the workflow
 
@@ -22,4 +23,4 @@ The samplesheet table has the following layout:
 | EC2224 | "Streptococcus pyogenes" | SF370  | SPY       | assembly.fasta |
 | ...    | ...                      | ...    | ...       | ...            |
 
-**Note:** Pangenome analysis with `Panaroo` requires at least two samples.
+**Note:** Pangenome analysis with `Panaroo` and pairwise similarity analysis with `FastANI` requires at least two samples.
diff --git a/config/config.yml b/config/config.yml
index 773cadc..9e99678 100644
--- a/config/config.yml
+++ b/config/config.yml
@@ -27,3 +27,7 @@ panaroo:
   remove_source: "cmsearch"
   remove_feature: "tRNA|rRNA|ncRNA|exon|sequence_feature"
   extra: "--clean-mode strict --remove-invalid-genes"
+
+fastani:
+  skip: False
+  extra: ""
diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml
index 9e5524f..00adbe4 100644
--- a/config/schemas/config.schema.yml
+++ b/config/schemas/config.schema.yml
@@ -119,7 +119,17 @@ properties:
         type: string
         description: Extra command-line arguments for Panaroo
         default: "--clean-mode strict --remove-invalid-genes"
-
+  fastani:
+    type: object
+    properties:
+      skip:
+        type: boolean
+        description: Whether to skip FastANI analysis
+        default: false
+      extra:
+        type: string
+        description: Extra command-line arguments for FastANI
+        default: ""
 required:
   - samplesheet
   - tool
@@ -128,3 +138,4 @@ required:
   - bakta
   - quast
   - panaroo
+  - fastani
\ No newline at end of file
diff --git a/workflow/envs/fastani.yml b/workflow/envs/fastani.yml
new file mode 100644
index 0000000..d815b6d
--- /dev/null
+++ b/workflow/envs/fastani.yml
@@ -0,0 +1,7 @@
+name: panaroo
+channels:
+  - conda-forge
+  - bioconda
+  - nodefaults
+dependencies:
+  - fastani=1.34
\ No newline at end of file
diff --git a/workflow/envs/panaroo.yml b/workflow/envs/panaroo.yml
index 43ee14a..b423d91 100644
--- a/workflow/envs/panaroo.yml
+++ b/workflow/envs/panaroo.yml
@@ -6,4 +6,5 @@ channels:
 dependencies:
   - numpy=1.26.4
   - scipy=1.11.4
-  - panaroo=1.5.2
+  - biopython=1.84
+  - panaroo=1.6.0
diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk
index 1d660ba..e09065a 100644
--- a/workflow/rules/annotate.smk
+++ b/workflow/rules/annotate.smk
@@ -48,7 +48,6 @@ rule annotate_pgap:
         "results/annotation/pgap/logs/{sample}_pgap.log",
     conda:
         "../envs/base.yml"
-    threads: 1
     params:
         pgap=config["pgap"]["bin"],
         use_yaml_config=config["pgap"]["use_yaml_config"],
@@ -87,7 +86,7 @@ rule annotate_prokka:
         "results/annotation/prokka/logs/{sample}_prokka.log",
     conda:
         "../envs/prokka.yml"
-    threads: workflow.cores * 0.25
+    threads: max(workflow.cores * 0.5, 1)
     params:
         prefix=lambda wc: wc.sample,
         locustag=lambda wc: samples.loc[wc.sample]["id_prefix"],
@@ -127,7 +126,7 @@ rule get_bakta_db:
         "results/annotation/bakta/database/db.log",
     conda:
         "../envs/bakta.yml"
-    threads: workflow.cores * 0.25
+    threads: max(workflow.cores * 0.25, 1)
     params:
         download_db=config["bakta"]["download_db"],
         existing_db=config["bakta"]["existing_db"],
@@ -160,7 +159,7 @@ rule annotate_bakta:
         "results/annotation/bakta/logs/{sample}_bakta.log",
     conda:
         "../envs/bakta.yml"
-    threads: workflow.cores * 0.25
+    threads: max(workflow.cores * 0.25, 1)
     params:
         prefix=lambda wc: wc.sample,
         locustag=lambda wc: format_bakta_locustag(samples.loc[wc.sample]["id_prefix"]),
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 3d6466d..41be1d4 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -62,6 +62,11 @@ def get_final_input(wildcards):
             "results/qc/panaroo/{tool}/summary_statistics.txt",
             tool=config["tool"],
         )
+    if len(samples.index) > 1 and not config["fastani"]["skip"]:
+        inputs += expand(
+            "results/qc/fastani/{tool}/summary.txt",
+            tool=config["tool"],
+        )
     return inputs
 
 
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index f994e3f..86f5049 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -7,7 +7,7 @@ rule quast:
         "results/qc/quast/{tool}/quast.log",
     conda:
         "../envs/quast.yml"
-    threads: 4
+    threads: max(workflow.cores * 0.5, 1)
     params:
         outdir=lambda wc, output: os.path.dirname(output.report),
         ref_fasta=(
@@ -36,6 +36,40 @@ rule quast:
         """
 
 
+rule fastani:
+    input:
+        fasta=get_quast_fasta,
+    output:
+        txt="results/qc/fastani/{tool}/summary.txt",
+    log:
+        "results/qc/fastani/{tool}/fastani.log",
+    conda:
+        "../envs/fastani.yml"
+    threads: max(workflow.cores * 0.5, 1)
+    params:
+        outdir=lambda wc, output: os.path.dirname(output.txt),
+        ref_fasta=(
+            " ".join(["-r", config["quast"]["reference_fasta"]])
+            if config["quast"]["reference_fasta"]
+            else []
+        ),
+        extra=config["fastani"]["extra"],
+    message:
+        """--- Running FastANI to compare genome similarity (all vs all) ---"""
+    shell:
+        """
+        printf '%s\n' {input.fasta} > {params.outdir}/input_files.txt;
+        {params.ref_fasta} >> {params.outdir}/input_files.txt;
+        fastANI \
+          --ql {params.outdir}/input_files.txt \
+          --rl {params.outdir}/input_files.txt \
+          --output {output.txt} \
+          --threads {threads} \
+          {params.extra} \
+          > {log} 2>&1
+        """
+
+
 rule prepare_panaroo:
     input:
         fasta="results/annotation/{tool}/{sample}/{sample}.fna",
@@ -74,7 +108,7 @@ rule panaroo:
         "results/qc/panaroo/{tool}/panaroo.log",
     conda:
         "../envs/panaroo.yml"
-    threads: 4
+    threads: max(workflow.cores * 0.5, 1)
     params:
         outdir=lambda wc, output: os.path.dirname(output.stats),
         extra=config["panaroo"]["extra"],

From e72502dd8db409fe0cc9b96b8d78a7f18b2f260c Mon Sep 17 00:00:00 2001
From: m-jahn <jahn@mpusp.mpg.de>
Date: Tue, 31 Mar 2026 13:45:57 +0200
Subject: [PATCH 5/8] fix: typos

---
 config/schemas/config.schema.yml | 2 +-
 workflow/envs/fastani.yml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml
index 00adbe4..4807425 100644
--- a/config/schemas/config.schema.yml
+++ b/config/schemas/config.schema.yml
@@ -138,4 +138,4 @@ required:
   - bakta
   - quast
   - panaroo
-  - fastani
\ No newline at end of file
+  - fastani
diff --git a/workflow/envs/fastani.yml b/workflow/envs/fastani.yml
index d815b6d..7104421 100644
--- a/workflow/envs/fastani.yml
+++ b/workflow/envs/fastani.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - nodefaults
 dependencies:
-  - fastani=1.34
\ No newline at end of file
+  - fastani=1.34

From 7778d93faa157eb56eb8f2aea8e0f8d5dcc8f265 Mon Sep 17 00:00:00 2001
From: jahn <jahn@mpusp.mpg.de>
Date: Tue, 31 Mar 2026 14:09:58 +0200
Subject: [PATCH 6/8] fix: reference input

---
 workflow/rules/qc.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index 86f5049..eac1084 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -49,7 +49,7 @@ rule fastani:
     params:
         outdir=lambda wc, output: os.path.dirname(output.txt),
         ref_fasta=(
-            " ".join(["-r", config["quast"]["reference_fasta"]])
+            [config["quast"]["reference_fasta"]]
             if config["quast"]["reference_fasta"]
             else []
         ),
@@ -59,7 +59,7 @@ rule fastani:
     shell:
         """
         printf '%s\n' {input.fasta} > {params.outdir}/input_files.txt;
-        {params.ref_fasta} >> {params.outdir}/input_files.txt;
+        printf '%s\n' {params.ref_fasta} >> {params.outdir}/input_files.txt;
         fastANI \
           --ql {params.outdir}/input_files.txt \
           --rl {params.outdir}/input_files.txt \

From 6f3e4c24b138eadb882629c62386237b986af0af Mon Sep 17 00:00:00 2001
From: Rina Ahmed-Begrich <begrich@mpusp.mpg.de>
Date: Thu, 2 Apr 2026 16:32:22 +0200
Subject: [PATCH 7/8] fix: adjust threads of prokka run

---
 workflow/rules/annotate.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk
index e09065a..e52b7f7 100644
--- a/workflow/rules/annotate.smk
+++ b/workflow/rules/annotate.smk
@@ -86,7 +86,7 @@ rule annotate_prokka:
         "results/annotation/prokka/logs/{sample}_prokka.log",
     conda:
         "../envs/prokka.yml"
-    threads: max(workflow.cores * 0.5, 1)
+    threads: max(workflow.cores * 0.25, 1)
     params:
         prefix=lambda wc: wc.sample,
         locustag=lambda wc: samples.loc[wc.sample]["id_prefix"],

From 328c40a50be8e475a28bc3349b1311750ce84932 Mon Sep 17 00:00:00 2001
From: Rina Ahmed-Begrich <begrich@mpusp.mpg.de>
Date: Thu, 2 Apr 2026 17:09:57 +0200
Subject: [PATCH 8/8] fix: introduced general reference parameter in config
 file. refactored some qc rules.

---
 .test/config/config.yml          |  6 ++++--
 config/config.yml                |  6 ++++--
 config/schemas/config.schema.yml | 24 ++++++++++++++----------
 workflow/rules/common.smk        | 15 +++++----------
 workflow/rules/qc.smk            | 24 +++++++++++-------------
 5 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/.test/config/config.yml b/.test/config/config.yml
index 9e99678..dd50d27 100644
--- a/.test/config/config.yml
+++ b/.test/config/config.yml
@@ -1,6 +1,10 @@
 samplesheet: "config/samples.csv"
 tool: ["prokka"]
 
+reference:
+  fasta: ""
+  gff: ""
+
 pgap:
   bin: "path/to/pgap.py"
   use_yaml_config: True
@@ -18,8 +22,6 @@ bakta:
   extra: "--keep-contig-headers --compliant"
 
 quast:
-  reference_fasta: ""
-  reference_gff: ""
   extra: ""
 
 panaroo:
diff --git a/config/config.yml b/config/config.yml
index 9e99678..dd50d27 100644
--- a/config/config.yml
+++ b/config/config.yml
@@ -1,6 +1,10 @@
 samplesheet: "config/samples.csv"
 tool: ["prokka"]
 
+reference:
+  fasta: ""
+  gff: ""
+
 pgap:
   bin: "path/to/pgap.py"
   use_yaml_config: True
@@ -18,8 +22,6 @@ bakta:
   extra: "--keep-contig-headers --compliant"
 
 quast:
-  reference_fasta: ""
-  reference_gff: ""
   extra: ""
 
 panaroo:
diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml
index 4807425..b7f32c9 100644
--- a/config/schemas/config.schema.yml
+++ b/config/schemas/config.schema.yml
@@ -19,6 +19,19 @@ properties:
         - prokka
         - pgap
         - bakta
+  reference:
+    type: object
+    properties:
+      fasta:
+        type: string
+        description: Path to the reference genome in FASTA format
+        default: ""
+      gff:
+        type: string
+        description: Path to the reference annotation in GFF format (optional)
+        default: ""
+    required:
+      - fasta
   pgap:
     type: object
     properties:
@@ -84,21 +97,11 @@ properties:
   quast:
     type: object
     properties:
-      reference_fasta:
-        type: string
-        description: Path to the reference genome for QUAST
-        default: ""
-      reference_gff:
-        type: string
-        description: Path to the reference annotation for QUAST
-        default: ""
       extra:
         type: string
         description: Extra command-line arguments for QUAST
         default: ""
     required:
-      - reference_fasta
-      - reference_gff
       - extra
   panaroo:
     type: object
@@ -133,6 +136,7 @@ properties:
 required:
   - samplesheet
   - tool
+  - reference
   - pgap
   - prokka
   - bakta
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 41be1d4..26f2d8d 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -27,12 +27,9 @@ def get_fasta(wildcards):
     return samples.loc[sample, "file"]
 
 
-def get_quast_fasta(wildcards):
-    return expand(
-        "results/annotation/{tool}/{sample}/{sample}.fna",
-        tool=wildcards.tool,
-        sample=samples.index,
-    )
+def get_all_fasta(wildcards):
+    """Get all input fasta files for all samples."""
+    return [samples.loc[s, "file"] for s in samples.index]
 
 
 def get_panaroo_gff(wildcards):
@@ -54,8 +51,7 @@ def get_panaroo_fasta(wildcards):
 def get_final_input(wildcards):
     inputs = []
     inputs += expand(
-        "results/qc/quast/{tool}/report.txt",
-        tool=config["tool"],
+        "results/qc/quast/report.txt",
     )
     if len(samples.index) > 1 and not config["panaroo"]["skip"]:
         inputs += expand(
@@ -64,8 +60,7 @@ def get_final_input(wildcards):
         )
     if len(samples.index) > 1 and not config["fastani"]["skip"]:
         inputs += expand(
-            "results/qc/fastani/{tool}/summary.txt",
-            tool=config["tool"],
+            "results/qc/fastani/summary.txt",
         )
     return inputs
 
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index eac1084..c146e26 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -1,23 +1,23 @@
 rule quast:
     input:
-        fasta=get_quast_fasta,
+        fasta=get_all_fasta,
     output:
-        report="results/qc/quast/{tool}/report.txt",
+        report="results/qc/quast/report.txt",
     log:
-        "results/qc/quast/{tool}/quast.log",
+        "results/qc/quast/quast.log",
     conda:
         "../envs/quast.yml"
     threads: max(workflow.cores * 0.5, 1)
     params:
         outdir=lambda wc, output: os.path.dirname(output.report),
         ref_fasta=(
-            " ".join(["-r", config["quast"]["reference_fasta"]])
-            if config["quast"]["reference_fasta"]
+            " ".join(["-r", config["reference"]["fasta"]])
+            if config["reference"]["fasta"]
             else []
         ),
         ref_gff=(
-            " ".join(["-g", config["quast"]["reference_gff"]])
-            if config["quast"]["reference_gff"]
+            " ".join(["-g", config["reference"]["gff"]])
+            if config["reference"]["gff"]
             else []
         ),
         extra=config["quast"]["extra"],
@@ -38,20 +38,18 @@ rule quast:
 
 rule fastani:
     input:
-        fasta=get_quast_fasta,
+        fasta=get_all_fasta,
     output:
-        txt="results/qc/fastani/{tool}/summary.txt",
+        txt="results/qc/fastani/summary.txt",
     log:
-        "results/qc/fastani/{tool}/fastani.log",
+        "results/qc/fastani/fastani.log",
     conda:
         "../envs/fastani.yml"
     threads: max(workflow.cores * 0.5, 1)
     params:
         outdir=lambda wc, output: os.path.dirname(output.txt),
         ref_fasta=(
-            [config["quast"]["reference_fasta"]]
-            if config["quast"]["reference_fasta"]
-            else []
+            [config["reference"]["fasta"]] if config["reference"]["fasta"] else []
         ),
         extra=config["fastani"]["extra"],
     message: