From fdf4adcc263cc80606442cad44d2963ea6bf050e Mon Sep 17 00:00:00 2001 From: m-jahn Date: Mon, 30 Mar 2026 16:05:32 +0200 Subject: [PATCH 1/8] fix: update CI workflows --- .github/workflows/conventional-prs.yml | 12 ++---- .github/workflows/deploy-apptainer.yml | 17 ++++++++ .github/workflows/deploy_apptainer.yml | 60 -------------------------- .github/workflows/main.yml | 57 ------------------------ .github/workflows/release-please.yml | 14 ++---- .github/workflows/snakemake-tests.yml | 12 ++++++ 6 files changed, 37 insertions(+), 135 deletions(-) create mode 100644 .github/workflows/deploy-apptainer.yml delete mode 100644 .github/workflows/deploy_apptainer.yml delete mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/snakemake-tests.yml diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml index 82028b7..d0f5164 100644 --- a/.github/workflows/conventional-prs.yml +++ b/.github/workflows/conventional-prs.yml @@ -1,4 +1,5 @@ -name: Lint PR +name: Conventional PRs + on: pull_request_target: types: @@ -11,10 +12,5 @@ permissions: pull-requests: read jobs: - main: - name: Validate PR title - runs-on: ubuntu-latest - steps: - - uses: amannn/action-semantic-pull-request@v5 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + conventional-prs: + uses: MPUSP/mpusp-github-actions/.github/workflows/conventional-prs.yml@main diff --git a/.github/workflows/deploy-apptainer.yml b/.github/workflows/deploy-apptainer.yml new file mode 100644 index 0000000..aa93cbc --- /dev/null +++ b/.github/workflows/deploy-apptainer.yml @@ -0,0 +1,17 @@ +name: Deploy Apptainer + +on: + workflow_run: + workflows: ["Release Please"] + types: + - completed + workflow_dispatch: + +permissions: + contents: read + packages: write + +jobs: + deploy-apptainer: + if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} + uses: MPUSP/mpusp-github-actions/.github/workflows/deploy-apptainer.yml@main diff --git a/.github/workflows/deploy_apptainer.yml b/.github/workflows/deploy_apptainer.yml deleted file mode 100644 index e7b8081..0000000 --- a/.github/workflows/deploy_apptainer.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Deploy Apptainer - -on: - workflow_run: - workflows: ["release-please"] - types: - - completed - workflow_dispatch: - -jobs: - build_and_push: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} - steps: - - name: checkout repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: add apptainer source - shell: bash - run: | - sudo add-apt-repository -y ppa:apptainer/ppa - sudo apt-get update - - - name: create dockerfile - uses: snakemake/snakemake-github-action@v2 - with: - directory: . - snakefile: workflow/Snakefile - install-apptainer: true - args: "--cores 1" - task: containerize - - - name: create apptainer recipe - shell: bash - run: | - pip install spython - sed -i "2i RUN apt-get update && apt-get install -y curl" Dockerfile - spython recipe Dockerfile > apptainer.def - sed -i 's/\/environment.yaml\/environment.yaml$/\/environment.yaml/' apptainer.def - - - name: create apptainer image - shell: bash - run: | - sudo apt-get install -y uidmap - apptainer build --fakeroot apptainer.sif apptainer.def - - - name: authenticate to GHCR - run: | - echo ${{ secrets.GITHUB_TOKEN }} | apptainer registry login -u ${{ github.actor }} --password-stdin oras://ghcr.io - - - name: push apptainer to GHCR - run: | - REPO=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') - echo "Pushing apptainer to: oras://ghcr.io/${REPO}:latest" - apptainer push apptainer.sif "oras://ghcr.io/${REPO}:latest" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index f1d5e40..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: CI - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - Formatting: - runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Test formatting - uses: super-linter/super-linter@v7 - env: - VALIDATE_ALL_CODEBASE: false - DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - VALIDATE_SNAKEMAKE_SNAKEFMT: true - VALIDATE_YAML_PRETTIER: true - - Linting: - runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} - steps: - - uses: actions/checkout@v4 - - name: Test linting workflow - uses: snakemake/snakemake-github-action@v2.0.0 - with: - directory: . - snakefile: workflow/Snakefile - args: "--lint" - - Testing: - runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} - needs: - - Formatting - steps: - - uses: actions/checkout@v4 - - name: Test run workflow - uses: snakemake/snakemake-github-action@v2.0.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--sdm conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache" - - - name: Test report - uses: snakemake/snakemake-github-action@v2.0.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--cores 1 --report report.zip -n" diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 78dcfea..b103aa0 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -1,20 +1,14 @@ +name: Release Please + on: push: - branches: - - main + branches: [main] permissions: contents: write pull-requests: write issues: write -name: release-please - jobs: release-please: - runs-on: ubuntu-latest - steps: - - uses: googleapis/release-please-action@v4 - with: - token: ${{ secrets.GITHUB_TOKEN }} - release-type: simple + uses: MPUSP/mpusp-github-actions/.github/workflows/release-please.yml@main diff --git a/.github/workflows/snakemake-tests.yml b/.github/workflows/snakemake-tests.yml new file mode 100644 index 0000000..7e58b06 --- /dev/null +++ b/.github/workflows/snakemake-tests.yml @@ -0,0 +1,12 @@ +name: Snakemake Tests + +on: + pull_request: + branches: [main] + +jobs: + snakemake-tests: + uses: MPUSP/mpusp-github-actions/.github/workflows/snakemake-tests.yml@main + with: + cores: 2 + dryrun: false From 20a6fc9fb3f75a3ba54b8ef719b311ab2412a871 Mon Sep 17 00:00:00 2001 From: m-jahn Date: Mon, 30 Mar 2026 16:20:25 +0200 Subject: [PATCH 2/8] fix: schema and README update --- README.md | 2 +- config/README.md | 30 ------------------------------ config/schemas/config.schema.yml | 29 +++++++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 1368e45..def7e9b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # snakemake-assembly-postprocessing [![Snakemake](https://img.shields.io/badge/snakemake-≥8.24.1-brightgreen.svg)](https://snakemake.github.io) -[![GitHub actions status](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/main.yml/badge.svg)](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/main.yml) +[![GitHub Actions](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/snakemake-tests.yml/badge.svg)](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/snakemake-tests.yml) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with apptainer](https://img.shields.io/badge/run%20with-apptainer-1D355C.svg?labelColor=000000)](https://apptainer.org/) [![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/MPUSP/snakemake-assembly-postprocessing) diff --git a/config/README.md b/config/README.md index d864e26..367bc06 100644 --- a/config/README.md +++ b/config/README.md @@ -23,33 +23,3 @@ The samplesheet table has the following layout: | ... | ... | ... | ... | ... | **Note:** Pangenome analysis with `Panaroo` requires at least two samples. - -### Parameters - -This table lists all parameters that can be used to run the workflow. - -| Parameter | Type | Details | Default | -|:---|:---|:---|:---| -| **samplesheet** | string | Path to the sample sheet file in csv format | | -| **tool** | array[string] | Annotation tool to use (one of `prokka`, `pgap`, `bakta`) | | -| **pgap** | | PGAP configuration object | | -| bin | string | Path to the PGAP script | | -| use_yaml_config | boolean | Whether to use YAML configuration for PGAP | `False` | -| _prepare_yaml_files_ | | Paths to YAML templates for PGAP | | -| generic | string | Path to the generic YAML configuration file | | -| submol | string | Path to the submol YAML configuration file | | -| **prokka** | | Prokka configuration object | | -| center | string | Center name for Prokka annotation (used in sequence IDs) | | -| extra | string | Extra command-line arguments for Prokka | `--addgenes` | -| **bakta** | | Bakta configuration object | | -| download_db | string | Bakta database type (`full`, `light`, or `none`) | `light` | -| existing_db | string | Path to an existing Bakta database (optional). Needs to be combined with `download_db='none'` | `--keep-contig-headers --compliant` | -| extra | string | Extra command-line arguments for Bakta | | -| **quast** | | QUAST configuration object | | -| reference_fasta | string | Path to the reference genome for QUAST | | -| reference_gff | string | Path to the reference annotation for QUAST | -| extra | string | Extra command-line arguments for QUAST | | -| **panaroo** | | Panaroo configuration object | | -| remove_source | string | Source types to remove in Panaroo (regex supported) | `cmsearch` | -| remove_feature | string | Feature types to remove in Panaroo (regex supported) | `tRNA\|rRNA\|ncRNA\|exon\|sequence_feature` | -| extra | string | Extra command-line arguments for Panaroo | `--clean-mode strict --remove-invalid-genes` | diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml index 33215e4..9e5524f 100644 --- a/config/schemas/config.schema.yml +++ b/config/schemas/config.schema.yml @@ -5,12 +5,16 @@ type: object properties: samplesheet: type: string - description: Path to the sample sheet file + description: Path to the sample sheet in CSV format + default: "config/samples.csv" tool: type: array - description: Annotation tool to use + description: Annotation tool(s) to use + default: ["prokka"] items: type: string + description: Name of the annotation tool + default: "prokka" enum: - prokka - pgap @@ -21,18 +25,22 @@ properties: bin: type: string description: Path to the PGAP script + default: "path/to/pgap.py" use_yaml_config: type: boolean description: Whether to use YAML configuration for PGAP + default: true prepare_yaml_files: type: object properties: generic: type: string description: Path to the generic YAML configuration file + default: "config/generic.yaml" submol: type: string description: Path to the submol YAML configuration file + default: "config/submol.yaml" required: - generic - submol @@ -46,9 +54,11 @@ properties: center: type: string description: Center name for Prokka annotation (used in sequence IDs) + default: "" extra: type: string description: Extra command-line arguments for Prokka + default: "--addgenes" required: - center - extra @@ -58,12 +68,15 @@ properties: download_db: type: string description: Bakta database type, one of 'full', 'light', or 'none' if existing is used + default: "light" existing_db: type: string description: Path to an existing Bakta database (optional) + default: "" extra: type: string description: Extra command-line arguments for Bakta + default: "--keep-contig-headers --compliant" required: - download_db - existing_db @@ -74,27 +87,38 @@ properties: reference_fasta: type: string description: Path to the reference genome for QUAST + default: "" reference_gff: type: string description: Path to the reference annotation for QUAST + default: "" extra: type: string description: Extra command-line arguments for QUAST + default: "" + required: + - reference_fasta + - reference_gff + - extra panaroo: type: object properties: skip: type: boolean description: Whether to skip Panaroo analysis + default: false remove_source: type: string description: Source types to remove in Panaroo (regex supported) + default: "cmsearch" remove_feature: type: string description: Feature types to remove in Panaroo (regex supported) + default: "tRNA|rRNA|ncRNA|exon|sequence_feature" extra: type: string description: Extra command-line arguments for Panaroo + default: "--clean-mode strict --remove-invalid-genes" required: - samplesheet @@ -103,3 +127,4 @@ required: - prokka - bakta - quast + - panaroo From 50f20dae6e9187f5c46b22f749ced971deac4e69 Mon Sep 17 00:00:00 2001 From: m-jahn Date: Mon, 30 Mar 2026 16:51:30 +0200 Subject: [PATCH 3/8] fix: new snakefmt directive order --- workflow/Snakefile | 2 +- workflow/rules/annotate.smk | 48 ++++++++++++++++++------------------- workflow/rules/common.smk | 1 - workflow/rules/qc.smk | 28 +++++++++++----------- 4 files changed, 39 insertions(+), 40 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index aafcb6a..4acfad8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -66,6 +66,6 @@ onerror: # target rules # ----------------------------------------------------- rule all: + default_target: True input: get_final_input, - default_target: True diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk index 4b9aab0..1d660ba 100644 --- a/workflow/rules/annotate.smk +++ b/workflow/rules/annotate.smk @@ -3,10 +3,10 @@ rule get_fasta: get_fasta, output: fasta="results/annotation/pgap/prepare_files/{sample}/genome.fasta", - conda: - "../envs/base.yml" log: "results/annotation/pgap/prepare_files/logs/{sample}_get_fasta.log", + conda: + "../envs/base.yml" shell: "INPUT=$(realpath {input}); " "ln -s ${{INPUT}} {output}; " @@ -19,6 +19,8 @@ rule prepare_yaml_files: output: input_yaml="results/annotation/pgap/prepare_files/{sample}/input.yaml", submol_yaml="results/annotation/pgap/prepare_files/{sample}/submol.yaml", + log: + "results/annotation/pgap/prepare_files/logs/{sample}_prepare_yaml_files.log", conda: "../envs/base.yml" params: @@ -28,8 +30,6 @@ rule prepare_yaml_files: submol=config["pgap"]["prepare_yaml_files"]["submol"], sample="{sample}", pd_samples=samples, - log: - "results/annotation/pgap/prepare_files/logs/{sample}_prepare_yaml_files.log", script: "../scripts/prepare_yaml_files.py" @@ -44,18 +44,18 @@ rule annotate_pgap: output: gff="results/annotation/pgap/{sample}/{sample}.gff", fasta="results/annotation/pgap/{sample}/{sample}.fna", + log: + "results/annotation/pgap/logs/{sample}_pgap.log", conda: "../envs/base.yml" - message: - """--- Running PGAP annotation for sample {wildcards.sample} ---""" + threads: 1 params: pgap=config["pgap"]["bin"], use_yaml_config=config["pgap"]["use_yaml_config"], species=lambda wc: samples.loc[wc.sample]["species"], outdir=lambda wc, output: os.path.dirname(output[0]), - threads: 1 - log: - "results/annotation/pgap/logs/{sample}_pgap.log", + message: + """--- Running PGAP annotation for sample {wildcards.sample} ---""" shell: "rm -rf {params.outdir}; " "if [ {params.use_yaml_config} == 'True' ]; then " @@ -83,10 +83,11 @@ rule annotate_prokka: output: gff="results/annotation/prokka/{sample}/{sample}.gff", fasta="results/annotation/prokka/{sample}/{sample}.fna", + log: + "results/annotation/prokka/logs/{sample}_prokka.log", conda: "../envs/prokka.yml" - message: - """--- Running PROKKA annotation for sample {wildcards.sample} ---""" + threads: workflow.cores * 0.25 params: prefix=lambda wc: wc.sample, locustag=lambda wc: samples.loc[wc.sample]["id_prefix"], @@ -95,9 +96,8 @@ rule annotate_prokka: strain=lambda wc: samples.loc[wc.sample]["strain"], outdir=lambda wc, output: os.path.dirname(output[0]), extra=config["prokka"]["extra"], - threads: workflow.cores * 0.25 - log: - "results/annotation/prokka/logs/{sample}_prokka.log", + message: + """--- Running PROKKA annotation for sample {wildcards.sample} ---""" shell: """ prokka \ @@ -123,17 +123,17 @@ rule get_bakta_db: "none": directory("results/annotation/bakta/database/custom"), }, ), + log: + "results/annotation/bakta/database/db.log", conda: "../envs/bakta.yml" - message: - """--- Getting BAKTA database for annotation ---""" + threads: workflow.cores * 0.25 params: download_db=config["bakta"]["download_db"], existing_db=config["bakta"]["existing_db"], outdir=lambda wc, output: os.path.dirname(output[0]), - threads: workflow.cores * 0.25 - log: - "results/annotation/bakta/database/db.log", + message: + """--- Getting BAKTA database for annotation ---""" shell: """ if [ {params.download_db} != 'none' ]; then @@ -156,10 +156,11 @@ rule annotate_bakta: output: gff="results/annotation/bakta/{sample}/{sample}.gff", fasta="results/annotation/bakta/{sample}/{sample}.fna", + log: + "results/annotation/bakta/logs/{sample}_bakta.log", conda: "../envs/bakta.yml" - message: - """--- Running BAKTA annotation for sample {wildcards.sample} ---""" + threads: workflow.cores * 0.25 params: prefix=lambda wc: wc.sample, locustag=lambda wc: format_bakta_locustag(samples.loc[wc.sample]["id_prefix"]), @@ -167,9 +168,8 @@ rule annotate_bakta: strain=lambda wc: samples.loc[wc.sample]["strain"], outdir=lambda wc, output: os.path.dirname(output[0]), extra=config["bakta"]["extra"], - threads: workflow.cores * 0.25 - log: - "results/annotation/bakta/logs/{sample}_bakta.log", + message: + """--- Running BAKTA annotation for sample {wildcards.sample} ---""" shell: """ bakta \ diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 47364a7..3d6466d 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -4,7 +4,6 @@ import re from snakemake import logging from snakemake.utils import validate - # read sample sheet samples = ( pd.read_csv(config["samplesheet"], sep=",", dtype={"sample": str}) diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 6135aa1..f994e3f 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -3,10 +3,11 @@ rule quast: fasta=get_quast_fasta, output: report="results/qc/quast/{tool}/report.txt", + log: + "results/qc/quast/{tool}/quast.log", conda: "../envs/quast.yml" - message: - """--- Running QUAST quality check for all assemblies ---""" + threads: 4 params: outdir=lambda wc, output: os.path.dirname(output.report), ref_fasta=( @@ -20,9 +21,8 @@ rule quast: else [] ), extra=config["quast"]["extra"], - threads: 4 - log: - "results/qc/quast/{tool}/quast.log", + message: + """--- Running QUAST quality check for all assemblies ---""" shell: """ quast \ @@ -43,15 +43,15 @@ rule prepare_panaroo: output: fasta="results/qc/panaroo/{tool}/prepare/{sample}.fna", gff="results/qc/panaroo/{tool}/prepare/{sample}.gff", + log: + "results/qc/panaroo/{tool}/prepare/{sample}.log", conda: "../envs/panaroo.yml" - message: - """--- Prepare input files for pan-genome alignment ---""" params: remove_source=config["panaroo"]["remove_source"], remove_feature=config["panaroo"]["remove_feature"], - log: - "results/qc/panaroo/{tool}/prepare/{sample}.log", + message: + """--- Prepare input files for pan-genome alignment ---""" shell: """ echo 'Preparing annotation for Panaroo:' > {log}; @@ -70,16 +70,16 @@ rule panaroo: fasta=get_panaroo_fasta, output: stats="results/qc/panaroo/{tool}/summary_statistics.txt", + log: + "results/qc/panaroo/{tool}/panaroo.log", conda: "../envs/panaroo.yml" - message: - """--- Running PANAROO to create pangenome from all annotations ---""" + threads: 4 params: outdir=lambda wc, output: os.path.dirname(output.stats), extra=config["panaroo"]["extra"], - threads: 4 - log: - "results/qc/panaroo/{tool}/panaroo.log", + message: + """--- Running PANAROO to create pangenome from all annotations ---""" shell: """ printf '%s\n' {input.gff} | \ From 5d6c8100bfdf335047c37a98cd60d8cd9eec94bf Mon Sep 17 00:00:00 2001 From: m-jahn Date: Tue, 31 Mar 2026 13:34:39 +0200 Subject: [PATCH 4/8] feat: fix panaroo bug, added fastANI rule, harmonized multi-threading --- .test/config/config.yml | 4 ++++ config/README.md | 3 ++- config/config.yml | 4 ++++ config/schemas/config.schema.yml | 13 ++++++++++- workflow/envs/fastani.yml | 7 ++++++ workflow/envs/panaroo.yml | 3 ++- workflow/rules/annotate.smk | 7 +++--- workflow/rules/common.smk | 5 +++++ workflow/rules/qc.smk | 38 ++++++++++++++++++++++++++++++-- 9 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 workflow/envs/fastani.yml diff --git a/.test/config/config.yml b/.test/config/config.yml index 773cadc..9e99678 100644 --- a/.test/config/config.yml +++ b/.test/config/config.yml @@ -27,3 +27,7 @@ panaroo: remove_source: "cmsearch" remove_feature: "tRNA|rRNA|ncRNA|exon|sequence_feature" extra: "--clean-mode strict --remove-invalid-genes" + +fastani: + skip: False + extra: "" diff --git a/config/README.md b/config/README.md index 367bc06..43c77d8 100644 --- a/config/README.md +++ b/config/README.md @@ -9,6 +9,7 @@ A Snakemake workflow for the post-processing of microbial genome assemblies. 3. [bakta](https://github.com/oschwengers/bakta), a fast, alignment-free annotation tool. Note: Bakta will automatically download its companion database from zenodo (light: 1.5 GB, full: 40 GB) 3. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast) 4. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/) +5. Compute pairwise average nucleotide identity (ANI) between the assemblies using [FastANI](https://github.com/ParBLiSS/FastANI) and plot a phylogenetic tree based on the ANI distances. ## Running the workflow @@ -22,4 +23,4 @@ The samplesheet table has the following layout: | EC2224 | "Streptococcus pyogenes" | SF370 | SPY | assembly.fasta | | ... | ... | ... | ... | ... | -**Note:** Pangenome analysis with `Panaroo` requires at least two samples. +**Note:** Pangenome analysis with `Panaroo` and pairwise similarity analysis with `FastANI` requires at least two samples. diff --git a/config/config.yml b/config/config.yml index 773cadc..9e99678 100644 --- a/config/config.yml +++ b/config/config.yml @@ -27,3 +27,7 @@ panaroo: remove_source: "cmsearch" remove_feature: "tRNA|rRNA|ncRNA|exon|sequence_feature" extra: "--clean-mode strict --remove-invalid-genes" + +fastani: + skip: False + extra: "" diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml index 9e5524f..00adbe4 100644 --- a/config/schemas/config.schema.yml +++ b/config/schemas/config.schema.yml @@ -119,7 +119,17 @@ properties: type: string description: Extra command-line arguments for Panaroo default: "--clean-mode strict --remove-invalid-genes" - + fastani: + type: object + properties: + skip: + type: boolean + description: Whether to skip FastANI analysis + default: false + extra: + type: string + description: Extra command-line arguments for FastANI + default: "" required: - samplesheet - tool @@ -128,3 +138,4 @@ required: - bakta - quast - panaroo + - fastani \ No newline at end of file diff --git a/workflow/envs/fastani.yml b/workflow/envs/fastani.yml new file mode 100644 index 0000000..d815b6d --- /dev/null +++ b/workflow/envs/fastani.yml @@ -0,0 +1,7 @@ +name: panaroo +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - fastani=1.34 \ No newline at end of file diff --git a/workflow/envs/panaroo.yml b/workflow/envs/panaroo.yml index 43ee14a..b423d91 100644 --- a/workflow/envs/panaroo.yml +++ b/workflow/envs/panaroo.yml @@ -6,4 +6,5 @@ channels: dependencies: - numpy=1.26.4 - scipy=1.11.4 - - panaroo=1.5.2 + - biopython=1.84 + - panaroo=1.6.0 diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk index 1d660ba..e09065a 100644 --- a/workflow/rules/annotate.smk +++ b/workflow/rules/annotate.smk @@ -48,7 +48,6 @@ rule annotate_pgap: "results/annotation/pgap/logs/{sample}_pgap.log", conda: "../envs/base.yml" - threads: 1 params: pgap=config["pgap"]["bin"], use_yaml_config=config["pgap"]["use_yaml_config"], @@ -87,7 +86,7 @@ rule annotate_prokka: "results/annotation/prokka/logs/{sample}_prokka.log", conda: "../envs/prokka.yml" - threads: workflow.cores * 0.25 + threads: max(workflow.cores * 0.5, 1) params: prefix=lambda wc: wc.sample, locustag=lambda wc: samples.loc[wc.sample]["id_prefix"], @@ -127,7 +126,7 @@ rule get_bakta_db: "results/annotation/bakta/database/db.log", conda: "../envs/bakta.yml" - threads: workflow.cores * 0.25 + threads: max(workflow.cores * 0.25, 1) params: download_db=config["bakta"]["download_db"], existing_db=config["bakta"]["existing_db"], @@ -160,7 +159,7 @@ rule annotate_bakta: "results/annotation/bakta/logs/{sample}_bakta.log", conda: "../envs/bakta.yml" - threads: workflow.cores * 0.25 + threads: max(workflow.cores * 0.25, 1) params: prefix=lambda wc: wc.sample, locustag=lambda wc: format_bakta_locustag(samples.loc[wc.sample]["id_prefix"]), diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 3d6466d..41be1d4 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -62,6 +62,11 @@ def get_final_input(wildcards): "results/qc/panaroo/{tool}/summary_statistics.txt", tool=config["tool"], ) + if len(samples.index) > 1 and not config["fastani"]["skip"]: + inputs += expand( + "results/qc/fastani/{tool}/summary.txt", + tool=config["tool"], + ) return inputs diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index f994e3f..86f5049 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -7,7 +7,7 @@ rule quast: "results/qc/quast/{tool}/quast.log", conda: "../envs/quast.yml" - threads: 4 + threads: max(workflow.cores * 0.5, 1) params: outdir=lambda wc, output: os.path.dirname(output.report), ref_fasta=( @@ -36,6 +36,40 @@ rule quast: """ +rule fastani: + input: + fasta=get_quast_fasta, + output: + txt="results/qc/fastani/{tool}/summary.txt", + log: + "results/qc/fastani/{tool}/fastani.log", + conda: + "../envs/fastani.yml" + threads: max(workflow.cores * 0.5, 1) + params: + outdir=lambda wc, output: os.path.dirname(output.txt), + ref_fasta=( + " ".join(["-r", config["quast"]["reference_fasta"]]) + if config["quast"]["reference_fasta"] + else [] + ), + extra=config["fastani"]["extra"], + message: + """--- Running FastANI to compare genome similarity (all vs all) ---""" + shell: + """ + printf '%s\n' {input.fasta} > {params.outdir}/input_files.txt; + {params.ref_fasta} >> {params.outdir}/input_files.txt; + fastANI \ + --ql {params.outdir}/input_files.txt \ + --rl {params.outdir}/input_files.txt \ + --output {output.txt} \ + --threads {threads} \ + {params.extra} \ + > {log} 2>&1 + """ + + rule prepare_panaroo: input: fasta="results/annotation/{tool}/{sample}/{sample}.fna", @@ -74,7 +108,7 @@ rule panaroo: "results/qc/panaroo/{tool}/panaroo.log", conda: "../envs/panaroo.yml" - threads: 4 + threads: max(workflow.cores * 0.5, 1) params: outdir=lambda wc, output: os.path.dirname(output.stats), extra=config["panaroo"]["extra"], From e72502dd8db409fe0cc9b96b8d78a7f18b2f260c Mon Sep 17 00:00:00 2001 From: m-jahn Date: Tue, 31 Mar 2026 13:45:57 +0200 Subject: [PATCH 5/8] fix: typos --- config/schemas/config.schema.yml | 2 +- workflow/envs/fastani.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml index 00adbe4..4807425 100644 --- a/config/schemas/config.schema.yml +++ b/config/schemas/config.schema.yml @@ -138,4 +138,4 @@ required: - bakta - quast - panaroo - - fastani \ No newline at end of file + - fastani diff --git a/workflow/envs/fastani.yml b/workflow/envs/fastani.yml index d815b6d..7104421 100644 --- a/workflow/envs/fastani.yml +++ b/workflow/envs/fastani.yml @@ -4,4 +4,4 @@ channels: - bioconda - nodefaults dependencies: - - fastani=1.34 \ No newline at end of file + - fastani=1.34 From 7778d93faa157eb56eb8f2aea8e0f8d5dcc8f265 Mon Sep 17 00:00:00 2001 From: jahn Date: Tue, 31 Mar 2026 14:09:58 +0200 Subject: [PATCH 6/8] fix: reference input --- workflow/rules/qc.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 86f5049..eac1084 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -49,7 +49,7 @@ rule fastani: params: outdir=lambda wc, output: os.path.dirname(output.txt), ref_fasta=( - " ".join(["-r", config["quast"]["reference_fasta"]]) + [config["quast"]["reference_fasta"]] if config["quast"]["reference_fasta"] else [] ), @@ -59,7 +59,7 @@ rule fastani: shell: """ printf '%s\n' {input.fasta} > {params.outdir}/input_files.txt; - {params.ref_fasta} >> {params.outdir}/input_files.txt; + printf '%s\n' {params.ref_fasta} >> {params.outdir}/input_files.txt; fastANI \ --ql {params.outdir}/input_files.txt \ --rl {params.outdir}/input_files.txt \ From 6f3e4c24b138eadb882629c62386237b986af0af Mon Sep 17 00:00:00 2001 From: Rina Ahmed-Begrich Date: Thu, 2 Apr 2026 16:32:22 +0200 Subject: [PATCH 7/8] fix: adjust threads of prokka run --- workflow/rules/annotate.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk index e09065a..e52b7f7 100644 --- a/workflow/rules/annotate.smk +++ b/workflow/rules/annotate.smk @@ -86,7 +86,7 @@ rule annotate_prokka: "results/annotation/prokka/logs/{sample}_prokka.log", conda: "../envs/prokka.yml" - threads: max(workflow.cores * 0.5, 1) + threads: max(workflow.cores * 0.25, 1) params: prefix=lambda wc: wc.sample, locustag=lambda wc: samples.loc[wc.sample]["id_prefix"], From 328c40a50be8e475a28bc3349b1311750ce84932 Mon Sep 17 00:00:00 2001 From: Rina Ahmed-Begrich Date: Thu, 2 Apr 2026 17:09:57 +0200 Subject: [PATCH 8/8] fix: introduced general reference parameter in config file. refactored some qc rules. --- .test/config/config.yml | 6 ++++-- config/config.yml | 6 ++++-- config/schemas/config.schema.yml | 24 ++++++++++++++---------- workflow/rules/common.smk | 15 +++++---------- workflow/rules/qc.smk | 24 +++++++++++------------- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/.test/config/config.yml b/.test/config/config.yml index 9e99678..dd50d27 100644 --- a/.test/config/config.yml +++ b/.test/config/config.yml @@ -1,6 +1,10 @@ samplesheet: "config/samples.csv" tool: ["prokka"] +reference: + fasta: "" + gff: "" + pgap: bin: "path/to/pgap.py" use_yaml_config: True @@ -18,8 +22,6 @@ bakta: extra: "--keep-contig-headers --compliant" quast: - reference_fasta: "" - reference_gff: "" extra: "" panaroo: diff --git a/config/config.yml b/config/config.yml index 9e99678..dd50d27 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,6 +1,10 @@ samplesheet: "config/samples.csv" tool: ["prokka"] +reference: + fasta: "" + gff: "" + pgap: bin: "path/to/pgap.py" use_yaml_config: True @@ -18,8 +22,6 @@ bakta: extra: "--keep-contig-headers --compliant" quast: - reference_fasta: "" - reference_gff: "" extra: "" panaroo: diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml index 4807425..b7f32c9 100644 --- a/config/schemas/config.schema.yml +++ b/config/schemas/config.schema.yml @@ -19,6 +19,19 @@ properties: - prokka - pgap - bakta + reference: + type: object + properties: + fasta: + type: string + description: Path to the reference genome in FASTA format + default: "" + gff: + type: string + description: Path to the reference annotation in GFF format (optional) + default: "" + required: + - fasta pgap: type: object properties: @@ -84,21 +97,11 @@ properties: quast: type: object properties: - reference_fasta: - type: string - description: Path to the reference genome for QUAST - default: "" - reference_gff: - type: string - description: Path to the reference annotation for QUAST - default: "" extra: type: string description: Extra command-line arguments for QUAST default: "" required: - - reference_fasta - - reference_gff - extra panaroo: type: object @@ -133,6 +136,7 @@ properties: required: - samplesheet - tool + - reference - pgap - prokka - bakta diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 41be1d4..26f2d8d 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -27,12 +27,9 @@ def get_fasta(wildcards): return samples.loc[sample, "file"] -def get_quast_fasta(wildcards): - return expand( - "results/annotation/{tool}/{sample}/{sample}.fna", - tool=wildcards.tool, - sample=samples.index, - ) +def get_all_fasta(wildcards): + """Get all input fasta files for all samples.""" + return [samples.loc[s, "file"] for s in samples.index] def get_panaroo_gff(wildcards): @@ -54,8 +51,7 @@ def get_panaroo_fasta(wildcards): def get_final_input(wildcards): inputs = [] inputs += expand( - "results/qc/quast/{tool}/report.txt", - tool=config["tool"], + "results/qc/quast/report.txt", ) if len(samples.index) > 1 and not config["panaroo"]["skip"]: inputs += expand( @@ -64,8 +60,7 @@ def get_final_input(wildcards): ) if len(samples.index) > 1 and not config["fastani"]["skip"]: inputs += expand( - "results/qc/fastani/{tool}/summary.txt", - tool=config["tool"], + "results/qc/fastani/summary.txt", ) return inputs diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index eac1084..c146e26 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -1,23 +1,23 @@ rule quast: input: - fasta=get_quast_fasta, + fasta=get_all_fasta, output: - report="results/qc/quast/{tool}/report.txt", + report="results/qc/quast/report.txt", log: - "results/qc/quast/{tool}/quast.log", + "results/qc/quast/quast.log", conda: "../envs/quast.yml" threads: max(workflow.cores * 0.5, 1) params: outdir=lambda wc, output: os.path.dirname(output.report), ref_fasta=( - " ".join(["-r", config["quast"]["reference_fasta"]]) - if config["quast"]["reference_fasta"] + " ".join(["-r", config["reference"]["fasta"]]) + if config["reference"]["fasta"] else [] ), ref_gff=( - " ".join(["-g", config["quast"]["reference_gff"]]) - if config["quast"]["reference_gff"] + " ".join(["-g", config["reference"]["gff"]]) + if config["reference"]["gff"] else [] ), extra=config["quast"]["extra"], @@ -38,20 +38,18 @@ rule quast: rule fastani: input: - fasta=get_quast_fasta, + fasta=get_all_fasta, output: - txt="results/qc/fastani/{tool}/summary.txt", + txt="results/qc/fastani/summary.txt", log: - "results/qc/fastani/{tool}/fastani.log", + "results/qc/fastani/fastani.log", conda: "../envs/fastani.yml" threads: max(workflow.cores * 0.5, 1) params: outdir=lambda wc, output: os.path.dirname(output.txt), ref_fasta=( - [config["quast"]["reference_fasta"]] - if config["quast"]["reference_fasta"] - else [] + [config["reference"]["fasta"]] if config["reference"]["fasta"] else [] ), extra=config["fastani"]["extra"], message: