Default Resource Usage (#113)

JoshLoecker · web-flow · commit cc63250e930f · 2026-01-27T10:18:51.000-06:00
* revert: import Layout and PrepMethod * Revert "revert: import Layout and PrepMethod" This reverts commit 7aa3f8c. * refactor: set `copy_*` rules as local so they are not submitted as a cluster job * refactor: set fastq_dump_{paired,single} as a network rule because it downloads files * refactor: increase memory requirement of trimming * refactor: increase runtime requirement of fragment_size * refactor: use a submission script for slurm submissions * refactor: move network slots into workflow resources, not set as a default resource This fixes a bug that occured when network slots was included as a default resource, all jobs then required 5 network slots, meaning only 1 job could run at a time * feat: commit the slurm submission script * refactor: move network slots into workflow resources, not set as a default resource
diff --git a/Snakefile b/Snakefile
@@ -94,26 +94,18 @@ rule all:
         )
 
 rule copy_config:
-    input:
-        "config.yaml"
-    output:
-        f"{cfg.data_root}/{{tissue}}/{{tissue}}_config.yaml"
-    resources:
-        mem_mb=256,
-        runtime=1,
-        tissue=""  # intentionally left blank; reference: github.com/jdblischak/smk-simple-slurm/issues/20
-    shell: "cp --verbose {input} {output}"
+    localrule: True
+    input: "config.yaml"
+    output: f"{cfg.data_root}/{{tissue}}/{{tissue}}_config.yaml"
+    shell: "cp {input} {output}"
 
 rule preroundup:
+    localrule: True
     output:
         layout=f"{cfg.data_root}/{{tissue}}/layouts/{{tissue}}_{{tag}}_layout.txt",
         preparation=f"{cfg.data_root}/{{tissue}}/prepMethods/{{tissue}}_{{tag}}_prep_method.txt",
     params:
-        sample_name=lambda wildcards: f"{wildcards.tissue}_{wildcards.tag}",
-    resources:
-        mem_mb=lambda wildcards, attempt: 1024 * attempt,
-        runtime=lambda wildcards, attempt: 1 * attempt,
-        tissue=lambda wildcards: wildcards.tissue,
+        sample_name=lambda wildcards: f"{wildcards.tissue}_{wildcards.tag}"
     benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/preroundup/preroundup_{{tissue}}_{{tag}}.benchmark", cfg.benchmark_count)
     run:
         # example row: SRR12873784,effectorcd8_S1R1,PE,total
@@ -354,6 +346,7 @@ rule fastq_dump_single:
     resources:
         mem_mb=lambda wildcards, attempt: 4096 * attempt,
         runtime=lambda wildcards, attempt: 30 * attempt,
+        network_slots=1,
         tissue=lambda wildcards: wildcards.tissue
     threads: 4
     conda: "envs/SRAtools.yaml"
@@ -477,7 +470,7 @@ rule trim_paired:
         r2_fastq=f"{cfg.data_root}/{{tissue}}/trim/{{tissue}}_{{tag}}_2.fastq.gz",
         r2_report=f"{cfg.data_root}/{{tissue}}/trim/{{tissue}}_{{tag}}_2_trimming_report.txt",
     resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+        mem_mb=lambda wildcards, attempt: 8096 * attempt,
         runtime=lambda wildcards, attempt: 45 * attempt,
         tissue=lambda wildcards: wildcards.tissue,
     threads: 4
@@ -525,7 +518,7 @@ rule trim_single:
     # See the trim_galore `--cores` setting for details on why 16 was chosen
     # https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
     resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+        mem_mb=lambda wildcards, attempt: 8096 * attempt,
         runtime=lambda wildcards, attempt: 45 * attempt,
         tissue=lambda wildcards: wildcards.tissue,
     threads: 4
@@ -851,7 +844,7 @@ rule fragment_size:
         bed_filepath=rules.download_genome.output,
     resources:
         mem_mb=lambda wildcards, attempt: 4096 * attempt,
-        runtime=lambda wildcards, attempt: 20 * attempt,
+        runtime=lambda wildcards, attempt: 40 * attempt,
         tissue=lambda wildcards: wildcards.tissue,
     conda: "envs/rseqc.yaml"
     threads: 4
@@ -948,57 +941,28 @@ rule rnaseq_metrics:
         """
 
 rule copy_fragment_size:
-    input:
-        rules.fragment_size.output,
-    output:
-        f"{cfg.como_root}/{{tissue}}/fragmentSizes/{{sample}}/{{tissue}}_{{tag}}_fragment_size.txt"
-    resources:
-        mem_mb=256,
-        runtime=1,
-        tissue=lambda wildcards: wildcards.tissue,
-    benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_fragment_size/copy_fragment_size_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
-    shell:
-        """cp --verbose {input} {output}"""
+    localrule: True
+    input: rules.fragment_size.output,
+    output: f"{cfg.como_root}/{{tissue}}/fragmentSizes/{{sample}}/{{tissue}}_{{tag}}_fragment_size.txt"
+    shell: """cp {input} {output}"""
 
 rule copy_insert_size:
-    input:
-        rules.insert_size.output.txt,
-    output:
-        f"{cfg.como_root}/{{tissue}}/insertSizeMetrics/{{sample}}/{{tissue}}_{{tag}}_insert_size.txt"
-    resources:
-        mem_mb=256,
-        runtime=1,
-        tissue=lambda wildcards: wildcards.tissue,
-    benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_insert_size/copy_insert_size_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
-    shell:
-        """cp --verbose {input} {output}"""
+    localrule: True
+    input: rules.insert_size.output.txt,
+    output: f"{cfg.como_root}/{{tissue}}/insertSizeMetrics/{{sample}}/{{tissue}}_{{tag}}_insert_size.txt"
+    shell: """cp {input} {output}"""
 
 rule copy_rnaseq_metrics:
-    input:
-        rules.rnaseq_metrics.output.strand,
-    output:
-        f"{cfg.como_root}/{{tissue}}/strandedness/{{sample}}/{{tissue}}_{{tag}}_strandedness.txt"
-    resources:
-        mem_mb=256,
-        runtime=1,
-        tissue=lambda wildcards: wildcards.tissue,
-    benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_rnaseq_metrics/copy_rnaseq_metrics_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
-    shell:
-        """cp --verbose {input} {output}"""
-
+    localrule: True
+    input: rules.rnaseq_metrics.output.strand,
+    output: f"{cfg.como_root}/{{tissue}}/strandedness/{{sample}}/{{tissue}}_{{tag}}_strandedness.txt"
+    shell: """cp {input} {output}"""
 
 rule copy_gene_counts:
-    input:
-        rules.align.output.gene_table,
-    output:
-        f"{cfg.como_root}/{{tissue}}/geneCounts/{{sample}}/{{tissue}}_{{tag}}.tab"
-    resources:
-        mem_mb=256,
-        runtime=1,
-        tissue=lambda wildcards: wildcards.tissue,
-    benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_gene_counts/copy_gene_counts_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
-    shell:
-        """cp --verbose {input} {output}"""
+    localrule: True
+    input: rules.align.output.gene_table,
+    output: f"{cfg.como_root}/{{tissue}}/geneCounts/{{sample}}/{{tissue}}_{{tag}}.tab"
+    shell: """cp {input} {output}"""
 
 
 def multiqc_contamination_input(wildcards) -> list[str]:
diff --git a/profiles/cluster/config.v8+.yaml b/profiles/cluster/config.v8+.yaml
@@ -1,28 +1,20 @@
 # This file configures various settings for snakemake to execute jobs to a SLURM cluster.
 # FROM: https://github.com/jdblischak/smk-simple-slurm
 
-executor: cluster-generic
-cluster-generic-submit-cmd:
-  mkdir -p logs/{rule}/{resources.tissue} &&
-  sbatch
-    --job-name=smk-{rule}-{wildcards}
-    --account=helikarlab
-    --cpus-per-task={threads}
-    --output=logs/{rule}/{resources.tissue}/{rule}-{wildcards}.out
-    --mem={resources.mem_mb}
-    --time={resources.runtime}
-    --parsable
+resources:
+  network_slots: 5
 
-# Define tissue name
 default-resources:
   mem_mb: 2048
-  network_slots: 5
+
 
 # Default job submittion
 retries: 3
+executor: cluster-generic
+cluster-generic-submit-cmd: profiles/cluster/submit.sh --job-name=smk-{rule}-{wildcards} --cpus-per-task={threads} --output=logs/{rule}/{resources.tissue}/{rule}-{wildcards}.out --mem={resources.mem_mb} --time={resources.runtime} --parsable
+cluster-generic-status-cmd: status.sh
 cluster-generic-cancel-cmd: scancel
 cluster-generic-cancel-nargs: 50
-cluster-generic-status-cmd: status.sh
 max-jobs-per-second: 10
 max-status-checks-per-second: 5
 latency-wait: 60
diff --git a/profiles/cluster/submit.sh b/profiles/cluster/submit.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+select_account() {
+  if [[ -n "${SNAKEMAKE_SLURM_ACCOUNT:-}" ]]; then
+    echo "${SNAKEMAKE_SLURM_ACCOUNT}"
+    return 0
+  fi
+
+  local acct=""
+  acct="$(sacctmgr --noheader --parsable2 show user "$USER" format=DefaultAccount 2>/dev/null | head -n1 | tr -d '[:space:]' || true)"
+  if [[ -n "${acct}" && "${acct}" != "Unknown" ]]; then
+    echo "$acct"
+    return 0
+  fi
+
+  # Otherwise pick the first account returned
+  acct="$(sacctmgr --noheader --parsable2 show user "$USER" format=Account 2>/dev/null | head -n1 | tr -d '[:space:]' || true)"
+  if [[ -n "${acct}" && "${acct}" != "Unknown" ]]; then
+    echo "$acct"
+    return 0
+  fi
+
+  echo ""
+}
+
+have_account_arg=0
+for a in "$@"; do
+  [[ "$a" == --account* || "$a" == -A* ]] && have_account_arg=1 && break
+done
+
+acct="$(select_account)"
+if [[ $have_account_arg -eq 0 && -n "${acct}" ]]; then
+  exec sbatch --account="$acct" "$@"
+else
+  exec sbatch "$@"
+fi
diff --git a/profiles/local/config.v8+.yaml b/profiles/local/config.v8+.yaml
@@ -3,7 +3,6 @@ resources:
 
 default-resources:
   mem_mb: 2048
-  network_slots: 5
 
 cores: 'all'