Skip to content

Commit cc63250

Browse files
authored
Default Resource Usage (#113)
* revert: import Layout and PrepMethod * Revert "revert: import Layout and PrepMethod" This reverts commit 7aa3f8c. * refactor: set `copy_*` rules as local so they are not submitted as a cluster job * refactor: set fastq_dump_{paired,single} as a network rule because it downloads files * refactor: increase memory requirement of trimming * refactor: increase runtime requirement of fragment_size * refactor: use a submission script for slurm submissions * refactor: move network slots into workflow resources, not set as a default resource This fixes a bug that occured when network slots was included as a default resource, all jobs then required 5 network slots, meaning only 1 job could run at a time * feat: commit the slurm submission script * refactor: move network slots into workflow resources, not set as a default resource
1 parent 5526b8a commit cc63250

4 files changed

Lines changed: 68 additions & 77 deletions

File tree

Snakefile

Lines changed: 26 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,18 @@ rule all:
9494
)
9595

9696
rule copy_config:
97-
input:
98-
"config.yaml"
99-
output:
100-
f"{cfg.data_root}/{{tissue}}/{{tissue}}_config.yaml"
101-
resources:
102-
mem_mb=256,
103-
runtime=1,
104-
tissue="" # intentionally left blank; reference: github.com/jdblischak/smk-simple-slurm/issues/20
105-
shell: "cp --verbose {input} {output}"
97+
localrule: True
98+
input: "config.yaml"
99+
output: f"{cfg.data_root}/{{tissue}}/{{tissue}}_config.yaml"
100+
shell: "cp {input} {output}"
106101

107102
rule preroundup:
103+
localrule: True
108104
output:
109105
layout=f"{cfg.data_root}/{{tissue}}/layouts/{{tissue}}_{{tag}}_layout.txt",
110106
preparation=f"{cfg.data_root}/{{tissue}}/prepMethods/{{tissue}}_{{tag}}_prep_method.txt",
111107
params:
112-
sample_name=lambda wildcards: f"{wildcards.tissue}_{wildcards.tag}",
113-
resources:
114-
mem_mb=lambda wildcards, attempt: 1024 * attempt,
115-
runtime=lambda wildcards, attempt: 1 * attempt,
116-
tissue=lambda wildcards: wildcards.tissue,
108+
sample_name=lambda wildcards: f"{wildcards.tissue}_{wildcards.tag}"
117109
benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/preroundup/preroundup_{{tissue}}_{{tag}}.benchmark", cfg.benchmark_count)
118110
run:
119111
# example row: SRR12873784,effectorcd8_S1R1,PE,total
@@ -354,6 +346,7 @@ rule fastq_dump_single:
354346
resources:
355347
mem_mb=lambda wildcards, attempt: 4096 * attempt,
356348
runtime=lambda wildcards, attempt: 30 * attempt,
349+
network_slots=1,
357350
tissue=lambda wildcards: wildcards.tissue
358351
threads: 4
359352
conda: "envs/SRAtools.yaml"
@@ -477,7 +470,7 @@ rule trim_paired:
477470
r2_fastq=f"{cfg.data_root}/{{tissue}}/trim/{{tissue}}_{{tag}}_2.fastq.gz",
478471
r2_report=f"{cfg.data_root}/{{tissue}}/trim/{{tissue}}_{{tag}}_2_trimming_report.txt",
479472
resources:
480-
mem_mb=lambda wildcards, attempt: 4096 * attempt,
473+
mem_mb=lambda wildcards, attempt: 8096 * attempt,
481474
runtime=lambda wildcards, attempt: 45 * attempt,
482475
tissue=lambda wildcards: wildcards.tissue,
483476
threads: 4
@@ -525,7 +518,7 @@ rule trim_single:
525518
# See the trim_galore `--cores` setting for details on why 16 was chosen
526519
# https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
527520
resources:
528-
mem_mb=lambda wildcards, attempt: 4096 * attempt,
521+
mem_mb=lambda wildcards, attempt: 8096 * attempt,
529522
runtime=lambda wildcards, attempt: 45 * attempt,
530523
tissue=lambda wildcards: wildcards.tissue,
531524
threads: 4
@@ -851,7 +844,7 @@ rule fragment_size:
851844
bed_filepath=rules.download_genome.output,
852845
resources:
853846
mem_mb=lambda wildcards, attempt: 4096 * attempt,
854-
runtime=lambda wildcards, attempt: 20 * attempt,
847+
runtime=lambda wildcards, attempt: 40 * attempt,
855848
tissue=lambda wildcards: wildcards.tissue,
856849
conda: "envs/rseqc.yaml"
857850
threads: 4
@@ -948,57 +941,28 @@ rule rnaseq_metrics:
948941
"""
949942

950943
rule copy_fragment_size:
951-
input:
952-
rules.fragment_size.output,
953-
output:
954-
f"{cfg.como_root}/{{tissue}}/fragmentSizes/{{sample}}/{{tissue}}_{{tag}}_fragment_size.txt"
955-
resources:
956-
mem_mb=256,
957-
runtime=1,
958-
tissue=lambda wildcards: wildcards.tissue,
959-
benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_fragment_size/copy_fragment_size_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
960-
shell:
961-
"""cp --verbose {input} {output}"""
944+
localrule: True
945+
input: rules.fragment_size.output,
946+
output: f"{cfg.como_root}/{{tissue}}/fragmentSizes/{{sample}}/{{tissue}}_{{tag}}_fragment_size.txt"
947+
shell: """cp {input} {output}"""
962948

963949
rule copy_insert_size:
964-
input:
965-
rules.insert_size.output.txt,
966-
output:
967-
f"{cfg.como_root}/{{tissue}}/insertSizeMetrics/{{sample}}/{{tissue}}_{{tag}}_insert_size.txt"
968-
resources:
969-
mem_mb=256,
970-
runtime=1,
971-
tissue=lambda wildcards: wildcards.tissue,
972-
benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_insert_size/copy_insert_size_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
973-
shell:
974-
"""cp --verbose {input} {output}"""
950+
localrule: True
951+
input: rules.insert_size.output.txt,
952+
output: f"{cfg.como_root}/{{tissue}}/insertSizeMetrics/{{sample}}/{{tissue}}_{{tag}}_insert_size.txt"
953+
shell: """cp {input} {output}"""
975954

976955
rule copy_rnaseq_metrics:
977-
input:
978-
rules.rnaseq_metrics.output.strand,
979-
output:
980-
f"{cfg.como_root}/{{tissue}}/strandedness/{{sample}}/{{tissue}}_{{tag}}_strandedness.txt"
981-
resources:
982-
mem_mb=256,
983-
runtime=1,
984-
tissue=lambda wildcards: wildcards.tissue,
985-
benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_rnaseq_metrics/copy_rnaseq_metrics_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
986-
shell:
987-
"""cp --verbose {input} {output}"""
988-
956+
localrule: True
957+
input: rules.rnaseq_metrics.output.strand,
958+
output: f"{cfg.como_root}/{{tissue}}/strandedness/{{sample}}/{{tissue}}_{{tag}}_strandedness.txt"
959+
shell: """cp {input} {output}"""
989960

990961
rule copy_gene_counts:
991-
input:
992-
rules.align.output.gene_table,
993-
output:
994-
f"{cfg.como_root}/{{tissue}}/geneCounts/{{sample}}/{{tissue}}_{{tag}}.tab"
995-
resources:
996-
mem_mb=256,
997-
runtime=1,
998-
tissue=lambda wildcards: wildcards.tissue,
999-
benchmark: repeat(f"{cfg.benchmark_dir}/{{tissue}}/copy_gene_counts/copy_gene_counts_{{sample}}/{{tissue}}_{{tag}}.benchmark",cfg.benchmark_count)
1000-
shell:
1001-
"""cp --verbose {input} {output}"""
962+
localrule: True
963+
input: rules.align.output.gene_table,
964+
output: f"{cfg.como_root}/{{tissue}}/geneCounts/{{sample}}/{{tissue}}_{{tag}}.tab"
965+
shell: """cp {input} {output}"""
1002966

1003967

1004968
def multiqc_contamination_input(wildcards) -> list[str]:

profiles/cluster/config.v8+.yaml

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,20 @@
11
# This file configures various settings for snakemake to execute jobs to a SLURM cluster.
22
# FROM: https://github.com/jdblischak/smk-simple-slurm
33

4-
executor: cluster-generic
5-
cluster-generic-submit-cmd:
6-
mkdir -p logs/{rule}/{resources.tissue} &&
7-
sbatch
8-
--job-name=smk-{rule}-{wildcards}
9-
--account=helikarlab
10-
--cpus-per-task={threads}
11-
--output=logs/{rule}/{resources.tissue}/{rule}-{wildcards}.out
12-
--mem={resources.mem_mb}
13-
--time={resources.runtime}
14-
--parsable
4+
resources:
5+
network_slots: 5
156

16-
# Define tissue name
177
default-resources:
188
mem_mb: 2048
19-
network_slots: 5
9+
2010

2111
# Default job submittion
2212
retries: 3
13+
executor: cluster-generic
14+
cluster-generic-submit-cmd: profiles/cluster/submit.sh --job-name=smk-{rule}-{wildcards} --cpus-per-task={threads} --output=logs/{rule}/{resources.tissue}/{rule}-{wildcards}.out --mem={resources.mem_mb} --time={resources.runtime} --parsable
15+
cluster-generic-status-cmd: status.sh
2316
cluster-generic-cancel-cmd: scancel
2417
cluster-generic-cancel-nargs: 50
25-
cluster-generic-status-cmd: status.sh
2618
max-jobs-per-second: 10
2719
max-status-checks-per-second: 5
2820
latency-wait: 60

profiles/cluster/submit.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env bash
2+
3+
select_account() {
4+
if [[ -n "${SNAKEMAKE_SLURM_ACCOUNT:-}" ]]; then
5+
echo "${SNAKEMAKE_SLURM_ACCOUNT}"
6+
return 0
7+
fi
8+
9+
local acct=""
10+
acct="$(sacctmgr --noheader --parsable2 show user "$USER" format=DefaultAccount 2>/dev/null | head -n1 | tr -d '[:space:]' || true)"
11+
if [[ -n "${acct}" && "${acct}" != "Unknown" ]]; then
12+
echo "$acct"
13+
return 0
14+
fi
15+
16+
# Otherwise pick the first account returned
17+
acct="$(sacctmgr --noheader --parsable2 show user "$USER" format=Account 2>/dev/null | head -n1 | tr -d '[:space:]' || true)"
18+
if [[ -n "${acct}" && "${acct}" != "Unknown" ]]; then
19+
echo "$acct"
20+
return 0
21+
fi
22+
23+
echo ""
24+
}
25+
26+
have_account_arg=0
27+
for a in "$@"; do
28+
[[ "$a" == --account* || "$a" == -A* ]] && have_account_arg=1 && break
29+
done
30+
31+
acct="$(select_account)"
32+
if [[ $have_account_arg -eq 0 && -n "${acct}" ]]; then
33+
exec sbatch --account="$acct" "$@"
34+
else
35+
exec sbatch "$@"
36+
fi

profiles/local/config.v8+.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ resources:
33

44
default-resources:
55
mem_mb: 2048
6-
network_slots: 5
76

87
cores: 'all'
98

0 commit comments

Comments
 (0)