Skip to content

Commit 22e64cd

Browse files
authored
fix: sra files containing dependencies were not being downloaded (#114)
This commit will download SRA files into a directory, instead of a file. This will download all dependencies of that file as well, which fasterq-dump will recognize and automatically handle
1 parent cc63250 commit 22e64cd

1 file changed

Lines changed: 26 additions & 25 deletions

File tree

Snakefile

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -321,17 +321,21 @@ rule fastq_dump_paired:
321321
tmpdir=$(mktemp -d)
322322
trap "rm -rf $tmpdir" EXIT
323323
324-
sra_temp="$tmpdir/{wildcards.tissue}_{wildcards.tag}.sra"
325-
prefetch --max-size u --progress --log-level info --output-file "$sra_temp" {params.srr} 1>{log} 2>&1
326-
327-
tmp_forward="$tmpdir/{wildcards.tissue}_{wildcards.tag}_1.fastq"
328-
tmp_reverse="$tmpdir/{wildcards.tissue}_{wildcards.tag}_2.fastq"
329-
fasterq-dump --force --split-files --progress --threads {threads} --temp "$tmpdir" --outdir "$tmpdir" "$sra_temp" 1>>{log} 2>&1
330-
pigz --processes {threads} --force "$tmp_forward" "$tmp_reverse"
331-
332-
printf "\n\n" >> {log}
333-
mv --verbose "$tmp_forward.gz" "{output.r1}" 1>>{log} 2>&1 &
334-
mv --verbose "$tmp_reverse.gz" "{output.r2}" 1>>{log} 2>&1 &
324+
sra_cache="$tmpdir/sra_cache"
325+
fastq_cache="$tmpdir/fastq_cache"
326+
mkdir -p "$sra_cache" "$fastq_cache"
327+
328+
prefetch --max-size u --progress --log-level info --force ALL --output-directory "$sra_cache" {params.srr} 1>{log} 2>&1
329+
330+
sra_temp="$sra_cache/{params.srr}.sra"
331+
fq_forward="$fastq_cache/{params.srr}_1.fastq"
332+
fq_reverse="$fastq_cache/{params.srr}_2.fastq"
333+
fasterq-dump --force --split-files --progress --threads {threads} --temp "$fastq_cache" --outdir "$fastq_cache" "$sra_temp" 1>>{log} 2>&1
334+
printf "\nGzipping:\n1) $fq_forward\n2) $fq_reverse" >> {log}
335+
pigz --processes {threads} --force "$fq_forward" "$fq_reverse"
336+
337+
mv --verbose "$fq_forward.gz" "{output.r1}" 1>>{log} 2>&1 &
338+
mv --verbose "$fq_reverse.gz" "{output.r2}" 1>>{log} 2>&1 &
335339
336340
wait
337341
"""
@@ -357,15 +361,19 @@ rule fastq_dump_single:
357361
tmpdir=$(mktemp -d)
358362
trap "rm -rf $tmpdir" EXIT
359363
360-
sra_temp="$tmpdir/{wildcards.tissue}_{wildcards.tag}.sra"
361-
prefetch --max-size u --progress --log-level info --output-file "$sra_temp" {params.srr} 1>>{log} 2>&1
364+
sra_cache="$tmpdir/sra_cache"
365+
fastq_cache="$tmpdir/fastq_cache"
366+
mkdir -p "$sra_cache" "$fastq_cache"
362367
363-
tmpfile="$tmpdir/{wildcards.tissue}_{wildcards.tag}.fastq"
364-
fasterq-dump --force --concatenate-reads --progress --threads {threads} --temp "$tmpdir" --outdir "$tmpdir" "$sra_temp" 1>>{log} 2>&1
365-
printf "\nGzipping $tmpfile file\n\n" >> {log}
366-
pigz -6 --processes 4 --force "$tmpfile"
368+
prefetch --max-size u --progress --log-level info --force ALL --output-directory "$sra_cache" {params.srr} 1>>{log} 2>&1
369+
370+
sra_file="$sra_cache/{params.srr}/{params.srr}.sra"
371+
fastq_file="$fastq_cache/{params.srr}.fastq"
372+
fasterq-dump --force --concatenate-reads --progress --threads {threads} --temp "$fastq_cache" --outdir "$fastq_cache" "$sra_file" 1>>{log} 2>&1
373+
printf "\nGzipping: $fastq_file\n\n" >> {log}
374+
pigz --processes {threads} --force "$fastq_file"
367375
368-
mv --verbose "$tmpfile.gz" {output.S} 1>>{log} 2>&1
376+
mv --verbose "$fastq_file.gz" {output.S} 1>>{log} 2>&1
369377
"""
370378

371379

@@ -412,7 +420,6 @@ rule qc_raw_fastq_paired:
412420
413421
fastqc {input.reads} --threads {threads} -o "$tmpdir" 1>{log} 2>&1
414422
415-
printf "\n\n" >> {log}
416423
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_1_fastqc.zip" "{output.r1_zip}" 1>>{log} 2>&1
417424
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_1_fastqc.html" "{output.r1_html}" 1>>{log} 2>&1
418425
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_2_fastqc.zip" "{output.r2_zip}" 1>>{log} 2>&1
@@ -440,7 +447,6 @@ rule qc_raw_fastq_single:
440447
441448
fastqc {input} --threads 5 -o "$tmpdir" 1>{log} 2>&1
442449
443-
printf "\n\n" >> {log}
444450
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_S_fastqc.zip" "{output.s_zip}" 1>>{log} 2>&1
445451
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_S_fastqc.html" "{output.s_html}" 1>>{log} 2>&1
446452
"""
@@ -483,7 +489,6 @@ rule trim_paired:
483489
trap "rm -rf $tmpdir" EXIT
484490
trim_galore --paired --cores 4 -o "$tmpdir" {input.r1} {input.r2} 1>{log} 2>&1
485491
486-
printf "\n\n" >> {log}
487492
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_1_val_1.fq.gz" "{output.r1_fastq}" 1>>{log} 2>&1
488493
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_1.fastq.gz_trimming_report.txt" "{output.r1_report}" 1>>{log} 2>&1
489494
@@ -561,7 +566,6 @@ rule qc_trim_fastq_paired:
561566
562567
fastqc {input} --threads {threads} -o "$tmpdir" 1>{log} 2>&1
563568
564-
printf "\n\n" >> {log}
565569
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_1_fastqc.zip" "{output.r1_zip}" 1>>{log} 2>&1
566570
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_1_fastqc.html" "{output.r1_html}" 1>>{log} 2>&1
567571
mv --verbose "$tmpdir/{wildcards.tissue}_{wildcards.tag}_2_fastqc.zip" "{output.r2_zip}" 1>>{log} 2>&1
@@ -591,7 +595,6 @@ rule qc_trim_fastq_single:
591595
592596
fastqc {input} --threads {threads} -o "$tmpdir" 1>{log} 2>&1
593597
594-
printf "\n\n" >> {log}
595598
mv --verbose "$tmp_zip" "{output.s_zip}" 1>>{log} 2>&1
596599
mv --verbose "$tmp_html" "{output.s_html}" 1>>{log} 2>&1
597600
"""
@@ -670,7 +673,6 @@ rule align:
670673
--outSAMattributes Standard \
671674
--quantMode GeneCounts TranscriptomeSAM 1>{log} 2>&1
672675
673-
printf "\n\n" >> {log}
674676
mv --verbose $tmpdir/* "$(dirname {output.gene_table})/" 1>>{log} 2>&1
675677
mv --verbose {params.gene_table} {output.gene_table} 1>>{log} 2>&1
676678
mv --verbose {params.bam_output} {output.bam_file} 1>>{log} 2>&1
@@ -724,7 +726,6 @@ rule salmon_quantification:
724726
--output {params.outdir} \
725727
--seqBias --gcBias --posBias --useVBOpt 1>{log} 2>&1
726728
727-
printf "\n\n" >> {log}
728729
mv --verbose {params.outdir}/quant.sf {output.quant} 1>>{log} 2>&1
729730
mv --verbose {params.outdir}/cmd_info.json {output.meta} 1>>{log} 2>&1
730731
"""

0 commit comments

Comments
 (0)