Skip to content

Commit e6b14ae

Browse files
committed
separate rsem into multiple rules
1 parent cc59df6 commit e6b14ae

1 file changed

Lines changed: 71 additions & 13 deletions

File tree

workflow/rules/rsem.smk

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,35 +34,43 @@ awk -v OFS="\\t" '{{print $2,$4,$5,$1,"0",$3,$6,$7,"0",$8,$9,$10}}' ${{bn}}.tmp
3434
rm -f ${{bn}}.tmp
3535
"""
3636

37-
rule get_rsem_counts:
37+
rule get_strand_info:
3838
input:
3939
starbam=rules.star.output.starbam, # used only for strandinfo
40+
output:
41+
strandinfo=join(WORKDIR,"rsem","{sample}","{sample}.strandinfo")
42+
envmodules: TOOLS['rseqc']['version'],
43+
params:
44+
sample="{sample}",
45+
rsemdir=join(WORKDIR,"rsem")
46+
shell:"""
47+
set -ex -o pipefail
48+
cd {params.rsemdir}
49+
# inter strandedness
50+
# samtools index -@{threads} {input.starbam}
51+
infer_experiment.py -r {input.bed12} -i {input.starbam} -s 1000000 > {output.strandinfo}
52+
"""
53+
54+
rule get_rsem_counts:
55+
input:
56+
strandinfo=rule.get_strand_info.output.strandinfo,
4057
tbam=rules.star.output.tbam,
41-
mutatedtbam=rules.split_tbam_by_mutation.output.mutatedtbam,
42-
unmutatedtbam=rules.split_tbam_by_mutation.output.unmutatedtbam,
4358
bed12=rules.create_bed12.output.bed12,
4459
ump=rules.create_rsem_index.output.ump
4560
output:
46-
strandinfo=join(WORKDIR,"rsem","{sample}","{sample}.strandinfo"),
4761
gcounts=join(WORKDIR,"rsem","{sample}","{sample}.RSEM.genes.results"),
4862
tcounts=join(WORKDIR,"rsem","{sample}","{sample}.RSEM.isoforms.results"),
49-
mutatedgcounts=join(WORKDIR,"rsem","{sample}","{sample}_mutated.RSEM.genes.results"),
50-
mutatedtcounts=join(WORKDIR,"rsem","{sample}","{sample}_mutated.RSEM.isoforms.results"),
51-
unmutatedgcounts=join(WORKDIR,"rsem","{sample}","{sample}_unmutated.RSEM.genes.results"),
52-
unmutatedtcounts=join(WORKDIR,"rsem","{sample}","{sample}_unmutated.RSEM.isoforms.results"),
5363
envmodules: TOOLS['rseqc']['version'], TOOLS['rsem']['version'], TOOLS["samtools"]["version"]
5464
threads: getthreads("get_rsem_counts")
5565
params:
5666
sample="{sample}",
5767
rsemdir=join(WORKDIR,"rsem")
5868
shell:"""
59-
set -exuf -o pipefail
69+
set -ex -o pipefail
6070
cd {params.rsemdir}
61-
# inter strandedness
62-
# samtools index -@{threads} {input.starbam}
63-
infer_experiment.py -r {input.bed12} -i {input.starbam} -s 1000000 > {output.strandinfo}
71+
6472
# Get strandedness to calculate Forward Probability
65-
fp=$(tail -n1 {output.strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
73+
fp=$(tail -n1 {input.strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
6674
echo "Forward Probability Passed to RSEM: $fp"
6775
rsemindex=$(echo {input.bed12}|sed "s@.bed12@@g")
6876
@@ -72,12 +80,62 @@ rsem-calculate-expression --no-bam-output --calc-ci --seed 12345 \
7280
mv {params.sample}.genes.results {output.gcounts}
7381
mv {params.sample}.isoforms.results {output.tcounts}
7482
83+
"""
84+
85+
rule get_rsem_counts_mutated:
86+
input:
87+
strandinfo=rule.get_strand_info.output.strandinfo,
88+
mutatedtbam=rules.split_tbam_by_mutation.output.mutatedtbam,
89+
bed12=rules.create_bed12.output.bed12,
90+
ump=rules.create_rsem_index.output.ump
91+
output:
92+
mutatedgcounts=join(WORKDIR,"rsem","{sample}","{sample}_mutated.RSEM.genes.results"),
93+
mutatedtcounts=join(WORKDIR,"rsem","{sample}","{sample}_mutated.RSEM.isoforms.results"),
94+
envmodules: TOOLS['rseqc']['version'], TOOLS['rsem']['version'], TOOLS["samtools"]["version"]
95+
threads: getthreads("get_rsem_counts")
96+
params:
97+
sample="{sample}",
98+
rsemdir=join(WORKDIR,"rsem")
99+
shell:"""
100+
set -ex -o pipefail
101+
cd {params.rsemdir}
102+
103+
# Get strandedness to calculate Forward Probability
104+
fp=$(tail -n1 {input.strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
105+
echo "Forward Probability Passed to RSEM: $fp"
106+
rsemindex=$(echo {input.bed12}|sed "s@.bed12@@g")
107+
75108
rsem-calculate-expression --no-bam-output --calc-ci --seed 12345 \
76109
--bam --paired-end -p {threads} {input.mutatedtbam} $rsemindex {params.sample}_mutated --time \
77110
--temporary-folder /lscratch/$SLURM_JOBID --keep-intermediate-files --forward-prob=${{fp}} --estimate-rspd
78111
mv {params.sample}_mutated.genes.results {output.mutatedgcounts}
79112
mv {params.sample}_mutated.isoforms.results {output.mutatedtcounts}
80113
114+
"""
115+
116+
rule get_rsem_counts_unmutated:
117+
input:
118+
strandinfo=rule.get_strand_info.output.strandinfo,
119+
unmutatedtbam=rules.split_tbam_by_mutation.output.unmutatedtbam,
120+
bed12=rules.create_bed12.output.bed12,
121+
ump=rules.create_rsem_index.output.ump
122+
output:
123+
unmutatedgcounts=join(WORKDIR,"rsem","{sample}","{sample}_unmutated.RSEM.genes.results"),
124+
unmutatedtcounts=join(WORKDIR,"rsem","{sample}","{sample}_unmutated.RSEM.isoforms.results"),
125+
envmodules: TOOLS['rseqc']['version'], TOOLS['rsem']['version'], TOOLS["samtools"]["version"]
126+
threads: getthreads("get_rsem_counts")
127+
params:
128+
sample="{sample}",
129+
rsemdir=join(WORKDIR,"rsem")
130+
shell:"""
131+
set -ex -o pipefail
132+
cd {params.rsemdir}
133+
134+
# Get strandedness to calculate Forward Probability
135+
fp=$(tail -n1 {input.strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
136+
echo "Forward Probability Passed to RSEM: $fp"
137+
rsemindex=$(echo {input.bed12}|sed "s@.bed12@@g")
138+
81139
rsem-calculate-expression --no-bam-output --calc-ci --seed 12345 \
82140
--bam --paired-end -p {threads} {input.unmutatedtbam} $rsemindex {params.sample}_unmutated --time \
83141
--temporary-folder /lscratch/$SLURM_JOBID --keep-intermediate-files --forward-prob=${{fp}} --estimate-rspd

0 commit comments

Comments
 (0)