@@ -34,35 +34,43 @@ awk -v OFS="\\t" '{{print $2,$4,$5,$1,"0",$3,$6,$7,"0",$8,$9,$10}}' ${{bn}}.tmp
3434rm -f ${{bn}}.tmp
3535"""
3636
37- rule get_rsem_counts :
37+ rule get_strand_info :
3838 input :
3939 starbam = rules .star .output .starbam , # used only for strandinfo
40+ output :
41+ strandinfo = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}.strandinfo" )
42+ envmodules : TOOLS ['rseqc' ]['version' ],
43+ params :
44+ sample = "{sample}" ,
45+ rsemdir = join (WORKDIR ,"rsem" )
46+ shell :"""
47+ set -ex -o pipefail
48+ cd {params.rsemdir}
49+ # inter strandedness
50+ # samtools index -@{threads} {input.starbam}
51+ infer_experiment.py -r {input.bed12} -i {input.starbam} -s 1000000 > {output.strandinfo}
52+ """
53+
54+ rule get_rsem_counts :
55+ input :
56+ strandinfo = rule .get_strand_info .output .strandinfo ,
4057 tbam = rules .star .output .tbam ,
41- mutatedtbam = rules .split_tbam_by_mutation .output .mutatedtbam ,
42- unmutatedtbam = rules .split_tbam_by_mutation .output .unmutatedtbam ,
4358 bed12 = rules .create_bed12 .output .bed12 ,
4459 ump = rules .create_rsem_index .output .ump
4560 output :
46- strandinfo = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}.strandinfo" ),
4761 gcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}.RSEM.genes.results" ),
4862 tcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}.RSEM.isoforms.results" ),
49- mutatedgcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_mutated.RSEM.genes.results" ),
50- mutatedtcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_mutated.RSEM.isoforms.results" ),
51- unmutatedgcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_unmutated.RSEM.genes.results" ),
52- unmutatedtcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_unmutated.RSEM.isoforms.results" ),
5363 envmodules : TOOLS ['rseqc' ]['version' ], TOOLS ['rsem' ]['version' ], TOOLS ["samtools" ]["version" ]
5464 threads : getthreads ("get_rsem_counts" )
5565 params :
5666 sample = "{sample}" ,
5767 rsemdir = join (WORKDIR ,"rsem" )
5868 shell :"""
59- set -exuf -o pipefail
69+ set -ex -o pipefail
6070cd {params.rsemdir}
61- # inter strandedness
62- # samtools index -@{threads} {input.starbam}
63- infer_experiment.py -r {input.bed12} -i {input.starbam} -s 1000000 > {output.strandinfo}
71+
6472# Get strandedness to calculate Forward Probability
65- fp=$(tail -n1 {output .strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
73+ fp=$(tail -n1 {input .strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
6674echo "Forward Probability Passed to RSEM: $fp"
6775rsemindex=$(echo {input.bed12}|sed "s@.bed12@@g")
6876
@@ -72,12 +80,62 @@ rsem-calculate-expression --no-bam-output --calc-ci --seed 12345 \
7280mv {params.sample}.genes.results {output.gcounts}
7381mv {params.sample}.isoforms.results {output.tcounts}
7482
83+ """
84+
85+ rule get_rsem_counts_mutated :
86+ input :
87+ strandinfo = rule .get_strand_info .output .strandinfo ,
88+ mutatedtbam = rules .split_tbam_by_mutation .output .mutatedtbam ,
89+ bed12 = rules .create_bed12 .output .bed12 ,
90+ ump = rules .create_rsem_index .output .ump
91+ output :
92+ mutatedgcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_mutated.RSEM.genes.results" ),
93+ mutatedtcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_mutated.RSEM.isoforms.results" ),
94+ envmodules : TOOLS ['rseqc' ]['version' ], TOOLS ['rsem' ]['version' ], TOOLS ["samtools" ]["version" ]
95+ threads : getthreads ("get_rsem_counts" )
96+ params :
97+ sample = "{sample}" ,
98+ rsemdir = join (WORKDIR ,"rsem" )
99+ shell :"""
100+ set -ex -o pipefail
101+ cd {params.rsemdir}
102+
103+ # Get strandedness to calculate Forward Probability
104+ fp=$(tail -n1 {input.strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
105+ echo "Forward Probability Passed to RSEM: $fp"
106+ rsemindex=$(echo {input.bed12}|sed "s@.bed12@@g")
107+
75108rsem-calculate-expression --no-bam-output --calc-ci --seed 12345 \
76109 --bam --paired-end -p {threads} {input.mutatedtbam} $rsemindex {params.sample}_mutated --time \
77110 --temporary-folder /lscratch/$SLURM_JOBID --keep-intermediate-files --forward-prob=${{fp}} --estimate-rspd
78111mv {params.sample}_mutated.genes.results {output.mutatedgcounts}
79112mv {params.sample}_mutated.isoforms.results {output.mutatedtcounts}
80113
114+ """
115+
116+ rule get_rsem_counts_unmutated :
117+ input :
118+ strandinfo = rule .get_strand_info .output .strandinfo ,
119+ unmutatedtbam = rules .split_tbam_by_mutation .output .unmutatedtbam ,
120+ bed12 = rules .create_bed12 .output .bed12 ,
121+ ump = rules .create_rsem_index .output .ump
122+ output :
123+ unmutatedgcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_unmutated.RSEM.genes.results" ),
124+ unmutatedtcounts = join (WORKDIR ,"rsem" ,"{sample}" ,"{sample}_unmutated.RSEM.isoforms.results" ),
125+ envmodules : TOOLS ['rseqc' ]['version' ], TOOLS ['rsem' ]['version' ], TOOLS ["samtools" ]["version" ]
126+ threads : getthreads ("get_rsem_counts" )
127+ params :
128+ sample = "{sample}" ,
129+ rsemdir = join (WORKDIR ,"rsem" )
130+ shell :"""
131+ set -ex -o pipefail
132+ cd {params.rsemdir}
133+
134+ # Get strandedness to calculate Forward Probability
135+ fp=$(tail -n1 {input.strandinfo} | awk '{{if($NF > 0.75) print "0.0"; else if ($NF<0.25) print "1.0"; else print "0.5";}}')
136+ echo "Forward Probability Passed to RSEM: $fp"
137+ rsemindex=$(echo {input.bed12}|sed "s@.bed12@@g")
138+
81139rsem-calculate-expression --no-bam-output --calc-ci --seed 12345 \
82140 --bam --paired-end -p {threads} {input.unmutatedtbam} $rsemindex {params.sample}_unmutated --time \
83141 --temporary-folder /lscratch/$SLURM_JOBID --keep-intermediate-files --forward-prob=${{fp}} --estimate-rspd
0 commit comments