Skip to content

Commit 0fdadcb

Browse files
committed
nfragment data collection updated
1 parent a70236e commit 0fdadcb

2 files changed

Lines changed: 90 additions & 27 deletions

File tree

workflow/rules/qc.smk

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,21 @@ k=62 extend2=200 rem ecct -Xmx{params.mem}
4747

4848
rule get_nfragments_json:
4949
input:
50-
rules.get_fastq_nreads.output.o1,
51-
rules.get_fastq_nreads.output.o2,
52-
rules.get_fastq_nreads.output.o3,
53-
rules.star.output.postsecondarysupplementaryfilterbamflagstat,
54-
rules.star.output.postinsertionfilterbamflagstat,
55-
rules.star.output.bamflagstat,
56-
rules.star.output.plusbamflagstat,
57-
rules.star.output.minusbamflagstat,
58-
rules.split_bam_by_mutation.output.mutatedbamflagstat,
59-
rules.split_bam_by_mutation.output.unmutatedbamflagstat,
60-
rules.call_mutations.output.vcf,
61-
rules.call_mutations.output.plusvcf,
62-
rules.call_mutations.output.minusvcf,
50+
rawlanestxt=rules.get_fastq_nreads.output.o1, # raw lanes.txt
51+
trimmedlanestxt=rules.get_fastq_nreads.output.o2, # trimmed lanes.txt
52+
fastuniqlanestxt=rules.get_fastq_nreads.output.o3, # fastuniq lanes.txt
53+
postsecondarysupplementaryfilterbamflagstat=rules.star.output.postsecondarysupplementaryfilterbamflagstat,
54+
postinsertionfilterbamflagstat=rules.star.output.postinsertionfilterbamflagstat,
55+
postmapqwidowfilterbamflagstat=rules.star.output.bamflagstat,
56+
pluspostmapqwidowfilterbamflagstat=rules.star.output.plusbamflagstat,
57+
minuspostmapqwidowfilterbamflagstat=rules.star.output.minusbamflagstat,
58+
plustoSNPcallingbamflagstat=rules.create_toSNPcalling_BAM.plustoSNPcallingbamflagstat,
59+
minustoSNPcallingbamflagstat=rules.create_toSNPcalling_BAM.minustoSNPcallingbamflagstat,
60+
mutatedbamflagstat=rules.split_bam_by_mutation.output.mutatedbamflagstat,
61+
unmutatedbamflagstat=rules.split_bam_by_mutation.output.unmutatedbamflagstat,
62+
vcf=rules.call_mutations.output.vcf,
63+
plusvcf=rules.call_mutations.output.plusvcf,
64+
minusvcf=rules.call_mutations.output.minusvcf,
6365
output:
6466
json=join(WORKDIR,"qc","nfragments","{sample}.json")
6567
params:
@@ -70,7 +72,22 @@ rule get_nfragments_json:
7072
shell:"""
7173
set -exuf -o pipefail
7274
cd {params.workdir}
73-
python {params.pyscript} {params.sample} {output.json}
75+
python {params.pyscript} {params.sample} {output.json} \
76+
{input.rawlanestxt} \
77+
{input.trimmedlanestxt} \
78+
{input.fastuniqlanestxt} \
79+
{input.postsecondarysupplementaryfilterbamflagstat} \
80+
{input.postinsertionfilterbamflagstat} \
81+
{input.postmapqwidowfilterbamflagstat} \
82+
{input.pluspostmapqwidowfilterbamflagstat} \
83+
{input.minuspostmapqwidowfilterbamflagstat} \
84+
{input.plustoSNPcallingbamflagstat} \
85+
{input.minustoSNPcallingbamflagstat} \
86+
{input.mutatedbamflagstat} \
87+
{input.unmutatedbamflagstat} \
88+
{input.vcf} \
89+
{input.plusvcf} \
90+
{input.minusvcf}
7491
"""
7592

7693
rule create_nfragments_table:

workflow/scripts/get_per_sample_nfragments.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,77 @@
22
import json
33
import sys
44

5+
# python {params.pyscript} {params.sample} {output.json} \
6+
# {input.rawlanestxt} \
7+
# {input.trimmedlanestxt} \
8+
# {input.fastuniqlanestxt} \
9+
# {input.postsecondarysupplementaryfilterbamflagstat} \
10+
# {input.postinsertionfilterbamflagstat} \
11+
# {input.postmapqwidowfilterbamflagstat} \
12+
# {input.pluspostmapqwidowfilterbamflagstat} \
13+
# {input.minuspostmapqwidowfilterbamflagstat} \
14+
# {input.plustoSNPcallingbamflagstat} \
15+
# {input.minustoSNPcallingbamflagstat} \
16+
# {input.mutatedbamflagstat} \
17+
# {input.unmutatedbamflagstat} \
18+
# {input.vcf} \
19+
# {input.plusvcf} \
20+
# {input.minusvcf}
21+
522
def get_nreads_from_labels(filename,samplename):
623
f=open(filename)
724
for l in f.readlines():
825
l=l.strip().split("\t")
926
if l[0]=="#" and l[1]==samplename:
1027
return l[2]
1128

29+
def get_nfragments_from_flagstat(filename):
30+
nf=int(float(subprocess.check_output("grep properly "+filename+"|awk '{print $1/2}'",shell=True).strip()))
31+
return nf
32+
33+
34+
def get_nmutations_from_vcf(filename):
35+
nm=int(float(subprocess.check_output("zcat "+filename+" | grep -v ^# |wc -l",shell=True).strip()))
36+
return nm
37+
38+
39+
1240
samplename=sys.argv[1] #sample name
1341
outfile=sys.argv[2] #output json name.... sample.json
42+
rawlanestxt=sys.argv[3] # raw lanes.txt
43+
trimmedlanestxt=sys.argv[4] # trimmed lanes.txt
44+
fastuniqlanestxt=sys.argv[5] # fastuniq lanes.txt
45+
postsecondarysupplementaryfilterbamflagstat=sys.argv[6]
46+
postinsertionfilterbamflagstat=sys.argv[7]
47+
postmapqwidowfilterbamflagstat=sys.argv[8]
48+
pluspostmapqwidowfilterbamflagstat=sys.argv[9]
49+
minuspostmapqwidowfilterbamflagstat=sys.argv[10]
50+
plustoSNPcallingbamflagstat=sys.argv[11]
51+
minustoSNPcallingbamflagstat=sys.argv[12]
52+
mutatedbamflagstat=sys.argv[13]
53+
unmutatedbamflagstat=sys.argv[14]
54+
vcf=sys.argv[15]
55+
plusvcf=sys.argv[16]
56+
minusvcf=sys.argv[17]
57+
1458
data=dict()
1559
data['samplename']=samplename
1660
data['nfragments']=dict()
17-
data['nfragments']['raw'] = get_nreads_from_labels("raw_fastq/lanes.txt",samplename)
18-
data['nfragments']['trim'] = get_nreads_from_labels("trim/lanes.txt",samplename)
19-
data['nfragments']['fastuniq'] = get_nreads_from_labels("fastuniq/lanes.txt",samplename)
20-
data['nfragments']['post_secondary_supplementary_filter'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".post_secondary_supplementary_filter.bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
21-
data['nfragments']['post_insertion_filter'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".post_insertion_filter.bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
22-
data['nfragments']['post_mapq_widow_filter'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
23-
data['nfragments']['to_SNP_calling_plus_strand'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".plus.bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
24-
data['nfragments']['to_SNP_calling_minus_strand'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".minus.bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
25-
data['nfragments']['mutated'] = int(float(subprocess.check_output("grep properly bams/"+samplename+".mutated.bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
26-
data['nfragments']['unmutated'] = int(float(subprocess.check_output("grep properly bams/"+samplename+".unmutated.bam.flagstat|awk '{print $1/2}'",shell=True).strip()))
27-
data['nmutations'] = int(float(subprocess.check_output("zcat vcf/"+samplename+".vcf.gz | grep -v ^# |wc -l",shell=True).strip()))
28-
data['nmutations_plus'] = int(float(subprocess.check_output("zcat vcf/"+samplename+".plus.vcf.gz | grep -v ^# |wc -l",shell=True).strip()))
29-
data['nmutations_minus'] = int(float(subprocess.check_output("zcat vcf/"+samplename+".minus.vcf.gz | grep -v ^# |wc -l",shell=True).strip()))
61+
data['nfragments']['raw'] = get_nreads_from_labels(rawlanestxt,samplename)
62+
data['nfragments']['trim'] = get_nreads_from_labels(trimmedlanestxt,samplename)
63+
data['nfragments']['fastuniq'] = get_nreads_from_labels(fastuniqlanestxt,samplename)
64+
data['nfragments']['post_secondary_supplementary_filter'] = get_nfragments_from_flagstat(postsecondarysupplementaryfilterbamflagstat)
65+
data['nfragments']['post_insertion_filter'] = get_nfragments_from_flagstat(postinsertionfilterbamflagstat)
66+
data['nfragments']['post_mapq_widow_filter'] = get_nfragments_from_flagstat(postmapqwidowfilterbamflagstat)
67+
data['nfragments']['post_mapq_widow_filter_plus_strand'] = get_nfragments_from_flagstat(pluspostmapqwidowfilterbamflagstat)
68+
data['nfragments']['post_mapq_widow_filter_minus_strand'] = get_nfragments_from_flagstat(minuspostmapqwidowfilterbamflagstat)
69+
data['nfragments']['to_SNP_calling_plus_strand'] = get_nfragments_from_flagstat(plustoSNPcallingbamflagstat)
70+
data['nfragments']['to_SNP_calling_minus_strand'] = get_nfragments_from_flagstat(minustoSNPcallingbamflagstat)
71+
data['nfragments']['mutated'] = get_nfragments_from_flagstat(mutatedbamflagstat)
72+
data['nfragments']['unmutated'] = get_nfragments_from_flagstat(unmutatedbamflagstat)
73+
data['nmutations'] = get_nmutations_from_vcf(vcf)
74+
data['nmutations_plus'] = get_nmutations_from_vcf(plusvcf)
75+
data['nmutations_minus'] = get_nmutations_from_vcf(minusvcf)
3076
out_file = open(outfile, "w")
3177
json.dump(data, out_file, indent = 6)
3278
out_file.close()

0 commit comments

Comments
 (0)