|
2 | 2 | import json |
3 | 3 | import sys |
4 | 4 |
|
| 5 | +# python {params.pyscript} {params.sample} {output.json} \ |
| 6 | +# {input.rawlanestxt} \ |
| 7 | +# {input.trimmedlanestxt} \ |
| 8 | +# {input.fastuniqlanestxt} \ |
| 9 | +# {input.postsecondarysupplementaryfilterbamflagstat} \ |
| 10 | +# {input.postinsertionfilterbamflagstat} \ |
| 11 | +# {input.postmapqwidowfilterbamflagstat} \ |
| 12 | +# {input.pluspostmapqwidowfilterbamflagstat} \ |
| 13 | +# {input.minuspostmapqwidowfilterbamflagstat} \ |
| 14 | +# {input.plustoSNPcallingbamflagstat} \ |
| 15 | +# {input.minustoSNPcallingbamflagstat} \ |
| 16 | +# {input.mutatedbamflagstat} \ |
| 17 | +# {input.unmutatedbamflagstat} \ |
| 18 | +# {input.vcf} \ |
| 19 | +# {input.plusvcf} \ |
| 20 | +# {input.minusvcf} |
| 21 | + |
5 | 22 | def get_nreads_from_labels(filename,samplename): |
6 | 23 | f=open(filename) |
7 | 24 | for l in f.readlines(): |
8 | 25 | l=l.strip().split("\t") |
9 | 26 | if l[0]=="#" and l[1]==samplename: |
10 | 27 | return l[2] |
11 | 28 |
|
| 29 | +def get_nfragments_from_flagstat(filename): |
| 30 | + nf=int(float(subprocess.check_output("grep properly "+filename+"|awk '{print $1/2}'",shell=True).strip())) |
| 31 | + return nf |
| 32 | + |
| 33 | + |
| 34 | +def get_nmutations_from_vcf(filename): |
| 35 | + nm=int(float(subprocess.check_output("zcat "+filename+" | grep -v ^# |wc -l",shell=True).strip())) |
| 36 | + return nm |
| 37 | + |
| 38 | + |
| 39 | + |
12 | 40 | samplename=sys.argv[1] #sample name |
13 | 41 | outfile=sys.argv[2] #output json name.... sample.json |
| 42 | +rawlanestxt=sys.argv[3] # raw lanes.txt |
| 43 | +trimmedlanestxt=sys.argv[4] # trimmed lanes.txt |
| 44 | +fastuniqlanestxt=sys.argv[5] # fastuniq lanes.txt |
| 45 | +postsecondarysupplementaryfilterbamflagstat=sys.argv[6] |
| 46 | +postinsertionfilterbamflagstat=sys.argv[7] |
| 47 | +postmapqwidowfilterbamflagstat=sys.argv[8] |
| 48 | +pluspostmapqwidowfilterbamflagstat=sys.argv[9] |
| 49 | +minuspostmapqwidowfilterbamflagstat=sys.argv[10] |
| 50 | +plustoSNPcallingbamflagstat=sys.argv[11] |
| 51 | +minustoSNPcallingbamflagstat=sys.argv[12] |
| 52 | +mutatedbamflagstat=sys.argv[13] |
| 53 | +unmutatedbamflagstat=sys.argv[14] |
| 54 | +vcf=sys.argv[15] |
| 55 | +plusvcf=sys.argv[16] |
| 56 | +minusvcf=sys.argv[17] |
| 57 | + |
14 | 58 | data=dict() |
15 | 59 | data['samplename']=samplename |
16 | 60 | data['nfragments']=dict() |
17 | | -data['nfragments']['raw'] = get_nreads_from_labels("raw_fastq/lanes.txt",samplename) |
18 | | -data['nfragments']['trim'] = get_nreads_from_labels("trim/lanes.txt",samplename) |
19 | | -data['nfragments']['fastuniq'] = get_nreads_from_labels("fastuniq/lanes.txt",samplename) |
20 | | -data['nfragments']['post_secondary_supplementary_filter'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".post_secondary_supplementary_filter.bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
21 | | -data['nfragments']['post_insertion_filter'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".post_insertion_filter.bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
22 | | -data['nfragments']['post_mapq_widow_filter'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
23 | | -data['nfragments']['to_SNP_calling_plus_strand'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".plus.bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
24 | | -data['nfragments']['to_SNP_calling_minus_strand'] = int(float(subprocess.check_output("grep properly hisat2/"+samplename+".minus.bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
25 | | -data['nfragments']['mutated'] = int(float(subprocess.check_output("grep properly bams/"+samplename+".mutated.bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
26 | | -data['nfragments']['unmutated'] = int(float(subprocess.check_output("grep properly bams/"+samplename+".unmutated.bam.flagstat|awk '{print $1/2}'",shell=True).strip())) |
27 | | -data['nmutations'] = int(float(subprocess.check_output("zcat vcf/"+samplename+".vcf.gz | grep -v ^# |wc -l",shell=True).strip())) |
28 | | -data['nmutations_plus'] = int(float(subprocess.check_output("zcat vcf/"+samplename+".plus.vcf.gz | grep -v ^# |wc -l",shell=True).strip())) |
29 | | -data['nmutations_minus'] = int(float(subprocess.check_output("zcat vcf/"+samplename+".minus.vcf.gz | grep -v ^# |wc -l",shell=True).strip())) |
| 61 | +data['nfragments']['raw'] = get_nreads_from_labels(rawlanestxt,samplename) |
| 62 | +data['nfragments']['trim'] = get_nreads_from_labels(trimmedlanestxt,samplename) |
| 63 | +data['nfragments']['fastuniq'] = get_nreads_from_labels(fastuniqlanestxt,samplename) |
| 64 | +data['nfragments']['post_secondary_supplementary_filter'] = get_nfragments_from_flagstat(postsecondarysupplementaryfilterbamflagstat) |
| 65 | +data['nfragments']['post_insertion_filter'] = get_nfragments_from_flagstat(postinsertionfilterbamflagstat) |
| 66 | +data['nfragments']['post_mapq_widow_filter'] = get_nfragments_from_flagstat(postmapqwidowfilterbamflagstat) |
| 67 | +data['nfragments']['post_mapq_widow_filter_plus_strand'] = get_nfragments_from_flagstat(pluspostmapqwidowfilterbamflagstat) |
| 68 | +data['nfragments']['post_mapq_widow_filter_minus_strand'] = get_nfragments_from_flagstat(minuspostmapqwidowfilterbamflagstat) |
| 69 | +data['nfragments']['to_SNP_calling_plus_strand'] = get_nfragments_from_flagstat(plustoSNPcallingbamflagstat) |
| 70 | +data['nfragments']['to_SNP_calling_minus_strand'] = get_nfragments_from_flagstat(minustoSNPcallingbamflagstat) |
| 71 | +data['nfragments']['mutated'] = get_nfragments_from_flagstat(mutatedbamflagstat) |
| 72 | +data['nfragments']['unmutated'] = get_nfragments_from_flagstat(unmutatedbamflagstat) |
| 73 | +data['nmutations'] = get_nmutations_from_vcf(vcf) |
| 74 | +data['nmutations_plus'] = get_nmutations_from_vcf(plusvcf) |
| 75 | +data['nmutations_minus'] = get_nmutations_from_vcf(minusvcf) |
30 | 76 | out_file = open(outfile, "w") |
31 | 77 | json.dump(data, out_file, indent = 6) |
32 | 78 | out_file.close() |
0 commit comments