File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 2626 run : conda config --show-sources
2727 - name : Run tests
2828 run : |
29+ export NXF_VER=22.04.5
2930 make
Original file line number Diff line number Diff line change 2929 bash test/scripts/run_test_11.sh
3030 bash test/scripts/run_test_12.sh
3131 bash test/scripts/run_test_13.sh
32+ bash test/scripts/run_test_14.sh
Original file line number Diff line number Diff line change @@ -268,6 +268,25 @@ These annotations are provided by VAFator (https://github.com/TRON-Bioinformatic
268268
269269No technical annotations are performed if the parameter ` --input_bams ` is not passed.
270270
271+ ## Phasing with WhatsHap
272+
273+ The phase of the mutations can be inferred from the reads pileup.
274+ This information is relevant to determine the impact of nearby mutations.
275+ WhatsHap adds the phasing information when possible to the VCF following the VCF specification for that purpose.
276+ Genotypes with phase information use the ` | ` instead of the ` / ` and ` 0|1 ` and ` 1|0 ` represent the two different phases.
277+ A homozygous mutations in phase is represented as ` 1|1 ` .
278+ Furthermore, because phasing may be incomplete, to define the different phased blocks the INFO/PS annotation used.
279+ All mutations belonging to the same phase block share the same PS number.
280+
281+ BAM files need to be provided to perform phasing with ` --input_bams ` and the phasing sample has to be determined with
282+ ` --phasing_sample ` .
283+
284+ This approach has some limitations:
285+ - We only support diploid genotyped VCF files. WhatsHap does support polyploid samples. But the particular case of
286+ somatic mutations is not supported.
287+ - The sample name chosen to perform the phasing has to be the same across all VCFs.
288+ Not to be mistaken with the patient name. Eg: only ` normal ` samples across all patients can be used for phasing
289+
271290## Functional annotations
272291
273292The functional annotations provide a biological context for every variant. Such as the overlapping genes or the effect
Original file line number Diff line number Diff line change @@ -6,10 +6,13 @@ include { FILTER_VCF } from './modules/01_filter'
66include { BCFTOOLS_NORM ; VT_DECOMPOSE_COMPLEX ; REMOVE_DUPLICATES } from ' ./modules/02_normalization'
77include { SUMMARY_VCF ; SUMMARY_VCF as SUMMARY_VCF_2 } from ' ./modules/03_summary'
88include { VAFATOR ; MULTIALLELIC_FILTER } from ' ./modules/04_vafator'
9- include { VARIANT_ANNOTATION_SNPEFF ; VARIANT_ANNOTATION_BCFTOOLS } from ' ./modules/05_variant_annotation'
9+ include { WHATSHAP } from ' ./modules/05_phasing'
10+ include { VARIANT_ANNOTATION_SNPEFF ; VARIANT_ANNOTATION_BCFTOOLS } from ' ./modules/06_variant_annotation'
11+
1012
1113params. help= false
1214params. input_vcfs = false
15+ params. input_bams = false
1316params. input_vcf = false
1417
1518// optional VAFator inputs
@@ -32,6 +35,7 @@ params.skip_multiallelic_filter = false
3235// SnpEff input
3336params. snpeff_organism = false
3437params. snpeff_datadir = false
38+ params. phasing_sample = false
3539
3640
3741if (params. help) {
@@ -136,6 +140,10 @@ workflow {
136140 final_vcfs = MULTIALLELIC_FILTER (final_vcfs)
137141 final_vcfs = MULTIALLELIC_FILTER . out. filtered_vcf
138142 }
143+ if (params. phasing_sample) {
144+ WHATSHAP (final_vcfs. join(input_bams. groupTuple()))
145+ final_vcfs = WHATSHAP . out. phased_vcf
146+ }
139147 }
140148
141149 if (params. snpeff_organism) {
Original file line number Diff line number Diff line change 1+ params. phasing_sample = false
2+ params. cpus = 1
3+ params. memory = ' 3g'
4+
5+
6+ process WHATSHAP {
7+ cpus params. cpus
8+ memory params. memory
9+ tag " ${ patient_name} "
10+ publishDir " ${ params.output} /${ patient_name} " , mode: " copy"
11+
12+ conda (params. enable_conda ? " bioconda::whatshap=1.4" : null )
13+
14+ input:
15+ tuple val(patient_name), file(vcf), val(bams)
16+
17+ output:
18+ tuple val(patient_name), file(" ${ patient_name} .phased.vcf" ), emit: phased_vcf
19+
20+ script:
21+ phasing_bams = bams. toList(). stream()
22+ .filter{ b -> b. split(' :' )[0 ] == " ${ params.phasing_sample} " }
23+ .collect{ b -> b. split(" :" )[1 ] }
24+ .join(" " ) ;
25+ """
26+ whatshap phase \
27+ --indel \
28+ -o ${ patient_name} .phased.vcf \
29+ --reference=${ params.reference} \
30+ ${ vcf} \
31+ ${ phasing_bams}
32+ """
33+ }
Original file line number Diff line number Diff line change @@ -49,6 +49,7 @@ process VARIANT_ANNOTATION_BCFTOOLS {
4949 --fasta-ref ${ params.reference} \\
5050 --gff-annot ${ params.gff} ${ vcf} \\
5151 --output-type v \\
52- --output ${ name} .annotated.vcf
52+ --output ${ name} .annotated.vcf \\
53+ --phase a
5354 """
5455}
Original file line number Diff line number Diff line change 2727// Capture exit codes from upstream processes when piping
2828process. shell = [' /bin/bash' , ' -euo' , ' pipefail' ]
2929
30- VERSION = ' 2.5 .0'
30+ VERSION = ' 3.0 .0'
3131
3232manifest {
3333 name = ' TRON-Bioinformatics/tronflow-vcf-postprocessing'
@@ -87,6 +87,7 @@ Optional input:
8787 * --snpeff_memory: for some samples SnpEff may require to use more memory (default: 3g)
8888 * --mapping_quality: VAFator minimum mapping quality (default: 0)
8989 * --base_call_quality: VAFator minimum base call quality (default: 0)
90+ * --phasing-sample: enables phasing with whatshap and defines the sample BAM to phase the VCF (BAMs need to be provided)
9091
9192Output:
9293 * Normalized VCF file
Original file line number Diff line number Diff line change 1+ single_sample test/data/test_single_sample.vcf
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+
3+
4+ source test/scripts/assert.sh
5+ output_folder=test/output/test14
6+ echo -e " tumor_normal\tprimary:" ` pwd` " /test/data/TESTX_S1_L001.bam" > test/data/test_bams.txt
7+ echo -e " tumor_normal\tnormal:" ` pwd` " /test/data/TESTX_S1_L002.bam" >> test/data/test_bams.txt
8+ echo -e " single_sample\ttumor:" ` pwd` " /test/data/TESTX_S1_L001.bam" >> test/data/test_bams.txt
9+ echo -e " single_sample\ttumor:" ` pwd` " /test/data/TESTX_S1_L002.bam" >> test/data/test_bams.txt
10+ echo -e " single_sample\tnormal:" ` pwd` " /test/data/TESTX_S1_L001.bam" >> test/data/test_bams.txt
11+ echo -e " single_sample\tnormal:" ` pwd` " /test/data/TESTX_S1_L002.bam" >> test/data/test_bams.txt
12+
13+ nextflow main.nf -profile test,conda --output $output_folder --input_bams test/data/test_bams.txt \
14+ --phasing_sample normal --input_vcfs test/data/test_input_single_sample.txt
15+
16+ test -s $output_folder /single_sample/single_sample.phased.vcf || { echo " Missing test 10 output file!" ; exit 1; }
You can’t perform that action at this time.
0 commit comments