1616import org .labkey .api .exp .api .ExperimentService ;
1717import org .labkey .api .pipeline .PipelineJob ;
1818import org .labkey .api .pipeline .PipelineJobException ;
19+ import org .labkey .api .pipeline .PipelineJobService ;
1920import org .labkey .api .query .FieldKey ;
2021import org .labkey .api .sequenceanalysis .SequenceAnalysisService ;
2122import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
@@ -84,6 +85,14 @@ public Provider()
8485 put ("valueField" , "rowid" );
8586 put ("allowBlank" , false );
8687 }}, null ),
88+ ToolParameterDescriptor .create ("useCassandra" , "Use Cassandra" , "If checked, Cassandra will be run." , "checkbox" , new JSONObject ()
89+ {{
90+ put ("checked" , true );
91+ }}, true ),
92+ ToolParameterDescriptor .create ("useFuncotator" , "Use Funcotator" , "If checked, Extended Funcotator will be run." , "checkbox" , new JSONObject ()
93+ {{
94+ put ("checked" , true );
95+ }}, true ),
8796 ToolParameterDescriptor .create ("dropFiltered" , "Drop Filtered Sites" , "If checked, filtered sites will be discarded, which can substantially improve speed." , "checkbox" , new JSONObject ()
8897 {{
8998 put ("checked" , true );
@@ -147,6 +156,13 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
147156 throw new PipelineJobException ("Unable to find file: " + dbnsfpFile .getPath ());
148157 }
149158
159+ boolean useFuncotator = getProvider ().getParameterByName ("useFuncotator" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
160+ File funcotatorSourceDir = new File (PipelineJobService .get ().getAppProperties ().getToolsDirectory (), "funcotatorDataSource" );
161+ if (useFuncotator && !funcotatorSourceDir .exists ())
162+ {
163+ throw new PipelineJobException ("Unable to find file: " + funcotatorSourceDir .getPath ());
164+ }
165+
150166 getPipelineCtx ().getLogger ().info ("processing file: " + inputVCF .getName ());
151167
152168 ReferenceGenome originalGenome = getPipelineCtx ().getSequenceSupport ().getCachedGenome (genome .getGenomeId ());
@@ -310,13 +326,29 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
310326 output .addIntermediateFile (clinvarAnnotated );
311327 output .addIntermediateFile (new File (clinvarAnnotated .getPath () + ".tbi" ));
312328
329+ //backport ClinVar
330+ getPipelineCtx ().getLogger ().info ("backport ClinVar 2.0 to source genome" );
331+ File clinvarAnnotatedBackport = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (clinvarAnnotated .getName ()) + ".bp.vcf.gz" );
332+ if (forceRecreate || !indexExists (clinvarAnnotatedBackport ))
333+ {
334+ BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner (getPipelineCtx ().getLogger ());
335+ bpRunner .execute (clinvarAnnotated , originalGenome .getWorkingFastaFile (), grch37Genome .getWorkingFastaFile (), clinvarAnnotatedBackport );
336+ }
337+ else
338+ {
339+ getPipelineCtx ().getLogger ().info ("resuming with existing file: " + clinvarAnnotatedBackport .getPath ());
340+ }
341+ output .addOutput (clinvarAnnotatedBackport , "VCF Annotated With Clinvar, Backported" );
342+ output .addIntermediateFile (clinvarAnnotatedBackport );
343+ output .addIntermediateFile (new File (clinvarAnnotatedBackport .getPath () + ".tbi" ));
344+
313345 //annotate with SnpSift
314- getPipelineCtx ().getLogger ().info ("annotating with SnpSift" );
346+ getPipelineCtx ().getLogger ().info ("annotating with SnpSift/dbnsfp " );
315347 File snpSiftAnnotated = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (liftedToGRCh37 .getName ()) + ".snpSift.vcf.gz" );
316348 if (forceRecreate || !indexExists (snpSiftAnnotated ))
317349 {
318350 SnpSiftWrapper ssRunner = new SnpSiftWrapper (getPipelineCtx ().getLogger ());
319- ssRunner .runSnpSift (dbnsfpFile , clinvarAnnotated , snpSiftAnnotated );
351+ ssRunner .runSnpSift (dbnsfpFile , liftedToGRCh37 , snpSiftAnnotated );
320352 }
321353 else
322354 {
@@ -326,48 +358,46 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
326358 output .addIntermediateFile (snpSiftAnnotated );
327359 output .addIntermediateFile (new File (snpSiftAnnotated .getPath () + ".tbi" ));
328360
329- //annotate with cassandra
330- getPipelineCtx ().getLogger ().info ("annotating with Cassandra" );
331- String basename = SequenceAnalysisService .get ().getUnzippedBaseName (liftedToGRCh37 .getName ()) + ".cassandra" ;
332- File cassandraAnnotated = new File (outputDirectory , basename + ".vcf.gz" );
333- if (forceRecreate || !indexExists (cassandraAnnotated ))
361+ //backport SnpSift
362+ getPipelineCtx ().getLogger ().info ("Backport SnpSift to source genome" );
363+ File snpSiftAnnotatedBackport = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (snpSiftAnnotated .getName ()) + ".bp.vcf.gz" );
364+ if (forceRecreate || !indexExists (snpSiftAnnotatedBackport ))
334365 {
335- //we can assume splitting happened upstream, so run over the full VCF
336- cassandraAnnotated = runCassandra ( liftedToGRCh37 , cassandraAnnotated , output , forceRecreate );
366+ BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner ( getPipelineCtx (). getLogger ());
367+ bpRunner . execute ( snpSiftAnnotated , originalGenome . getWorkingFastaFile (), grch37Genome . getWorkingFastaFile (), snpSiftAnnotatedBackport );
337368 }
338369 else
339370 {
340- getPipelineCtx ().getLogger ().info ("resuming with existing file: " + cassandraAnnotated .getPath ());
371+ getPipelineCtx ().getLogger ().info ("resuming with existing file: " + snpSiftAnnotatedBackport .getPath ());
341372 }
373+ output .addOutput (snpSiftAnnotatedBackport , "VCF Annotated With SnpSift, Backported" );
374+ output .addIntermediateFile (snpSiftAnnotatedBackport );
375+ output .addIntermediateFile (new File (snpSiftAnnotatedBackport .getPath () + ".tbi" ));
342376
343- if (cassandraAnnotated != null )
377+ //annotate with cassandra
378+ File cassandraAnnotatedBackport = null ;
379+ if (getProvider ().getParameterByName ("useCassandra" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false ))
344380 {
381+ getPipelineCtx ().getLogger ().info ("annotating with Cassandra" );
382+ String basename = SequenceAnalysisService .get ().getUnzippedBaseName (liftedToGRCh37 .getName ()) + ".cassandra" ;
383+ File cassandraAnnotated = new File (outputDirectory , basename + ".vcf.gz" );
384+ if (forceRecreate || !indexExists (cassandraAnnotated ))
385+ {
386+ //we can assume splitting happened upstream, so run over the full VCF
387+ runCassandra (liftedToGRCh37 , cassandraAnnotated , output , forceRecreate );
388+ }
389+ else
390+ {
391+ getPipelineCtx ().getLogger ().info ("resuming with existing file: " + cassandraAnnotated .getPath ());
392+ }
393+
345394 output .addOutput (cassandraAnnotated , "VCF Annotated With Cassandra" );
346395 output .addIntermediateFile (cassandraAnnotated );
347396 output .addIntermediateFile (new File (cassandraAnnotated .getPath () + ".tbi" ));
348- }
349-
350- //backport ClinVar
351- getPipelineCtx ().getLogger ().info ("backport ClinVar 2.0 to source genome" );
352- File clinvarAnnotatedBackport = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (clinvarAnnotated .getName ()) + ".bp.vcf.gz" );
353- if (forceRecreate || !indexExists (clinvarAnnotatedBackport ))
354- {
355- BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner (getPipelineCtx ().getLogger ());
356- bpRunner .execute (clinvarAnnotated , originalGenome .getWorkingFastaFile (), grch37Genome .getWorkingFastaFile (), clinvarAnnotatedBackport );
357- }
358- else
359- {
360- getPipelineCtx ().getLogger ().info ("resuming with existing file: " + clinvarAnnotatedBackport .getPath ());
361- }
362- output .addOutput (clinvarAnnotatedBackport , "VCF Annotated With Clinvar, Backported" );
363- output .addIntermediateFile (clinvarAnnotatedBackport );
364- output .addIntermediateFile (new File (clinvarAnnotatedBackport .getPath () + ".tbi" ));
365397
366- //backport Cassandra
367- getPipelineCtx ().getLogger ().info ("backport Cassandra to source genome" );
368- File cassandraAnnotatedBackport = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (cassandraAnnotated .getName ()) + ".bp.vcf.gz" );
369- if (cassandraAnnotated != null )
370- {
398+ //backport Cassandra
399+ getPipelineCtx ().getLogger ().info ("backport Cassandra to source genome" );
400+ cassandraAnnotatedBackport = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (cassandraAnnotated .getName ()) + ".bp.vcf.gz" );
371401 if (forceRecreate || !indexExists (cassandraAnnotatedBackport ))
372402 {
373403 BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner (getPipelineCtx ().getLogger ());
@@ -383,8 +413,50 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
383413 }
384414 else
385415 {
386- getPipelineCtx ().getLogger ().info ("No cassandra output, will not backport" );
387- cassandraAnnotatedBackport = null ;
416+ getPipelineCtx ().getLogger ().debug ("Cassandra will be skipped" );
417+ }
418+
419+ //annotate with funcotator
420+ File funcotatorAnnotatedBackport = null ;
421+ if (useFuncotator )
422+ {
423+ getPipelineCtx ().getLogger ().info ("annotating with Funcotator" );
424+ String basename = SequenceAnalysisService .get ().getUnzippedBaseName (liftedToGRCh37 .getName ()) + ".funcotator" ;
425+ File funcotatorAnnotated = new File (outputDirectory , basename + ".vcf.gz" );
426+ if (forceRecreate || !indexExists (funcotatorAnnotated ))
427+ {
428+ //we can assume splitting happened upstream, so run over the full VCF
429+ FuncotatorWrapper fr = new FuncotatorWrapper (getPipelineCtx ().getLogger ());
430+ fr .runFuncotator (funcotatorSourceDir , liftedToGRCh37 , snpSiftAnnotated , genome );
431+ }
432+ else
433+ {
434+ getPipelineCtx ().getLogger ().info ("resuming with existing file: " + funcotatorAnnotated .getPath ());
435+ }
436+
437+ output .addOutput (funcotatorAnnotated , "VCF Annotated With Funcotator" );
438+ output .addIntermediateFile (funcotatorAnnotated );
439+ output .addIntermediateFile (new File (funcotatorAnnotated .getPath () + ".tbi" ));
440+
441+ //backport Funcotator
442+ getPipelineCtx ().getLogger ().info ("backport Funcotator to source genome" );
443+ funcotatorAnnotatedBackport = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (funcotatorAnnotated .getName ()) + ".bp.vcf.gz" );
444+ if (forceRecreate || !indexExists (funcotatorAnnotatedBackport ))
445+ {
446+ BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner (getPipelineCtx ().getLogger ());
447+ bpRunner .execute (funcotatorAnnotated , originalGenome .getWorkingFastaFile (), grch37Genome .getWorkingFastaFile (), funcotatorAnnotatedBackport );
448+ }
449+ else
450+ {
451+ getPipelineCtx ().getLogger ().info ("resuming with existing file: " + funcotatorAnnotatedBackport .getPath ());
452+ }
453+ output .addOutput (funcotatorAnnotatedBackport , "VCF Annotated With Funcotator, Backported" );
454+ output .addIntermediateFile (funcotatorAnnotatedBackport );
455+ output .addIntermediateFile (new File (funcotatorAnnotatedBackport .getPath () + ".tbi" ));
456+ }
457+ else
458+ {
459+ getPipelineCtx ().getLogger ().debug ("Funcotator will be skipped" );
388460 }
389461
390462 //multiannotator
@@ -405,7 +477,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
405477 needToSubsetToInterval = false ;
406478 }
407479
408- maRunner .execute (inputVCF , cassandraAnnotatedBackport , clinvarAnnotatedBackport , liftoverRejects , multiAnnotated , options );
480+ maRunner .execute (inputVCF , cassandraAnnotatedBackport , clinvarAnnotatedBackport , liftoverRejects , funcotatorAnnotatedBackport , snpSiftAnnotatedBackport , multiAnnotated , options );
409481 }
410482 else
411483 {
@@ -422,7 +494,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
422494 return output ;
423495 }
424496
425- private File runCassandra (File liftedToGRCh37 , File finalOutput , VariantProcessingStepOutputImpl output , boolean forceRecreate ) throws PipelineJobException
497+ private void runCassandra (File liftedToGRCh37 , File finalOutput , VariantProcessingStepOutputImpl output , boolean forceRecreate ) throws PipelineJobException
426498 {
427499 List <String > extraArgs = new ArrayList <>();
428500
@@ -489,8 +561,6 @@ private File runCassandra(File liftedToGRCh37, File finalOutput, VariantProcessi
489561 {
490562 throw new PipelineJobException (e );
491563 }
492-
493- return finalOutput ;
494564 }
495565
496566 protected static boolean indexExists (File vcf )
0 commit comments