@@ -51,9 +51,11 @@ public class AnnotationStep extends AbstractCommandPipelineStep<CassandraRunner>
5151{
5252 public static final String GRCH37 = "genome37" ;
5353 private static final String CLINVAR_VCF = "clinvar37" ;
54+ private static final String DBNSFP_FILE = "dbnsfpFile" ;
55+
5456 public static final String CHAIN_FILE = "CHAIN_FILE" ;
5557
56- public AnnotationStep (PipelineStepProvider provider , PipelineContext ctx )
58+ public AnnotationStep (PipelineStepProvider <?> provider , PipelineContext ctx )
5759 {
5860 super (provider , ctx , new CassandraRunner (ctx .getLogger ()));
5961 }
@@ -67,6 +69,10 @@ public Provider()
6769 {{
6870 put ("allowBlank" , false );
6971 }}, null ),
72+ ToolParameterDescriptor .createExpDataParam (DBNSFP_FILE , "dbNSFP Database (GRCh37)" , "This is the DataId of the dbNSFP database (txt.gz file) using the GRCh37 genome." , "ldk-expdatafield" , new JSONObject ()
73+ {{
74+ put ("allowBlank" , false );
75+ }}, null ),
7076 ToolParameterDescriptor .create (GRCH37 , "GRCh37 Genome" , "The genome that matches human GRCh37." , "ldk-simplelabkeycombo" , new JSONObject ()
7177 {{
7278 put ("width" , 400 );
@@ -126,10 +132,21 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
126132 VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl ();
127133
128134 File clinvarVCF = getPipelineCtx ().getSequenceSupport ().getCachedData (getProvider ().getParameterByName (CLINVAR_VCF ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class ));
135+ if (!clinvarVCF .exists ())
136+ {
137+ throw new PipelineJobException ("Unable to find file: " + clinvarVCF .getPath ());
138+ }
139+
129140 ReferenceGenome grch37Genome = getPipelineCtx ().getSequenceSupport ().getCachedGenome (getProvider ().getParameterByName (GRCH37 ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class ));
130141 Integer chainFileId = getPipelineCtx ().getSequenceSupport ().getCachedObject (CHAIN_FILE , Integer .class );
131142 File chainFile = getPipelineCtx ().getSequenceSupport ().getCachedData (chainFileId );
132143
144+ File dbnsfpFile = getPipelineCtx ().getSequenceSupport ().getCachedData (getProvider ().getParameterByName (DBNSFP_FILE ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Integer .class ));
145+ if (!dbnsfpFile .exists ())
146+ {
147+ throw new PipelineJobException ("Unable to find file: " + dbnsfpFile .getPath ());
148+ }
149+
133150 getPipelineCtx ().getLogger ().info ("processing file: " + inputVCF .getName ());
134151
135152 ReferenceGenome originalGenome = getPipelineCtx ().getSequenceSupport ().getCachedGenome (genome .getGenomeId ());
@@ -293,6 +310,22 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
293310 output .addIntermediateFile (clinvarAnnotated );
294311 output .addIntermediateFile (new File (clinvarAnnotated .getPath () + ".tbi" ));
295312
313+ //annotate with SnpSift
314+ getPipelineCtx ().getLogger ().info ("annotating with SnpSift" );
315+ File snpSiftAnnotated = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (liftedToGRCh37 .getName ()) + ".snpSift.vcf.gz" );
316+ if (forceRecreate || !indexExists (snpSiftAnnotated ))
317+ {
318+ SnpSiftWrapper ssRunner = new SnpSiftWrapper (getPipelineCtx ().getLogger ());
319+ ssRunner .runSnpSift (dbnsfpFile , clinvarAnnotated , snpSiftAnnotated );
320+ }
321+ else
322+ {
323+ getPipelineCtx ().getLogger ().info ("resuming with existing file: " + snpSiftAnnotated .getPath ());
324+ }
325+ output .addOutput (snpSiftAnnotated , "VCF Annotated With SnpSift" );
326+ output .addIntermediateFile (snpSiftAnnotated );
327+ output .addIntermediateFile (new File (snpSiftAnnotated .getPath () + ".tbi" ));
328+
296329 //annotate with cassandra
297330 getPipelineCtx ().getLogger ().info ("annotating with Cassandra" );
298331 String basename = SequenceAnalysisService .get ().getUnzippedBaseName (liftedToGRCh37 .getName ()) + ".cassandra" ;
0 commit comments