Skip to content

Commit 054c1f6

Browse files
authored
Merge pull request #166 from LabKey/fb_merge_23.3_to_develop
Merge discvr-23.3 to develop
2 parents 0f983db + 12d6043 commit 054c1f6

12 files changed

Lines changed: 374 additions & 72 deletions

File tree

mGAP/resources/schemas/mgap.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@
689689
<table tableName="releaseTrackSubsets" tableDbType="TABLE" useColumnOrder="true">
690690
<javaCustomizer class="org.labkey.ldk.query.DefaultTableCustomizer" />
691691
<pkColumnName>rowid</pkColumnName>
692-
<tableTitle>mGAP Release Track Sample Sets</tableTitle>
692+
<tableTitle>mGAP Samples To Include Per Track</tableTitle>
693693
<auditLogging>DETAILED</auditLogging>
694694
<columns>
695695
<column columnName="rowid">

mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,11 @@ public class AnnotationStep extends AbstractCommandPipelineStep<CassandraRunner>
5151
{
5252
public static final String GRCH37 = "genome37";
5353
private static final String CLINVAR_VCF = "clinvar37";
54+
private static final String DBNSFP_FILE = "dbnsfpFile";
55+
5456
public static final String CHAIN_FILE = "CHAIN_FILE";
5557

56-
public AnnotationStep(PipelineStepProvider provider, PipelineContext ctx)
58+
public AnnotationStep(PipelineStepProvider<?> provider, PipelineContext ctx)
5759
{
5860
super(provider, ctx, new CassandraRunner(ctx.getLogger()));
5961
}
@@ -67,6 +69,10 @@ public Provider()
6769
{{
6870
put("allowBlank", false);
6971
}}, null),
72+
ToolParameterDescriptor.createExpDataParam(DBNSFP_FILE, "dbNSFP Database (GRCh37)", "This is the DataId of the dbNSFP database (txt.gz file) using the GRCh37 genome.", "ldk-expdatafield", new JSONObject()
73+
{{
74+
put("allowBlank", false);
75+
}}, null),
7076
ToolParameterDescriptor.create(GRCH37, "GRCh37 Genome", "The genome that matches human GRCh37.", "ldk-simplelabkeycombo", new JSONObject()
7177
{{
7278
put("width", 400);
@@ -126,10 +132,21 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
126132
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
127133

128134
File clinvarVCF = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(CLINVAR_VCF).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
135+
if (!clinvarVCF.exists())
136+
{
137+
throw new PipelineJobException("Unable to find file: " + clinvarVCF.getPath());
138+
}
139+
129140
ReferenceGenome grch37Genome = getPipelineCtx().getSequenceSupport().getCachedGenome(getProvider().getParameterByName(GRCH37).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
130141
Integer chainFileId = getPipelineCtx().getSequenceSupport().getCachedObject(CHAIN_FILE, Integer.class);
131142
File chainFile = getPipelineCtx().getSequenceSupport().getCachedData(chainFileId);
132143

144+
File dbnsfpFile = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(DBNSFP_FILE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
145+
if (!dbnsfpFile.exists())
146+
{
147+
throw new PipelineJobException("Unable to find file: " + dbnsfpFile.getPath());
148+
}
149+
133150
getPipelineCtx().getLogger().info("processing file: " + inputVCF.getName());
134151

135152
ReferenceGenome originalGenome = getPipelineCtx().getSequenceSupport().getCachedGenome(genome.getGenomeId());
@@ -293,6 +310,22 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
293310
output.addIntermediateFile(clinvarAnnotated);
294311
output.addIntermediateFile(new File(clinvarAnnotated.getPath() + ".tbi"));
295312

313+
//annotate with SnpSift
314+
getPipelineCtx().getLogger().info("annotating with SnpSift");
315+
File snpSiftAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".snpSift.vcf.gz");
316+
if (forceRecreate || !indexExists(snpSiftAnnotated))
317+
{
318+
SnpSiftWrapper ssRunner = new SnpSiftWrapper(getPipelineCtx().getLogger());
319+
ssRunner.runSnpSift(dbnsfpFile, clinvarAnnotated, snpSiftAnnotated);
320+
}
321+
else
322+
{
323+
getPipelineCtx().getLogger().info("resuming with existing file: " + snpSiftAnnotated.getPath());
324+
}
325+
output.addOutput(snpSiftAnnotated, "VCF Annotated With SnpSift");
326+
output.addIntermediateFile(snpSiftAnnotated);
327+
output.addIntermediateFile(new File(snpSiftAnnotated.getPath() + ".tbi"));
328+
296329
//annotate with cassandra
297330
getPipelineCtx().getLogger().info("annotating with Cassandra");
298331
String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cassandra";

0 commit comments

Comments
 (0)