Skip to content

Commit 0a87f3c

Browse files
committed
Add option to exclude specific samples in RenameSamplesForMgapStep
1 parent 5522ccf commit 0a87f3c

1 file changed

Lines changed: 28 additions & 3 deletions

File tree

mGAP/src/org/labkey/mgap/pipeline/RenameSamplesForMgapStep.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
3434
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
3535
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
36+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
3637
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
3738
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
3839
import org.labkey.api.util.PageFlowUtil;
@@ -43,6 +44,7 @@
4344
import java.io.IOException;
4445
import java.sql.SQLException;
4546
import java.util.ArrayList;
47+
import java.util.Arrays;
4648
import java.util.Collection;
4749
import java.util.Collections;
4850
import java.util.HashMap;
@@ -53,7 +55,9 @@
5355

5456
public class RenameSamplesForMgapStep extends AbstractPipelineStep implements VariantProcessingStep
5557
{
56-
public RenameSamplesForMgapStep(PipelineStepProvider provider, PipelineContext ctx)
58+
public static String SAMPLE_EXCLUDE = "sampleNameToExclude";
59+
60+
public RenameSamplesForMgapStep(PipelineStepProvider<?> provider, PipelineContext ctx)
5761
{
5862
super(provider, ctx);
5963
}
@@ -62,7 +66,9 @@ public static class Provider extends AbstractVariantProcessingStepProvider<Renam
6266
{
6367
public Provider()
6468
{
65-
super("RenameSamplesForMgap", "Rename Sample For mGAP", "RenameSamplesForMgapStep", "This will rename the samples in the VCF based on the mGAP animal mapping table. If the VCF contains samples not found in this table it will throw an error.", List.of(), null, null);
69+
super("RenameSamplesForMgap", "Rename Sample For mGAP", "RenameSamplesForMgapStep", "This will rename the samples in the VCF based on the mGAP animal mapping table. If the VCF contains samples not found in this table it will throw an error.", List.of(
70+
ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Samples(s) To Exclude From Rename", "The following samples will be excluded from the analysis.", "sequenceanalysis-trimmingtextarea", null, null)
71+
), List.of("sequenceanalysis/field/TrimmingTextArea.js"), null);
6672
}
6773

6874
@Override
@@ -144,9 +150,16 @@ private File renameSamples(File currentVCF, File outputDirectory, ReferenceGenom
144150
try (VCFFileReader reader = new VCFFileReader(currentVCF); VariantContextWriter writer = builder.build())
145151
{
146152
VCFHeader header = reader.getFileHeader();
147-
List<String> samples = header.getGenotypeSamples();
153+
List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
148154
getPipelineCtx().getLogger().debug("Original samples:" + StringUtils.join(samples, ","));
149155

156+
List<String> excludeFromRename = new ArrayList<>();
157+
String toExcludeStr = StringUtils.trimToNull(getProvider().getParameterByName(SAMPLE_EXCLUDE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, null));
158+
if (toExcludeStr != null)
159+
{
160+
excludeFromRename.addAll(Arrays.asList(toExcludeStr.split(";")));
161+
}
162+
150163
List<String> remappedSamples = new ArrayList<>();
151164

152165
for (String sample : samples)
@@ -155,6 +168,10 @@ private File renameSamples(File currentVCF, File outputDirectory, ReferenceGenom
155168
{
156169
remappedSamples.add(sampleMap.get(sample));
157170
}
171+
else if (excludeFromRename.contains(sample))
172+
{
173+
remappedSamples.add(sample);
174+
}
158175
else
159176
{
160177
throw new PipelineJobException("No alternate name provided for sample: " + sample);
@@ -249,6 +266,14 @@ private Map<String, String> getSamplesToAlias(File input) throws PipelineJobExce
249266
Set<String> sampleNames = new HashSet<>(header.getSampleNamesInOrder());
250267
getPipelineCtx().getLogger().info("total samples in input VCF: " + sampleNames.size());
251268

269+
String toExcludeStr = StringUtils.trimToNull(getProvider().getParameterByName(SAMPLE_EXCLUDE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, null));
270+
if (toExcludeStr != null)
271+
{
272+
List<String> excludeFromRename = Arrays.asList(toExcludeStr.split(";"));
273+
sampleNames.removeAll(excludeFromRename);
274+
getPipelineCtx().getLogger().info("after exclusion: " + sampleNames.size());
275+
}
276+
252277
// Pass 1: match on proper ID:
253278
querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("subjectname"), sampleNames, CompareType.IN), sampleNames);
254279

0 commit comments

Comments
 (0)