Skip to content

Commit cd3bef8

Browse files
committed
Editing PotentialRegionFilter
Applying blacklist filter to entire chromosome before potential regions are found. This brings MultiGPS potential region filtering in line with ChExMix's approach.
1 parent 185adfa commit cd3bef8

1 file changed

Lines changed: 40 additions & 4 deletions

File tree

src/org/seqcode/projects/multigps/framework/PotentialRegionFilter.java

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,19 +120,25 @@ public PotentialRegionFilter(EventsConfig ec, MultiGPSConfig c, ExptConfig econ,
120120
*/
121121
public List<Region> execute(){
122122
//TODO: check config for defined subset of regions
123-
Iterator<Region> testRegions = new ChromosomeGenerator().execute(config.getGenome());
123+
Iterator<Region> testRegionsIter = new ChromosomeGenerator().execute(config.getGenome());
124+
List<Region> testRegions = new ArrayList<Region>();
125+
while(testRegionsIter.hasNext())
126+
testRegions.add(testRegionsIter.next());
127+
128+
//If we put the exclude filter here, we can deal with large regions that overlap exclude regions more smoothly
129+
//However, this method of filtering is safest when excluding regions from whole chromosomes or other large regions
130+
testRegions = filterExcludedLeaveRemaining(testRegions);
124131

125132
//Threading divides analysis over entire chromosomes. This approach is not compatible with file caching.
126133
int numThreads = econfig.getCacheAllData() ? config.getMaxThreads() : 1;
127134

128135
Thread[] threads = new Thread[numThreads];
129-
ArrayList<Region> threadRegions[] = new ArrayList[numThreads];
136+
List<Region> threadRegions[] = new ArrayList[numThreads];
130137
int i = 0;
131138
for (i = 0 ; i < threads.length; i++) {
132139
threadRegions[i] = new ArrayList<Region>();
133140
}i=0;
134-
while(testRegions.hasNext()){
135-
Region r = testRegions.next();
141+
for(Region r : testRegions){
136142
threadRegions[(i++) % numThreads].add(r);
137143
}
138144

@@ -169,6 +175,36 @@ public List<Region> execute(){
169175
return potentialRegions;
170176
}
171177

178+
179+
/**
180+
* Filter out pre-defined regions to ignore (e.g. blacklist regions)
181+
* This version returns segments of regions that don't overlap.
182+
* Safest to apply to whole chromosomes, I think
183+
*/
184+
protected List<Region> filterExcludedLeaveRemaining(List<Region> testRegions) {
185+
if(config.getRegionsToIgnore().size()==0)
186+
return testRegions;
187+
188+
List<Region> filtered = new ArrayList<Region>();
189+
filtered.addAll(testRegions);
190+
for(Region i : config.getRegionsToIgnore()){
191+
boolean overlaps = false;
192+
int x=0;
193+
while(x < filtered.size() && overlaps==false){
194+
Region t = filtered.get(x);
195+
if(t.overlaps(i)){
196+
overlaps = true;
197+
Collection<Region> subFrags = t.getSubtractionFragments(i);
198+
filtered.remove(x);
199+
if(subFrags.size()>0)
200+
filtered.addAll(subFrags);
201+
}
202+
x++;
203+
}
204+
}
205+
return filtered;
206+
}
207+
172208
/**
173209
* Print potential regions to a file.
174210
* TESTING ONLY

0 commit comments

Comments
 (0)