Skip to content

Commit e93cbc6

Browse files
committed
Editing PotentialRegionFilter
Adding extra filter step at the very start to remove blacklist regions from chromosomes before potential regions have been examined. It should be cleaner to handle cases where a large potential region overlaps an excluded region this way.
1 parent f8c17b1 commit e93cbc6

1 file changed

Lines changed: 48 additions & 9 deletions

File tree

src/org/seqcode/projects/chexmix/framework/PotentialRegionFilter.java

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,19 +115,25 @@ public PotentialRegionFilter(EventsConfig ec, ChExMixConfig c, ExptConfig econ,
115115
*/
116116
public List<Region> execute(){
117117
//TODO: check config for defined subset of regions
118-
Iterator<Region> testRegions = new ChromosomeGenerator().execute(config.getGenome());
118+
Iterator<Region> testRegionsIter = new ChromosomeGenerator().execute(config.getGenome());
119+
List<Region> testRegions = new ArrayList<Region>();
120+
while(testRegionsIter.hasNext())
121+
testRegions.add(testRegionsIter.next());
122+
123+
//If we put the exclude filter here, we can deal with large regions that overlap exclude regions more smoothly
124+
//However, this method of filtering is safest when excluding regions from whole chromosomes or other large regions
125+
testRegions = filterExcludedLeaveRemaining(testRegions);
119126

120127
//Threading divides analysis over entire chromosomes. This approach is not compatible with file caching.
121128
int numThreads = econfig.getCacheAllData() ? config.getMaxThreads() : 1;
122129

123130
Thread[] threads = new Thread[numThreads];
124-
ArrayList<Region> threadRegions[] = new ArrayList[numThreads];
131+
List<Region> threadRegions[] = new ArrayList[numThreads];
125132
int i = 0;
126133
for (i = 0 ; i < threads.length; i++) {
127134
threadRegions[i] = new ArrayList<Region>();
128135
}i=0;
129-
while(testRegions.hasNext()){
130-
Region r = testRegions.next();
136+
for(Region r : testRegions){
131137
threadRegions[(i++) % numThreads].add(r);
132138
}
133139

@@ -187,7 +193,7 @@ public List<Region> executeInitialPositionFiler(){
187193
chrStartExcluded.add(r);
188194
}
189195

190-
potentialRegions = filterExcluded(chrStartExcluded);
196+
potentialRegions = filterExcludedAnyOverlap(chrStartExcluded);
191197

192198
// signal and control counts from potential regions
193199
countReadsInRegionsNoLandscape(potentialRegions);
@@ -325,8 +331,11 @@ protected void countReadsInRegionsByRepNoLandscape(List<Region> regs){
325331
}
326332
}
327333

328-
//Filter out pre-defined regions to ignore (e.g. tower regions)
329-
protected List<Region> filterExcluded(List<Region> testRegions) {
334+
/**
335+
* Filter out pre-defined regions to ignore (e.g. blacklist regions)
336+
* This version filters regions that touch an excluded region >=1bp
337+
*/
338+
protected List<Region> filterExcludedAnyOverlap(List<Region> testRegions) {
330339
List<Region> filtered = new ArrayList<Region>();
331340
if(config.getRegionsToIgnore().size()==0)
332341
return testRegions;
@@ -345,6 +354,35 @@ protected List<Region> filterExcluded(List<Region> testRegions) {
345354
}
346355
return filtered;
347356
}
357+
358+
/**
359+
* Filter out pre-defined regions to ignore (e.g. blacklist regions)
360+
* This version returns segments of regions that don't overlap.
361+
* Safest to apply to whole chromosomes, I think
362+
*/
363+
protected List<Region> filterExcludedLeaveRemaining(List<Region> testRegions) {
364+
if(config.getRegionsToIgnore().size()==0)
365+
return testRegions;
366+
367+
List<Region> filtered = new ArrayList<Region>();
368+
filtered.addAll(testRegions);
369+
for(Region i : config.getRegionsToIgnore()){
370+
boolean overlaps = false;
371+
int x=0;
372+
while(x < filtered.size() && overlaps==false){
373+
Region t = filtered.get(x);
374+
if(t.overlaps(i)){
375+
overlaps = true;
376+
Collection<Region> subFrags = t.getSubtractionFragments(i);
377+
filtered.remove(x);
378+
if(subFrags.size()>0)
379+
filtered.addAll(subFrags);
380+
}
381+
x++;
382+
}
383+
}
384+
return filtered;
385+
}
348386

349387
/**
350388
* Print potential regions to a file.
@@ -472,7 +510,8 @@ public void run() {
472510
//Count all "signal" reads overlapping the regions in currPotRegions (including the lastPotential)
473511
if(lastPotential!=null)
474512
currPotRegions.add(lastPotential);
475-
currPotRegions = filterExcluded(currPotRegions);
513+
//The exclude filter here is very likely redundant with the first-pass filter in the main execute method, but I'm leaving it here in case there are other execution modes.
514+
currPotRegions = filterExcludedAnyOverlap(currPotRegions);
476515
countReadsInRegions(currPotRegions, ipHits, backHits, y==currentRegion.getEnd() ? y : y-expansion);
477516
countReadsInRegionsByRep(currPotRegions, ipHitsByRep, y==currentRegion.getEnd() ? y : y-expansion);
478517
//Note: it looks like currPotRegions and threadPotentials are redundant in the above, but they are not.
@@ -495,7 +534,7 @@ public void run() {
495534
threadPotentials.add(p);
496535
}
497536
}
498-
threadPotentials = filterExcluded(threadPotentials);
537+
threadPotentials = filterExcludedAnyOverlap(threadPotentials);
499538
}
500539
if(threadPotentials.size()>0){
501540
synchronized(potentialRegions){

0 commit comments

Comments
 (0)