@@ -115,19 +115,25 @@ public PotentialRegionFilter(EventsConfig ec, ChExMixConfig c, ExptConfig econ,
115115 */
116116 public List <Region > execute (){
117117 //TODO: check config for defined subset of regions
118- Iterator <Region > testRegions = new ChromosomeGenerator ().execute (config .getGenome ());
118+ Iterator <Region > testRegionsIter = new ChromosomeGenerator ().execute (config .getGenome ());
119+ List <Region > testRegions = new ArrayList <Region >();
120+ while (testRegionsIter .hasNext ())
121+ testRegions .add (testRegionsIter .next ());
122+
123+ //If we put the exclude filter here, we can deal with large regions that overlap exclude regions more smoothly
124+ //However, this method of filtering is safest when excluding regions from whole chromosomes or other large regions
125+ testRegions = filterExcludedLeaveRemaining (testRegions );
119126
120127 //Threading divides analysis over entire chromosomes. This approach is not compatible with file caching.
121128 int numThreads = econfig .getCacheAllData () ? config .getMaxThreads () : 1 ;
122129
123130 Thread [] threads = new Thread [numThreads ];
124- ArrayList <Region > threadRegions [] = new ArrayList [numThreads ];
131+ List <Region > threadRegions [] = new ArrayList [numThreads ];
125132 int i = 0 ;
126133 for (i = 0 ; i < threads .length ; i ++) {
127134 threadRegions [i ] = new ArrayList <Region >();
128135 }i =0 ;
129- while (testRegions .hasNext ()){
130- Region r = testRegions .next ();
136+ for (Region r : testRegions ){
131137 threadRegions [(i ++) % numThreads ].add (r );
132138 }
133139
@@ -187,7 +193,7 @@ public List<Region> executeInitialPositionFiler(){
187193 chrStartExcluded .add (r );
188194 }
189195
190- potentialRegions = filterExcluded (chrStartExcluded );
196+ potentialRegions = filterExcludedAnyOverlap (chrStartExcluded );
191197
192198 // signal and control counts from potential regions
193199 countReadsInRegionsNoLandscape (potentialRegions );
@@ -325,8 +331,11 @@ protected void countReadsInRegionsByRepNoLandscape(List<Region> regs){
325331 }
326332 }
327333
328- //Filter out pre-defined regions to ignore (e.g. tower regions)
329- protected List <Region > filterExcluded (List <Region > testRegions ) {
334+ /**
335+ * Filter out pre-defined regions to ignore (e.g. blacklist regions)
336+ * This version filters regions that touch an excluded region >=1bp
337+ */
338+ protected List <Region > filterExcludedAnyOverlap (List <Region > testRegions ) {
330339 List <Region > filtered = new ArrayList <Region >();
331340 if (config .getRegionsToIgnore ().size ()==0 )
332341 return testRegions ;
@@ -345,6 +354,35 @@ protected List<Region> filterExcluded(List<Region> testRegions) {
345354 }
346355 return filtered ;
347356 }
357+
358+ /**
359+ * Filter out pre-defined regions to ignore (e.g. blacklist regions)
360+ * This version returns segments of regions that don't overlap.
361+ * Safest to apply to whole chromosomes, I think
362+ */
363+ protected List <Region > filterExcludedLeaveRemaining (List <Region > testRegions ) {
364+ if (config .getRegionsToIgnore ().size ()==0 )
365+ return testRegions ;
366+
367+ List <Region > filtered = new ArrayList <Region >();
368+ filtered .addAll (testRegions );
369+ for (Region i : config .getRegionsToIgnore ()){
370+ boolean overlaps = false ;
371+ int x =0 ;
372+ while (x < filtered .size () && overlaps ==false ){
373+ Region t = filtered .get (x );
374+ if (t .overlaps (i )){
375+ overlaps = true ;
376+ Collection <Region > subFrags = t .getSubtractionFragments (i );
377+ filtered .remove (x );
378+ if (subFrags .size ()>0 )
379+ filtered .addAll (subFrags );
380+ }
381+ x ++;
382+ }
383+ }
384+ return filtered ;
385+ }
348386
349387 /**
350388 * Print potential regions to a file.
@@ -472,7 +510,8 @@ public void run() {
472510 //Count all "signal" reads overlapping the regions in currPotRegions (including the lastPotential)
473511 if (lastPotential !=null )
474512 currPotRegions .add (lastPotential );
475- currPotRegions = filterExcluded (currPotRegions );
513+ //The exclude filter here is very likely redundant with the first-pass filter in the main execute method, but I'm leaving it here in case there are other execution modes.
514+ currPotRegions = filterExcludedAnyOverlap (currPotRegions );
476515 countReadsInRegions (currPotRegions , ipHits , backHits , y ==currentRegion .getEnd () ? y : y -expansion );
477516 countReadsInRegionsByRep (currPotRegions , ipHitsByRep , y ==currentRegion .getEnd () ? y : y -expansion );
478517 //Note: it looks like currPotRegions and threadPotentials are redundant in the above, but they are not.
@@ -495,7 +534,7 @@ public void run() {
495534 threadPotentials .add (p );
496535 }
497536 }
498- threadPotentials = filterExcluded (threadPotentials );
537+ threadPotentials = filterExcludedAnyOverlap (threadPotentials );
499538 }
500539 if (threadPotentials .size ()>0 ){
501540 synchronized (potentialRegions ){
0 commit comments