Remove _mergedUntil filter from extractRegionGraph, reuse full agglomeration graph

Donglai Wei · claude · Donglai Wei · commit 1322a9abbf9d · 2026-03-26T21:08:54.000-04:00
extractRegionGraph was filtering edges with score &lt; _mergedUntil (the
agglomeration threshold), discarding high-affinity edges that dust merge
needs. Self-edges are already handled by the _deleted filter.

Remove the threshold filter so extractRegionGraph returns the full graph
with accumulated scoring statistics. This lets dust merge reuse the
agglomeration's region graph directly — no rebuild needed, and results
match the original behavior.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/connectomics/decoding/decoders/waterz.py b/connectomics/decoding/decoders/waterz.py
@@ -73,18 +73,6 @@ def _merge_function_to_scoring(shorthand: str) -> str:
     )
 
 
-def _strip_oneminus(scoring_function: str) -> str:
-    """Strip ``OneMinus<...>`` or ``One255Minus<...>`` wrapper.
-
-    ``merge_dust`` / ``buildRegionGraphOnly`` expects the raw scoring
-    function (high score = strong connection), not the inverted wrapper
-    used by the agglomeration priority queue.
-    """
-    for prefix in ("OneMinus<", "One255Minus<"):
-        if scoring_function.startswith(prefix) and scoring_function.endswith(">"):
-            return scoring_function[len(prefix):-1]
-    return scoring_function
-
 
 def decode_waterz(
     predictions: np.ndarray,
@@ -159,11 +147,11 @@ def decode_waterz(
         min_instance_size: Minimum instance size in voxels. Instances smaller
             than this are removed (set to background). Set to 0 to disable.
             Default: 0
-        dust_merge: Enable dust postprocessing.  Rebuilds the region graph
-            via ``waterz.merge_dust`` using the same scoring function as
-            agglomeration (e.g. p85 histogram quantile), ensuring consistent
-            edge weights.  When False, the dust merge and dust removal
-            thresholds below are ignored. Default: True
+        dust_merge: Enable dust postprocessing.  Reuses the agglomeration's
+            full region graph (with accumulated scoring statistics) via
+            ``waterz.merge_segments`` — no graph rebuild needed.
+            When False, the dust merge and dust removal thresholds below
+            are ignored. Default: True
         dust_merge_size: Size+affinity dust merge (zwatershed-style).
             Segments with fewer voxels than this are merged into their
             highest-affinity neighbor.  Unlike *min_instance_size* which
@@ -308,29 +296,50 @@ def decode_waterz(
         waterz_kwargs["fragments"] = fragments.astype(np.uint64, copy=False)
 
     do_dust_merge = bool(dust_merge) and dust_merge_size > 0
-
-    # For dust merge, strip OneMinus/One255Minus so buildRegionGraphOnly
-    # uses the same scoring function as agglomeration (e.g. p85 histogram)
-    # but returns raw affinities (high = strong) instead of inverted scores.
-    dust_scoring = _strip_oneminus(scoring_function) if do_dust_merge else ""
+    waterz_kwargs["return_region_graph"] = do_dust_merge
 
     # waterz.waterz() runs watershed + region-graph once, then incrementally
     # merges for each threshold.  Returns all segmentations (copied).
     seg_list = waterz.waterz(affs, thresholds=thresholds_list, **waterz_kwargs)
 
     # Post-process each result
     processed: List[np.ndarray] = []
-    for seg in seg_list:
-        # Size+affinity dust merge via buildRegionGraphOnly with the same
-        # scoring function as agglomeration (not MeanAffinity default).
+    for waterz_result in seg_list:
+        if do_dust_merge:
+            seg, region_graph = waterz_result
+        else:
+            seg = waterz_result
+
+        # Size+affinity dust merge reusing the agglomeration's full region
+        # graph (extractRegionGraph returns all non-deleted edges with
+        # accumulated scores from the agglomeration process).
         if do_dust_merge:
             seg = seg.astype(np.uint64, copy=False)
-            waterz.merge_dust(
-                seg, affs,
+            n_edges = len(region_graph)
+            rg_affs = np.empty(n_edges, dtype=np.float32)
+            id1 = np.empty(n_edges, dtype=np.uint64)
+            id2 = np.empty(n_edges, dtype=np.uint64)
+            # Invert OneMinus/One255Minus scores to raw affinities.
+            score_max = 255.0 if is_uint8 else 1.0
+            for idx, edge in enumerate(region_graph):
+                rg_affs[idx] = score_max - float(edge["score"])
+                id1[idx] = int(edge["u"])
+                id2[idx] = int(edge["v"])
+            if n_edges:
+                np.clip(rg_affs, 0.0, score_max, out=rg_affs)
+                order = np.argsort(rg_affs)[::-1]
+                rg_affs = np.ascontiguousarray(rg_affs[order])
+                id1 = np.ascontiguousarray(id1[order])
+                id2 = np.ascontiguousarray(id2[order])
+            ids, cnts = np.unique(seg, return_counts=True)
+            max_id = int(ids.max()) if len(ids) else 0
+            counts = np.zeros(max_id + 1, dtype=np.uint64)
+            counts[ids] = cnts
+            waterz.merge_segments(
+                seg, rg_affs, id1, id2, counts,
                 size_th=dust_merge_size,
                 weight_th=dust_merge_affinity,
                 dust_th=dust_remove_size,
-                scoring_function=dust_scoring,
             )
         # Branch merge: resolve false splits via z-slice IOU analysis
         if branch_merge:
diff --git a/tests/unit/test_decode_waterz.py b/tests/unit/test_decode_waterz.py
@@ -11,25 +11,33 @@ class _FakeWaterzModule:
     """Minimal waterz stub for testing wrapper behavior."""
 
     def __init__(self):
-        self.merge_dust_calls = []
+        self.merge_segments_calls = []
         self.waterz_calls = []
 
     def waterz(self, affs, thresholds, **kwargs):
         self.waterz_calls.append(kwargs.copy())
         seg = np.zeros(affs.shape[1:], dtype=np.uint64)
         seg[:, :, :2] = 1
         seg[:, :, 2:] = 2
+        if kwargs.get("return_region_graph", False):
+            # ScoredEdge dicts from extractRegionGraph.
+            # OneMinus score 0.2 → affinity = 1.0 - 0.2 = 0.8
+            rg = [{"u": 1, "v": 2, "score": 0.2}]
+            return [(seg.copy(), list(rg)) for _ in thresholds]
         return [seg.copy() for _ in thresholds]
 
-    def merge_dust(self, seg, affs, size_th, weight_th, dust_th, scoring_function, channels="all"):
-        self.merge_dust_calls.append(
+    def merge_segments(self, seg, rg_affs, id1, id2, counts,
+                       size_th, weight_th, dust_th):
+        self.merge_segments_calls.append(
             {
                 "seg_shape": seg.shape,
-                "aff_shape": affs.shape,
+                "rg_affs": rg_affs.tolist(),
+                "id1": id1.tolist(),
+                "id2": id2.tolist(),
+                "counts": counts.tolist(),
                 "size_th": size_th,
                 "weight_th": weight_th,
                 "dust_th": dust_th,
-                "scoring_function": scoring_function,
             }
         )
 
@@ -56,13 +64,14 @@ def test_decode_waterz_skips_dust_postprocessing_when_disabled(monkeypatch):
             "scoring_function": "OneMinus<HistogramQuantileAffinity<RegionGraphType, 50, ScoreValue, 256>>",
             "aff_threshold_low": 0.0001,
             "aff_threshold_high": 0.9999,
+            "return_region_graph": False,
         }
     ]
-    assert fake_waterz.merge_dust_calls == []
+    assert fake_waterz.merge_segments_calls == []
 
 
-def test_decode_waterz_dust_merge_uses_same_scoring_function(monkeypatch):
-    """Dust merge rebuilds graph with same scoring as agglomeration (OneMinus stripped)."""
+def test_decode_waterz_reuses_agglomeration_region_graph_for_dust(monkeypatch):
+    """Dust merge reuses agglomeration's region graph with inverted scores."""
     fake_waterz = _FakeWaterzModule()
     monkeypatch.setattr(waterz_decoder, "waterz", fake_waterz)
     monkeypatch.setattr(waterz_decoder, "WATERZ_AVAILABLE", True)
@@ -72,7 +81,6 @@ def test_decode_waterz_dust_merge_uses_same_scoring_function(monkeypatch):
     waterz_decoder.decode_waterz(
         predictions,
         thresholds=0.4,
-        merge_function="aff85_his256",
         dust_merge=True,
         dust_merge_size=100,
         dust_merge_affinity=0.3,
@@ -81,40 +89,21 @@ def test_decode_waterz_dust_merge_uses_same_scoring_function(monkeypatch):
 
     assert fake_waterz.waterz_calls == [
         {
-            "scoring_function": "OneMinus<HistogramQuantileAffinity<RegionGraphType, 85, ScoreValue, 256>>",
+            "scoring_function": "OneMinus<HistogramQuantileAffinity<RegionGraphType, 50, ScoreValue, 256>>",
             "aff_threshold_low": 0.0001,
             "aff_threshold_high": 0.9999,
+            "return_region_graph": True,
         }
     ]
-    assert fake_waterz.merge_dust_calls == [
+    assert fake_waterz.merge_segments_calls == [
         {
             "seg_shape": (4, 4, 4),
-            "aff_shape": (3, 4, 4, 4),
+            "rg_affs": [0.800000011920929],
+            "id1": [1],
+            "id2": [2],
+            "counts": [0, 32, 32],
             "size_th": 100,
             "weight_th": 0.3,
             "dust_th": 50,
-            "scoring_function": "HistogramQuantileAffinity<RegionGraphType, 85, ScoreValue, 256>",
         }
     ]
-
-
-def test_decode_waterz_dust_merge_strips_one255minus(monkeypatch):
-    """One255Minus wrapper is also stripped for dust merge scoring."""
-    fake_waterz = _FakeWaterzModule()
-    monkeypatch.setattr(waterz_decoder, "waterz", fake_waterz)
-    monkeypatch.setattr(waterz_decoder, "WATERZ_AVAILABLE", True)
-
-    predictions = np.ones((3, 4, 4, 4), dtype=np.float32)
-
-    waterz_decoder.decode_waterz(
-        predictions,
-        thresholds=0.4,
-        merge_function="aff50_his256_ran255",
-        dust_merge=True,
-        dust_merge_size=100,
-        dust_merge_affinity=0.3,
-        dust_remove_size=50,
-    )
-
-    call = fake_waterz.merge_dust_calls[0]
-    assert call["scoring_function"] == "HistogramQuantileAffinity<RegionGraphType, 50, ScoreValue, 256>"