Revert extractRegionGraph change, use merge_dust with matched scoring

Donglai Wei · claude · Donglai Wei · commit df6d5a0f8a92 · 2026-03-26T22:55:13.000-04:00
Removing the _mergedUntil filter from extractRegionGraph caused bad
results because stale edges have inconsistent scores and non-root
endpoint IDs after agglomeration.

Instead, use merge_dust (rebuilds graph via buildRegionGraphOnly) with
the OneMinus wrapper stripped so it uses the same scoring function as
agglomeration (e.g. HistogramQuantileAffinity p85). This gives correct
and consistent edge weights without reusing the problematic internal
agglomeration graph.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/connectomics/decoding/decoders/waterz.py b/connectomics/decoding/decoders/waterz.py
@@ -73,6 +73,18 @@ def _merge_function_to_scoring(shorthand: str) -> str:
     )
 
 
+def _strip_oneminus(scoring_function: str) -> str:
+    """Strip ``OneMinus<...>`` or ``One255Minus<...>`` wrapper.
+
+    ``merge_dust`` / ``buildRegionGraphOnly`` expects the raw scoring
+    function (high score = strong connection), not the inverted wrapper
+    used by the agglomeration priority queue.
+    """
+    for prefix in ("OneMinus<", "One255Minus<"):
+        if scoring_function.startswith(prefix) and scoring_function.endswith(">"):
+            return scoring_function[len(prefix):-1]
+    return scoring_function
+
 
 def decode_waterz(
     predictions: np.ndarray,
@@ -147,11 +159,11 @@ def decode_waterz(
         min_instance_size: Minimum instance size in voxels. Instances smaller
             than this are removed (set to background). Set to 0 to disable.
             Default: 0
-        dust_merge: Enable dust postprocessing.  Reuses the agglomeration's
-            full region graph (with accumulated scoring statistics) via
-            ``waterz.merge_segments`` — no graph rebuild needed.
-            When False, the dust merge and dust removal thresholds below
-            are ignored. Default: True
+        dust_merge: Enable dust postprocessing.  Rebuilds the region graph
+            via ``waterz.merge_dust`` using the same scoring function as
+            agglomeration (e.g. p85 histogram quantile), ensuring consistent
+            edge weights.  When False, the dust merge and dust removal
+            thresholds below are ignored. Default: True
         dust_merge_size: Size+affinity dust merge (zwatershed-style).
             Segments with fewer voxels than this are merged into their
             highest-affinity neighbor.  Unlike *min_instance_size* which
@@ -296,50 +308,29 @@ def decode_waterz(
         waterz_kwargs["fragments"] = fragments.astype(np.uint64, copy=False)
 
     do_dust_merge = bool(dust_merge) and dust_merge_size > 0
-    waterz_kwargs["return_region_graph"] = do_dust_merge
+
+    # For dust merge, strip OneMinus/One255Minus so buildRegionGraphOnly
+    # uses the same scoring function as agglomeration (e.g. p85 histogram)
+    # but returns raw affinities (high = strong) instead of inverted scores.
+    dust_scoring = _strip_oneminus(scoring_function) if do_dust_merge else ""
 
     # waterz.waterz() runs watershed + region-graph once, then incrementally
     # merges for each threshold.  Returns all segmentations (copied).
     seg_list = waterz.waterz(affs, thresholds=thresholds_list, **waterz_kwargs)
 
     # Post-process each result
     processed: List[np.ndarray] = []
-    for waterz_result in seg_list:
-        if do_dust_merge:
-            seg, region_graph = waterz_result
-        else:
-            seg = waterz_result
-
-        # Size+affinity dust merge reusing the agglomeration's full region
-        # graph (extractRegionGraph returns all non-deleted edges with
-        # accumulated scores from the agglomeration process).
+    for seg in seg_list:
+        # Size+affinity dust merge via buildRegionGraphOnly with the same
+        # scoring function as agglomeration (not MeanAffinity default).
         if do_dust_merge:
             seg = seg.astype(np.uint64, copy=False)
-            n_edges = len(region_graph)
-            rg_affs = np.empty(n_edges, dtype=np.float32)
-            id1 = np.empty(n_edges, dtype=np.uint64)
-            id2 = np.empty(n_edges, dtype=np.uint64)
-            # Invert OneMinus/One255Minus scores to raw affinities.
-            score_max = 255.0 if is_uint8 else 1.0
-            for idx, edge in enumerate(region_graph):
-                rg_affs[idx] = score_max - float(edge["score"])
-                id1[idx] = int(edge["u"])
-                id2[idx] = int(edge["v"])
-            if n_edges:
-                np.clip(rg_affs, 0.0, score_max, out=rg_affs)
-                order = np.argsort(rg_affs)[::-1]
-                rg_affs = np.ascontiguousarray(rg_affs[order])
-                id1 = np.ascontiguousarray(id1[order])
-                id2 = np.ascontiguousarray(id2[order])
-            ids, cnts = np.unique(seg, return_counts=True)
-            max_id = int(ids.max()) if len(ids) else 0
-            counts = np.zeros(max_id + 1, dtype=np.uint64)
-            counts[ids] = cnts
-            waterz.merge_segments(
-                seg, rg_affs, id1, id2, counts,
+            waterz.merge_dust(
+                seg, affs,
                 size_th=dust_merge_size,
                 weight_th=dust_merge_affinity,
                 dust_th=dust_remove_size,
+                scoring_function=dust_scoring,
             )
         # Branch merge: resolve false splits via z-slice IOU analysis
         if branch_merge:
diff --git a/tests/unit/test_decode_waterz.py b/tests/unit/test_decode_waterz.py
@@ -11,33 +11,26 @@ class _FakeWaterzModule:
     """Minimal waterz stub for testing wrapper behavior."""
 
     def __init__(self):
-        self.merge_segments_calls = []
+        self.merge_dust_calls = []
         self.waterz_calls = []
 
     def waterz(self, affs, thresholds, **kwargs):
         self.waterz_calls.append(kwargs.copy())
         seg = np.zeros(affs.shape[1:], dtype=np.uint64)
         seg[:, :, :2] = 1
         seg[:, :, 2:] = 2
-        if kwargs.get("return_region_graph", False):
-            # ScoredEdge dicts from extractRegionGraph.
-            # OneMinus score 0.2 → affinity = 1.0 - 0.2 = 0.8
-            rg = [{"u": 1, "v": 2, "score": 0.2}]
-            return [(seg.copy(), list(rg)) for _ in thresholds]
         return [seg.copy() for _ in thresholds]
 
-    def merge_segments(self, seg, rg_affs, id1, id2, counts,
-                       size_th, weight_th, dust_th):
-        self.merge_segments_calls.append(
+    def merge_dust(self, seg, affs, size_th, weight_th, dust_th,
+                   scoring_function, channels="all"):
+        self.merge_dust_calls.append(
             {
                 "seg_shape": seg.shape,
-                "rg_affs": rg_affs.tolist(),
-                "id1": id1.tolist(),
-                "id2": id2.tolist(),
-                "counts": counts.tolist(),
+                "aff_shape": affs.shape,
                 "size_th": size_th,
                 "weight_th": weight_th,
                 "dust_th": dust_th,
+                "scoring_function": scoring_function,
             }
         )
 
@@ -64,14 +57,13 @@ def test_decode_waterz_skips_dust_postprocessing_when_disabled(monkeypatch):
             "scoring_function": "OneMinus<HistogramQuantileAffinity<RegionGraphType, 50, ScoreValue, 256>>",
             "aff_threshold_low": 0.0001,
             "aff_threshold_high": 0.9999,
-            "return_region_graph": False,
         }
     ]
-    assert fake_waterz.merge_segments_calls == []
+    assert fake_waterz.merge_dust_calls == []
 
 
-def test_decode_waterz_reuses_agglomeration_region_graph_for_dust(monkeypatch):
-    """Dust merge reuses agglomeration's region graph with inverted scores."""
+def test_decode_waterz_dust_merge_uses_same_scoring_function(monkeypatch):
+    """Dust merge rebuilds graph with same scoring as agglomeration (OneMinus stripped)."""
     fake_waterz = _FakeWaterzModule()
     monkeypatch.setattr(waterz_decoder, "waterz", fake_waterz)
     monkeypatch.setattr(waterz_decoder, "WATERZ_AVAILABLE", True)
@@ -81,6 +73,7 @@ def test_decode_waterz_reuses_agglomeration_region_graph_for_dust(monkeypatch):
     waterz_decoder.decode_waterz(
         predictions,
         thresholds=0.4,
+        merge_function="aff85_his256",
         dust_merge=True,
         dust_merge_size=100,
         dust_merge_affinity=0.3,
@@ -89,21 +82,40 @@ def test_decode_waterz_reuses_agglomeration_region_graph_for_dust(monkeypatch):
 
     assert fake_waterz.waterz_calls == [
         {
-            "scoring_function": "OneMinus<HistogramQuantileAffinity<RegionGraphType, 50, ScoreValue, 256>>",
+            "scoring_function": "OneMinus<HistogramQuantileAffinity<RegionGraphType, 85, ScoreValue, 256>>",
             "aff_threshold_low": 0.0001,
             "aff_threshold_high": 0.9999,
-            "return_region_graph": True,
         }
     ]
-    assert fake_waterz.merge_segments_calls == [
+    assert fake_waterz.merge_dust_calls == [
         {
             "seg_shape": (4, 4, 4),
-            "rg_affs": [0.800000011920929],
-            "id1": [1],
-            "id2": [2],
-            "counts": [0, 32, 32],
+            "aff_shape": (3, 4, 4, 4),
             "size_th": 100,
             "weight_th": 0.3,
             "dust_th": 50,
+            "scoring_function": "HistogramQuantileAffinity<RegionGraphType, 85, ScoreValue, 256>",
         }
     ]
+
+
+def test_decode_waterz_dust_merge_strips_one255minus(monkeypatch):
+    """One255Minus wrapper is also stripped for dust merge scoring."""
+    fake_waterz = _FakeWaterzModule()
+    monkeypatch.setattr(waterz_decoder, "waterz", fake_waterz)
+    monkeypatch.setattr(waterz_decoder, "WATERZ_AVAILABLE", True)
+
+    predictions = np.ones((3, 4, 4, 4), dtype=np.float32)
+
+    waterz_decoder.decode_waterz(
+        predictions,
+        thresholds=0.4,
+        merge_function="aff50_his256_ran255",
+        dust_merge=True,
+        dust_merge_size=100,
+        dust_merge_affinity=0.3,
+        dust_remove_size=50,
+    )
+
+    call = fake_waterz.merge_dust_calls[0]
+    assert call["scoring_function"] == "HistogramQuantileAffinity<RegionGraphType, 50, ScoreValue, 256>"