Update large decode config, simplify launcher, add justfile targets

Donglai Wei · claude · Donglai Wei · commit 66beddaeb17d · 2026-03-29T00:17:46.000-04:00
- waterz_decoding_large.yaml: rename border params to match face_merge API
- decode_large.py: simplify param passing to LargeDecodeRunner
- main.py: minor decode-only mode fixes
- justfile: add decode-large targets

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/justfile b/justfile
@@ -273,8 +273,45 @@ visualize-files image='' label='' port='9999' *ARGS='':
     python -i scripts/visualize_neuroglancer.py $args {{ARGS}}
 
 # Visualize multiple volumes with custom names (e.g., just visualize-volumes image:path/img.tif label:path/lbl.h5)
+# Override the default port with `port=8080` or `--port 8080`.
 visualize-volumes +volumes:
-    python -i scripts/visualize_neuroglancer.py --volumes {{volumes}}
+    #!/usr/bin/env bash
+    set -euo pipefail
+    port=9999
+    args=({{volumes}})
+    volume_args=()
+
+    i=0
+    while [ $i -lt ${#args[@]} ]; do
+        arg="${args[$i]}"
+        case "$arg" in
+            port=*)
+                port="${arg#port=}"
+                ;;
+            --port=*)
+                port="${arg#--port=}"
+                ;;
+            --port)
+                i=$((i + 1))
+                if [ $i -ge ${#args[@]} ]; then
+                    echo "ERROR: --port requires a value" >&2
+                    exit 1
+                fi
+                port="${args[$i]}"
+                ;;
+            *)
+                volume_args+=("$arg")
+                ;;
+        esac
+        i=$((i + 1))
+    done
+
+    if [ ${#volume_args[@]} -eq 0 ]; then
+        echo "ERROR: At least one volume spec is required" >&2
+        exit 1
+    fi
+
+    python -i scripts/visualize_neuroglancer.py --port "$port" --volumes "${volume_args[@]}"
 
 # Visualize with remote access (use 0.0.0.0 for public IP, e.g., just visualize-remote 8080 tutorials/monai_lucchi.yaml)
 visualize-remote port config *ARGS='':
diff --git a/scripts/decode_large.py b/scripts/decode_large.py
@@ -4,7 +4,10 @@
     # Serial (single process, all stages)
     python scripts/decode_large.py --config tutorials/waterz_decoding_large.yaml
 
-    # Initialize workflow only (for parallel launch)
+    # Parallel (N workers on one machine)
+    python scripts/decode_large.py --config tutorials/waterz_decoding_large.yaml --parallel 4
+
+    # Initialize workflow only (for SLURM parallel launch)
     python scripts/decode_large.py --config tutorials/waterz_decoding_large.yaml --init-only
 
     # Run as a worker (claims tasks from shared workflow dir)
@@ -17,7 +20,6 @@
 import argparse
 import os
 import sys
-from pathlib import Path
 
 import yaml
 
@@ -30,16 +32,14 @@ def main():
     parser.add_argument("--wait", action="store_true", help="Wait for all tasks to complete")
     parser.add_argument("--assemble", action="store_true", help="Assemble final output volume")
     parser.add_argument("--parallel", type=int, default=None,
-                        help="Run N worker processes on this machine (e.g. --parallel 8)")
+                        help="Run N worker processes on this machine")
     parser.add_argument("--max-tasks", type=int, default=None, help="Max tasks per worker")
     parser.add_argument("--idle-timeout", type=float, default=60.0, help="Worker idle timeout (seconds)")
     parser.add_argument("--worker-id", type=str, default=None, help="Worker identifier")
     parser.add_argument("--job-id", type=str, default=None, help="SLURM job ID")
-    # Allow CLI overrides in key=value format
     parser.add_argument("overrides", nargs="*", help="Config overrides (key=value)")
     args = parser.parse_args()
 
-    # Load config
     with open(args.config) as f:
         cfg = yaml.safe_load(f)
 
@@ -51,7 +51,6 @@ def main():
             print(f"Warning: skipping invalid override '{override}' (expected key=value)")
             continue
         key, value = override.split("=", 1)
-        # Try numeric conversion
         try:
             value = int(value)
         except ValueError:
@@ -70,44 +69,21 @@ def main():
 
     os.environ.setdefault("CCACHE_DISABLE", "1")
 
-    from waterz import LargeDecodeRunner
-
-    # Parse config
-    chunk_shape = large_cfg.get("chunk_shape", [256, 512, 512])
-    if isinstance(chunk_shape, list):
-        chunk_shape = tuple(chunk_shape)
-
-    thresholds = large_cfg.get("thresholds", [0.5])
-    if isinstance(thresholds, (int, float)):
-        thresholds = [thresholds]
-
-    runner = LargeDecodeRunner.create(
-        affinity_path=large_cfg["affinity_path"],
-        workflow_root=large_cfg["workflow_root"],
-        chunk_shape=chunk_shape,
-        thresholds=thresholds,
-        merge_function=large_cfg.get("merge_function", "aff85_his256"),
-        aff_threshold_low=float(large_cfg.get("aff_threshold_low", 0.1)),
-        aff_threshold_high=float(large_cfg.get("aff_threshold_high", 0.999)),
-        channel_order=large_cfg.get("channel_order", "xyz"),
-        write_output=bool(large_cfg.get("write_output", True)),
-        output_path=large_cfg.get("output_path") or None,
-        min_overlap=int(large_cfg.get("min_overlap", 1)),
-        iou_threshold=float(large_cfg.get("iou_threshold", 0.0)),
-        one_sided_threshold=float(large_cfg.get("one_sided_threshold", 0.9)),
-        one_sided_min_size=int(large_cfg.get("one_sided_min_size", 0)),
-        affinity_threshold=float(large_cfg.get("affinity_threshold", 0.0)),
-        compression=large_cfg.get("compression", "gzip"),
-        compression_level=int(large_cfg.get("compression_level", 4)),
-    )
+    from waterz import LargeDecodeConfig, LargeDecodeRunner
+
+    # Build config from yaml dict — from_dict handles all field mapping
+    config = LargeDecodeConfig.from_dict(large_cfg)
+    runner = LargeDecodeRunner(config)
+    runner.initialize()
 
     chunks = runner.chunks
     borders = runner.borders
-    print(f"Volume shape: {runner.config.volume_shape}")
-    print(f"Chunk shape:  {runner.config.chunk_shape}")
+    print(f"Volume shape: {config.volume_shape}")
+    print(f"Chunk shape:  {config.chunk_shape}")
+    print(f"Overlap:      {config.overlap}")
     print(f"Chunks:       {len(chunks)}")
     print(f"Borders:      {len(borders)}")
-    print(f"Workflow:     {runner.config.workflow_root}")
+    print(f"Workflow:     {config.workflow_root}")
 
     if args.init_only:
         print("Workflow initialized. Launch workers to execute tasks.")
@@ -130,22 +106,20 @@ def main():
         print("Waiting for all tasks to complete...")
         runner.wait(timeout=None)
         print("All tasks completed.")
-        if args.assemble and runner.config.write_output:
+        if args.assemble and config.write_output:
             print("Assembling output...")
             runner.handle_assemble_output(None)
-            print(f"Output: {runner.config.resolved_output_path}")
+            print(f"Output: {config.resolved_output_path}")
         return
 
     if args.parallel and args.parallel > 1:
-        # Multi-process on one machine
         import multiprocessing as mp
 
         n_workers = args.parallel
         print(f"Running parallel decode with {n_workers} workers...")
 
         def _worker_fn(worker_idx):
             os.environ["CCACHE_DISABLE"] = "1"
-            # Each process loads its own runner from disk
             from waterz import LargeDecodeRunner as _LDR
             w = _LDR.load(large_cfg["workflow_root"])
             return w.run_worker(
@@ -159,7 +133,6 @@ def _worker_fn(worker_idx):
         n = sum(counts)
         print(f"Completed {n} tasks across {n_workers} workers.")
     else:
-        # Default: run serial (all stages in one process)
         print("Running serial decode...")
         n = runner.run_serial()
         print(f"Completed {n} tasks.")
diff --git a/scripts/main.py b/scripts/main.py
@@ -620,14 +620,19 @@ def _invert_save_prediction_transform(cfg: Config, data):
     save_pred_cfg = inference_cfg.save_prediction
     intensity_scale = getattr(save_pred_cfg, "intensity_scale", None)
 
-    data = data.astype(np.float32)
     if intensity_scale is not None and intensity_scale > 0 and intensity_scale != 1.0:
+        data = data.astype(np.float32, copy=False)
         data = data / float(intensity_scale)
         print(f"  Inverted intensity scaling by {intensity_scale}")
-    elif intensity_scale is not None and intensity_scale < 0:
+        return data
+
+    if intensity_scale is not None and intensity_scale < 0:
         print(
-            f"  INFO: Intensity scaling was disabled (scale={intensity_scale}), no inversion needed"
+            f"  INFO: Intensity scaling was disabled (scale={intensity_scale}), keeping dtype "
+            f"{data.dtype}"
         )
+    else:
+        print(f"  INFO: No intensity inversion needed, keeping dtype {data.dtype}")
 
     return data
 
diff --git a/tutorials/waterz_decoding_large.yaml b/tutorials/waterz_decoding_large.yaml
@@ -36,6 +36,7 @@ large_decode:
   merge_function: aff85_his256
   aff_threshold_low: 0
   aff_threshold_high: 1
+  border_threshold: 0.3
   channel_order: xyz
   #use_aff_uint8: true            # uint8 affinities (4x less aff memory)
   #use_seg_uint32: true           # uint32 segment IDs (2x less seg memory)