docs

crusaderky · crusaderky · commit 5058e721cd9d · 2026-03-08T11:55:41.000Z
diff --git a/recursive_diff/core.py b/recursive_diff/core.py
@@ -596,16 +596,18 @@ def _fast_dask_nonzero(mask: Array) -> tuple[tuple[Array, ...], Array]:
     """Variant of da.nonzero(mask), which is much faster when the number of
     nonzero elements is much smaller than the total.
 
-    Returns
+    Returns:
 
-    - tuple of single-chunk arrays of shape (mask.ndim, number of differences),
-      ordered as it would be returned by da.nonzero(mask)
-    - single-chunk array of shape (number of differences, ) which is to be used
-      by _fast_dask_mask to reorder the output.
+    - tuple of arrays of shape (nan, ), one array per axis, one point per nonzero
+      element, just like da.nonzero(mask)
+    - matching array of shape (nan, ) which is to be used by _fast_dask_mask to reorder
+      the output.
     """
     import dask
     import dask.array as da
 
+    # 1. Apply np.nonzero() to each chunk independently and add the
+    # coordinates of the top-left corner of the chunk to the output
     chunk_offsets: list[list[int]] = [
         [0, *np.cumsum(c[:-1]).tolist()] for c in mask.chunks
     ]
@@ -617,20 +619,22 @@ def _fast_dask_nonzero(mask: Array) -> tuple[tuple[Array, ...], Array]:
             itertools.product(*chunk_offsets),
         )
     ]
+    # 2. rechunk to a single chunk (needed for sorting)
     rechunked = dask.delayed(np.concatenate, pure=True)(delayeds, axis=1)
     nz = da.from_delayed(
         rechunked,
         shape=(mask.ndim, math.nan),
         dtype=int,
         meta=np.array([[]], dtype=int),
     )
+    # 3. Get the order in which np.nonzero() would have returned the output
     sort_indices = nz[::-1, :].map_blocks(
         np.lexsort,
         dtype=int,
         meta=np.array([], dtype=int),
         drop_axis=0,
     )
-
+    # 4. Reorder
     nz_sorted = nz.T.map_blocks(
         operator.getitem,
         sort_indices,
@@ -648,14 +652,15 @@ def _fast_dask_nonzero_chunk(
 
 
 def _fast_dask_mask(a: Array, mask: Array, sort_indices: Array) -> Array:
-    """Variant of a[mask], which does not preserve the order of the returned elements,
-    which is much faster on Dask for 2+ dimensions arrays because it does not need
-    rechunnking. Applying this function to multiple identically shaped **and chunked**
+    """Variant of a[mask], which is much faster when the number of
+    True points in the mask is much smaller than the total.
+    Applying this function to multiple identically shaped **and chunked**
     arrays with the same mask will return objects in the same order.
     """
     import dask
     import dask.array as da
 
+    # 1. Apply a[mask] to each chunk independelty
     f = dask.delayed(operator.getitem, pure=True)
     delayeds = [
         f(a_i, mask_i)
@@ -664,7 +669,10 @@ def _fast_dask_mask(a: Array, mask: Array, sort_indices: Array) -> Array:
             mask.to_delayed().reshape(-1),
         )
     ]
+    # 2. rechunk to a single chunk (needed by a[b], where a has shape=(nan, )
+    #    and b is an integer array
     rechunked = dask.delayed(np.concatenate, pure=True)(delayeds)
+    # 3. Sort the results to match a[mask]
     sorted = dask.delayed(operator.getitem, pure=True)(rechunked, sort_indices)
     return da.from_delayed(
         sorted,