Skip to content

Commit 47f1c2b

Browse files
Copilotakhanf
andauthored
Add batch histogram computation, aggregation, Otsu threshold, and QC rules
Agent-Logs-Url: https://github.com/khanlab/SPIMquant/sessions/1911c21b-2dbc-4019-96d4-4c15c24c890a Co-authored-by: akhanf <11492701+akhanf@users.noreply.github.com>
1 parent a8692fd commit 47f1c2b

7 files changed

Lines changed: 696 additions & 37 deletions

File tree

spimquant/workflow/Snakefile

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,32 @@ active ``seg_method`` contains at least one ``otsu+k{}i{}`` entry.
252252
else [],
253253

254254

255+
rule all_batch_otsu_hist_qc:
256+
"""Target rule to generate batch-wide Otsu threshold sweep QC reports.
257+
258+
Produces one HTML report per stain/method combination (across all subjects)
259+
for every ``otsu+k{}i{}`` segmentation method. Each report shows the
260+
aggregated histogram with Multi-Otsu threshold positions and one mid-volume
261+
crop per subject at each threshold, enabling visual selection of a threshold
262+
that generalises across the entire acquisition batch. Only meaningful when
263+
the active ``seg_method`` contains at least one ``otsu+k{}i{}`` entry.
264+
"""
265+
input:
266+
expand(
267+
bids(
268+
root=root,
269+
datatype="group",
270+
stain="{stain}",
271+
desc="{desc}",
272+
suffix="batchotsuthreshqc.html",
273+
),
274+
stain=stains_for_seg,
275+
desc=otsu_seg_methods,
276+
)
277+
if (do_seg and len(otsu_seg_methods) > 0)
278+
else [],
279+
280+
255281
rule all_register:
256282
input:
257283
inputs["spim"].expand(

spimquant/workflow/rules/qc.smk

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,62 @@ Only applicable when ``seg_method`` uses the ``otsu+k{}i{}`` pattern.
459459
"../scripts/qc_otsu_threshold_sweep.py"
460460

461461

462+
rule qc_batch_otsu_threshold_sweep:
463+
"""Batch threshold sweep QC HTML report for multiotsu segmentation.
464+
465+
Uses the batch-wide Multi-Otsu thresholds (computed from the aggregated
466+
histogram across all subjects in the dataset) to sweep over a range of
467+
threshold values and display one mid-volume 2D crop per subject at each
468+
threshold. Subjects are shown as columns so that the user can immediately
469+
see whether a given threshold generalises across the entire acquisition batch.
470+
471+
The aggregated histogram PNG is embedded at the top of the report.
472+
473+
Only applicable when ``seg_method`` uses the ``otsu+k{}i{}`` pattern.
474+
"""
475+
input:
476+
corrected_zarrs=inputs["spim"].expand(
477+
bids(
478+
root=work,
479+
datatype="seg",
480+
stain="{stain}",
481+
level=str(config["segmentation_level"]),
482+
desc="corrected{method}".format(method=config["correction_method"]),
483+
suffix="SPIM.ome.zarr",
484+
**inputs["spim"].wildcards,
485+
),
486+
allow_missing=True,
487+
),
488+
thresholds_png=bids(
489+
root=root,
490+
datatype="group",
491+
stain="{stain}",
492+
level=str(config["segmentation_level"]),
493+
desc="{desc}",
494+
suffix="thresholds.png",
495+
),
496+
output:
497+
html=bids(
498+
root=root,
499+
datatype="group",
500+
stain="{stain}",
501+
desc="{desc}",
502+
suffix="batchotsuthreshqc.html",
503+
),
504+
threads: 4
505+
resources:
506+
mem_mb=32000,
507+
runtime=60,
508+
params:
509+
n_thresholds=10,
510+
patch_size=300,
511+
level=config["segmentation_level"],
512+
hist_percentile_range=[float(x) for x in config["seg_hist_percentile_range"]],
513+
zarrnii_kwargs={"orientation": config["orientation"]},
514+
script:
515+
"../scripts/qc_batch_otsu_threshold_sweep.py"
516+
517+
462518
rule qc_roi_summary:
463519
"""Per-ROI summary QC: top-region bar plots for a single subject.
464520

spimquant/workflow/rules/segmentation.smk

Lines changed: 154 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Segmentation and quantification workflow for SPIMquant.
33
4-
This module performs intensity correction, segmentation, and quantitative analysis of
4+
This module performs intensity correction, segmentation, and quantitative analysis of
55
pathology markers from SPIM microscopy data. It handles multi-channel data with different
66
stains (e.g., beta-amyloid, alpha-synuclein, Iba1) and produces per-region statistics.
77
@@ -27,9 +27,6 @@ rule gaussian_biasfield:
2727
"""simple bias field correction with gaussian"""
2828
input:
2929
spim=inputs["spim"].path,
30-
params:
31-
proc_level=5,
32-
zarrnii_kwargs={"orientation": config["orientation"]},
3330
output:
3431
corrected=temp(
3532
directory(
@@ -64,6 +61,9 @@ rule gaussian_biasfield:
6461
mem_mb=256000,
6562
disk_mb=2097152,
6663
runtime=15,
64+
params:
65+
proc_level=5,
66+
zarrnii_kwargs={"orientation": config["orientation"]},
6767
script:
6868
"../scripts/gaussian_biasfield.py"
6969

@@ -72,11 +72,6 @@ rule n4_biasfield:
7272
"""N4 bias field correction with antspyx"""
7373
input:
7474
spim=inputs["spim"].path,
75-
params:
76-
proc_level=5,
77-
zarrnii_kwargs={"orientation": config["orientation"]},
78-
shrink_factor=16 if config["sloppy"] else 1,
79-
target_chunk_size=512, #this sets the chunk size for this and downstream masks
8075
output:
8176
corrected=temp(
8277
directory(
@@ -96,18 +91,23 @@ rule n4_biasfield:
9691
resources:
9792
mem_mb=500000 if config["dask_scheduler"] == "distributed" else 250000,
9893
runtime=180,
94+
params:
95+
proc_level=5,
96+
zarrnii_kwargs={"orientation": config["orientation"]},
97+
shrink_factor=16 if config["sloppy"] else 1,
98+
target_chunk_size=512, #this sets the chunk size for this and downstream masks
9999
script:
100100
"../scripts/n4_biasfield.py"
101101

102102

103103
rule multiotsu:
104104
"""Perform multi-Otsu thresholding for segmentation.
105-
106-
Applies multi-level Otsu thresholding to identify intensity classes in the
107-
corrected image. The k parameter determines number of classes, and the i parameter
108-
selects which threshold index to use for creating the binary mask. Outputs a
109-
histogram visualization of the threshold selection.
110-
"""
105+
106+
Applies multi-level Otsu thresholding to identify intensity classes in the
107+
corrected image. The k parameter determines number of classes, and the i parameter
108+
selects which threshold index to use for creating the binary mask. Outputs a
109+
histogram visualization of the threshold selection.
110+
"""
111111
input:
112112
corrected=bids(
113113
root=work,
@@ -118,12 +118,6 @@ rule multiotsu:
118118
suffix="SPIM.ome.zarr",
119119
**inputs["spim"].wildcards,
120120
),
121-
params:
122-
hist_bin_width=float(config["seg_hist_bin_width"]),
123-
hist_percentile_range=[float(x) for x in config["seg_hist_percentile_range"]],
124-
otsu_k=lambda wildcards: int(wildcards.k),
125-
otsu_threshold_index=lambda wildcards: int(wildcards.i),
126-
zarrnii_kwargs={"orientation": config["orientation"]},
127121
output:
128122
mask=bids(
129123
root=root,
@@ -148,16 +142,23 @@ rule multiotsu:
148142
mem_mb=500000 if config["dask_scheduler"] == "distributed" else 250000,
149143
disk_mb=2097152,
150144
runtime=180,
145+
params:
146+
hist_bin_width=float(config["seg_hist_bin_width"]),
147+
hist_percentile_range=[float(x) for x in config["seg_hist_percentile_range"]],
148+
otsu_k=lambda wildcards: int(wildcards.k),
149+
otsu_threshold_index=lambda wildcards: int(wildcards.i),
150+
zarrnii_kwargs={"orientation": config["orientation"]},
151151
script:
152152
"../scripts/multiotsu.py"
153153

154154

155155
rule threshold:
156156
"""Apply simple intensity threshold for segmentation.
157-
158-
Creates binary mask by thresholding the corrected image at a specified intensity value.
159-
Simpler alternative to multi-Otsu for cases where the threshold is known a priori.
160-
"""
157+
158+
Creates binary mask by thresholding the corrected image at a specified intensity value.
159+
Simpler alternative to multi-Otsu for cases where the threshold is known a priori.
160+
161+
"""
161162
input:
162163
corrected=bids(
163164
root=work,
@@ -168,9 +169,6 @@ rule threshold:
168169
suffix="SPIM.ome.zarr",
169170
**inputs["spim"].wildcards,
170171
),
171-
params:
172-
threshold=lambda wildcards: int(wildcards.threshold),
173-
zarrnii_kwargs={"orientation": config["orientation"]},
174172
output:
175173
mask=bids(
176174
root=root,
@@ -185,17 +183,136 @@ rule threshold:
185183
resources:
186184
mem_mb=500000 if config["dask_scheduler"] == "distributed" else 250000,
187185
runtime=180,
186+
params:
187+
threshold=lambda wildcards: int(wildcards.threshold),
188+
zarrnii_kwargs={"orientation": config["orientation"]},
188189
script:
189190
"../scripts/threshold.py"
190191

191192

193+
rule compute_spim_histogram:
194+
"""Compute an intensity histogram from a raw SPIM channel at a given level.
195+
196+
Reads the OME-Zarr at the specified pyramid *level* using Dask and accumulates
197+
a histogram over a fixed intensity range with a fixed number of bins. The bin
198+
edges and counts are stored as a compressed NumPy archive (``.npz``) for later
199+
aggregation and batch-wide threshold optimisation.
200+
"""
201+
input:
202+
spim=inputs["spim"].path,
203+
output:
204+
histogram=bids(
205+
root=root,
206+
datatype="seg",
207+
stain="{stain}",
208+
level="{level}",
209+
suffix="histogram.npz",
210+
**inputs["spim"].wildcards,
211+
),
212+
threads: 32
213+
resources:
214+
mem_mb=32000,
215+
runtime=30,
216+
params:
217+
level="{level}",
218+
hist_bins=config.get("spim_hist_bins", 500),
219+
hist_range=config.get("spim_hist_range", [0, 65535]),
220+
zarrnii_kwargs={"orientation": config["orientation"]},
221+
script:
222+
"../scripts/compute_spim_histogram.py"
223+
224+
225+
rule aggregate_histograms:
226+
"""Aggregate per-subject histograms into a single batch histogram.
227+
228+
Collects histogram ``.npz`` files from all subjects (for a given stain and
229+
pyramid level) and produces a single combined histogram by summing counts.
230+
All input histograms must have been computed with the same ``hist_range`` and
231+
``hist_bins`` parameters.
232+
"""
233+
input:
234+
histograms=inputs["spim"].expand(
235+
bids(
236+
root=root,
237+
datatype="seg",
238+
stain="{stain}",
239+
level="{level}",
240+
suffix="histogram.npz",
241+
**inputs["spim"].wildcards,
242+
),
243+
allow_missing=True,
244+
),
245+
output:
246+
histogram=bids(
247+
root=root,
248+
datatype="group",
249+
stain="{stain}",
250+
level="{level}",
251+
suffix="histogram.npz",
252+
),
253+
threads: 1
254+
resources:
255+
mem_mb=4000,
256+
runtime=10,
257+
script:
258+
"../scripts/aggregate_histograms.py"
259+
260+
261+
rule multiotsu_from_batch_histogram:
262+
"""Compute Multi-Otsu thresholds from the batch-aggregated histogram.
263+
264+
Reads the aggregated histogram, optionally clips to a percentile-based
265+
intensity range, and applies multi-level Otsu thresholding. This yields a
266+
single set of thresholds that is consistent across all subjects in the batch
267+
(i.e. acquisitions with the same imaging parameters).
268+
269+
Outputs a TSV of threshold index → value and a histogram PNG with the
270+
threshold positions marked.
271+
"""
272+
input:
273+
histogram=bids(
274+
root=root,
275+
datatype="group",
276+
stain="{stain}",
277+
level="{level}",
278+
suffix="histogram.npz",
279+
),
280+
output:
281+
thresholds_png=bids(
282+
root=root,
283+
datatype="group",
284+
stain="{stain}",
285+
level="{level}",
286+
desc="otsu+k{k,[0-9]+}i{i,[0-9]+}",
287+
suffix="thresholds.png",
288+
),
289+
thresholds_tsv=bids(
290+
root=root,
291+
datatype="group",
292+
stain="{stain}",
293+
level="{level}",
294+
desc="otsu+k{k,[0-9]+}i{i,[0-9]+}",
295+
suffix="thresholds.tsv",
296+
),
297+
threads: 1
298+
resources:
299+
mem_mb=4000,
300+
runtime=10,
301+
params:
302+
hist_percentile_range=[float(x) for x in config["seg_hist_percentile_range"]],
303+
otsu_k=lambda wildcards: int(wildcards.k),
304+
otsu_threshold_index=lambda wildcards: int(wildcards.i),
305+
script:
306+
"../scripts/multiotsu_from_batch_histogram.py"
307+
308+
192309
rule clean_segmentation:
193310
"""Clean segmentation mask by removing edge artifacts and small objects.
194-
195-
Performs connected component analysis to identify and exclude objects that
196-
extend too close to the image boundaries (likely artifacts). Creates both
197-
a cleaned mask and an exclusion mask showing what was removed.
198-
"""
311+
312+
Performs connected component analysis to identify and exclude objects that
313+
extend too close to the image boundaries (likely artifacts). Creates both
314+
a cleaned mask and an exclusion mask showing what was removed.
315+
"""
199316
input:
200317
mask=bids(
201318
root=root,
@@ -206,10 +323,6 @@ rule clean_segmentation:
206323
suffix="mask.ozx",
207324
**inputs["spim"].wildcards,
208325
),
209-
params:
210-
max_extent=0.15,
211-
proc_level=2, #level at which to calculate conncomp
212-
zarrnii_kwargs={"orientation": config["orientation"]},
213326
output:
214327
exclude_mask=bids(
215328
root=root,
@@ -234,5 +347,9 @@ rule clean_segmentation:
234347
mem_mb=256000,
235348
disk_mb=2097152,
236349
runtime=30,
350+
params:
351+
max_extent=0.15,
352+
proc_level=2, #level at which to calculate conncomp
353+
zarrnii_kwargs={"orientation": config["orientation"]},
237354
script:
238355
"../scripts/clean_segmentation.py"

0 commit comments

Comments
 (0)