Skip to content

Commit 19bafc6

Browse files
m9hclaude
andcommitted
Add Birnbaum skull dataset tests with expert label validation
Downloaded GU002 from Kaggle (Birnbaum et al. 2025): paired T1 MRI + 7-class expert-corrected segmentation from stroke patients. 4/4 tests GREEN: - Label loading and tissue distribution - Birnbaum → openlifu label remapping (bone=6→2, GM=4→4, etc.) - Expert labels to acoustic properties via HeterogeneousSkullSegmentation - Pseudo-CT bone prediction vs expert labels (Dice=0.164) The 0.164 Dice confirms the limitation of threshold-based pseudo-CT reported in PR OpenwaterHealth#436 (0.315 with their method), motivating nnU-Net. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 226ca15 commit 19bafc6

2 files changed

Lines changed: 103 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ hv_calibration_coeffs.c
174174
hv_calibration_coeffs.h
175175
benchmarks/tfuscapes_data/*.npz
176176
benchmarks/itrusst_data/data/
177+
benchmarks/birnbaum_data/*.nii

tests/test_birnbaum.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""TDD: Birnbaum skull dataset loading, remapping, and acoustic simulation."""
2+
import numpy as np
3+
import pytest
4+
from pathlib import Path
5+
6+
DATA_DIR = Path(__file__).parent.parent / "benchmarks" / "birnbaum_data"
7+
T1_PATH = DATA_DIR / "GU002_deface.nii"
8+
LABEL_PATH = DATA_DIR / "GU002_label_deface.nii"
9+
10+
# Birnbaum label convention → openlifu convention
11+
BIRNBAUM_TO_OPENLIFU = {
12+
0: 0, # background → water
13+
1: 0, # air → water
14+
2: 0, # air cavities → water (could be 0 or separate)
15+
3: 5, # WM → white_matter
16+
4: 4, # GM → gray_matter
17+
5: 3, # CSF → csf
18+
6: 2, # bone → skull
19+
7: 1, # scalp → scalp (not present in GU002 apparently)
20+
}
21+
22+
23+
def remap_birnbaum_labels(labels):
24+
out = np.zeros_like(labels, dtype=np.int32)
25+
for src, dst in BIRNBAUM_TO_OPENLIFU.items():
26+
out[labels == src] = dst
27+
return out
28+
29+
30+
@pytest.mark.skipif(not LABEL_PATH.exists(), reason="Birnbaum data not downloaded")
31+
def test_load_birnbaum_labels():
32+
"""Should load and have expected tissue types."""
33+
import nibabel as nib
34+
lab = nib.load(str(LABEL_PATH))
35+
data = np.asarray(lab.get_fdata(), dtype=int)
36+
assert data.ndim == 3
37+
assert 6 in np.unique(data), "No bone label (6)"
38+
assert data.shape == (186, 222, 220)
39+
40+
41+
@pytest.mark.skipif(not LABEL_PATH.exists(), reason="Birnbaum data not downloaded")
42+
def test_birnbaum_remap():
43+
"""Remapped labels should have skull(2) and GM(4)."""
44+
import nibabel as nib
45+
raw = np.asarray(nib.load(str(LABEL_PATH)).get_fdata(), dtype=int)
46+
remapped = remap_birnbaum_labels(raw)
47+
assert 2 in np.unique(remapped), "No skull after remap"
48+
assert 4 in np.unique(remapped), "No GM after remap"
49+
# Bone count should match
50+
assert np.sum(remapped == 2) == np.sum(raw == 6)
51+
52+
53+
@pytest.mark.skipif(not LABEL_PATH.exists(), reason="Birnbaum data not downloaded")
54+
def test_birnbaum_to_acoustic_properties():
55+
"""Remapped Birnbaum labels should produce valid acoustic property maps."""
56+
import nibabel as nib
57+
from openlifu.seg.seg_methods.heterogeneous import HeterogeneousSkullSegmentation
58+
import xarray as xa
59+
60+
raw = np.asarray(nib.load(str(LABEL_PATH)).get_fdata(), dtype=int)
61+
labels = remap_birnbaum_labels(raw)
62+
63+
seg = HeterogeneousSkullSegmentation(source="labels", label_array=labels)
64+
shape = labels.shape
65+
coords = xa.Coordinates({
66+
dim: xa.DataArray(np.arange(shape[i], dtype=float), dims=[dim], attrs={"units": "mm"})
67+
for i, dim in enumerate(("x", "y", "z"))
68+
})
69+
volume = xa.DataArray(np.zeros(shape), coords=coords)
70+
params = seg.seg_params(volume)
71+
72+
# Skull voxels should have c=4080
73+
skull = labels == 2
74+
np.testing.assert_allclose(params["sound_speed"].data[skull], 4080.0)
75+
# GM voxels should have c=1560
76+
gm = labels == 4
77+
np.testing.assert_allclose(params["sound_speed"].data[gm], 1560.0)
78+
79+
80+
@pytest.mark.skipif(not T1_PATH.exists(), reason="Birnbaum T1 not downloaded")
81+
def test_birnbaum_pseudo_ct_vs_expert_labels():
82+
"""Compare pseudo-CT bone prediction against expert skull labels."""
83+
import nibabel as nib
84+
from benchmarks.pseudo_ct_validation import t1_to_pseudo_ct
85+
86+
t1 = np.asarray(nib.load(str(T1_PATH)).get_fdata(), dtype=np.float32)
87+
raw_labels = np.asarray(nib.load(str(LABEL_PATH)).get_fdata(), dtype=int)
88+
89+
pseudo = t1_to_pseudo_ct(t1, method="plymouth")
90+
pred_bone = pseudo > 1200 # high HU threshold for bone
91+
expert_bone = raw_labels == 6
92+
93+
# Dice coefficient
94+
intersection = np.sum(pred_bone & expert_bone)
95+
dice = 2 * intersection / (np.sum(pred_bone) + np.sum(expert_bone) + 1e-8)
96+
97+
# The simple threshold method won't be great — just verify it's nonzero
98+
assert dice > 0.01, f"Bone Dice too low: {dice:.4f}"
99+
# Report the actual Dice for comparison with PR #436 results
100+
print(f"\nBirnbaum GU002 pseudo-CT bone Dice: {dice:.4f}")
101+
print(f" Expert bone voxels: {expert_bone.sum()}")
102+
print(f" Predicted bone voxels: {pred_bone.sum()}")

0 commit comments

Comments
 (0)