Skip to content

Commit 1abd275

Browse files
Carole SudreCarole Sudre
authored andcommitted
Updating figures and tests
1 parent cd4cccc commit 1abd275

5 files changed

Lines changed: 151 additions & 71 deletions

File tree

MetricsReloaded/metrics/calibration_measures.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import numpy as np
3232
import math
3333
from scipy.special import gamma
34-
34+
import warnings
3535
# from metrics.pairwise_measures import CacheFunctionOutput
3636
from MetricsReloaded.utility.utils import (
3737
CacheFunctionOutput,
@@ -150,6 +150,7 @@ def expectation_calibration_error(self):
150150
if "bins_ece" in self.dict_args:
151151
nbins = self.dict_args["bins_ece"]
152152
else:
153+
warnings.warn("Bins ECE not specified in optional arguments dictionary - default set to 10")
153154
nbins = 10
154155
step = 1.0 / nbins
155156
range_values = np.arange(0, 1.00001, step)
@@ -176,7 +177,55 @@ def expectation_calibration_error(self):
176177
else:
177178
list_values.append(nsamples * np.abs(prop - np.mean(pred_sel)))
178179
numb_samples += nsamples
179-
return np.sum(np.asarray(list_values)) / numb_samples
180+
ece = np.sum(np.asarray(list_values)) / numb_samples
181+
return ece
182+
183+
184+
def maximum_calibration_error(self):
185+
"""
186+
Derives the maximum calibration error in the case of binary task
187+
bins_mce is the key in the dictionary for the number of bins to consider
188+
Default is 10
189+
190+
.. math::
191+
192+
MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)
193+
194+
:return: mce
195+
196+
"""
197+
if "bins_mce" in self.dict_args:
198+
nbins = self.dict_args["bins_mce"]
199+
else:
200+
warnings.warn("Bins MCE not specified in optional arguments dictionary - default set to 10")
201+
nbins = 10
202+
step = 1.0 / nbins
203+
range_values = np.arange(0, 1.00001, step)
204+
list_values = []
205+
numb_samples = 0
206+
pred_prob = self.pred[:,1]
207+
for (l, u) in zip(range_values[:-1], range_values[1:]):
208+
ref_tmp = np.where(
209+
np.logical_and(pred_prob > l, pred_prob <= u),
210+
self.ref,
211+
np.ones_like(self.ref) * -1,
212+
)
213+
ref_sel = ref_tmp[ref_tmp > -1]
214+
nsamples = np.size(ref_sel)
215+
prop = np.sum(ref_sel) / nsamples
216+
pred_tmp = np.where(
217+
np.logical_and(pred_prob > l, pred_prob <= u),
218+
pred_prob,
219+
np.ones_like(pred_prob) * -1,
220+
)
221+
pred_sel = pred_tmp[pred_tmp > -1]
222+
if nsamples == 0:
223+
list_values.append(0)
224+
else:
225+
list_values.append(np.abs(prop - np.mean(pred_sel)))
226+
mce = np.max(np.asarray(list_values))
227+
return mce
228+
180229

181230
def brier_score(self):
182231
"""

MetricsReloaded/metrics/pairwise_measures.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ def __init__(
265265
"fbeta": (self.fbeta, "FBeta"),
266266
"dsc":(self.dsc, "DSC"),
267267
"youden_ind": (self.youden_index, "YoudenInd"),
268+
"ppv":(self.positive_predictive_value,'PPV'),
269+
"npv":(self.negative_predictive_value,'NPV'),
270+
"ior":(self.intersection_over_reference,"IoR"),
271+
"sensitivity":(self.sensitivity,"Sens"),
272+
"specificity":(self.specificity,"Spec"),
268273
"mcc": (self.matthews_correlation_coefficient, "MCC"),
269274
"cldice": (self.centreline_dsc, "CentreLineDSC"),
270275
"assd": (self.measured_average_distance, "ASSD"),
@@ -693,7 +698,7 @@ def pred_in_ref(self):
693698
else:
694699
return 0
695700

696-
def positive_predictive_values(self):
701+
def positive_predictive_value(self):
697702
"""
698703
Calculates the positive predictive value
699704
@@ -785,10 +790,10 @@ def fbeta(self):
785790
warnings.warn("beta value not specified in option - default set to 1")
786791
beta = 1
787792
numerator = (
788-
(1 + np.square(beta)) * self.positive_predictive_values() * self.recall()
793+
(1 + np.square(beta)) * self.positive_predictive_value() * self.recall()
789794
)
790795
denominator = (
791-
np.square(beta) * self.positive_predictive_values() + self.recall()
796+
np.square(beta) * self.positive_predictive_value() + self.recall()
792797
)
793798
if np.isnan(denominator):
794799
if self.fp() + self.fn() > 0:
@@ -830,7 +835,7 @@ def net_benefit_treated(self):
830835
net_benefit = tp / n - fp / n * er
831836
return net_benefit
832837

833-
def negative_predictive_values(self):
838+
def negative_predictive_value(self):
834839
"""
835840
This function calculates the negative predictive value ratio between
836841
the number of true negatives and the total number of negative elements

test/test_metrics/test_calibration_metrics.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
from scipy.special import gamma
55
from MetricsReloaded.utility.utils import median_heuristic
66

7-
8-
def test_expected_calibration_error():
9-
f40_pred = [[1-0.22, 0.22 ],
7+
pred_224 = [[1-0.22, 0.22 ],
108
[1-0.48, 0.48],
119
[0.51,0.49],
1210
[0.04, 0.96],
@@ -17,15 +15,38 @@ def test_expected_calibration_error():
1715
[0.66, 0.34],
1816
[0.13, 0.87]]
1917
#f40_pred = [0.22, 0.48, 0.49, 0.96, 0.55, 0.64, 0.78, 0.82, 0.34, 0.87]
20-
f40_ref = [0, 1, 0, 0, 1, 1, 1, 1, 1, 0]
21-
ppm = CalibrationMeasures(f40_pred, f40_ref)
22-
ppm1 = CalibrationMeasures(f40_pred, f40_ref, dict_args={"bins_ece": 2})
23-
value_test2 = ppm.expectation_calibration_error()
18+
ref_224 = [0, 1, 0, 0, 1, 1, 1, 1, 1, 0]
19+
20+
def test_expected_calibration_error():
21+
"""
22+
Using as reference SN 2.24 p67
23+
"""
24+
ppm1 = CalibrationMeasures(pred_224, ref_224, dict_args={"bins_ece": 2})
25+
ppm2 = CalibrationMeasures(pred_224, ref_224, dict_args={'bins_ece':5})
26+
ppm3 = CalibrationMeasures(pred_224, ref_224)
2427
value_test1 = ppm1.expectation_calibration_error()
28+
value_test2 = ppm2.expectation_calibration_error()
29+
value_test3 = ppm3.expectation_calibration_error()
2530
expected_ece1 = 0.11
26-
expected_ece2 = 0.36
31+
expected_ece2 = 0.32
32+
expected_ece3 = 0.36
33+
assert_allclose(value_test1, expected_ece1, atol=0.01)
34+
assert_allclose(value_test2, expected_ece2, atol=0.01)
35+
assert_allclose(value_test3, expected_ece3, atol=0.01)
36+
37+
def test_maximum_calibration_error():
38+
ppm1 = CalibrationMeasures(pred_224, ref_224, dict_args={"bins_mce": 2})
39+
ppm2 = CalibrationMeasures(pred_224, ref_224, dict_args={'bins_mce':5})
40+
ppm3 = CalibrationMeasures(pred_224, ref_224)
41+
value_test1 = ppm1.maximum_calibration_error()
42+
value_test2 = ppm2.maximum_calibration_error()
43+
value_test3 = ppm3.maximum_calibration_error()
44+
expected_ece1 = 0.12
45+
expected_ece2 = 0.55
46+
expected_ece3 = 0.96
2747
assert_allclose(value_test1, expected_ece1, atol=0.01)
2848
assert_allclose(value_test2, expected_ece2, atol=0.01)
49+
assert_allclose(value_test3, expected_ece3, atol=0.01)
2950

3051

3152
def test_logarithmic_score():

test/test_metrics/test_pairwise_measures.py

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@
4646
ppm212_1 = PM(pred212, ref212)
4747
ppm212_2 = PM(pred212,ref212,dict_args={'boundary_dist':2})
4848

49+
#Data for figure 5c (Hausdoff with annotation error p14 Pitfalls)
50+
ref5c = np.zeros([14, 14])
51+
ref5c[1, 1] = 1
52+
ref5c[9:12, 9:12] = 1
53+
pred5c = np.zeros([14, 14])
54+
pred5c [9:12, 9:12] = 1
55+
bpm5c = PM(pred5c, ref5c, dict_args={'hd_perc':95})
56+
4957
### Small size of structures relative to pixel/voxel size (DSC)
5058
## Larger structure
5159
p_large_ref = np.zeros((11, 11))
@@ -77,7 +85,7 @@
7785
f27_ref2 = f27_pred1
7886
f27_pred2 = f27_ref1
7987

80-
# Figure ClDice p 53 S2.14
88+
# Figure ClDice p 53 S2.14 pitfalls paper
8189
ref214 = np.zeros([24,24])
8290
ref214[1:10,7:12]=1
8391
ref214[10:12,3:19]=1
@@ -116,26 +124,26 @@
116124

117125

118126

119-
# panoptic quality
120-
pq_pred1 = np.zeros([21, 21])
121-
pq_pred1[5:7, 2:5] = 1
122-
pq_pred2 = np.zeros([21, 21])
123-
pq_pred2[14:18, 4:6] = 1
124-
pq_pred2[16, 3] = 1
125-
pq_pred3 = np.zeros([21, 21])
126-
pq_pred3[14:18, 7:12] = 1
127-
pq_pred4 = np.zeros([21, 21])
128-
pq_pred4[2:8, 13:16] = 1
129-
pq_pred4[2:4, 12] = 1
130-
131-
pq_ref1 = np.zeros([21, 21])
132-
pq_ref1[8:11, 3] = 1
133-
pq_ref1[9, 2:5] = 1
134-
pq_ref2 = np.zeros([21, 21])
135-
pq_ref2[14:19, 7:13] = 1
136-
pq_ref3 = np.zeros([21, 21])
137-
pq_ref3[2:7, 14:17] = 1
138-
pq_ref3[2:4, 12:14] = 1
127+
# panoptic quality Figure 3.51 p96
128+
pq_pred1 = np.zeros([18, 18])
129+
pq_pred1[ 3:7,1:3] = 1
130+
pq_pred1[3:6,3:7]=1
131+
pq_pred2 = np.zeros([18, 18])
132+
pq_pred2[13:16,4:6] = 1
133+
pq_pred3 = np.zeros([18, 18])
134+
pq_pred3[7:12,13:17] = 1
135+
pq_pred4 = np.zeros([18, 18])
136+
pq_pred4[13:15,13:17] = 1
137+
pq_pred4[15,15] = 1
138+
139+
pq_ref1 = np.zeros([18, 18])
140+
pq_ref1[2:7, 1:3] = 1
141+
pq_ref1[2:5,3:6] = 1
142+
pq_ref2 = np.zeros([18, 18])
143+
pq_ref2[6:12,12:17] = 1
144+
pq_ref3 = np.zeros([18, 18])
145+
pq_ref3[14:15:,7:10] = 1
146+
pq_ref3[13:16,8:9] = 1
139147

140148
f27_pred = np.concatenate([np.ones([81]), np.zeros([9]), np.ones([2]), np.zeros([8])])
141149
f27_ref = np.concatenate([np.ones([90]), np.zeros([10])])
@@ -324,16 +332,6 @@ def test_fn_map():
324332
fn2 = ppm210_2.fn()
325333
expected_fn1 = 12
326334
expected_fn2 = 0
327-
# fn_map_1 = ppm210_1.__fn_map()
328-
# expected_fn_map1 = np.zeros([14,14])
329-
# expected_fn_map1[5:6,5:9] = 1
330-
# expected_fn_map1[8:9,5:9] = 1
331-
# expected_fn_map1[5:9,5:6] = 1
332-
# expected_fn_map1[5:9,8:9] = 1
333-
# fn_map_2 = ppm210_2.__fn_map()
334-
# expected_fn_map2 = np.zeros([14,14])
335-
# assert_array_equal(fn_map_1, expected_fn_map1)
336-
# assert_array_equal(fn_map_2, expected_fn_map2)
337335
assert fn1 == 12
338336
assert fn2 == 0
339337

@@ -553,8 +551,8 @@ def test_negative_predictive_value():
553551
"""
554552
Taking figure SN 2.9 as inspiration p49 Pitfalls
555553
"""
556-
value_test1 = ppm29_1.negative_predictive_values()
557-
value_test2 = ppm29_2.negative_predictive_values()
554+
value_test1 = ppm29_1.negative_predictive_value()
555+
value_test2 = ppm29_2.negative_predictive_value()
558556
expected_npv1 = 0.889
559557
expected_npv2 = 0.47
560558
assert_allclose(value_test1, expected_npv1, atol=0.001)
@@ -699,23 +697,27 @@ def test_nsd2():
699697
assert_allclose(value_test, expected_nsd2, atol=0.01)
700698

701699

702-
def test_iou():
700+
def test_intersection_over_union():
703701
bpm = PM(p_pred, p_ref)
704702
value_test = bpm.intersection_over_union()
705703
print("IoU ", value_test)
706704
expected_iou = 0.76
707705
assert_allclose(value_test, expected_iou, atol=0.01)
708706

709707

710-
def test_fbeta():
711-
pm = PM(p_large_pred1, p_large_ref)
712-
pm2 = PM(p_large_pred1, p_large_ref, dict_args={"beta": 1})
713-
value_test = pm.fbeta()
714-
value_test2 = pm2.fbeta()
715-
print(value_test)
716-
expected_fbeta = 0.986
717-
assert_allclose(value_test, expected_fbeta, atol=0.001)
718-
assert_allclose(value_test2, expected_fbeta, atol=0.001)
708+
def test_fbeta_beta_value():
709+
"""
710+
Taking inspiration from SN 2.9 - p49 Pitfalls
711+
"""
712+
expected_f11 = 0.86
713+
expected_f12 = 0.94
714+
ppm29_1.dict_args={'beta':1}
715+
ppm29_2.dict_args={'beta':1}
716+
value_test1 = ppm29_1.fbeta()
717+
value_test2 = ppm29_2.fbeta()
718+
assert_allclose(value_test1, expected_f11, atol=0.01)
719+
assert_allclose(value_test2, expected_f12, atol=0.01)
720+
719721

720722
def test_sensitivity():
721723
"""
@@ -749,13 +751,13 @@ def test_sens():
749751
assert_allclose(value_test, expected_sens, atol=0.01)
750752

751753

752-
def test_ppv():
754+
def test_positive_predictive_value():
753755
"""
754756
Taking as inspiration figure SN2.9 p49 Pitfalls
755757
"""
756758

757-
value_test1 = ppm29_1.positive_predictive_values()
758-
value_test2 = ppm29_2.positive_predictive_values()
759+
value_test1 = ppm29_1.positive_predictive_value()
760+
value_test2 = ppm29_2.positive_predictive_value()
759761
expected_ppv1 = 0.82
760762
expected_ppv2 = 0.98
761763
assert_allclose(value_test1, expected_ppv1, atol=0.01)
@@ -817,15 +819,12 @@ def test_nsd_s210():
817819
assert_allclose(nsd_1,expected_nsd1,atol=0.01)
818820
assert_allclose(nsd_2,expected_nsd2,atol=0.01)
819821

820-
def test_hd():
821-
f20_ref = np.zeros([14, 14])
822-
f20_ref[1, 1] = 1
823-
f20_ref[9:12, 9:12] = 1
824-
f20_pred = np.zeros([14, 14])
825-
f20_pred[9:12, 9:12] = 1
826-
bpm = PM(f20_pred, f20_ref, dict_args={"hd_perc": 95})
827-
hausdorff_distance = bpm.measured_hausdorff_distance()
828-
hausdorff_distance_perc = bpm.measured_hausdorff_distance_perc()
822+
def test_hausdorff_distance_5c():
823+
"""
824+
Using figure 5c p14 as illustration for calculation of HD and HD95
825+
"""
826+
hausdorff_distance = bpm5c.measured_hausdorff_distance()
827+
hausdorff_distance_perc = bpm5c.measured_hausdorff_distance_perc()
829828
print(hausdorff_distance_perc)
830829
expected_hausdorff_distance = 11.31
831830
expected_hausdorff_distance_perc = 6.79

test/test_metrics/test_prob_pairwise_measures.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
from MetricsReloaded.metrics.prob_pairwise_measures import ProbabilityPairwiseMeasures
1515

1616

17-
def test_auc():
17+
def test_auroc():
18+
"""
19+
Based on SN2.18 p60 of Pitfalls paper
20+
"""
1821
ref = np.asarray([0, 0, 0, 1, 1, 1])
1922
pred_proba = np.asarray([0.21, 0.35, 0.63, 0.92, 0.32, 0.79])
2023
ppm = ProbabilityPairwiseMeasures(pred_proba, ref)
@@ -24,7 +27,10 @@ def test_auc():
2427
assert_allclose(value_test, expected_auc, atol=0.01)
2528

2629

27-
def test_ap():
30+
def test_average_precision():
31+
"""
32+
Based on SN2.18 p60 of pitfalls paper
33+
"""
2834
ref = np.asarray([0, 0, 0, 1, 1, 1])
2935
pred_proba = np.asarray([0.21, 0.35, 0.63, 0.92, 0.32, 0.79])
3036
ppm = ProbabilityPairwiseMeasures(pred_proba, ref)

0 commit comments

Comments
 (0)