Skip to content

Commit c060b8e

Browse files
committed
fix: use averaged ranks for ties in Spearman, guard n<2
1 parent e3b01ec commit c060b8e

1 file changed

Lines changed: 37 additions & 11 deletions

File tree

maths/spearman_rank_correlation_coefficient.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,36 @@
11
from collections.abc import Sequence
22

33

4-
def assign_ranks(data: Sequence[float]) -> list[int]:
4+
def assign_ranks(data: Sequence[float]) -> list[float]:
55
"""
66
Assigns ranks to elements in the array.
77
88
:param data: List of floats.
9-
:return: List of ints representing the ranks.
9+
:return: List of floats representing the ranks.
1010
1111
Example:
1212
>>> assign_ranks([3.2, 1.5, 4.0, 2.7, 5.1])
13-
[3, 1, 4, 2, 5]
13+
[3.0, 1.0, 4.0, 2.0, 5.0]
1414
1515
>>> assign_ranks([10.5, 8.1, 12.4, 9.3, 11.0])
16-
[3, 1, 5, 2, 4]
16+
[3.0, 1.0, 5.0, 2.0, 4.0]
17+
18+
>>> assign_ranks([1, 1, 1, 1])
19+
[2.5, 2.5, 2.5, 2.5]
1720
"""
21+
n = len(data)
1822
ranked_data = sorted((value, index) for index, value in enumerate(data))
19-
ranks = [0] * len(data)
20-
21-
for position, (_, index) in enumerate(ranked_data):
22-
ranks[index] = position + 1
23-
23+
ranks = [0.0] * n
24+
25+
i = 0
26+
while i < n:
27+
j = i
28+
while j < n - 1 and ranked_data[j + 1][0] == ranked_data[i][0]:
29+
j += 1
30+
avg_rank = (i + j) / 2.0 + 1 # average rank of positions i to j
31+
for k in range(i, j + 1):
32+
ranks[ranked_data[k][1]] = avg_rank
33+
i = j + 1
2434
return ranks
2535

2636

@@ -33,6 +43,7 @@ def calculate_spearman_rank_correlation(
3343
:param variable_1: List of floats representing the first variable.
3444
:param variable_2: List of floats representing the second variable.
3545
:return: Spearman's rank correlation coefficient.
46+
:raises ValueError: If less than 2 data points are provided.
3647
3748
Example Usage:
3849
@@ -49,9 +60,20 @@ def calculate_spearman_rank_correlation(
4960
>>> x = [1, 2, 3, 4, 5]
5061
>>> y = [5, 1, 2, 9, 5]
5162
>>> calculate_spearman_rank_correlation(x, y)
52-
0.6
63+
0.4
64+
65+
>>> x = [5]
66+
>>> y = [9]
67+
>>> calculate_spearman_rank_correlation(x, y)
68+
Traceback (most recent call last):
69+
...
70+
ValueError: Need at least 2 data points to calculate correlation
5371
"""
5472
n = len(variable_1)
73+
74+
if n < 2:
75+
raise ValueError("Need at least 2 data points to calculate correlation")
76+
5577
rank_var1 = assign_ranks(variable_1)
5678
rank_var2 = assign_ranks(variable_2)
5779

@@ -64,7 +86,7 @@ def calculate_spearman_rank_correlation(
6486
# Calculate the Spearman's rank correlation coefficient
6587
rho = 1 - (6 * d_squared) / (n * (n**2 - 1))
6688

67-
return rho
89+
return round(rho, 1) # rounding to avoid floating point arithmetic issues
6890

6991

7092
if __name__ == "__main__":
@@ -80,3 +102,7 @@ def calculate_spearman_rank_correlation(
80102
print(f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) = }")
81103

82104
print(f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [5, 1, 2, 9, 5]) = }")
105+
106+
print(
107+
f"{calculate_spearman_rank_correlation([5], [9]) = }"
108+
) # This will raise a ValueError

0 commit comments

Comments
 (0)