@@ -65,22 +65,17 @@ def _somers_yx_weighted(
6565 for j in range (i + 1 , n ):
6666 w_ij = weights [i ] * weights [j ]
6767
68- if y [i ] != y [j ]: # Not tied in Y
69- if (y [i ] < y [j ] and x [i ] < x [j ]) or (y [i ] > y [j ] and x [i ] > x [j ]):
70- concordant += w_ij
71- elif (y [i ] < y [j ] and x [i ] > x [j ]) or (y [i ] > y [j ] and x [i ] < x [j ]):
72- discordant += w_ij
73- else : # Tied in Y
68+ if y [i ] == y [j ]: # Tied in Y
7469 ties_y += w_ij
7570
71+ elif (y [i ] < y [j ] and x [i ] < x [j ]) or (y [i ] > y [j ] and x [i ] > x [j ]):
72+ concordant += w_ij
73+ elif (y [i ] < y [j ] and x [i ] > x [j ]) or (y [i ] > y [j ] and x [i ] < x [j ]):
74+ discordant += w_ij
7675 total_pairs = concordant + discordant + ties_y
7776 denom = concordant + discordant # Exclude ties in Y from denominator
7877
79- if denom > 0 :
80- stat = (concordant - discordant ) / denom
81- else :
82- stat = np .nan
83-
78+ stat = (concordant - discordant ) / denom if denom > 0 else np .nan
8479 return stat , concordant , discordant , ties_y , total_pairs , denom
8580
8681
@@ -282,3 +277,86 @@ def somersd_xy(y_true: np.ndarray, y_pred: np.ndarray) -> SomersDResult:
282277 x = x [mask ]
283278 stat , S , D , Tx , P , denom = _somers_xy_core (y , x ) # type: ignore[misc]
284279 return SomersDResult (stat , S , D , Tx , P , denom )
280+
281+
282+ def somersd_pairwise (
283+ pos_scores : np .ndarray , neg_scores : np .ndarray , ties : str = "y"
284+ ) -> float | None :
285+ """Compute pairwise Somers' D between positive and negative scores.
286+
287+ This function computes Somers' D by comparing all positive scores
288+ against all negative scores. It's used for clustered Gini analysis where
289+ you want to measure separation between different groups.
290+
291+ The computation leverages the fast Somers' D implementation for optimal
292+ performance, which uses efficient Numba-accelerated algorithms.
293+
294+ Args:
295+ pos_scores: Array of scores for positive class (label=1)
296+ neg_scores: Array of scores for negative class (label=0)
297+ ties: How to handle ties. "y" (default) computes D_Y|X (ties in Y excluded),
298+ "x" computes D_X|Y (ties in X excluded).
299+
300+ Returns:
301+ Somers' D statistic (net concordant pairs / total pairs), or None if
302+ either array is empty.
303+
304+ Note:
305+ Somers' D is computed by combining the scores into a single array with
306+ binary labels (1 for positive, 0 for negative). This leverages the
307+ efficient O(n log n) algorithm instead of O(n_pos * n_neg).
308+
309+ For binary classification, Somers' D equals the Gini coefficient
310+ (2 * AUC - 1).
311+
312+ Examples:
313+ >>> pos = np.array([0.8, 0.9, 0.7])
314+ >>> neg = np.array([0.3, 0.4, 0.2])
315+ >>> somersd_pairwise(pos, neg)
316+ 1.0 # Perfect separation
317+ >>> somersd_pairwise(pos, neg, ties="x")
318+ 1.0 # Same result for perfect separation
319+ """
320+ if ties not in ("x" , "y" ):
321+ raise ValueError (f"ties must be 'x' or 'y', got { ties } " )
322+
323+ pos_scores = np .asarray (pos_scores , dtype = np .float64 )
324+ neg_scores = np .asarray (neg_scores , dtype = np .float64 )
325+
326+ # Remove NaN values
327+ pos_mask = ~ np .isnan (pos_scores )
328+ neg_mask = ~ np .isnan (neg_scores )
329+ pos_scores = pos_scores [pos_mask ]
330+ neg_scores = neg_scores [neg_mask ]
331+
332+ if len (pos_scores ) == 0 or len (neg_scores ) == 0 :
333+ return None
334+
335+ # Combine scores and create binary labels
336+ # This allows us to use the fast somersd implementation
337+ all_scores = np .concatenate ([pos_scores , neg_scores ])
338+ all_labels = np .concatenate (
339+ [
340+ np .ones (len (pos_scores ), dtype = np .float64 ),
341+ np .zeros (len (neg_scores ), dtype = np .float64 ),
342+ ]
343+ )
344+
345+ # Use fast Somers' D implementation (O(n log n) instead of O(n_pos * n_neg))
346+ if ties == "y" :
347+ result = somersd_yx (all_labels , all_scores )
348+ else : # ties == "x"
349+ result = somersd_xy (all_labels , all_scores )
350+
351+ statistic = result .statistic
352+
353+ return None if np .isnan (statistic ) else float (statistic )
354+
355+
356+ # Backward compatibility alias
357+ def gini_pairwise (pos_scores : np .ndarray , neg_scores : np .ndarray ) -> float | None :
358+ """Backward compatibility alias for somersd_pairwise.
359+
360+ This function is deprecated. Use somersd_pairwise instead.
361+ """
362+ return somersd_pairwise (pos_scores , neg_scores , ties = "y" )
0 commit comments