@@ -111,9 +111,24 @@ def __repr__(self):
111111 return f"KMedoidsResult(loss={ self .loss } , labels={ self .labels } , medoids={ self .medoids } , n_iter={ self .n_iter } , n_swaps={ self .n_swap } )"
112112
113113
114- class BestkResult :
114+ class DynkResult :
115115 """
116- Result of choosing the optimal number of clusters according to the Medoid Silhouette.
116+ K-medoids clustering result with automatic number of clusters
117+
118+ :param loss: Loss of this clustering (sum of deviations)
119+ :type loss: float
120+
121+ :param labels: Cluster assignment
122+ :type labels: ndarray
123+
124+ :param medoids: Chosen medoid indexes
125+ :type medoids: ndarray
126+
127+ :param n_iter: Number of iterations
128+ :type n_iter: int
129+
130+ :param n_swap: Number of swaps performed
131+ :type n_swap: int
117132
118133 :param bestk: Best k by Medoid Silhouette
119134 :type bestk: int
@@ -124,13 +139,18 @@ class BestkResult:
124139 :param rangek: range of k
125140 :type rangek: range
126141 """
127- def __init__ (self , bestk , losses , rangek ):
142+ def __init__ (self , loss , labels , medoids , bestk , losses , rangek , n_iter = None , n_swap = None ):
143+ self .loss = loss
144+ self .labels = labels
145+ self .medoids = medoids
146+ self .n_iter = n_iter
147+ self .n_swap = n_swap
128148 self .bestk = bestk
129149 self .losses = losses
130150 self .rangek = rangek
131151
132152 def __repr__ (self ):
133- return f"BestkResult( bestk={ self .bestk } , losses={ self .losses } , rangek={ self .rangek } )"
153+ return f"DynkResult(loss= { self . loss } , labels= { self . labels } , medoids= { self . medoids } , bestk={ self .bestk } , losses={ self .losses } , rangek={ self .rangek } , n_iter= { self . n_iter } , n_swaps= { self . n_swap } )"
134154
135155def _check_medoids (diss , medoids , init , random_state ):
136156 """Check the medoids and random_state parameters."""
@@ -609,8 +629,8 @@ def dynmsc(diss, medoids, max_iter=100, init="random", random_state=None):
609629 :param random_state: random seed if no medoids are given
610630 :type random_state: int, RandomState instance or None
611631
612- :return: k-medoids clustering result
613- :rtype: KMedoidsResult
632+ :return: k-medoids clustering with automatic number of clusters
633+ :rtype: DynkResult
614634 """
615635 import numpy as np
616636 from .kmedoids import _dynmsc_f32 , _dynmsc_f64
@@ -623,53 +643,9 @@ def dynmsc(diss, medoids, max_iter=100, init="random", random_state=None):
623643 if isinstance (diss , np .ndarray ):
624644 dtype = diss .dtype
625645 if dtype == np .float32 :
626- return KMedoidsResult (* _dynmsc_f32 (diss , medoids .astype (np .uint64 ), max_iter ))
627- elif dtype == np .float64 :
628- return KMedoidsResult (* _dynmsc_f64 (diss , medoids .astype (np .uint64 ), max_iter ))
629- raise ValueError ("Input data not supported. Use a numpy array of floats." )
630-
631- def bestk (diss , medoids = 100 , max_iter = 100 , init = "random" , random_state = None ):
632- """Optimal number of clusters according to the Medoid Silhouette
633-
634- This version uses DynMSC to choose the ptimal number of clusters according
635- to the Medoid Silhouette, that performs DynMSC for k = 2 to the number of input medoids
636- and returns k with the highest Average Medoid Silhouette.
637-
638- References:
639-
640- | Lars Lenssen, Erich Schubert:
641- | Medoid silhouette clustering with automatic cluster number selection
642- | Information Systems (120), 2024, 102290
643- | <https://doi.org/10.1016/j.is.2023.102290>
644-
645- :param diss: square numpy array of dissimilarities
646- :type diss: ndarray
647- :param medoids: maximum number of clusters to find or existing medoids with length of maximum number of clusters to find
648- :type medoids: int or ndarray
649- :param max_iter: maximum number of iterations
650- :type max_iter: int
651- :param init: initialization method
652- :type init: str, "random", "first" or "build"
653- :param random_state: random seed if no medoids are given
654- :type random_state: int, RandomState instance or None
655-
656- :return: Result of choosing the optimal number of clusters according to the Medoid Silhouette
657- :rtype: BestkResult
658- """
659- import numpy as np
660- from .kmedoids import _bestk_f32 , _bestk_f64
661-
662- if not isinstance (diss , np .ndarray ):
663- diss = np .array (diss )
664-
665- medoids = _check_medoids (diss , medoids , init , random_state )
666-
667- if isinstance (diss , np .ndarray ):
668- dtype = diss .dtype
669- if dtype == np .float32 :
670- return BestkResult (* _bestk_f32 (diss , medoids .astype (np .uint64 ), max_iter ))
646+ return DynkResult (* _dynmsc_f32 (diss , medoids .astype (np .uint64 ), max_iter ))
671647 elif dtype == np .float64 :
672- return BestkResult ( * _bestk_f64 (diss , medoids .astype (np .uint64 ), max_iter ))
648+ return DynkResult ( * _dynmsc_f64 (diss , medoids .astype (np .uint64 ), max_iter ))
673649 raise ValueError ("Input data not supported. Use a numpy array of floats." )
674650
675651def alternating (diss , medoids , max_iter = 100 , init = "random" , random_state = None ):
0 commit comments