Skip to content

Commit ef2fe52

Browse files
committed
fix silhouette evaluation (Closes: #23)
1 parent ebdc2ca commit ef2fe52

6 files changed

Lines changed: 18 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
For changes to the main Rust package, please see <https://github.com/kno10/rust-kmedoids/blob/main/CHANGELOG.md>
44

5+
## kmedoids 0.4.3 (2023-04-20)
6+
7+
- fix silhouette evaluation for k > 2 (in Rust)
8+
- use np.unique in Python wrapper to ensure labels are 0..k
9+
510
## kmedoids 0.4.2 (2023-03-07)
611

712
- fix predict for non-precomputed distances

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ authors:
1010
title: "Fast k-medoids Clustering in Rust and Python"
1111
journal: "J. Open Source Softw."
1212
doi: 10.21105/joss.04183
13-
version: 0.4.2
14-
date-released: 2022-09-24
13+
version: 0.4.3
14+
date-released: 2023-04-20
1515
license: GPL-3.0
1616
preferred-citation:
1717
title: "Fast k-medoids Clustering in Rust and Python"

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
edition = "2021"
33
name = "kmedoids"
4-
version = "0.4.2"
4+
version = "0.4.3"
55
authors = ["Erich Schubert <erich.schubert@tu-dortmund.de>", "Lars Lenssen <lars.lenssen@tu-dortmund.de>"]
66
description = "k-Medoids clustering with the FasterPAM algorithm"
77
homepage = "https://github.com/kno10/python-kmedoids"
@@ -14,7 +14,7 @@ name = "kmedoids"
1414
crate-type = ["cdylib"]
1515

1616
[dependencies]
17-
rustkmedoids = { version = "0.4.2", package = "kmedoids", git = "https://github.com/kno10/rust-kmedoids" }
17+
rustkmedoids = { version = "0.4.3", package = "kmedoids", git = "https://github.com/kno10/rust-kmedoids" }
1818
numpy = "0.18"
1919
ndarray = "0.15"
2020
rand = "0.8"

kmedoids/__init__.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,7 @@ def silhouette(diss, labels, samples=False, n_cpu=-1):
617617

618618
if not isinstance(diss, np.ndarray):
619619
diss = np.array(diss)
620-
if not isinstance(labels, np.ndarray):
621-
labels = np.array(labels, dtype=np.uint64)
620+
labels = np.unique(labels, return_inverse=True)[1].astype(np.uint64) # ensure labels are 0..k-1
622621

623622
if isinstance(diss, np.ndarray):
624623
dtype = diss.dtype
@@ -628,20 +627,20 @@ def silhouette(diss, labels, samples=False, n_cpu=-1):
628627
if n_cpu > 1:
629628
assert not samples, "samples=true currently may only be used with n_cpu=1"
630629
if dtype == np.float32:
631-
return (_par_silhouette_f32(diss, labels.astype(np.uint64), n_cpu), [])
630+
return (_par_silhouette_f32(diss, labels, n_cpu), [])
632631
elif dtype == np.float64:
633-
return (_par_silhouette_f64(diss, labels.astype(np.uint64), n_cpu), [])
632+
return (_par_silhouette_f64(diss, labels, n_cpu), [])
634633
elif dtype == np.int32:
635-
return (_par_silhouette_i32(diss, labels.astype(np.uint64), n_cpu), [])
634+
return (_par_silhouette_i32(diss, labels, n_cpu), [])
636635
elif dtype == np.int64:
637636
raise ValueError("Input of int64 is currently not supported, as it could overflow the float64 used internally when computing Silhouette. Use diss.astype(numpy.float64) if that is acceptable and you have the necessary memory for this copy.")
638637
else:
639638
if dtype == np.float32:
640-
return _silhouette_f32(diss, labels.astype(np.uint64), samples)
639+
return _silhouette_f32(diss, labels, samples)
641640
elif dtype == np.float64:
642-
return _silhouette_f64(diss, labels.astype(np.uint64), samples)
641+
return _silhouette_f64(diss, labels, samples)
643642
elif dtype == np.int32:
644-
return _silhouette_i32(diss, labels.astype(np.uint64), samples)
643+
return _silhouette_i32(diss, labels, samples)
645644
elif dtype == np.int64:
646645
raise ValueError("Input of int64 is currently not supported, as it could overflow the float64 used internally when computing Silhouette. Use diss.astype(numpy.float64) if that is acceptable and you have the necessary memory for this copy.")
647646
raise ValueError("Input data not supported. Use a numpy array of floats.")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "maturin"
44

55
[project]
66
name = "kmedoids"
7-
version = "0.4.2"
7+
version = "0.4.3"
88
description = "k-Medoids Clustering in Python with FasterPAM"
99
requires-dist = ["numpy"]
1010
classifier = [

tests/test_integration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_pamsil(self):
4747
dist = np.array([[0, 2, 3, 4, 5], [2, 0, 6, 7, 8], [3, 6, 0, 9, 10], [4, 7, 9, 0, 11], [5, 8, 10, 11, 0]], dtype=np.float32)
4848
pamsil = kmedoids.pamsil(dist, 2)
4949
pamsil_rust = kmedoids.kmedoids._pamsil_swap_f32(dist, pamsil.medoids, 100)
50-
assert pamsil.loss == 0.5137878787878788
50+
assert pamsil.loss == 0.3137878787878788
5151
assert pamsil.loss == pamsil_rust[0]
5252
assert np.array_equal(pamsil.medoids, pamsil_rust[2])
5353

0 commit comments

Comments
 (0)