Skip to content

Commit de72e85

Browse files
authored
Feature/k shape (#80)
* Adding KShape, it is not working yet, more work is needed. * Adding kshape implementation + tests. * Adding bindings for kshape * Adding full implementation for kshape and kmeans bindings * Adding optimised implementation and tests to check correctness. * Adding kshape benchmark * Fixing bug * Improving Eigen Vectors computation * Adding tests for kmeans and kshape with initial values. * Adding changes to follow cding rules * Deleting unused function to pass codecoverage * Fix bug * Fix formatting
1 parent 1536190 commit de72e85

11 files changed

Lines changed: 743 additions & 47 deletions

File tree

benchmarks/clusteringBench.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,29 @@ void Kmeans(benchmark::State &state) {
2323
while (state.KeepRunning()) {
2424
af::array means;
2525
af::array labels;
26-
khiva::clustering::kmeans(t, k, means, labels);
26+
khiva::clustering::kMeans(t, k, means, labels);
27+
means.eval();
28+
af::sync();
29+
}
30+
addMemoryCounters(state);
31+
}
32+
33+
template <af::Backend BE, int D>
34+
void Kshape(benchmark::State &state) {
35+
af::setBackend(BE);
36+
af::setDevice(D);
37+
38+
auto n = state.range(0);
39+
auto m = state.range(1);
40+
int k = state.range(2);
41+
42+
auto t = af::randu(n, m, f32);
43+
44+
af::sync();
45+
while (state.KeepRunning()) {
46+
af::array means;
47+
af::array labels;
48+
khiva::clustering::kShape(t, k, means, labels);
2749
means.eval();
2850
af::sync();
2951
}
@@ -35,20 +57,35 @@ void cudaBenchmarks() {
3557
->RangeMultiplier(2)
3658
->Ranges({{1 << 10, 256 << 10}, {16, 128}, {8, 16}})
3759
->Unit(benchmark::TimeUnit::kMicrosecond);
60+
61+
BENCHMARK_TEMPLATE(Kshape, af::Backend::AF_BACKEND_CUDA, CUDA_BENCHMARKING_DEVICE)
62+
->RangeMultiplier(2)
63+
->Ranges({{1 << 10, 256 << 10}, {16, 128}, {8, 16}})
64+
->Unit(benchmark::TimeUnit::kMicrosecond);
3865
}
3966

4067
void openclBenchmarks() {
4168
BENCHMARK_TEMPLATE(Kmeans, af::Backend::AF_BACKEND_OPENCL, OPENCL_BENCHMARKING_DEVICE)
4269
->RangeMultiplier(2)
4370
->Ranges({{1 << 10, 256 << 10}, {16, 128}, {8, 16}})
4471
->Unit(benchmark::TimeUnit::kMicrosecond);
72+
73+
BENCHMARK_TEMPLATE(Kshape, af::Backend::AF_BACKEND_OPENCL, OPENCL_BENCHMARKING_DEVICE)
74+
->RangeMultiplier(2)
75+
->Ranges({{1 << 10, 256 << 10}, {16, 128}, {8, 16}})
76+
->Unit(benchmark::TimeUnit::kMicrosecond);
4577
}
4678

4779
void cpuBenchmarks() {
4880
BENCHMARK_TEMPLATE(Kmeans, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
4981
->RangeMultiplier(2)
5082
->Ranges({{1 << 10, 256 << 10}, {16, 128}, {8, 16}})
5183
->Unit(benchmark::TimeUnit::kMicrosecond);
84+
85+
BENCHMARK_TEMPLATE(Kshape, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
86+
->RangeMultiplier(2)
87+
->Ranges({{1 << 10, 256 << 10}, {16, 128}, {8, 16}})
88+
->Unit(benchmark::TimeUnit::kMicrosecond);
5289
}
5390

5491
KHIVA_BENCHMARK_MAIN(cudaBenchmarks, openclBenchmarks, cpuBenchmarks)

bindings/c/include/khiva_c.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
66

77
#include <khiva_c/array.h>
8+
#include <khiva_c/clustering.h>
89
#include <khiva_c/defines.h>
910
#include <khiva_c/dimensionality.h>
1011
#include <khiva_c/distances.h>
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Copyright (c) 2018 Shapelets.io
2+
//
3+
// This Source Code Form is subject to the terms of the Mozilla Public
4+
// License, v. 2.0. If a copy of the MPL was not distributed with this
5+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
#include <khiva_c/defines.h>
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
/**
14+
* @brief Calculates the k-means algorithm.
15+
*
16+
* @param tss Expects an input array whose dimension zero is the length of the time series (all the same) and
17+
* dimension one indicates the number of time series.
18+
* @param k The number of means to be computed.
19+
* @param centroids The resulting means or centroids.
20+
* @param labels The resulting labels of each time series which is the closest centroid.
21+
* @param tolerance The error tolerance to stop the computation of the centroids.
22+
* @param max_iterations The maximum number of iterations allowed.
23+
*/
24+
KHIVAAPI void k_means(khiva_array *tss, int *k, khiva_array *centroids, khiva_array *labels, float *tolerance,
25+
int *max_iterations);
26+
27+
/**
28+
* @brief Calculates the k-means algorithm.
29+
*
30+
* @param tss Expects an input array whose dimension zero is the length of the time series (all the same)
31+
* and dimension one indicates the number of time series.
32+
* @param k The number of means to be computed.
33+
* @param initial_centroids The initial means or centroids.
34+
* @param centroids The resulting means or centroids.
35+
* @param labels The resulting labels of each time series which is the closest centroid.
36+
* @param tolerance The error tolerance to stop the computation of the centroids.
37+
* @param max_iterations The maximum number of iterations allowed.
38+
*/
39+
KHIVAAPI void k_means_initial_values(khiva_array *tss, int *k, khiva_array *initial_centroids, khiva_array *centroids,
40+
khiva_array *initial_labels, khiva_array *labels, float *tolerance,
41+
int *max_iterations);
42+
43+
/**
44+
* @brief Calculates the clusterization based on SBD.
45+
*
46+
* @param tss Expects an input array whose dimension zero is the length of the time series (all the same) and
47+
* dimension one indicates the number of time series.
48+
* @param k The number of means to be computed.
49+
* @param centroids The resulting means or centroids.
50+
* @param labels The resulting labels of each time series which is the closest centroid.
51+
* @param tolerance The error tolerance to stop the computation of the centroids.
52+
* @param max_iterations The maximum number of iterations allowed.
53+
*/
54+
KHIVAAPI void k_shape(khiva_array *tss, int *k, khiva_array *centroids, khiva_array *labels, float *tolerance,
55+
int *max_iterations);
56+
57+
/**
58+
* @brief Calculates the clusterization based on SBD.
59+
*
60+
* @param tss Expects an input array whose dimension zero is the length of the time series (all the same)
61+
* and dimension one indicates the number of time series.
62+
* @param k The number of means to be computed.
63+
* @param initial_centroids The initial means or centroids.
64+
* @param centroids The resulting means or centroids.
65+
* @param labels The resulting labels of each time series which is the closest centroid.
66+
* @param tolerance The error tolerance to stop the computation of the centroids.
67+
* @param max_iterations The maximum number of iterations allowed.
68+
*/
69+
KHIVAAPI void k_shape_initial_values(khiva_array *tss, int *k, khiva_array *initial_centroids, khiva_array *centroids,
70+
khiva_array *initial_labels, khiva_array *labels, float *tolerance,
71+
int *max_iterations);
72+
73+
#ifdef __cplusplus
74+
}
75+
#endif

bindings/c/src/clustering.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright (c) 2018 Shapelets.io
2+
//
3+
// This Source Code Form is subject to the terms of the Mozilla Public
4+
// License, v. 2.0. If a copy of the MPL was not distributed with this
5+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
#include <arrayfire.h>
8+
#include <khiva/clustering.h>
9+
#include <khiva_c/clustering.h>
10+
#include <iostream>
11+
12+
void k_means(khiva_array *tss, int *k, khiva_array *centroids, khiva_array *labels, float *tolerance,
13+
int *max_iterations) {
14+
af::array var = af::array(*tss);
15+
af_retain_array(tss, var.get());
16+
af::array primitive_labels;
17+
af::array primitive_centroids;
18+
19+
khiva::clustering::kMeans(var, *k, primitive_centroids, primitive_labels, *tolerance, *max_iterations);
20+
21+
af_retain_array(labels, primitive_labels.get());
22+
af_retain_array(centroids, primitive_centroids.get());
23+
}
24+
25+
void k_means_initial_values(khiva_array *tss, int *k, khiva_array *initial_centroids, khiva_array *centroids,
26+
khiva_array *initial_labels, khiva_array *labels, float *tolerance, int *max_iterations) {
27+
af::array var_tss = af::array(*tss);
28+
af_retain_array(tss, var_tss.get());
29+
30+
af::array var_initial_centroids = af::array(*initial_centroids);
31+
af_retain_array(initial_centroids, var_initial_centroids.get());
32+
33+
af::array var_initial_labels = af::array(*initial_labels);
34+
af_retain_array(tss, var_initial_labels.get());
35+
36+
khiva::clustering::kMeans(var_tss, *k, var_initial_centroids, var_initial_labels, *tolerance, *max_iterations);
37+
38+
af_retain_array(labels, var_initial_labels.get());
39+
af_retain_array(centroids, var_initial_centroids.get());
40+
}
41+
42+
void k_shape(khiva_array *tss, int *k, khiva_array *centroids, khiva_array *labels, float *tolerance,
43+
int *max_iterations) {
44+
af::array var = af::array(*tss);
45+
af_retain_array(tss, var.get());
46+
af::array primitive_labels;
47+
af::array primitive_centroids;
48+
49+
khiva::clustering::kShape(var, *k, primitive_centroids, primitive_labels, *tolerance, *max_iterations);
50+
51+
af_retain_array(labels, primitive_labels.get());
52+
af_retain_array(centroids, primitive_centroids.get());
53+
}
54+
55+
void k_shape_initial_values(khiva_array *tss, int *k, khiva_array *initial_centroids, khiva_array *centroids,
56+
khiva_array *initial_labels, khiva_array *labels, float *tolerance, int *max_iterations) {
57+
af::array var_tss = af::array(*tss);
58+
af_retain_array(tss, var_tss.get());
59+
60+
af::array var_initial_centroids = af::array(*initial_centroids);
61+
af_retain_array(initial_centroids, var_initial_centroids.get());
62+
63+
af::array var_initial_labels = af::array(*initial_labels);
64+
af_retain_array(tss, var_initial_labels.get());
65+
66+
khiva::clustering::kShape(var_tss, *k, var_initial_centroids, var_initial_labels, *tolerance, *max_iterations);
67+
68+
af_retain_array(labels, var_initial_labels.get());
69+
af_retain_array(centroids, var_initial_centroids.get());
70+
}

bindings/jni/include/khiva_jni.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
66

77
#include <khiva_jni/array.h>
8+
#include <khiva_jni/clustering.h>
89
#include <khiva_jni/dimensionality.h>
910
#include <khiva_jni/distances.h>
1011
#include <khiva_jni/features.h>
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Copyright (c) 2018 Shapelets.io
2+
//
3+
// This Source Code Form is subject to the terms of the Mozilla Public
4+
// License, v. 2.0. If a copy of the MPL was not distributed with this
5+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
#include <jni.h>
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
/**
14+
* @brief Calculates the kMeans algorithm.
15+
*
16+
* @param tss Expects an input array whose dimension zero is the length of the time series (all the same) and
17+
* dimension one indicates the number of time series.
18+
* @brief Calculates The clusterization based on SBD.
19+
* @param k The number of means to be computed.
20+
* @param ref_centroids The resulting means or centroids.
21+
* @param ref_labels The resulting labels of each time series which is the closest centroid.
22+
* @param tolerance The error tolerance to stop the computation of the centroids.
23+
* @param maxIterations The maximum number of iterations allowed.
24+
*/
25+
JNIEXPORT jlongArray JNICALL Java_io_shapelets_khiva_Clustering_kMeans(JNIEnv *env, jobject, jlong ref_tss, jint k,
26+
jlong ref_centroids, jlong ref_labels,
27+
jfloat tolerance, jint maxIterations);
28+
29+
/**
30+
* @brief Computes the kShape algorithm.
31+
*
32+
* @param tss Expects an input array whose dimension zero is the length of the time series (all the same) and
33+
* dimension one indicates the number of time series.
34+
* @brief Calculates The clusterization based on SBD.
35+
* @param k The number of means to be computed.
36+
* @param ref_centroids The resulting means or centroids.
37+
* @param ref_labels The resulting labels of each time series which is the closest centroid.
38+
* @param tolerance The error tolerance to stop the computation of the centroids.
39+
* @param maxIterations The maximum number of iterations allowed.
40+
*/
41+
JNIEXPORT jlongArray JNICALL Java_io_shapelets_khiva_Clustering_kShape(JNIEnv *env, jobject, jlong ref_tss, jint k,
42+
jlong ref_centroids, jlong ref_labels,
43+
jfloat tolerance, jint maxIterations);
44+
45+
#ifdef __cplusplus
46+
}
47+
#endif

bindings/jni/src/clustering.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright (c) 2018 Shapelets.io
2+
//
3+
// This Source Code Form is subject to the terms of the Mozilla Public
4+
// License, v. 2.0. If a copy of the MPL was not distributed with this
5+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
#include <jni.h>
8+
#include <khiva/clustering.h>
9+
#include <khiva_jni/clustering.h>
10+
11+
JNIEXPORT jlongArray JNICALL Java_io_shapelets_khiva_Clustering_kMeans(JNIEnv *env, jobject, jlong ref_tss, jint k,
12+
jlong ref_centroids, jlong ref_labels,
13+
jfloat tolerance, jint maxIterations) {
14+
const jint l = 3;
15+
jlong tmp[l];
16+
jlongArray pointers = env->NewLongArray(l);
17+
18+
af_array arr = (af_array)ref_tss;
19+
af::array var = af::array(arr);
20+
21+
jlong raw_pointer_labels = 0;
22+
af_array af_p_labels = (af_array)raw_pointer_labels;
23+
24+
jlong raw_pointer_centroids = 0;
25+
af_array af_p_centroids = (af_array)raw_pointer_centroids;
26+
27+
af_retain_array(&arr, var.get());
28+
29+
af::array primitive_labels;
30+
af::array primitive_centroids;
31+
32+
khiva::clustering::kMeans(var, static_cast<int>(k), primitive_centroids, primitive_labels,
33+
static_cast<float>(tolerance), static_cast<int>(maxIterations));
34+
35+
af_retain_array(&af_p_labels, primitive_labels.get());
36+
af_retain_array(&af_p_centroids, primitive_centroids.get());
37+
38+
tmp[0] = (jlong)arr;
39+
tmp[1] = (jlong)af_p_labels;
40+
tmp[2] = (jlong)af_p_centroids;
41+
42+
env->SetLongArrayRegion(pointers, 0, l, &tmp[0]);
43+
return pointers;
44+
}
45+
46+
JNIEXPORT jlongArray JNICALL Java_io_shapelets_khiva_Clustering_kShape(JNIEnv *env, jobject, jlong ref_tss, jint k,
47+
jlong ref_centroids, jlong ref_labels,
48+
jfloat tolerance, jint maxIterations) {
49+
const jint l = 3;
50+
jlong tmp[l];
51+
jlongArray pointers = env->NewLongArray(l);
52+
53+
af_array arr = (af_array)ref_tss;
54+
af::array var = af::array(arr);
55+
56+
jlong raw_pointer_labels = 0;
57+
af_array af_p_labels = (af_array)raw_pointer_labels;
58+
59+
jlong raw_pointer_centroids = 0;
60+
af_array af_p_centroids = (af_array)raw_pointer_centroids;
61+
62+
af_retain_array(&arr, var.get());
63+
64+
af::array primitive_labels;
65+
af::array primitive_centroids;
66+
67+
khiva::clustering::kShape(var, static_cast<int>(k), primitive_centroids, primitive_labels,
68+
static_cast<float>(tolerance), static_cast<int>(maxIterations));
69+
70+
af_retain_array(&af_p_labels, primitive_labels.get());
71+
af_retain_array(&af_p_centroids, primitive_centroids.get());
72+
73+
tmp[0] = (jlong)arr;
74+
tmp[1] = (jlong)af_p_labels;
75+
tmp[2] = (jlong)af_p_centroids;
76+
77+
env->SetLongArrayRegion(pointers, 0, l, &tmp[0]);
78+
return pointers;
79+
}

include/khiva.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
// Core header files
88
#include <khiva/array.h>
9+
#include <khiva/clustering.h>
910
#include <khiva/dimensionality.h>
1011
#include <khiva/distances.h>
1112
#include <khiva/features.h>

0 commit comments

Comments
 (0)