Skip to content

Commit 864fbdc

Browse files
authored
Merge pull request #13 from dgoeries/rm-memcopy
perf: Remove memcpy, speed up ltd
2 parents 9c8a9fe + 121de37 commit 864fbdc

1 file changed

Lines changed: 76 additions & 56 deletions

File tree

src/downsample/_ltd.c

Lines changed: 76 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -188,46 +188,6 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
188188
}
189189

190190

191-
static double calculate_sse_for_bucket(PyArrayObject *bucket) {
192-
npy_intp num_points = PyArray_DIM(bucket, 0);
193-
double *data = (double *)PyArray_DATA(bucket);
194-
195-
double sum_x = 0.0, sum_y = 0.0;
196-
double a_numerator = 0.0;
197-
double a_denominator = 0.0;
198-
199-
for (npy_intp i = 0; i < num_points; i++) {
200-
sum_x += data[i * 2];
201-
sum_y += data[i * 2 + 1];
202-
}
203-
double avg_x = sum_x / (double)num_points;
204-
double avg_y = sum_y / (double)num_points;
205-
206-
for (npy_intp i = 0; i < num_points; i++) {
207-
double deviation_x = data[i * 2] - avg_x;
208-
double deviation_y = data[i * 2 + 1] - avg_y;
209-
a_numerator += deviation_x * deviation_y;
210-
a_denominator += deviation_x * deviation_x;
211-
}
212-
double a, b;
213-
if (a_denominator == 0.0) {
214-
a = (a_numerator > 0) ? INFINITY : -INFINITY;
215-
b = avg_y;
216-
} else {
217-
a = a_numerator / a_denominator;
218-
b = avg_y - a * avg_x;
219-
}
220-
double sse = 0.0;
221-
for (npy_intp i = 0; i < num_points; i++) {
222-
double x = data[i * 2];
223-
double y = data[i * 2 + 1];
224-
double standardError = y - (a * x + b);
225-
sse += standardError * standardError;
226-
}
227-
return sse;
228-
}
229-
230-
231191
static npy_intp find_highest_sse_bucket_index(PyObject *buckets_list,
232192
PyArrayObject *sse_array) {
233193
npy_intp sse_len = PyArray_DIM(sse_array, 0);
@@ -282,19 +242,91 @@ static npy_intp find_lowest_sse_adjacent_buckets_index(PyArrayObject *sse_array,
282242
}
283243

284244

245+
static double calculate_sse_bucket(double *prev_point, double *bucket_data,
246+
npy_intp bucket_len, double *next_point) {
247+
248+
// Total points = 1 (prev) + bucket_len + 1 (next)
249+
npy_intp total_points = bucket_len + 2;
250+
double sum_x = 0.0, sum_y = 0.0;
251+
npy_intp i;
252+
253+
sum_x += prev_point[0];
254+
sum_y += prev_point[1];
255+
256+
for (i = 0; i < bucket_len; i++) {
257+
sum_x += bucket_data[i * 2];
258+
sum_y += bucket_data[i * 2 + 1];
259+
}
260+
261+
sum_x += next_point[0];
262+
sum_y += next_point[1];
263+
double avg_x = sum_x / total_points;
264+
double avg_y = sum_y / total_points;
265+
266+
// Calculate linear regression
267+
double numerator = 0.0;
268+
double denominator = 0.0;
269+
double deviation_x, deviation_y;
270+
271+
deviation_x = prev_point[0] - avg_x;
272+
deviation_y = prev_point[1] - avg_y;
273+
numerator += deviation_x * deviation_y;
274+
denominator += deviation_x * deviation_x;
275+
276+
for (i = 0; i < bucket_len; i++) {
277+
deviation_x = bucket_data[i * 2] - avg_x;
278+
deviation_y = bucket_data[i * 2 + 1] - avg_y;
279+
numerator += deviation_x * deviation_y;
280+
denominator += deviation_x * deviation_x;
281+
}
282+
283+
deviation_x = next_point[0] - avg_x;
284+
deviation_y = next_point[1] - avg_y;
285+
numerator += deviation_x * deviation_y;
286+
denominator += deviation_x * deviation_x;
287+
288+
double a, b;
289+
if (denominator == 0.0) {
290+
a = (numerator > 0) ? INFINITY : -INFINITY;
291+
b = avg_y;
292+
} else {
293+
a = numerator / denominator;
294+
b = avg_y - a * avg_x;
295+
}
296+
297+
// Calculate error
298+
double sse = 0.0;
299+
double standardError;
300+
301+
standardError = prev_point[1] - (a * prev_point[0] + b);
302+
sse += standardError * standardError;
303+
304+
for (i = 0; i < bucket_len; i++) {
305+
double bx = bucket_data[i * 2];
306+
double by = bucket_data[i * 2 + 1];
307+
standardError = by - (a * bx + b);
308+
sse += standardError * standardError;
309+
}
310+
311+
standardError = next_point[1] - (a * next_point[0] + b);
312+
sse += standardError * standardError;
313+
314+
return sse;
315+
}
316+
285317
static PyObject *calculate_sse_for_buckets(PyObject *buckets_list) {
286318
Py_ssize_t num_buckets = PyList_Size(buckets_list);
287319
if (num_buckets < 3) {
288320
PyErr_SetString(PyExc_ValueError,
289321
"Not enough buckets to calculate SSE");
290322
return NULL;
291323
}
324+
292325
npy_intp sse_len = num_buckets;
293326
PyObject *sse_array =
294327
PyArray_Zeros(1, &sse_len, PyArray_DescrFromType(NPY_DOUBLE), 0);
295328

296329
double *sse_data = (double *)PyArray_DATA((PyArrayObject *)sse_array);
297-
298330
for (Py_ssize_t i = 1; i < num_buckets - 1; i++) {
299331
PyArrayObject *prev_bucket =
300332
(PyArrayObject *)PyList_GetItem(buckets_list, i - 1);
@@ -303,28 +335,16 @@ static PyObject *calculate_sse_for_buckets(PyObject *buckets_list) {
303335
PyArrayObject *next_bucket =
304336
(PyArrayObject *)PyList_GetItem(buckets_list, i + 1);
305337

306-
npy_intp curr_rows = PyArray_DIM(curr_bucket, 0);
307-
npy_intp cols = PyArray_DIM(curr_bucket, 1);
308-
309-
npy_intp combined_dims[2] = {curr_rows + 2, cols};
310-
PyArrayObject *bucket_with_adj =
311-
(PyArrayObject *)PyArray_SimpleNew(2, combined_dims, NPY_DOUBLE);
312-
313-
double *bucket_adj_data = (double *)PyArray_DATA(bucket_with_adj);
314338
double *prev_last_row = (double *)PyArray_GETPTR2(
315339
prev_bucket, PyArray_DIM(prev_bucket, 0) - 1, 0);
316-
memcpy(bucket_adj_data, prev_last_row, cols * __DOUBLE_SIZE__);
317340

318341
double *curr_data = (double *)PyArray_DATA(curr_bucket);
319-
memcpy(bucket_adj_data + cols, curr_data,
320-
curr_rows * cols * __DOUBLE_SIZE__);
342+
npy_intp curr_len = PyArray_DIM(curr_bucket, 0);
321343

322344
double *next_first_row = (double *)PyArray_GETPTR2(next_bucket, 0, 0);
323-
memcpy(bucket_adj_data + (curr_rows + 1) * cols, next_first_row,
324-
cols * __DOUBLE_SIZE__);
325345

326-
sse_data[i] = calculate_sse_for_bucket(bucket_with_adj);
327-
Py_DECREF(bucket_with_adj);
346+
sse_data[i] = calculate_sse_bucket(prev_last_row, curr_data, curr_len,
347+
next_first_row);
328348
}
329349

330350
return sse_array;

0 commit comments

Comments
 (0)