Skip to content

Commit 1bdd2ff

Browse files
authored
Merge pull request #11 from dgoeries/stable-release
feat: Create stable release, fix memory leak in ltd
2 parents 5750fb9 + eeb1b21 commit 1bdd2ff

5 files changed

Lines changed: 49 additions & 59 deletions

File tree

.github/workflows/python-app.yml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,31 @@ on:
88

99
jobs:
1010
build:
11-
1211
runs-on: ${{ matrix.os }}
1312
strategy:
1413
matrix:
1514
os: [ubuntu-latest]
16-
python-version: ["3.10", "3.11", "3.12"]
15+
python-version: ["3.11", "3.12", "3.13", "3.13"]
1716

1817
steps:
19-
- uses: actions/checkout@v4
18+
- name: Check out repository
19+
uses: actions/checkout@v4
20+
2021
- name: Set up Python ${{ matrix.python-version }}
2122
uses: actions/setup-python@v5
2223
with:
2324
python-version: ${{ matrix.python-version }}
25+
cache: 'pip'
26+
2427
- name: Install dependencies
2528
run: |
2629
python -m pip install --upgrade pip
27-
python -m pip install setuptools
28-
python -m pip install flake8 pytest
30+
python -m pip install setuptools flake8 pytest
31+
2932
- name: Build package
3033
run: |
3134
python -m pip install -e . -v
35+
3236
- name: Test with pytest
3337
run: |
34-
pytest
38+
pytest

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ repos:
2727
rev: v3.19.0
2828
hooks:
2929
- id: pyupgrade
30-
args: [ --py310-plus ]
30+
args: [ --py311-plus ]

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "downsample"
11-
version = "0.0.5"
11+
version = "0.1.0"
1212
readme = {file = "README.txt", content-type = "text/markdown"}
1313
description = "Downsample algorithms module for Python written in C"
14-
requires-python = ">=3.10"
14+
requires-python = ">=3.11"
1515
authors = [
1616
{ name = "Dennis Göries", email = "dennis@goeries.de" }
1717
]
@@ -24,7 +24,7 @@ dependencies = [
2424

2525
license = { text = "MIT" }
2626
classifiers = [
27-
"Development Status :: 4 - Beta",
27+
"Development Status :: 5 - Production/Stable",
2828
"Environment :: Console",
2929
"Intended Audience :: Developers",
3030
"Intended Audience :: Science/Research",

src/downsample/_ltd.c

Lines changed: 30 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ static PyObject *split_bucket_at(PyObject *buckets_list, int index) {
3939
// Get the bucket of interest and index
4040
PyArrayObject *bucket =
4141
(PyArrayObject *)PyList_GetItem(buckets_list, index);
42+
4243
npy_intp bucket_size = PyArray_DIM(bucket, 0);
4344
npy_intp dim = PyArray_DIM(bucket, 1);
4445

4546
// calculate split sizes and split the bucket into half
4647
npy_intp bucket_a_length = (npy_intp)ceil((double)bucket_size / 2.0);
4748
npy_intp bucket_b_length = bucket_size - bucket_a_length;
48-
4949
npy_intp dims_a[2] = {bucket_a_length, dim};
5050
npy_intp dims_b[2] = {bucket_b_length, dim};
5151

@@ -61,13 +61,19 @@ static PyObject *split_bucket_at(PyObject *buckets_list, int index) {
6161

6262
// resulting bucket size increases by 1
6363
PyObject *result_buckets = PyList_New(bucket_count + 1);
64-
// Repopulate result_buckets with the items from buckets_list
64+
6565
for (Py_ssize_t i = 0; i < bucket_count; i++) {
66+
// Note: Don't copy bucket at 'index', it is split later.
67+
if (i == index) {
68+
continue;
69+
}
6670
PyObject *item = PyList_GetItem(buckets_list, i);
6771
Py_INCREF(item);
6872
PyList_SET_ITEM(result_buckets, (i < index) ? i : i + 1, item);
6973
}
74+
7075
// Insert to the new list the split bucket at index and index +1
76+
// Note: PyList_SET_ITEM steals the references of bucket_a and bucket_b
7177
PyList_SET_ITEM(result_buckets, index, bucket_a);
7278
PyList_SET_ITEM(result_buckets, index + 1, bucket_b);
7379

@@ -109,6 +115,7 @@ static PyObject *merge_bucket_at(PyObject *buckets_list, int index) {
109115
}
110116
// place the merged bucket at index
111117
PyList_SET_ITEM(result_buckets, index, merged_bucket);
118+
112119
// And finally copy buckets after index + 1 from buckets_list to
113120
// result_buckets, shifting each index by one (due to the removal of one
114121
// bucket).
@@ -117,11 +124,10 @@ static PyObject *merge_bucket_at(PyObject *buckets_list, int index) {
117124
Py_INCREF(item);
118125
PyList_SET_ITEM(result_buckets, i - 1, item);
119126
}
120-
127+
Py_DECREF(buckets_list);
121128
return result_buckets;
122129
}
123130

124-
125131
static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
126132
Py_ssize_t bucket_count = PyList_Size(buckets_list);
127133
npy_intp num_points = bucket_count;
@@ -131,17 +137,14 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
131137
double *x_data = (double *)PyArray_DATA((PyArrayObject *)x_array);
132138
double *y_data = (double *)PyArray_DATA((PyArrayObject *)y_array);
133139

134-
// Get the first point of the first bucket and initialize sampled data
135140
PyArrayObject *first_bucket =
136141
(PyArrayObject *)PyList_GetItem(buckets_list, 0);
137142

138143
double *first_point_data = (double *)PyArray_GETPTR2(first_bucket, 0, 0);
139144
x_data[0] = first_point_data[0];
140145
y_data[0] = first_point_data[1];
141-
// Store the last selected data point
142146
double *last_selected_data = first_point_data;
143147

144-
// Main LTTB loop
145148
for (Py_ssize_t i = 1; i < bucket_count - 1; i++) {
146149
PyArrayObject *bucket =
147150
(PyArrayObject *)PyList_GetItem(buckets_list, i);
@@ -154,8 +157,8 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
154157

155158
double max_area = -1.0;
156159
npy_intp max_area_index = -1;
157-
158160
npy_intp bucket_size = PyArray_DIM(bucket, 0);
161+
159162
for (npy_intp j = 0; j < bucket_size; j++) {
160163
double *point_data = (double *)PyArray_GETPTR2(bucket, j, 0);
161164
double area = calculate_triangle_area(
@@ -173,17 +176,16 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
173176
Py_DECREF(average_point);
174177
}
175178

176-
// Append the first point of the last bucket
177179
PyArrayObject *last_bucket =
178180
(PyArrayObject *)PyList_GetItem(buckets_list, bucket_count - 1);
179181
double *last_point_data = (double *)PyArray_GETPTR2(last_bucket, 0, 0);
180182
x_data[bucket_count - 1] = last_point_data[0];
181183
y_data[bucket_count - 1] = last_point_data[1];
182-
// Return x and y arrays as a tuple
184+
183185
PyObject *result = PyTuple_Pack(2, x_array, y_array);
184186
Py_DECREF(x_array);
185187
Py_DECREF(y_array);
186-
Py_DECREF(buckets_list);
188+
// This function borrows the list, hence it should not destroy it (DECREF).
187189

188190
return result;
189191
}
@@ -345,75 +347,59 @@ static PyObject *calculate_sse_for_buckets(PyObject *buckets_list) {
345347
return sse_array;
346348
}
347349

348-
349350
static PyObject *ltd_for_buckets(PyObject *buckets_list) {
350-
// 1: The data has been split into an almost equal number of buckets as the
351-
// threshold
352-
// - first bucket only containing the first data point
353-
// - last bucket containing only the last data point .
354-
// First and last buckets will then excluded in the bucket resizing
355-
356-
// 2: Calculate the SSE for the buckets with one point in
357-
// adjacent buckets overlapping
358-
// 3: while halting condition is not met continue
359-
// 4: Find the bucket F with the highest SSE
360-
// 5: Find the pair of adjacent buckets A and B with the lowest SSE sum.
361-
// The pair should not contain F
362-
// 6: Split bucket F into roughly two equal buckets.
363-
// 7: Merge the buckets A and B
364-
// 8: Calculate the SSE of the newly split up and merged buckets
365-
// 9: end
366-
// 10: Use the Largest-Triangle-Three-Buckets algorithm on the resulting
367-
// buckets for point selection
368-
369-
// 1.
370-
Py_ssize_t num_buckets = PyList_Size(buckets_list);
351+
// We modify the local 'buckets_list' variable (swap it),
352+
// so we must own a reference to it initially.
353+
Py_INCREF(buckets_list);
371354

355+
Py_ssize_t num_buckets = PyList_Size(buckets_list);
372356
int threshold = (int)num_buckets;
373357
int num_iterations = ((int)num_buckets * 10) / threshold;
374358

375359
for (int i = 0; i < num_iterations; i++) {
376-
// 2. + 3.
377360
PyObject *sse_for_buckets = calculate_sse_for_buckets(buckets_list);
378-
// 4.
361+
379362
npy_intp highest_sse_bucket_index = find_highest_sse_bucket_index(
380363
buckets_list, (PyArrayObject *)sse_for_buckets);
364+
381365
if (highest_sse_bucket_index < 0) {
382366
Py_DECREF(sse_for_buckets);
383367
break;
384368
}
385-
// 5.
369+
386370
npy_intp lowest_sse_adjacent_bucket_index =
387371
find_lowest_sse_adjacent_buckets_index(
388372
(PyArrayObject *)sse_for_buckets, highest_sse_bucket_index);
373+
389374
if (lowest_sse_adjacent_bucket_index < 0) {
390375
Py_DECREF(sse_for_buckets);
391376
break;
392377
}
393378

394-
// 6.
379+
// Split
395380
PyObject *updated_buckets =
396381
split_bucket_at(buckets_list, highest_sse_bucket_index);
382+
397383
Py_DECREF(buckets_list);
398384
buckets_list = updated_buckets;
399385

400386
if (lowest_sse_adjacent_bucket_index > highest_sse_bucket_index) {
401387
lowest_sse_adjacent_bucket_index += 1;
402388
}
403-
// 7.
389+
404390
PyObject *merged_buckets =
405391
merge_bucket_at(buckets_list, lowest_sse_adjacent_bucket_index);
406-
// 8.
392+
407393
Py_DECREF(buckets_list);
408394
buckets_list = merged_buckets;
409395

410-
Py_DECREF(
411-
sse_for_buckets); // Release SSE array for the current iteration
396+
Py_DECREF(sse_for_buckets);
412397
}
413-
// end 9.
414-
// 10.
398+
415399
PyObject *lttb_result = LTTB_for_buckets(buckets_list);
416-
// Don't forget to release the final reference of buckets_list
400+
401+
// Finally, release the final buckets_list (balances the initial INCREF
402+
// or creation in loop
417403
Py_DECREF(buckets_list);
418404

419405
return lttb_result;

src/downsample/tests/test_memory_leak.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1+
import gc
12
import tracemalloc
23

34
import numpy as np
45
import pytest
56

6-
from downsample import ltob, lttb
7+
from downsample import ltd, lttb
78

89

9-
@pytest.mark.parametrize("func", [lttb, ltob])
10+
@pytest.mark.parametrize("func", [lttb, ltd])
1011
def test_memory_leak(func):
1112
"""
1213
Test memory leak for different LTTB functions.
@@ -18,8 +19,8 @@ def test_memory_leak(func):
1819

1920
# Test parameters (shared for all functions)
2021
size = 1_000_000
21-
threshold = 100
22-
iterations = 1_000
22+
threshold = 1000
23+
iterations = 1000
2324

2425
# Generate test data
2526
x = np.linspace(0, 10, size)
@@ -31,7 +32,6 @@ def test_memory_leak(func):
3132
for _ in range(iterations):
3233
result = func(x, y, threshold)
3334
del result
34-
import gc
3535
gc.collect()
3636

3737
# Snapshot after function execution

0 commit comments

Comments
 (0)