@@ -39,13 +39,13 @@ static PyObject *split_bucket_at(PyObject *buckets_list, int index) {
3939 // Get the bucket of interest and index
4040 PyArrayObject * bucket =
4141 (PyArrayObject * )PyList_GetItem (buckets_list , index );
42+
4243 npy_intp bucket_size = PyArray_DIM (bucket , 0 );
4344 npy_intp dim = PyArray_DIM (bucket , 1 );
4445
4546 // calculate split sizes and split the bucket into half
4647 npy_intp bucket_a_length = (npy_intp )ceil ((double )bucket_size / 2.0 );
4748 npy_intp bucket_b_length = bucket_size - bucket_a_length ;
48-
4949 npy_intp dims_a [2 ] = {bucket_a_length , dim };
5050 npy_intp dims_b [2 ] = {bucket_b_length , dim };
5151
@@ -61,13 +61,19 @@ static PyObject *split_bucket_at(PyObject *buckets_list, int index) {
6161
6262 // resulting bucket size increases by 1
6363 PyObject * result_buckets = PyList_New (bucket_count + 1 );
64- // Repopulate result_buckets with the items from buckets_list
64+
6565 for (Py_ssize_t i = 0 ; i < bucket_count ; i ++ ) {
66+ // Note: Don't copy bucket at 'index', it is split later.
67+ if (i == index ) {
68+ continue ;
69+ }
6670 PyObject * item = PyList_GetItem (buckets_list , i );
6771 Py_INCREF (item );
6872 PyList_SET_ITEM (result_buckets , (i < index ) ? i : i + 1 , item );
6973 }
74+
7075 // Insert to the new list the split bucket at index and index +1
76+ // Note: PyList_SET_ITEM steals the references of bucket_a and bucket_b
7177 PyList_SET_ITEM (result_buckets , index , bucket_a );
7278 PyList_SET_ITEM (result_buckets , index + 1 , bucket_b );
7379
@@ -109,6 +115,7 @@ static PyObject *merge_bucket_at(PyObject *buckets_list, int index) {
109115 }
110116 // place the merged bucket at index
111117 PyList_SET_ITEM (result_buckets , index , merged_bucket );
118+
112119 // And finally copy buckets after index + 1 from buckets_list to
113120 // result_buckets, shifting each index by one (due to the removal of one
114121 // bucket).
@@ -117,11 +124,10 @@ static PyObject *merge_bucket_at(PyObject *buckets_list, int index) {
117124 Py_INCREF (item );
118125 PyList_SET_ITEM (result_buckets , i - 1 , item );
119126 }
120-
127+ Py_DECREF ( buckets_list );
121128 return result_buckets ;
122129}
123130
124-
125131static PyObject * LTTB_for_buckets (PyObject * buckets_list ) {
126132 Py_ssize_t bucket_count = PyList_Size (buckets_list );
127133 npy_intp num_points = bucket_count ;
@@ -131,17 +137,14 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
131137 double * x_data = (double * )PyArray_DATA ((PyArrayObject * )x_array );
132138 double * y_data = (double * )PyArray_DATA ((PyArrayObject * )y_array );
133139
134- // Get the first point of the first bucket and initialize sampled data
135140 PyArrayObject * first_bucket =
136141 (PyArrayObject * )PyList_GetItem (buckets_list , 0 );
137142
138143 double * first_point_data = (double * )PyArray_GETPTR2 (first_bucket , 0 , 0 );
139144 x_data [0 ] = first_point_data [0 ];
140145 y_data [0 ] = first_point_data [1 ];
141- // Store the last selected data point
142146 double * last_selected_data = first_point_data ;
143147
144- // Main LTTB loop
145148 for (Py_ssize_t i = 1 ; i < bucket_count - 1 ; i ++ ) {
146149 PyArrayObject * bucket =
147150 (PyArrayObject * )PyList_GetItem (buckets_list , i );
@@ -154,8 +157,8 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
154157
155158 double max_area = -1.0 ;
156159 npy_intp max_area_index = -1 ;
157-
158160 npy_intp bucket_size = PyArray_DIM (bucket , 0 );
161+
159162 for (npy_intp j = 0 ; j < bucket_size ; j ++ ) {
160163 double * point_data = (double * )PyArray_GETPTR2 (bucket , j , 0 );
161164 double area = calculate_triangle_area (
@@ -173,17 +176,16 @@ static PyObject *LTTB_for_buckets(PyObject *buckets_list) {
173176 Py_DECREF (average_point );
174177 }
175178
176- // Append the first point of the last bucket
177179 PyArrayObject * last_bucket =
178180 (PyArrayObject * )PyList_GetItem (buckets_list , bucket_count - 1 );
179181 double * last_point_data = (double * )PyArray_GETPTR2 (last_bucket , 0 , 0 );
180182 x_data [bucket_count - 1 ] = last_point_data [0 ];
181183 y_data [bucket_count - 1 ] = last_point_data [1 ];
182- // Return x and y arrays as a tuple
184+
183185 PyObject * result = PyTuple_Pack (2 , x_array , y_array );
184186 Py_DECREF (x_array );
185187 Py_DECREF (y_array );
186- Py_DECREF ( buckets_list );
188+ // This function borrows the list, hence it should not destroy it (DECREF).
187189
188190 return result ;
189191}
@@ -345,75 +347,59 @@ static PyObject *calculate_sse_for_buckets(PyObject *buckets_list) {
345347 return sse_array ;
346348}
347349
348-
349350static PyObject * ltd_for_buckets (PyObject * buckets_list ) {
350- // 1: The data has been split into an almost equal number of buckets as the
351- // threshold
352- // - first bucket only containing the first data point
353- // - last bucket containing only the last data point .
354- // First and last buckets will then excluded in the bucket resizing
355-
356- // 2: Calculate the SSE for the buckets with one point in
357- // adjacent buckets overlapping
358- // 3: while halting condition is not met continue
359- // 4: Find the bucket F with the highest SSE
360- // 5: Find the pair of adjacent buckets A and B with the lowest SSE sum.
361- // The pair should not contain F
362- // 6: Split bucket F into roughly two equal buckets.
363- // 7: Merge the buckets A and B
364- // 8: Calculate the SSE of the newly split up and merged buckets
365- // 9: end
366- // 10: Use the Largest-Triangle-Three-Buckets algorithm on the resulting
367- // buckets for point selection
368-
369- // 1.
370- Py_ssize_t num_buckets = PyList_Size (buckets_list );
351+ // We modify the local 'buckets_list' variable (swap it),
352+ // so we must own a reference to it initially.
353+ Py_INCREF (buckets_list );
371354
355+ Py_ssize_t num_buckets = PyList_Size (buckets_list );
372356 int threshold = (int )num_buckets ;
373357 int num_iterations = ((int )num_buckets * 10 ) / threshold ;
374358
375359 for (int i = 0 ; i < num_iterations ; i ++ ) {
376- // 2. + 3.
377360 PyObject * sse_for_buckets = calculate_sse_for_buckets (buckets_list );
378- // 4.
361+
379362 npy_intp highest_sse_bucket_index = find_highest_sse_bucket_index (
380363 buckets_list , (PyArrayObject * )sse_for_buckets );
364+
381365 if (highest_sse_bucket_index < 0 ) {
382366 Py_DECREF (sse_for_buckets );
383367 break ;
384368 }
385- // 5.
369+
386370 npy_intp lowest_sse_adjacent_bucket_index =
387371 find_lowest_sse_adjacent_buckets_index (
388372 (PyArrayObject * )sse_for_buckets , highest_sse_bucket_index );
373+
389374 if (lowest_sse_adjacent_bucket_index < 0 ) {
390375 Py_DECREF (sse_for_buckets );
391376 break ;
392377 }
393378
394- // 6.
379+ // Split
395380 PyObject * updated_buckets =
396381 split_bucket_at (buckets_list , highest_sse_bucket_index );
382+
397383 Py_DECREF (buckets_list );
398384 buckets_list = updated_buckets ;
399385
400386 if (lowest_sse_adjacent_bucket_index > highest_sse_bucket_index ) {
401387 lowest_sse_adjacent_bucket_index += 1 ;
402388 }
403- // 7.
389+
404390 PyObject * merged_buckets =
405391 merge_bucket_at (buckets_list , lowest_sse_adjacent_bucket_index );
406- // 8.
392+
407393 Py_DECREF (buckets_list );
408394 buckets_list = merged_buckets ;
409395
410- Py_DECREF (
411- sse_for_buckets ); // Release SSE array for the current iteration
396+ Py_DECREF (sse_for_buckets );
412397 }
413- // end 9.
414- // 10.
398+
415399 PyObject * lttb_result = LTTB_for_buckets (buckets_list );
416- // Don't forget to release the final reference of buckets_list
400+
401+ // Finally, release the final buckets_list (balances the initial INCREF
402+ // or creation in loop
417403 Py_DECREF (buckets_list );
418404
419405 return lttb_result ;
0 commit comments