From a8349efb0fb1bb3187a36bd178185750dcb3b52a Mon Sep 17 00:00:00 2001 From: dance858 Date: Wed, 1 Apr 2026 18:06:16 -0700 Subject: [PATCH 1/4] wrap malloc/alloc --- include/problem.h | 1 + src/atoms/affine/add.c | 3 +- src/atoms/affine/broadcast.c | 5 ++- src/atoms/affine/const_scalar_mult.c | 3 +- src/atoms/affine/const_vector_mult.c | 7 ++-- src/atoms/affine/constant.c | 3 +- src/atoms/affine/diag_vec.c | 5 ++- src/atoms/affine/hstack.c | 5 ++- src/atoms/affine/index.c | 9 ++-- src/atoms/affine/left_matmul.c | 15 +++---- src/atoms/affine/neg.c | 3 +- src/atoms/affine/promote.c | 3 +- src/atoms/affine/reshape.c | 3 +- src/atoms/affine/right_matmul.c | 5 ++- src/atoms/affine/sum.c | 9 ++-- src/atoms/affine/trace.c | 9 ++-- src/atoms/affine/transpose.c | 5 ++- src/atoms/affine/variable.c | 3 +- src/atoms/affine/vstack.c | 3 +- src/atoms/bivariate_full_dom/matmul.c | 9 ++-- src/atoms/bivariate_full_dom/multiply.c | 7 ++-- .../bivariate_restricted_dom/quad_over_lin.c | 9 ++-- src/atoms/bivariate_restricted_dom/rel_entr.c | 3 +- .../rel_entr_scalar_vector.c | 3 +- .../rel_entr_vector_scalar.c | 3 +- src/atoms/elementwise_full_dom/common.c | 8 ++-- src/atoms/elementwise_full_dom/power.c | 3 +- src/atoms/elementwise_restricted_dom/common.c | 3 +- src/atoms/other/prod.c | 3 +- src/atoms/other/prod_axis_one.c | 9 ++-- src/atoms/other/prod_axis_zero.c | 9 ++-- src/atoms/other/quad_form.c | 5 ++- src/expr.c | 7 ++-- src/old-code/linear_op.c | 6 ++- src/problem.c | 41 ++++++++++++++----- src/utils/COO_Matrix.c | 19 +++++---- src/utils/CSC_Matrix.c | 17 ++++---- src/utils/CSR_Matrix.c | 11 ++--- src/utils/CSR_sum.c | 3 +- src/utils/dense_matrix.c | 9 ++-- src/utils/int_double_pair.c | 3 +- src/utils/linalg_dense_sparse_matmuls.c | 7 ++-- src/utils/linalg_sparse_matmuls.c | 5 ++- src/utils/sparse_matrix.c | 5 ++- 44 files changed, 185 insertions(+), 121 deletions(-) diff --git a/include/problem.h b/include/problem.h index 25d1517..459a6a4 100644 --- a/include/problem.h +++ b/include/problem.h @@ -38,6 +38,7 @@ typedef struct int nnz_hessian; int n_vars; int total_constraint_size; + size_t memory_bytes; } Diff_engine_stats; typedef struct problem diff --git a/src/atoms/affine/add.c b/src/atoms/affine/add.c index 59b6223..05b5822 100644 --- a/src/atoms/affine/add.c +++ b/src/atoms/affine/add.c @@ -17,6 +17,7 @@ */ #include "atoms/affine.h" #include "utils/CSR_sum.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -91,7 +92,7 @@ static bool is_affine(const expr *node) expr *new_add(expr *left, expr *right) { assert(left->d1 == right->d1 && left->d2 == right->d2); - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, left->d1, left->d2, left->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = left; diff --git a/src/atoms/affine/broadcast.c b/src/atoms/affine/broadcast.c index 9e3ddae..2090223 100644 --- a/src/atoms/affine/broadcast.c +++ b/src/atoms/affine/broadcast.c @@ -18,6 +18,7 @@ #include "atoms/affine.h" #include "subexpr.h" #include "utils/mini_numpy.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -194,7 +195,7 @@ static void wsum_hess_init_impl(expr *node) node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); /* allocate space for weight vector */ - node->work->dwork = malloc(node->size * sizeof(double)); + node->work->dwork = SP_MALLOC(node->size * sizeof(double)); } static void eval_wsum_hess(expr *node, const double *w) @@ -273,7 +274,7 @@ expr *new_broadcast(expr *child, int d1, int d2) exit(1); } - broadcast_expr *bcast = (broadcast_expr *) calloc(1, sizeof(broadcast_expr)); + broadcast_expr *bcast = (broadcast_expr *) SP_CALLOC(1, sizeof(broadcast_expr)); expr *node = (expr *) bcast; // -------------------------------------------------------------------------- diff --git a/src/atoms/affine/const_scalar_mult.c b/src/atoms/affine/const_scalar_mult.c index abc4651..f410d4a 100644 --- a/src/atoms/affine/const_scalar_mult.c +++ b/src/atoms/affine/const_scalar_mult.c @@ -17,6 +17,7 @@ */ #include "atoms/affine.h" #include "subexpr.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -97,7 +98,7 @@ static bool is_affine(const expr *node) expr *new_const_scalar_mult(double a, expr *child) { const_scalar_mult_expr *mult_node = - (const_scalar_mult_expr *) calloc(1, sizeof(const_scalar_mult_expr)); + (const_scalar_mult_expr *) SP_CALLOC(1, sizeof(const_scalar_mult_expr)); expr *node = &mult_node->base; init_expr(node, child->d1, child->d2, child->n_vars, forward, jacobian_init_impl, diff --git a/src/atoms/affine/const_vector_mult.c b/src/atoms/affine/const_vector_mult.c index 3b6346e..8d965a4 100644 --- a/src/atoms/affine/const_vector_mult.c +++ b/src/atoms/affine/const_vector_mult.c @@ -17,6 +17,7 @@ */ #include "atoms/affine.h" #include "subexpr.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -77,7 +78,7 @@ static void wsum_hess_init_impl(expr *node) /* same sparsity as child */ node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); - node->work->dwork = (double *) malloc(node->size * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); } static void eval_wsum_hess(expr *node, const double *w) @@ -112,7 +113,7 @@ static bool is_affine(const expr *node) expr *new_const_vector_mult(const double *a, expr *child) { const_vector_mult_expr *vnode = - (const_vector_mult_expr *) calloc(1, sizeof(const_vector_mult_expr)); + (const_vector_mult_expr *) SP_CALLOC(1, sizeof(const_vector_mult_expr)); expr *node = &vnode->base; init_expr(node, child->d1, child->d2, child->n_vars, forward, jacobian_init_impl, @@ -122,7 +123,7 @@ expr *new_const_vector_mult(const double *a, expr *child) expr_retain(child); /* copy a vector */ - vnode->a = (double *) malloc(child->size * sizeof(double)); + vnode->a = (double *) SP_MALLOC(child->size * sizeof(double)); memcpy(vnode->a, a, child->size * sizeof(double)); return node; diff --git a/src/atoms/affine/constant.c b/src/atoms/affine/constant.c index b7a5bfc..607b019 100644 --- a/src/atoms/affine/constant.c +++ b/src/atoms/affine/constant.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include @@ -60,7 +61,7 @@ static bool is_affine(const expr *node) expr *new_constant(int d1, int d2, int n_vars, const double *values) { - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, d1, d2, n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); memcpy(node->value, values, node->size * sizeof(double)); diff --git a/src/atoms/affine/diag_vec.c b/src/atoms/affine/diag_vec.c index 0c887dd..5c1374a 100644 --- a/src/atoms/affine/diag_vec.c +++ b/src/atoms/affine/diag_vec.c @@ -18,6 +18,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -100,7 +101,7 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* workspace for extracting diagonal weights */ - node->work->dwork = (double *) calloc(x->size, sizeof(double)); + node->work->dwork = (double *) SP_CALLOC(x->size, sizeof(double)); /* Copy child's Hessian structure (diag_vec is linear, so its own Hessian is * zero) */ @@ -136,7 +137,7 @@ expr *new_diag_vec(expr *child) /* n is the number of elements (works for both row and column vectors) */ int n = child->size; - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, n, n, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = child; diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c index 1be5fb9..185b3c7 100644 --- a/src/atoms/affine/hstack.c +++ b/src/atoms/affine/hstack.c @@ -17,6 +17,7 @@ */ #include "atoms/affine.h" #include "utils/CSR_sum.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -185,14 +186,14 @@ expr *new_hstack(expr **args, int n_args, int n_vars) } /* Allocate the type-specific struct */ - hstack_expr *hnode = (hstack_expr *) calloc(1, sizeof(hstack_expr)); + hstack_expr *hnode = (hstack_expr *) SP_CALLOC(1, sizeof(hstack_expr)); expr *node = &hnode->base; init_expr(node, args[0]->d1, d2, n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, wsum_hess_eval, free_type_data); /* Set type-specific fields (deep copy args array) */ - hnode->args = (expr **) calloc(n_args, sizeof(expr *)); + hnode->args = (expr **) SP_CALLOC(n_args, sizeof(expr *)); hnode->n_args = n_args; for (int i = 0; i < n_args; i++) { diff --git a/src/atoms/affine/index.c b/src/atoms/affine/index.c index 57b8af7..026d17f 100644 --- a/src/atoms/affine/index.c +++ b/src/atoms/affine/index.c @@ -17,6 +17,7 @@ */ #include "atoms/affine.h" #include "subexpr.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -28,7 +29,7 @@ * Returns true if duplicates exist, false otherwise. */ static bool check_for_duplicates(const int *indices, int n_idxs, int max_idx) { - bool *seen = (bool *) calloc(max_idx, sizeof(bool)); + bool *seen = (bool *) SP_CALLOC(max_idx, sizeof(bool)); bool has_dup = false; for (int i = 0; i < n_idxs && !has_dup; i++) { @@ -104,7 +105,7 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* for setting weight vector to evaluate hessian of child */ - node->work->dwork = (double *) calloc(x->size, sizeof(double)); + node->work->dwork = (double *) SP_CALLOC(x->size, sizeof(double)); /* in the implementation of eval_wsum_hess we evaluate the child's hessian with a weight vector that has w[i] = 0 @@ -163,7 +164,7 @@ expr *new_index(expr *child, int d1, int d2, const int *indices, int n_idxs) { assert(d1 * d2 == n_idxs); /* allocate type-specific struct */ - index_expr *idx = (index_expr *) calloc(1, sizeof(index_expr)); + index_expr *idx = (index_expr *) SP_CALLOC(1, sizeof(index_expr)); expr *node = &idx->base; init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl, @@ -174,7 +175,7 @@ expr *new_index(expr *child, int d1, int d2, const int *indices, int n_idxs) expr_retain(child); /* copy indices */ - idx->indices = (int *) malloc(n_idxs * sizeof(int)); + idx->indices = (int *) SP_MALLOC(n_idxs * sizeof(int)); memcpy(idx->indices, indices, n_idxs * sizeof(int)); idx->n_idxs = n_idxs; diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c index ec7e082..c8f8e19 100644 --- a/src/atoms/affine/left_matmul.c +++ b/src/atoms/affine/left_matmul.c @@ -46,6 +46,7 @@ vector-valued or matrix-valued. */ +#include "utils/tracked_alloc.h" #include "utils/utils.h" static void forward(expr *node, const double *u) @@ -125,7 +126,7 @@ static void wsum_hess_init_impl(expr *node) /* work for computing A^T w*/ int n_blocks = ((left_matmul_expr *) node)->n_blocks; int dim = ((left_matmul_expr *) node)->AT->m * n_blocks; - node->work->dwork = (double *) malloc(dim * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(dim * sizeof(double)); } static void eval_wsum_hess(expr *node, const double *w) @@ -167,7 +168,7 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A) /* Allocate the type-specific struct */ left_matmul_expr *lnode = - (left_matmul_expr *) calloc(1, sizeof(left_matmul_expr)); + (left_matmul_expr *) SP_CALLOC(1, sizeof(left_matmul_expr)); expr *node = &lnode->base; init_expr(node, d1, d2, u->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); @@ -178,8 +179,8 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A) (requiring size node->n_vars) and for transposing A (requiring size A->n). csc_to_csr_work is used for converting J_CSC to CSR (requiring node->size) */ - node->work->iwork = (int *) malloc(MAX(A->n, node->n_vars) * sizeof(int)); - lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int)); + node->work->iwork = (int *) SP_MALLOC(MAX(A->n, node->n_vars) * sizeof(int)); + lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int)); lnode->n_blocks = n_blocks; /* store A and AT */ @@ -212,15 +213,15 @@ expr *new_left_matmul_dense(expr *u, int m, int n, const double *data) } left_matmul_expr *lnode = - (left_matmul_expr *) calloc(1, sizeof(left_matmul_expr)); + (left_matmul_expr *) SP_CALLOC(1, sizeof(left_matmul_expr)); expr *node = &lnode->base; init_expr(node, d1, d2, u->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); node->left = u; expr_retain(u); - node->work->iwork = (int *) malloc(MAX(n, node->n_vars) * sizeof(int)); - lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int)); + node->work->iwork = (int *) SP_MALLOC(MAX(n, node->n_vars) * sizeof(int)); + lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int)); lnode->n_blocks = n_blocks; lnode->A = new_dense_matrix(m, n, data); diff --git a/src/atoms/affine/neg.c b/src/atoms/affine/neg.c index 3bc4cbe..14c706f 100644 --- a/src/atoms/affine/neg.c +++ b/src/atoms/affine/neg.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -87,7 +88,7 @@ static bool is_affine(const expr *node) expr *new_neg(expr *child) { - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, child->d1, child->d2, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = child; diff --git a/src/atoms/affine/promote.c b/src/atoms/affine/promote.c index 61bd232..24db54d 100644 --- a/src/atoms/affine/promote.c +++ b/src/atoms/affine/promote.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -106,7 +107,7 @@ static bool is_affine(const expr *node) expr *new_promote(expr *child, int d1, int d2) { assert(child->size == 1); - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = child; diff --git a/src/atoms/affine/reshape.c b/src/atoms/affine/reshape.c index 1444724..f11abae 100644 --- a/src/atoms/affine/reshape.c +++ b/src/atoms/affine/reshape.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -67,7 +68,7 @@ static bool is_affine(const expr *node) expr *new_reshape(expr *child, int d1, int d2) { assert(d1 * d2 == child->size); - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = child; diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c index 723086e..b49a1a9 100644 --- a/src/atoms/affine/right_matmul.c +++ b/src/atoms/affine/right_matmul.c @@ -18,6 +18,7 @@ #include "atoms/affine.h" #include "utils/CSR_Matrix.h" +#include "utils/tracked_alloc.h" #include /* This file implements the atom 'right_matmul' corresponding to the operation y = @@ -28,7 +29,7 @@ expr *new_right_matmul(expr *u, const CSR_Matrix *A) { /* We can express right matmul using left matmul and transpose: u @ A = (A^T @ u^T)^T. */ - int *work_transpose = (int *) malloc(A->n * sizeof(int)); + int *work_transpose = (int *) SP_MALLOC(A->n * sizeof(int)); CSR_Matrix *AT = transpose(A, work_transpose); expr *u_transpose = new_transpose(u); @@ -44,7 +45,7 @@ expr *new_right_matmul_dense(expr *u, int m, int n, const double *data) { /* We express: u @ A = (A^T @ u^T)^T A is m x n, so A^T is n x m. */ - double *AT = (double *) malloc(n * m * sizeof(double)); + double *AT = (double *) SP_MALLOC(n * m * sizeof(double)); for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) diff --git a/src/atoms/affine/sum.c b/src/atoms/affine/sum.c index bdd7140..3d3c583 100644 --- a/src/atoms/affine/sum.c +++ b/src/atoms/affine/sum.c @@ -19,6 +19,7 @@ #include "utils/CSR_sum.h" #include "utils/int_double_pair.h" #include "utils/mini_numpy.h" +#include "utils/tracked_alloc.h" #include "utils/utils.h" #include #include @@ -89,8 +90,8 @@ static void jacobian_init_impl(expr *node) /* we never have to store more than the child's nnz */ node->jacobian = new_csr_matrix(node->size, node->n_vars, x->jacobian->nnz); node->work->iwork = - malloc(MAX(node->jacobian->n, x->jacobian->nnz) * sizeof(int)); - snode->idx_map = malloc(x->jacobian->nnz * sizeof(int)); + SP_MALLOC(MAX(node->jacobian->n, x->jacobian->nnz) * sizeof(int)); + snode->idx_map = SP_MALLOC(x->jacobian->nnz * sizeof(int)); /* the idx_map array maps each nonzero entry j in x->jacobian to the corresponding index in the output row matrix C. Specifically, for @@ -135,7 +136,7 @@ static void wsum_hess_init_impl(expr *node) /* we never have to store more than the child's nnz */ node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); - node->work->dwork = malloc(x->size * sizeof(double)); + node->work->dwork = SP_MALLOC(x->size * sizeof(double)); } static void eval_wsum_hess(expr *node, const double *w) @@ -195,7 +196,7 @@ expr *new_sum(expr *child, int axis) } /* Allocate the type-specific struct */ - sum_expr *snode = (sum_expr *) calloc(1, sizeof(sum_expr)); + sum_expr *snode = (sum_expr *) SP_CALLOC(1, sizeof(sum_expr)); expr *node = &snode->base; /* to be consistent with CVXPY and NumPy we treat the result from diff --git a/src/atoms/affine/trace.c b/src/atoms/affine/trace.c index 7a2f045..a57e8a9 100644 --- a/src/atoms/affine/trace.c +++ b/src/atoms/affine/trace.c @@ -18,6 +18,7 @@ #include "atoms/affine.h" #include "utils/CSR_sum.h" #include "utils/int_double_pair.h" +#include "utils/tracked_alloc.h" #include "utils/utils.h" #include #include @@ -69,14 +70,14 @@ static void jacobian_init_impl(expr *node) // fill sparsity pattern and idx_map // --------------------------------------------------------------- trace_expr *tnode = (trace_expr *) node; - node->work->iwork = malloc(MAX(node->jacobian->n, total_nnz) * sizeof(int)); + node->work->iwork = SP_MALLOC(MAX(node->jacobian->n, total_nnz) * sizeof(int)); /* the idx_map array maps each nonzero entry j in the original matrix A (from the selected, evenly spaced rows) to the corresponding index in the output row matrix C. Specifically, for each nonzero entry j in A (from the selected rows), idx_map[j] gives the position in C->x where the value from A->x[j] should be accumulated. */ - tnode->idx_map = malloc(x->jacobian->nnz * sizeof(int)); + tnode->idx_map = SP_MALLOC(x->jacobian->nnz * sizeof(int)); sum_spaced_rows_into_row_csr_alloc(A, node->jacobian, row_spacing, node->work->iwork, tnode->idx_map); } @@ -103,7 +104,7 @@ static void wsum_hess_init_impl(expr *node) /* initialize child's hessian */ wsum_hess_init(x); - node->work->dwork = (double *) calloc(x->size, sizeof(double)); + node->work->dwork = (double *) SP_CALLOC(x->size, sizeof(double)); /* We copy over the sparsity pattern from the child. This also includes the contribution to wsum_hess of entries of the child that will always have @@ -143,7 +144,7 @@ static void free_type_data(expr *node) expr *new_trace(expr *child) { - trace_expr *tnode = (trace_expr *) calloc(1, sizeof(trace_expr)); + trace_expr *tnode = (trace_expr *) SP_CALLOC(1, sizeof(trace_expr)); expr *node = &tnode->base; init_expr(node, 1, 1, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); diff --git a/src/atoms/affine/transpose.c b/src/atoms/affine/transpose.c index 56b7326..5e2f0d8 100644 --- a/src/atoms/affine/transpose.c +++ b/src/atoms/affine/transpose.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -95,7 +96,7 @@ static void wsum_hess_init_impl(expr *node) node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); /* for computing Kw where K is the commutation matrix */ - node->work->dwork = (double *) malloc(node->size * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); } static void eval_wsum_hess(expr *node, const double *w) { @@ -126,7 +127,7 @@ static bool is_affine(const expr *node) expr *new_transpose(expr *child) { - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, child->d2, child->d1, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = child; diff --git a/src/atoms/affine/variable.c b/src/atoms/affine/variable.c index 23844d1..393ff11 100644 --- a/src/atoms/affine/variable.c +++ b/src/atoms/affine/variable.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include @@ -63,7 +64,7 @@ static bool is_affine(const expr *node) expr *new_variable(int d1, int d2, int var_id, int n_vars) { - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, d1, d2, n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, wsum_hess_eval, NULL); node->var_id = var_id; diff --git a/src/atoms/affine/vstack.c b/src/atoms/affine/vstack.c index 8f72630..b4fb073 100644 --- a/src/atoms/affine/vstack.c +++ b/src/atoms/affine/vstack.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/tracked_alloc.h" #include #include @@ -34,7 +35,7 @@ expr *new_vstack(expr **args, int n_args, int n_vars) assert(args[i]->d2 == args[0]->d2); } - expr **transposed = (expr **) malloc(n_args * sizeof(expr *)); + expr **transposed = (expr **) SP_MALLOC(n_args * sizeof(expr *)); for (int i = 0; i < n_args; i++) { transposed[i] = new_transpose(args[i]); diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c index cfe3ceb..d8ead81 100644 --- a/src/atoms/bivariate_full_dom/matmul.c +++ b/src/atoms/bivariate_full_dom/matmul.c @@ -23,6 +23,7 @@ #include "utils/linalg_dense_sparse_matmuls.h" #include "utils/linalg_sparse_matmuls.h" #include "utils/mini_numpy.h" +#include "utils/tracked_alloc.h" #include "utils/utils.h" #include #include @@ -420,12 +421,12 @@ static void wsum_hess_init_chain_rule(expr *node) mnode->B = build_cross_hessian_sparsity(m, k, n); mnode->BJg = csr_csc_matmul_alloc(mnode->B, Jg); int max_alloc = MAX(mnode->BJg->m, mnode->BJg->n); - mnode->BJg_csc_work = (int *) malloc(max_alloc * sizeof(int)); + mnode->BJg_csc_work = (int *) SP_MALLOC(max_alloc * sizeof(int)); mnode->BJg_CSC = csr_to_csc_alloc(mnode->BJg, mnode->BJg_csc_work); mnode->C = BTA_alloc(mnode->BJg_CSC, Jf); /* initialize C^T */ - node->work->iwork = (int *) malloc(mnode->C->m * sizeof(int)); + node->work->iwork = (int *) SP_MALLOC(mnode->C->m * sizeof(int)); mnode->CT = AT_alloc(mnode->C, node->work->iwork); /* initialize Hessians of children */ @@ -445,7 +446,7 @@ static void wsum_hess_init_chain_rule(expr *node) if (!f->is_affine(f) || !g->is_affine(g)) { node->work->dwork = - (double *) malloc(MAX(f->size, g->size) * sizeof(double)); + (double *) SP_MALLOC(MAX(f->size, g->size) * sizeof(double)); } } @@ -526,7 +527,7 @@ expr *new_matmul(expr *x, expr *y) } /* Allocate the expression node */ - expr *node = (expr *) calloc(1, sizeof(matmul_expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(matmul_expr)); /* Choose no-chain-rule or chain-rule function pointers */ bool use_chain_rule = !(x->var_id != NOT_A_VARIABLE && diff --git a/src/atoms/bivariate_full_dom/multiply.c b/src/atoms/bivariate_full_dom/multiply.c index d1e2a6f..69b3ba5 100644 --- a/src/atoms/bivariate_full_dom/multiply.c +++ b/src/atoms/bivariate_full_dom/multiply.c @@ -18,6 +18,7 @@ #include "atoms/bivariate_full_dom.h" #include "subexpr.h" #include "utils/CSR_sum.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -141,7 +142,7 @@ static void wsum_hess_init_impl(expr *node) /* used for computing weights to wsum_hess of children */ if (!x->is_affine(x) || !y->is_affine(y)) { - node->work->dwork = (double *) malloc(node->size * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); } /* prepare sparsity pattern of csc conversion */ @@ -152,7 +153,7 @@ static void wsum_hess_init_impl(expr *node) /* compute sparsity of C and prepare CT */ CSR_Matrix *C = BTA_alloc(Jg1, Jg2); - node->work->iwork = (int *) malloc(C->m * sizeof(int)); + node->work->iwork = (int *) SP_MALLOC(C->m * sizeof(int)); CSR_Matrix *CT = AT_alloc(C, node->work->iwork); /* initialize wsum_hessians of children */ @@ -283,7 +284,7 @@ static bool is_affine(const expr *node) expr *new_elementwise_mult(expr *left, expr *right) { elementwise_mult_expr *mul_node = - (elementwise_mult_expr *) calloc(1, sizeof(elementwise_mult_expr)); + (elementwise_mult_expr *) SP_CALLOC(1, sizeof(elementwise_mult_expr)); expr *node = &mul_node->base; init_expr(node, left->d1, left->d2, left->n_vars, forward, jacobian_init_impl, diff --git a/src/atoms/bivariate_restricted_dom/quad_over_lin.c b/src/atoms/bivariate_restricted_dom/quad_over_lin.c index 4ad795a..6b32648 100644 --- a/src/atoms/bivariate_restricted_dom/quad_over_lin.c +++ b/src/atoms/bivariate_restricted_dom/quad_over_lin.c @@ -18,6 +18,7 @@ #include "atoms/bivariate_restricted_dom.h" #include "subexpr.h" #include "utils/CSC_Matrix.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -81,10 +82,10 @@ static void jacobian_init_impl(expr *node) } else /* left node is not a variable (guaranteed to be a linear operator) */ { - node->work->dwork = (double *) malloc(x->size * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(x->size * sizeof(double)); /* compute required allocation and allocate jacobian */ - bool *col_nz = (bool *) calloc( + bool *col_nz = (bool *) SP_CALLOC( node->n_vars, sizeof(bool)); /* TODO: could use iwork here instead*/ int nonzero_cols = count_nonzero_cols(x->jacobian, col_nz); node->jacobian = new_csr_matrix(1, node->n_vars, nonzero_cols + 1); @@ -110,7 +111,7 @@ static void jacobian_init_impl(expr *node) node->jacobian->p[1] = node->jacobian->nnz; /* find position where y should be inserted */ - node->work->iwork = (int *) malloc(sizeof(int)); + node->work->iwork = (int *) SP_MALLOC(sizeof(int)); for (int j = 0; j < node->jacobian->nnz; j++) { if (node->jacobian->i[j] == y->var_id) @@ -335,7 +336,7 @@ expr *new_quad_over_lin(expr *left, expr *right) exit(EXIT_FAILURE); } - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, 1, 1, left->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL); node->left = left; diff --git a/src/atoms/bivariate_restricted_dom/rel_entr.c b/src/atoms/bivariate_restricted_dom/rel_entr.c index 62772f8..aa3dc7e 100644 --- a/src/atoms/bivariate_restricted_dom/rel_entr.c +++ b/src/atoms/bivariate_restricted_dom/rel_entr.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/bivariate_restricted_dom.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -203,7 +204,7 @@ expr *new_rel_entr_vector_args(expr *left, expr *right) exit(EXIT_FAILURE); } - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, left->d1, left->d2, left->n_vars, forward_vector_args, jacobian_init_vectors_args, eval_jacobian_vector_args, is_affine, wsum_hess_init_vector_args, eval_wsum_hess_vector_args, NULL); diff --git a/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c b/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c index 28a3c0d..006c872 100644 --- a/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c +++ b/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/bivariate_restricted_dom.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -217,7 +218,7 @@ static bool is_affine(const expr *node) expr *new_rel_entr_first_arg_scalar(expr *left, expr *right) { assert(left->d1 == 1 && left->d2 == 1); - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, right->d1, right->d2, left->n_vars, forward_scalar_vector, jacobian_init_scalar_vector, eval_jacobian_scalar_vector, is_affine, wsum_hess_init_scalar_vector, eval_wsum_hess_scalar_vector, NULL); diff --git a/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c b/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c index b8e4c2c..0535376 100644 --- a/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c +++ b/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/bivariate_restricted_dom.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -218,7 +219,7 @@ static bool is_affine(const expr *node) expr *new_rel_entr_second_arg_scalar(expr *left, expr *right) { assert(right->d1 == 1 && right->d2 == 1); - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); init_expr(node, left->d1, left->d2, left->n_vars, forward_vector_scalar, jacobian_init_vector_scalar, eval_jacobian_vector_scalar, is_affine, wsum_hess_init_vector_scalar, eval_wsum_hess_vector_scalar, NULL); diff --git a/src/atoms/elementwise_full_dom/common.c b/src/atoms/elementwise_full_dom/common.c index 3a6df35..f5811fb 100644 --- a/src/atoms/elementwise_full_dom/common.c +++ b/src/atoms/elementwise_full_dom/common.c @@ -3,6 +3,7 @@ #include "utils/CSC_Matrix.h" #include "utils/CSR_Matrix.h" #include "utils/CSR_sum.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -28,8 +29,9 @@ void jacobian_init_elementwise(expr *node) jacobian_init(child); CSR_Matrix *Jg = child->jacobian; node->jacobian = new_csr_copy_sparsity(Jg); - node->work->dwork = (double *) malloc(node->size * sizeof(double)); - node->work->local_jac_diag = (double *) malloc(node->size * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); + node->work->local_jac_diag = + (double *) SP_MALLOC(node->size * sizeof(double)); } } @@ -182,7 +184,7 @@ void init_elementwise(expr *node, expr *child) expr *new_elementwise(expr *child) { - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); if (!node) return NULL; init_elementwise(node, child); diff --git a/src/atoms/elementwise_full_dom/power.c b/src/atoms/elementwise_full_dom/power.c index 1564433..67876fb 100644 --- a/src/atoms/elementwise_full_dom/power.c +++ b/src/atoms/elementwise_full_dom/power.c @@ -1,5 +1,6 @@ #include "atoms/elementwise_full_dom.h" #include "subexpr.h" +#include "utils/tracked_alloc.h" #include #include @@ -43,7 +44,7 @@ static void local_wsum_hess(expr *node, double *out, const double *w) expr *new_power(expr *child, double p) { /* Allocate the type-specific struct */ - power_expr *pnode = (power_expr *) calloc(1, sizeof(power_expr)); + power_expr *pnode = (power_expr *) SP_CALLOC(1, sizeof(power_expr)); expr *node = &pnode->base; init_elementwise(node, child); node->forward = forward; diff --git a/src/atoms/elementwise_restricted_dom/common.c b/src/atoms/elementwise_restricted_dom/common.c index 67d8484..fd73fa7 100644 --- a/src/atoms/elementwise_restricted_dom/common.c +++ b/src/atoms/elementwise_restricted_dom/common.c @@ -1,4 +1,5 @@ #include "atoms/elementwise_restricted_dom.h" +#include "utils/tracked_alloc.h" #include void jacobian_init_restricted(expr *node) @@ -42,7 +43,7 @@ bool is_affine_restricted(const expr *node) expr *new_restricted(expr *child) { - expr *node = (expr *) calloc(1, sizeof(expr)); + expr *node = (expr *) SP_CALLOC(1, sizeof(expr)); if (!node) return NULL; init_expr(node, child->d1, child->d2, child->n_vars, NULL, diff --git a/src/atoms/other/prod.c b/src/atoms/other/prod.c index 5826167..0856b3a 100644 --- a/src/atoms/other/prod.c +++ b/src/atoms/other/prod.c @@ -1,4 +1,5 @@ #include "atoms/non_elementwise_full_dom.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -195,7 +196,7 @@ I think they return row vectors.*/ expr *new_prod(expr *child) { /* Output is scalar: 1 x 1 */ - prod_expr *pnode = (prod_expr *) calloc(1, sizeof(prod_expr)); + prod_expr *pnode = (prod_expr *) SP_CALLOC(1, sizeof(prod_expr)); expr *node = &pnode->base; init_expr(node, 1, 1, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); diff --git a/src/atoms/other/prod_axis_one.c b/src/atoms/other/prod_axis_one.c index 72cd12a..b7efa4d 100644 --- a/src/atoms/other/prod_axis_one.c +++ b/src/atoms/other/prod_axis_one.c @@ -1,4 +1,5 @@ #include "atoms/non_elementwise_full_dom.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -378,7 +379,7 @@ static void free_type_data(expr *node) expr *new_prod_axis_one(expr *child) { - prod_axis *pnode = (prod_axis *) calloc(1, sizeof(prod_axis)); + prod_axis *pnode = (prod_axis *) SP_CALLOC(1, sizeof(prod_axis)); expr *node = &pnode->base; /* output is always a row vector 1 x d1 (one product per row) */ @@ -387,9 +388,9 @@ expr *new_prod_axis_one(expr *child) free_type_data); /* allocate arrays to store per-row statistics */ - pnode->num_of_zeros = (int *) calloc(child->d1, sizeof(int)); - pnode->zero_index = (int *) calloc(child->d1, sizeof(int)); - pnode->prod_nonzero = (double *) calloc(child->d1, sizeof(double)); + pnode->num_of_zeros = (int *) SP_CALLOC(child->d1, sizeof(int)); + pnode->zero_index = (int *) SP_CALLOC(child->d1, sizeof(int)); + pnode->prod_nonzero = (double *) SP_CALLOC(child->d1, sizeof(double)); node->left = child; expr_retain(child); diff --git a/src/atoms/other/prod_axis_zero.c b/src/atoms/other/prod_axis_zero.c index f6411de..67f3f72 100644 --- a/src/atoms/other/prod_axis_zero.c +++ b/src/atoms/other/prod_axis_zero.c @@ -1,4 +1,5 @@ #include "atoms/non_elementwise_full_dom.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -338,7 +339,7 @@ static void free_type_data(expr *node) /* TODO: refactor to remove diagonal entry as nonzero since it's always zero */ expr *new_prod_axis_zero(expr *child) { - prod_axis *pnode = (prod_axis *) calloc(1, sizeof(prod_axis)); + prod_axis *pnode = (prod_axis *) SP_CALLOC(1, sizeof(prod_axis)); expr *node = &pnode->base; /* output is always a row vector 1 x d2 - TODO: is that correct? */ @@ -347,9 +348,9 @@ expr *new_prod_axis_zero(expr *child) free_type_data); /* allocate arrays to store per-column statistics */ - pnode->num_of_zeros = (int *) calloc(child->d2, sizeof(int)); - pnode->zero_index = (int *) calloc(child->d2, sizeof(int)); - pnode->prod_nonzero = (double *) calloc(child->d2, sizeof(double)); + pnode->num_of_zeros = (int *) SP_CALLOC(child->d2, sizeof(int)); + pnode->zero_index = (int *) SP_CALLOC(child->d2, sizeof(int)); + pnode->prod_nonzero = (double *) SP_CALLOC(child->d2, sizeof(double)); node->left = child; expr_retain(child); diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index 02dba70..a26b40e 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -3,6 +3,7 @@ #include "utils/CSC_Matrix.h" #include "utils/CSR_sum.h" #include "utils/cblas_wrapper.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -32,7 +33,7 @@ static void jacobian_init_impl(expr *node) expr *x = node->left; /* dwork stores the result of Q @ f(x) in the forward pass */ - node->work->dwork = (double *) malloc(x->size * sizeof(double)); + node->work->dwork = (double *) SP_MALLOC(x->size * sizeof(double)); if (x->var_id != NOT_A_VARIABLE) { @@ -231,7 +232,7 @@ static bool is_affine(const expr *node) expr *new_quad_form(expr *left, CSR_Matrix *Q) { assert(left->d1 == 1 || left->d2 == 1); /* left must be a vector */ - quad_form_expr *qnode = (quad_form_expr *) calloc(1, sizeof(quad_form_expr)); + quad_form_expr *qnode = (quad_form_expr *) SP_CALLOC(1, sizeof(quad_form_expr)); expr *node = &qnode->base; init_expr(node, 1, 1, left->n_vars, forward, jacobian_init_impl, eval_jacobian, diff --git a/src/expr.c b/src/expr.c index 01e5b49..697417c 100644 --- a/src/expr.c +++ b/src/expr.c @@ -18,6 +18,7 @@ #include "expr.h" #include "utils/CSC_Matrix.h" #include "utils/int_double_pair.h" +#include "utils/tracked_alloc.h" #include #include @@ -31,7 +32,7 @@ void init_expr(expr *node, int d1, int d2, int n_vars, forward_fn forward, node->size = d1 * d2; node->n_vars = n_vars; node->refcount = 0; - node->value = (double *) calloc(d1 * d2, sizeof(double)); + node->value = (double *) SP_CALLOC(d1 * d2, sizeof(double)); node->var_id = NOT_A_VARIABLE; node->forward = forward; node->jacobian_init_impl = jacobian_init; @@ -40,7 +41,7 @@ void init_expr(expr *node, int d1, int d2, int n_vars, forward_fn forward, node->wsum_hess_init_impl = wsum_hess_init; node->eval_wsum_hess = eval_wsum_hess; node->free_type_data = free_type_data; - node->work = (Expr_Work *) calloc(1, sizeof(Expr_Work)); + node->work = (Expr_Work *) SP_CALLOC(1, sizeof(Expr_Work)); } void jacobian_csc_init(expr *node) @@ -49,7 +50,7 @@ void jacobian_csc_init(expr *node) { return; } - node->work->csc_work = (int *) malloc(node->n_vars * sizeof(int)); + node->work->csc_work = (int *) SP_MALLOC(node->n_vars * sizeof(int)); node->work->jacobian_csc = csr_to_csc_alloc(node->jacobian, node->work->csc_work); } diff --git a/src/old-code/linear_op.c b/src/old-code/linear_op.c index c1dfc12..8edce0f 100644 --- a/src/old-code/linear_op.c +++ b/src/old-code/linear_op.c @@ -18,6 +18,7 @@ #include "old-code/old_affine.h" #include "subexpr.h" #include "utils/CSR_Matrix.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -87,7 +88,8 @@ expr *new_linear(expr *u, const CSR_Matrix *A, const double *b) { assert(u->d2 == 1); /* Allocate the type-specific struct */ - linear_op_expr *lin_node = (linear_op_expr *) calloc(1, sizeof(linear_op_expr)); + linear_op_expr *lin_node = + (linear_op_expr *) SP_CALLOC(1, sizeof(linear_op_expr)); expr *node = &lin_node->base; init_expr(node, A->m, 1, u->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); @@ -101,7 +103,7 @@ expr *new_linear(expr *u, const CSR_Matrix *A, const double *b) /* Initialize offset (copy b if provided, otherwise NULL) */ if (b != NULL) { - lin_node->b = (double *) malloc(A->m * sizeof(double)); + lin_node->b = (double *) SP_MALLOC(A->m * sizeof(double)); memcpy(lin_node->b, b, A->m * sizeof(double)); } else diff --git a/src/problem.c b/src/problem.c index 99b6fd1..79118ab 100644 --- a/src/problem.c +++ b/src/problem.c @@ -17,6 +17,7 @@ */ #include "problem.h" #include "utils/CSR_sum.h" +#include "utils/tracked_alloc.h" #include "utils/utils.h" #include #include @@ -29,7 +30,8 @@ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork); problem *new_problem(expr *objective, expr **constraints, int n_constraints, bool verbose) { - problem *prob = (problem *) calloc(1, sizeof(problem)); + g_allocated_bytes = 0; + problem *prob = (problem *) SP_CALLOC(1, sizeof(problem)); if (!prob) return NULL; /* objective */ @@ -43,7 +45,7 @@ problem *new_problem(expr *objective, expr **constraints, int n_constraints, prob->n_constraints = n_constraints; if (n_constraints > 0) { - prob->constraints = (expr **) malloc(n_constraints * sizeof(expr *)); + prob->constraints = (expr **) SP_MALLOC(n_constraints * sizeof(expr *)); for (int i = 0; i < n_constraints; i++) { prob->constraints[i] = constraints[i]; @@ -54,8 +56,8 @@ problem *new_problem(expr *objective, expr **constraints, int n_constraints, /* allocation */ prob->constraint_values = - (double *) calloc(prob->total_constraint_size, sizeof(double)); - prob->gradient_values = (double *) calloc(prob->n_vars, sizeof(double)); + (double *) SP_CALLOC(prob->total_constraint_size, sizeof(double)); + prob->gradient_values = (double *) SP_CALLOC(prob->n_vars, sizeof(double)); /* Initialize statistics */ prob->stats.time_init_derivatives = 0.0; @@ -231,8 +233,8 @@ void problem_init_hessian(problem *prob) prob->lagrange_hessian = new_csr_matrix(prob->n_vars, prob->n_vars, nnz); memset(prob->lagrange_hessian->x, 0, nnz * sizeof(double)); /* affine shortcut */ prob->stats.nnz_hessian = nnz; - prob->hess_idx_map = (int *) malloc(nnz * sizeof(int)); - int *iwork = (int *) malloc(MAX(nnz, prob->n_vars) * sizeof(int)); + prob->hess_idx_map = (int *) SP_MALLOC(nnz * sizeof(int)); + int *iwork = (int *) SP_MALLOC(MAX(nnz, prob->n_vars) * sizeof(int)); problem_lagrange_hess_fill_sparsity(prob, iwork); free(iwork); @@ -267,6 +269,16 @@ void problem_init_derivatives(problem *prob) problem_init_hessian(prob); } +static inline void format_memory(size_t bytes, char *buf, size_t buf_size) +{ + if (bytes < 1024) + snprintf(buf, buf_size, "%zu B", bytes); + else if (bytes < 1024 * 1024) + snprintf(buf, buf_size, "%.2f KB", (double) bytes / 1024.0); + else + snprintf(buf, buf_size, "%.2f MB", (double) bytes / (1024.0 * 1024.0)); +} + static inline void print_end_message(const Diff_engine_stats *stats) { printf("\n" @@ -284,6 +296,11 @@ static inline void print_end_message(const Diff_engine_stats *stats) printf(" Jacobian nonlinear constraints (nnz): %d\n", stats->nnz_nonlinear); printf(" Lagrange Hessian (nnz): %d\n", stats->nnz_hessian); + char mem_buf[64]; + format_memory(stats->memory_bytes, mem_buf, sizeof(mem_buf)); + printf("\nMemory:\n"); + printf(" Total tracked allocations: %12s\n", mem_buf); + printf("\nTiming (seconds):\n"); printf(" Derivative structure (sparsity): %8.3f\n", stats->time_init_derivatives); @@ -308,12 +325,19 @@ void free_problem(problem *prob) { if (prob == NULL) return; + if (prob->verbose) + { + prob->stats.memory_bytes = g_allocated_bytes; + print_end_message(&prob->stats); + } + /* Free allocated arrays */ free(prob->constraint_values); free(prob->gradient_values); free_csr_matrix(prob->jacobian); free_csr_matrix(prob->lagrange_hessian); free_coo_matrix(prob->jacobian_coo); + free_coo_matrix(prob->lagrange_hessian_coo); free(prob->hess_idx_map); /* Release expression references (decrements refcount) */ @@ -324,11 +348,6 @@ void free_problem(problem *prob) } free(prob->constraints); - if (prob->verbose) - { - print_end_message(&prob->stats); - } - /* Free problem struct */ free(prob); } diff --git a/src/utils/COO_Matrix.c b/src/utils/COO_Matrix.c index 1a39df4..956073b 100644 --- a/src/utils/COO_Matrix.c +++ b/src/utils/COO_Matrix.c @@ -16,18 +16,19 @@ * limitations under the License. */ #include "utils/COO_Matrix.h" +#include "utils/tracked_alloc.h" #include #include COO_Matrix *new_coo_matrix(const CSR_Matrix *A) { - COO_Matrix *coo = (COO_Matrix *) malloc(sizeof(COO_Matrix)); + COO_Matrix *coo = (COO_Matrix *) SP_MALLOC(sizeof(COO_Matrix)); coo->m = A->m; coo->n = A->n; coo->nnz = A->nnz; - coo->rows = (int *) malloc(A->nnz * sizeof(int)); - coo->cols = (int *) malloc(A->nnz * sizeof(int)); - coo->x = (double *) malloc(A->nnz * sizeof(double)); + coo->rows = (int *) SP_MALLOC(A->nnz * sizeof(int)); + coo->cols = (int *) SP_MALLOC(A->nnz * sizeof(int)); + coo->x = (double *) SP_MALLOC(A->nnz * sizeof(double)); coo->value_map = NULL; for (int r = 0; r < A->m; r++) @@ -59,14 +60,14 @@ COO_Matrix *new_coo_matrix_lower_triangular(const CSR_Matrix *A) } } - COO_Matrix *coo = (COO_Matrix *) malloc(sizeof(COO_Matrix)); + COO_Matrix *coo = (COO_Matrix *) SP_MALLOC(sizeof(COO_Matrix)); coo->m = A->m; coo->n = A->n; coo->nnz = count; - coo->rows = (int *) malloc(count * sizeof(int)); - coo->cols = (int *) malloc(count * sizeof(int)); - coo->x = (double *) malloc(count * sizeof(double)); - coo->value_map = (int *) malloc(count * sizeof(int)); + coo->rows = (int *) SP_MALLOC(count * sizeof(int)); + coo->cols = (int *) SP_MALLOC(count * sizeof(int)); + coo->x = (double *) SP_MALLOC(count * sizeof(double)); + coo->value_map = (int *) SP_MALLOC(count * sizeof(int)); /* Pass 2: fill arrays */ int idx = 0; diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c index 9d4e078..d397678 100644 --- a/src/utils/CSC_Matrix.c +++ b/src/utils/CSC_Matrix.c @@ -17,18 +17,19 @@ */ #include "utils/CSC_Matrix.h" #include "utils/iVec.h" +#include "utils/tracked_alloc.h" #include #include #include CSC_Matrix *new_csc_matrix(int m, int n, int nnz) { - CSC_Matrix *matrix = (CSC_Matrix *) malloc(sizeof(CSC_Matrix)); + CSC_Matrix *matrix = (CSC_Matrix *) SP_MALLOC(sizeof(CSC_Matrix)); if (!matrix) return NULL; - matrix->p = (int *) malloc((n + 1) * sizeof(int)); - matrix->i = (int *) malloc(nnz * sizeof(int)); - matrix->x = (double *) malloc(nnz * sizeof(double)); + matrix->p = (int *) SP_MALLOC((n + 1) * sizeof(int)); + matrix->i = (int *) SP_MALLOC(nnz * sizeof(int)); + matrix->x = (double *) SP_MALLOC(nnz * sizeof(double)); if (!matrix->p || !matrix->i || !matrix->x) { @@ -66,7 +67,7 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A) int i, j, ii, jj; /* row ptr and column idxs for upper triangular part of C = A^T A */ - int *Cp = (int *) malloc((n + 1) * sizeof(int)); + int *Cp = (int *) SP_MALLOC((n + 1) * sizeof(int)); iVec *Ci = iVec_new(m); Cp[0] = 0; @@ -340,7 +341,7 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) int i, j, ii, jj; /* row ptr and column idxs for C = B^T A */ - int *Cp = (int *) malloc((p + 1) * sizeof(int)); + int *Cp = (int *) SP_MALLOC((p + 1) * sizeof(int)); iVec *Ci = iVec_new(n); Cp[0] = 0; @@ -486,13 +487,13 @@ CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A) int i, j, k, jj, ii, ell; /* marker[row] = last column j that registered row as nonzero */ - int *marker = (int *) malloc(m * sizeof(int)); + int *marker = (int *) SP_MALLOC(m * sizeof(int)); for (i = 0; i < m; i++) { marker[i] = -1; } - int *Cp = (int *) malloc((n + 1) * sizeof(int)); + int *Cp = (int *) SP_MALLOC((n + 1) * sizeof(int)); iVec *Ci = iVec_new(A->nnz); Cp[0] = 0; diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c index 5fa8eb2..cbc3851 100644 --- a/src/utils/CSR_Matrix.c +++ b/src/utils/CSR_Matrix.c @@ -17,6 +17,7 @@ */ #include "utils/CSR_Matrix.h" #include "utils/int_double_pair.h" +#include "utils/tracked_alloc.h" #include "utils/utils.h" #include #include @@ -26,10 +27,10 @@ CSR_Matrix *new_csr_matrix(int m, int n, int nnz) { - CSR_Matrix *matrix = (CSR_Matrix *) malloc(sizeof(CSR_Matrix)); - matrix->p = (int *) calloc(m + 1, sizeof(int)); - matrix->i = (int *) calloc(nnz, sizeof(int)); - matrix->x = (double *) malloc(nnz * sizeof(double)); + CSR_Matrix *matrix = (CSR_Matrix *) SP_MALLOC(sizeof(CSR_Matrix)); + matrix->p = (int *) SP_CALLOC(m + 1, sizeof(int)); + matrix->i = (int *) SP_CALLOC(nnz, sizeof(int)); + matrix->x = (double *) SP_MALLOC(nnz * sizeof(double)); matrix->m = m; matrix->n = n; matrix->nnz = nnz; @@ -257,7 +258,7 @@ void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C) int i, j, col; /* Count entries per row */ - int *counts = (int *) calloc(m, sizeof(int)); + int *counts = (int *) SP_CALLOC(m, sizeof(int)); for (i = 0; i < m; i++) { for (j = Ap[i]; j < Ap[i + 1]; j++) diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c index 8c3e5f6..dc5240c 100644 --- a/src/utils/CSR_sum.c +++ b/src/utils/CSR_sum.c @@ -18,6 +18,7 @@ #include "utils/CSR_sum.h" #include "utils/CSR_Matrix.h" #include "utils/int_double_pair.h" +#include "utils/tracked_alloc.h" #include "utils/utils.h" #include #include @@ -372,7 +373,7 @@ CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *out = new_csr_matrix(m, n, nnz_ub); for (int k = 0; k < 4; k++) { - idx_maps[k] = (int *) malloc(inputs[k]->nnz * sizeof(int)); + idx_maps[k] = (int *) SP_MALLOC(inputs[k]->nnz * sizeof(int)); } /* 4-way sorted merge per row */ diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c index 8a2dd2c..a973abe 100644 --- a/src/utils/dense_matrix.c +++ b/src/utils/dense_matrix.c @@ -18,6 +18,7 @@ #include "utils/dense_matrix.h" #include "utils/cblas_wrapper.h" #include "utils/linalg_dense_sparse_matmuls.h" +#include "utils/tracked_alloc.h" #include #include @@ -51,16 +52,16 @@ static void dense_free(Matrix *A) Matrix *new_dense_matrix(int m, int n, const double *data) { - Dense_Matrix *dm = (Dense_Matrix *) calloc(1, sizeof(Dense_Matrix)); + Dense_Matrix *dm = (Dense_Matrix *) SP_CALLOC(1, sizeof(Dense_Matrix)); dm->base.m = m; dm->base.n = n; dm->base.block_left_mult_vec = dense_block_left_mult_vec; dm->base.block_left_mult_sparsity = I_kron_A_alloc; dm->base.block_left_mult_values = I_kron_A_fill_values; dm->base.free_fn = dense_free; - dm->x = (double *) malloc(m * n * sizeof(double)); + dm->x = (double *) SP_MALLOC(m * n * sizeof(double)); memcpy(dm->x, data, m * n * sizeof(double)); - dm->work = (double *) malloc(n * sizeof(double)); + dm->work = (double *) SP_MALLOC(n * sizeof(double)); return &dm->base; } @@ -68,7 +69,7 @@ Matrix *dense_matrix_trans(const Dense_Matrix *A) { int m = A->base.m; int n = A->base.n; - double *AT_x = (double *) malloc(m * n * sizeof(double)); + double *AT_x = (double *) SP_MALLOC(m * n * sizeof(double)); for (int i = 0; i < m; i++) { diff --git a/src/utils/int_double_pair.c b/src/utils/int_double_pair.c index 6b49021..4a43602 100644 --- a/src/utils/int_double_pair.c +++ b/src/utils/int_double_pair.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "utils/int_double_pair.h" +#include "utils/tracked_alloc.h" #include static int compare_int_double_pair(const void *a, const void *b) @@ -30,7 +31,7 @@ static int compare_int_double_pair(const void *a, const void *b) int_double_pair *new_int_double_pair_array(int size) { - return (int_double_pair *) malloc(size * sizeof(int_double_pair)); + return (int_double_pair *) SP_MALLOC(size * sizeof(int_double_pair)); } void set_int_double_pair_array(int_double_pair *pair, int *ints, double *doubles, diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c index a073349..f90c4e3 100644 --- a/src/utils/linalg_dense_sparse_matmuls.c +++ b/src/utils/linalg_dense_sparse_matmuls.c @@ -20,6 +20,7 @@ #include "utils/cblas_wrapper.h" #include "utils/dense_matrix.h" #include "utils/iVec.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -34,7 +35,7 @@ CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p) int n = A->n; int i, j, jj, block, block_start, block_end, block_jj_start, row_offset; - int *Cp = (int *) malloc((J->n + 1) * sizeof(int)); + int *Cp = (int *) SP_MALLOC((J->n + 1) * sizeof(int)); iVec *Ci = iVec_new(J->n * m); Cp[0] = 0; @@ -173,7 +174,7 @@ CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J) // --------------------------------------------------------------- // build sparsity pattern per blk_row // --------------------------------------------------------------- - iVec **pattern = (iVec **) malloc(m * sizeof(iVec *)); + iVec **pattern = (iVec **) SP_MALLOC(m * sizeof(iVec *)); total_nnz = 0; for (blk_row = 0; blk_row < m; blk_row++) { @@ -261,7 +262,7 @@ CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J) * nonzero in row range [blk*k, blk*k + k). */ int i, j, ii, blk; - iVec **pattern = (iVec **) malloc(n * sizeof(iVec *)); + iVec **pattern = (iVec **) SP_MALLOC(n * sizeof(iVec *)); int total_nnz = 0; for (blk = 0; blk < n; blk++) { diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c index 9a7b164..ac7252b 100644 --- a/src/utils/linalg_sparse_matmuls.c +++ b/src/utils/linalg_sparse_matmuls.c @@ -18,6 +18,7 @@ #include "utils/CSC_Matrix.h" #include "utils/CSR_Matrix.h" #include "utils/iVec.h" +#include "utils/tracked_alloc.h" #include #include #include @@ -113,7 +114,7 @@ CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A, row_offset; /* allocate column pointers and an estimate of row indices */ - int *Cp = (int *) malloc((J->n + 1) * sizeof(int)); + int *Cp = (int *) SP_MALLOC((J->n + 1) * sizeof(int)); iVec *Ci = iVec_new(J->n * m); Cp[0] = 0; @@ -276,7 +277,7 @@ CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B) int len_a, len_b; - int *Cp = (int *) malloc((m + 1) * sizeof(int)); + int *Cp = (int *) SP_MALLOC((m + 1) * sizeof(int)); iVec *Ci = iVec_new(m); Cp[0] = 0; diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c index 24ed539..e327886 100644 --- a/src/utils/sparse_matrix.c +++ b/src/utils/sparse_matrix.c @@ -17,6 +17,7 @@ */ #include "utils/linalg_sparse_matmuls.h" #include "utils/matrix.h" +#include "utils/tracked_alloc.h" #include static void sparse_block_left_mult_vec(const Matrix *self, const double *x, @@ -49,7 +50,7 @@ static void sparse_free(Matrix *self) Matrix *new_sparse_matrix(const CSR_Matrix *A) { - Sparse_Matrix *sm = (Sparse_Matrix *) calloc(1, sizeof(Sparse_Matrix)); + Sparse_Matrix *sm = (Sparse_Matrix *) SP_CALLOC(1, sizeof(Sparse_Matrix)); sm->base.m = A->m; sm->base.n = A->n; sm->base.block_left_mult_vec = sparse_block_left_mult_vec; @@ -63,7 +64,7 @@ Matrix *new_sparse_matrix(const CSR_Matrix *A) Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork) { CSR_Matrix *AT = transpose(self->csr, iwork); - Sparse_Matrix *sm = (Sparse_Matrix *) calloc(1, sizeof(Sparse_Matrix)); + Sparse_Matrix *sm = (Sparse_Matrix *) SP_CALLOC(1, sizeof(Sparse_Matrix)); sm->base.m = AT->m; sm->base.n = AT->n; sm->base.block_left_mult_vec = sparse_block_left_mult_vec; From 351cae1d67dadb9e772410a96654498a3a6467a4 Mon Sep 17 00:00:00 2001 From: dance858 Date: Wed, 1 Apr 2026 18:14:21 -0700 Subject: [PATCH 2/4] message --- src/problem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/problem.c b/src/problem.c index 79118ab..399079b 100644 --- a/src/problem.c +++ b/src/problem.c @@ -272,11 +272,17 @@ void problem_init_derivatives(problem *prob) static inline void format_memory(size_t bytes, char *buf, size_t buf_size) { if (bytes < 1024) + { snprintf(buf, buf_size, "%zu B", bytes); + } else if (bytes < 1024 * 1024) + { snprintf(buf, buf_size, "%.2f KB", (double) bytes / 1024.0); + } else + { snprintf(buf, buf_size, "%.2f MB", (double) bytes / (1024.0 * 1024.0)); + } } static inline void print_end_message(const Diff_engine_stats *stats) @@ -295,11 +301,9 @@ static inline void print_end_message(const Diff_engine_stats *stats) printf(" Affine constraints (nnz): %d\n", stats->nnz_affine); printf(" Jacobian nonlinear constraints (nnz): %d\n", stats->nnz_nonlinear); printf(" Lagrange Hessian (nnz): %d\n", stats->nnz_hessian); - char mem_buf[64]; format_memory(stats->memory_bytes, mem_buf, sizeof(mem_buf)); - printf("\nMemory:\n"); - printf(" Total tracked allocations: %12s\n", mem_buf); + printf(" Allocated memory: %s\n", mem_buf); printf("\nTiming (seconds):\n"); printf(" Derivative structure (sparsity): %8.3f\n", From 3eedf7cba69691735c7acb956c9bcc1985b16e53 Mon Sep 17 00:00:00 2001 From: dance858 Date: Wed, 1 Apr 2026 18:17:20 -0700 Subject: [PATCH 3/4] alloc file --- include/utils/tracked_alloc.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/utils/tracked_alloc.h diff --git a/include/utils/tracked_alloc.h b/include/utils/tracked_alloc.h new file mode 100644 index 0000000..e8242da --- /dev/null +++ b/include/utils/tracked_alloc.h @@ -0,0 +1,23 @@ +#ifndef TRACKED_ALLOC_H +#define TRACKED_ALLOC_H + +#include +#include + +extern size_t g_allocated_bytes; + +static inline void *SP_MALLOC(size_t size) +{ + void *ptr = malloc(size); + if (ptr) g_allocated_bytes += size; + return ptr; +} + +static inline void *SP_CALLOC(size_t count, size_t size) +{ + void *ptr = calloc(count, size); + if (ptr) g_allocated_bytes += count * size; + return ptr; +} + +#endif /* TRACKED_ALLOC_H */ From 4aa04b5defc166660fc9e8331b2f83a72da419ce Mon Sep 17 00:00:00 2001 From: dance858 Date: Wed, 1 Apr 2026 18:17:32 -0700 Subject: [PATCH 4/4] alloc file again --- src/utils/tracked_alloc.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 src/utils/tracked_alloc.c diff --git a/src/utils/tracked_alloc.c b/src/utils/tracked_alloc.c new file mode 100644 index 0000000..336643a --- /dev/null +++ b/src/utils/tracked_alloc.c @@ -0,0 +1,3 @@ +#include "utils/tracked_alloc.h" + +size_t g_allocated_bytes = 0;