diff --git a/include/expr.h b/include/expr.h
index b2ea35c..4068399 100644
--- a/include/expr.h
+++ b/include/expr.h
@@ -21,7 +21,7 @@
 #include "utils/CSC_Matrix.h"
 #include "utils/CSR_Matrix.h"
 #include <stdbool.h>
-#include <stddef.h>
+#include <stddef.h> /* size_t */
 #include <string.h>
 
 #define JAC_IDXS_NOT_SET -1
@@ -63,6 +63,8 @@ typedef struct expr
     //                         general quantities
     // ------------------------------------------------------------------------
     int d1, d2, size, n_vars, refcount, var_id;
+    size_t bytes;
+    bool visited;
     struct expr *left;
     struct expr *right;
 
diff --git a/include/problem.h b/include/problem.h
index 25d1517..48fa3e8 100644
--- a/include/problem.h
+++ b/include/problem.h
@@ -38,6 +38,7 @@ typedef struct
     int nnz_hessian;
     int n_vars;
     int total_constraint_size;
+    size_t bytes;
 } Diff_engine_stats;
 
 typedef struct problem
diff --git a/include/utils/COO_Matrix.h b/include/utils/COO_Matrix.h
index 16a9aad..05924d3 100644
--- a/include/utils/COO_Matrix.h
+++ b/include/utils/COO_Matrix.h
@@ -2,6 +2,7 @@
 #define COO_MATRIX_H
 
 #include "CSR_Matrix.h"
+#include <stddef.h>
 
 /* COO (Coordinate) Sparse Matrix Format
  *
@@ -40,4 +41,8 @@ void refresh_lower_triangular_coo(COO_Matrix *coo, const double *vals);
 
 void free_coo_matrix(COO_Matrix *matrix);
 
+/* Returns total bytes used by rows, cols, x, value_map arrays
+   (0 if A is NULL) */
+size_t coo_bytes(const COO_Matrix *A);
+
 #endif /* COO_MATRIX_H */
diff --git a/include/utils/CSC_Matrix.h b/include/utils/CSC_Matrix.h
index 8270120..647e74f 100644
--- a/include/utils/CSC_Matrix.h
+++ b/include/utils/CSC_Matrix.h
@@ -2,6 +2,7 @@
 #define CSC_MATRIX_H
 
 #include "CSR_Matrix.h"
+#include <stddef.h>
 
 /* CSC (Compressed Sparse Column) Matrix Format
  *
@@ -23,18 +24,19 @@ typedef struct CSC_Matrix
     int nnz;
 } CSC_Matrix;
 
-/* constructor and destructor */
-CSC_Matrix *new_csc_matrix(int m, int n, int nnz);
+/* constructor and destructor.
+   If mem is non-NULL, *mem is incremented by the bytes allocated. */
+CSC_Matrix *new_csc_matrix(int m, int n, int nnz, size_t *mem);
 void free_csc_matrix(CSC_Matrix *matrix);
 
 /* Fill sparsity of C = A^T D A for diagonal D */
-CSR_Matrix *ATA_alloc(const CSC_Matrix *A);
+CSR_Matrix *ATA_alloc(const CSC_Matrix *A, size_t *mem);
 
 /* Fill sparsity of C = B^T D A for diagonal D */
-CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B);
+CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B, size_t *mem);
 
 /* Fill sparsity of C = BA, where B is symmetric. */
-CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A);
+CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A, size_t *mem);
 
 /* Compute values for C = A^T D A (null d corresponds to D as identity) */
 void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C);
@@ -53,11 +55,14 @@ void yTA_fill_values(const CSC_Matrix *A, const double *x, CSR_Matrix *C);
 int count_nonzero_cols_csc(const CSC_Matrix *A);
 
 /* convert from CSR to CSC format */
-CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork);
+CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork, size_t *mem);
 void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork);
 
 /* convert from CSC to CSR format */
-CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork);
+CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork, size_t *mem);
 void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork);
 
+/* Returns total bytes used by p, i, x arrays (0 if A is NULL) */
+size_t csc_bytes(const CSC_Matrix *A);
+
 #endif /* CSC_MATRIX_H */
diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_Matrix.h
index a6758c8..1f5cac4 100644
--- a/include/utils/CSR_Matrix.h
+++ b/include/utils/CSR_Matrix.h
@@ -1,6 +1,7 @@
 #ifndef CSR_MATRIX_H
 #define CSR_MATRIX_H
 #include <stdbool.h>
+#include <stddef.h>
 
 /* CSR (Compressed Sparse Row) Matrix Format
  *
@@ -22,16 +23,17 @@ typedef struct CSR_Matrix
     int nnz;
 } CSR_Matrix;
 
-/* constructors and destructors */
-CSR_Matrix *new_csr_matrix(int m, int n, int nnz);
-CSR_Matrix *new_csr(const CSR_Matrix *A);
-CSR_Matrix *new_csr_copy_sparsity(const CSR_Matrix *A);
+/* constructors and destructors.
+   If mem is non-NULL, *mem is incremented by the bytes allocated. */
+CSR_Matrix *new_csr_matrix(int m, int n, int nnz, size_t *mem);
+CSR_Matrix *new_csr(const CSR_Matrix *A, size_t *mem);
+CSR_Matrix *new_csr_copy_sparsity(const CSR_Matrix *A, size_t *mem);
 void free_csr_matrix(CSR_Matrix *matrix);
 void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C);
 
 /* transpose functionality (iwork must be of size A->n) */
 CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork);
-CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork);
+CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork, size_t *mem);
 void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork);
 
 /* computes dense y = Ax */
@@ -49,6 +51,9 @@ void insert_idx(int idx, int *arr, int len);
 /* get value at position (row, col) in A */
 double csr_get_value(const CSR_Matrix *A, int row, int col);
 
+/* Returns total bytes used by p, i, x arrays (0 if A is NULL) */
+size_t csr_bytes(const CSR_Matrix *A);
+
 /* Expand symmetric CSR matrix A to full matrix C. A is assumed to store
    only upper triangle. C must be pre-allocated with sufficient nnz */
 void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C);
diff --git a/include/utils/CSR_sum.h b/include/utils/CSR_sum.h
index 85fd334..534ffda 100644
--- a/include/utils/CSR_sum.h
+++ b/include/utils/CSR_sum.h
@@ -46,7 +46,7 @@ void sum_spaced_rows_into_row_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C,
 /* Compute sparsity pattern of out = A + B + C + D */
 CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B,
                             const CSR_Matrix *C, const CSR_Matrix *D,
-                            int *idx_maps[4]);
+                            int *idx_maps[4], size_t *mem);
 // ------------------------------------------------------------------------------------
 
 /* Accumulates values from A according to map. Must memset to zero before calling. */
diff --git a/include/utils/linalg_dense_sparse_matmuls.h b/include/utils/linalg_dense_sparse_matmuls.h
index 8404940..6bedce4 100644
--- a/include/utils/linalg_dense_sparse_matmuls.h
+++ b/include/utils/linalg_dense_sparse_matmuls.h
@@ -8,18 +8,18 @@
 /* C = (I_p kron A) @ J via the polymorphic Matrix interface.
  * A is dense m x n, J is (n*p) x k in CSC, C is (m*p) x k in CSC. */
 // TODO: maybe we can replace these with I_kron_X functionality?
-CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p);
+CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p, size_t *mem);
 void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C);
 
 /* Sparsity and values of C = (Y^T kron I_m) @ J where Y is k x n, J is (m*k) x p,
    and C is (m*n) x p. Y is given in column-major dense format. */
-CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J);
+CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J, size_t *mem);
 void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_Matrix *J,
                            CSR_Matrix *C);
 
 /* Sparsity and values of C = (I_n kron X) @ J where X is m x k (col-major dense),
    J is (k*n) x p, and C is (m*n) x p. */
-CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J);
+CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J, size_t *mem);
 void I_kron_X_fill_values(int m, int k, int n, const double *X, const CSC_Matrix *J,
                           CSR_Matrix *C);
 
diff --git a/include/utils/linalg_sparse_matmuls.h b/include/utils/linalg_sparse_matmuls.h
index 7a40c92..815577c 100644
--- a/include/utils/linalg_sparse_matmuls.h
+++ b/include/utils/linalg_sparse_matmuls.h
@@ -14,8 +14,8 @@
     * Mathematically it corresponds to  C = [A @ J1; A @ J2; ...; A @ Jp],
       where J = [J1; J2; ...; Jp]
 */
-CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A,
-                                              const CSC_Matrix *J, int p);
+CSC_Matrix *block_left_multiply_alloc(const CSR_Matrix *A, const CSC_Matrix *J,
+                                      int p, size_t *mem);
 
 void block_left_multiply_fill_values(const CSR_Matrix *A, const CSC_Matrix *J,
                                      CSC_Matrix *C);
@@ -36,6 +36,7 @@ void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B,
 /* C = A @ B where A is CSR, B is CSC. Result C is CSR.
  * Allocates and precomputes sparsity pattern. No workspace required.
  */
-CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B);
+CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B,
+                                 size_t *mem);
 
 #endif /* LINALG_H */
diff --git a/include/utils/matrix.h b/include/utils/matrix.h
index 478fabd..326994f 100644
--- a/include/utils/matrix.h
+++ b/include/utils/matrix.h
@@ -28,7 +28,7 @@ typedef struct Matrix
     void (*block_left_mult_vec)(const struct Matrix *self, const double *x,
                                 double *y, int p);
     CSC_Matrix *(*block_left_mult_sparsity)(const struct Matrix *self,
-                                            const CSC_Matrix *J, int p);
+                                            const CSC_Matrix *J, int p, size_t *mem);
     void (*block_left_mult_values)(const struct Matrix *self, const CSC_Matrix *J,
                                    CSC_Matrix *C);
     void (*free_fn)(struct Matrix *self);
diff --git a/src/atoms/affine/add.c b/src/atoms/affine/add.c
index 59b6223..5b65038 100644
--- a/src/atoms/affine/add.c
+++ b/src/atoms/affine/add.c
@@ -42,7 +42,7 @@ static void jacobian_init_impl(expr *node)
 
     /* we never have to store more than the sum of children's nnz */
     int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz;
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max, &node->bytes);
 
     /* fill sparsity pattern  */
     sum_csr_alloc(node->left->jacobian, node->right->jacobian, node->jacobian);
@@ -66,7 +66,8 @@ static void wsum_hess_init_impl(expr *node)
 
     /* we never have to store more than the sum of children's nnz */
     int nnz_max = node->left->wsum_hess->nnz + node->right->wsum_hess->nnz;
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz_max);
+    node->wsum_hess =
+        new_csr_matrix(node->n_vars, node->n_vars, nnz_max, &node->bytes);
 
     /* fill sparsity pattern of hessian */
     sum_csr_alloc(node->left->wsum_hess, node->right->wsum_hess, node->wsum_hess);
diff --git a/src/atoms/affine/broadcast.c b/src/atoms/affine/broadcast.c
index 9e3ddae..71974e9 100644
--- a/src/atoms/affine/broadcast.c
+++ b/src/atoms/affine/broadcast.c
@@ -92,7 +92,8 @@ static void jacobian_init_impl(expr *node)
         total_nnz = x->jacobian->nnz * node->size;
     }
 
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, total_nnz);
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, total_nnz, &node->bytes);
 
     // ---------------------------------------------------------------------
     //                 fill sparsity pattern
@@ -191,10 +192,11 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     /* Same sparsity as child - weights get summed */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 
     /* allocate space for weight vector */
     node->work->dwork = malloc(node->size * sizeof(double));
+    node->bytes += node->size * sizeof(double);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/const_scalar_mult.c b/src/atoms/affine/const_scalar_mult.c
index abc4651..6420aac 100644
--- a/src/atoms/affine/const_scalar_mult.c
+++ b/src/atoms/affine/const_scalar_mult.c
@@ -47,7 +47,7 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(x);
 
     /* same sparsity as child */
-    node->jacobian = new_csr_copy_sparsity(x->jacobian);
+    node->jacobian = new_csr_copy_sparsity(x->jacobian, &node->bytes);
 }
 
 static void eval_jacobian(expr *node)
@@ -73,7 +73,7 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     /* same sparsity as child */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/const_vector_mult.c b/src/atoms/affine/const_vector_mult.c
index 3b6346e..f720c3d 100644
--- a/src/atoms/affine/const_vector_mult.c
+++ b/src/atoms/affine/const_vector_mult.c
@@ -46,7 +46,7 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(x);
 
     /* same sparsity as child */
-    node->jacobian = new_csr_copy_sparsity(x->jacobian);
+    node->jacobian = new_csr_copy_sparsity(x->jacobian, &node->bytes);
 }
 
 static void eval_jacobian(expr *node)
@@ -75,9 +75,10 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     /* same sparsity as child */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 
     node->work->dwork = (double *) malloc(node->size * sizeof(double));
+    node->bytes += node->size * sizeof(double);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
@@ -123,6 +124,7 @@ expr *new_const_vector_mult(const double *a, expr *child)
 
     /* copy a vector */
     vnode->a = (double *) malloc(child->size * sizeof(double));
+    node->bytes += child->size * sizeof(double);
     memcpy(vnode->a, a, child->size * sizeof(double));
 
     return node;
diff --git a/src/atoms/affine/constant.c b/src/atoms/affine/constant.c
index b7a5bfc..dd71015 100644
--- a/src/atoms/affine/constant.c
+++ b/src/atoms/affine/constant.c
@@ -30,7 +30,7 @@ static void jacobian_init_impl(expr *node)
 {
     /* Constant jacobian is all zeros: size x n_vars with 0 nonzeros.
      * new_csr_matrix uses calloc for row pointers, so they're already 0. */
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, 0);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, 0, &node->bytes);
 }
 
 static void eval_jacobian(expr *node)
@@ -42,7 +42,7 @@ static void eval_jacobian(expr *node)
 static void wsum_hess_init_impl(expr *node)
 {
     /* Constant Hessian is all zeros: n_vars x n_vars with 0 nonzeros. */
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0);
+    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/diag_vec.c b/src/atoms/affine/diag_vec.c
index 0c887dd..5b66c7f 100644
--- a/src/atoms/affine/diag_vec.c
+++ b/src/atoms/affine/diag_vec.c
@@ -51,7 +51,7 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(x);
 
     CSR_Matrix *Jx = x->jacobian;
-    CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz);
+    CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz, &node->bytes);
 
     /* Output has n^2 rows but only n diagonal positions are non-empty.
      * Diagonal position i is at row i*(n+1) in Fortran order. */
@@ -101,11 +101,12 @@ static void wsum_hess_init_impl(expr *node)
 
     /* workspace for extracting diagonal weights */
     node->work->dwork = (double *) calloc(x->size, sizeof(double));
+    node->bytes += x->size * sizeof(double);
 
     /* Copy child's Hessian structure (diag_vec is linear, so its own Hessian is
      * zero) */
     CSR_Matrix *Hx = x->wsum_hess;
-    node->wsum_hess = new_csr_copy_sparsity(Hx);
+    node->wsum_hess = new_csr_copy_sparsity(Hx, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c
index 1be5fb9..dcbe5b5 100644
--- a/src/atoms/affine/hstack.c
+++ b/src/atoms/affine/hstack.c
@@ -55,7 +55,7 @@ static void jacobian_init_impl(expr *node)
         nnz += hnode->args[i]->jacobian->nnz;
     }
 
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz, &node->bytes);
 
     /* precompute sparsity pattern of this node's jacobian */
     int row_offset = 0;
@@ -113,8 +113,8 @@ static void wsum_hess_init_impl(expr *node)
 
     /* worst-case scenario the nnz of node->wsum_hess is the sum of children's
        nnz */
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz);
-    hnode->CSR_work = new_csr_matrix(node->n_vars, node->n_vars, nnz);
+    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz, &node->bytes);
+    hnode->CSR_work = new_csr_matrix(node->n_vars, node->n_vars, nnz, &node->bytes);
 
     /* fill sparsity pattern */
     CSR_Matrix *H = node->wsum_hess;
@@ -193,6 +193,7 @@ expr *new_hstack(expr **args, int n_args, int n_vars)
 
     /* Set type-specific fields (deep copy args array) */
     hnode->args = (expr **) calloc(n_args, sizeof(expr *));
+    node->bytes += n_args * sizeof(expr *);
     hnode->n_args = n_args;
     for (int i = 0; i < n_args; i++)
     {
diff --git a/src/atoms/affine/index.c b/src/atoms/affine/index.c
index 57b8af7..1732a67 100644
--- a/src/atoms/affine/index.c
+++ b/src/atoms/affine/index.c
@@ -64,7 +64,7 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(x);
 
     CSR_Matrix *Jx = x->jacobian;
-    CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz);
+    CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz, &node->bytes);
 
     /* set sparsity pattern */
     J->p[0] = 0;
@@ -105,6 +105,7 @@ static void wsum_hess_init_impl(expr *node)
 
     /* for setting weight vector to evaluate hessian of child */
     node->work->dwork = (double *) calloc(x->size, sizeof(double));
+    node->bytes += x->size * sizeof(double);
 
     /* in the implementation of eval_wsum_hess we evaluate the
        child's hessian with a weight vector that has w[i] = 0
@@ -113,7 +114,7 @@ static void wsum_hess_init_impl(expr *node)
        structural zeros, but we do not try to exploit that sparsity
        right now. */
     CSR_Matrix *Hx = x->wsum_hess;
-    node->wsum_hess = new_csr_copy_sparsity(Hx);
+    node->wsum_hess = new_csr_copy_sparsity(Hx, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
@@ -175,6 +176,7 @@ expr *new_index(expr *child, int d1, int d2, const int *indices, int n_idxs)
 
     /* copy indices */
     idx->indices = (int *) malloc(n_idxs * sizeof(int));
+    node->bytes += n_idxs * sizeof(int);
     memcpy(idx->indices, indices, n_idxs * sizeof(int));
     idx->n_idxs = n_idxs;
 
diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
index ec7e082..8e9fa90 100644
--- a/src/atoms/affine/left_matmul.c
+++ b/src/atoms/affine/left_matmul.c
@@ -88,12 +88,14 @@ static void jacobian_init_impl(expr *node)
 
     /* initialize child's jacobian and precompute sparsity of its CSC */
     jacobian_init(x);
-    lnode->Jchild_CSC = csr_to_csc_alloc(x->jacobian, node->work->iwork);
+    lnode->Jchild_CSC =
+        csr_to_csc_alloc(x->jacobian, node->work->iwork, &node->bytes);
 
     /* precompute sparsity of this node's jacobian in CSC and CSR */
     lnode->J_CSC = lnode->A->block_left_mult_sparsity(lnode->A, lnode->Jchild_CSC,
-                                                      lnode->n_blocks);
-    node->jacobian = csc_to_csr_alloc(lnode->J_CSC, lnode->csc_to_csr_work);
+                                                      lnode->n_blocks, &node->bytes);
+    node->jacobian =
+        csc_to_csr_alloc(lnode->J_CSC, lnode->csc_to_csr_work, &node->bytes);
 }
 
 static void eval_jacobian(expr *node)
@@ -120,12 +122,13 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     /* allocate this node's hessian with the same sparsity as child's */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 
     /* work for computing A^T w*/
     int n_blocks = ((left_matmul_expr *) node)->n_blocks;
     int dim = ((left_matmul_expr *) node)->AT->m * n_blocks;
     node->work->dwork = (double *) malloc(dim * sizeof(double));
+    node->bytes += dim * sizeof(double);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
@@ -178,8 +181,11 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A)
        (requiring size node->n_vars) and for transposing A (requiring size A->n).
        csc_to_csr_work is used for converting J_CSC to CSR (requiring
        node->size) */
-    node->work->iwork = (int *) malloc(MAX(A->n, node->n_vars) * sizeof(int));
+    int iwork_count = MAX(A->n, node->n_vars);
+    node->work->iwork = (int *) malloc(iwork_count * sizeof(int));
+    node->bytes += iwork_count * sizeof(int);
     lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int));
+    node->bytes += node->size * sizeof(int);
     lnode->n_blocks = n_blocks;
 
     /* store A and AT */
@@ -219,8 +225,11 @@ expr *new_left_matmul_dense(expr *u, int m, int n, const double *data)
     node->left = u;
     expr_retain(u);
 
-    node->work->iwork = (int *) malloc(MAX(n, node->n_vars) * sizeof(int));
+    int iwork_count = MAX(n, node->n_vars);
+    node->work->iwork = (int *) malloc(iwork_count * sizeof(int));
+    node->bytes += iwork_count * sizeof(int);
     lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int));
+    node->bytes += node->size * sizeof(int);
     lnode->n_blocks = n_blocks;
 
     lnode->A = new_dense_matrix(m, n, data);
diff --git a/src/atoms/affine/neg.c b/src/atoms/affine/neg.c
index 3bc4cbe..d2d30ec 100644
--- a/src/atoms/affine/neg.c
+++ b/src/atoms/affine/neg.c
@@ -39,7 +39,7 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(x);
 
     /* same sparsity pattern as child */
-    node->jacobian = new_csr_copy_sparsity(x->jacobian);
+    node->jacobian = new_csr_copy_sparsity(x->jacobian, &node->bytes);
 }
 
 static void eval_jacobian(expr *node)
@@ -64,7 +64,7 @@ static void wsum_hess_init_impl(expr *node)
 
     /* same sparsity pattern as child */
     CSR_Matrix *child_hess = x->wsum_hess;
-    node->wsum_hess = new_csr_copy_sparsity(child_hess);
+    node->wsum_hess = new_csr_copy_sparsity(child_hess, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/promote.c b/src/atoms/affine/promote.c
index 61bd232..fc8ab32 100644
--- a/src/atoms/affine/promote.c
+++ b/src/atoms/affine/promote.c
@@ -41,7 +41,7 @@ static void jacobian_init_impl(expr *node)
 
     /* each output row copies the single row from child's jacobian */
     int nnz = node->size * x->jacobian->nnz;
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz, &node->bytes);
 
     /* fill sparsity pattern */
     CSR_Matrix *J = node->jacobian;
@@ -78,7 +78,7 @@ static void wsum_hess_init_impl(expr *node)
 
     /* same sparsity as child since we're summing weights */
     CSR_Matrix *child_hess = node->left->wsum_hess;
-    node->wsum_hess = new_csr_copy_sparsity(child_hess);
+    node->wsum_hess = new_csr_copy_sparsity(child_hess, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/reshape.c b/src/atoms/affine/reshape.c
index 1444724..973028b 100644
--- a/src/atoms/affine/reshape.c
+++ b/src/atoms/affine/reshape.c
@@ -35,7 +35,7 @@ static void jacobian_init_impl(expr *node)
 {
     expr *x = node->left;
     jacobian_init(x);
-    node->jacobian = new_csr_copy_sparsity(x->jacobian);
+    node->jacobian = new_csr_copy_sparsity(x->jacobian, &node->bytes);
 }
 
 static void eval_jacobian(expr *node)
@@ -49,7 +49,7 @@ static void wsum_hess_init_impl(expr *node)
 {
     expr *x = node->left;
     wsum_hess_init(x);
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/sum.c b/src/atoms/affine/sum.c
index bdd7140..46dd634 100644
--- a/src/atoms/affine/sum.c
+++ b/src/atoms/affine/sum.c
@@ -87,10 +87,13 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(x);
 
     /* we never have to store more than the child's nnz */
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, x->jacobian->nnz);
-    node->work->iwork =
-        malloc(MAX(node->jacobian->n, x->jacobian->nnz) * sizeof(int));
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, x->jacobian->nnz, &node->bytes);
+    int iwork_count = MAX(node->jacobian->n, x->jacobian->nnz);
+    node->work->iwork = malloc(iwork_count * sizeof(int));
+    node->bytes += iwork_count * sizeof(int);
     snode->idx_map = malloc(x->jacobian->nnz * sizeof(int));
+    node->bytes += x->jacobian->nnz * sizeof(int);
 
     /* the idx_map array maps each nonzero entry j in x->jacobian
        to the corresponding index in the output row matrix C. Specifically, for
@@ -134,8 +137,9 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     /* we never have to store more than the child's nnz */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
     node->work->dwork = malloc(x->size * sizeof(double));
+    node->bytes += x->size * sizeof(double);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/trace.c b/src/atoms/affine/trace.c
index 7a2f045..e28902a 100644
--- a/src/atoms/affine/trace.c
+++ b/src/atoms/affine/trace.c
@@ -63,13 +63,15 @@ static void jacobian_init_impl(expr *node)
         total_nnz += A->p[row + 1] - A->p[row];
     }
 
-    node->jacobian = new_csr_matrix(1, node->n_vars, total_nnz);
+    node->jacobian = new_csr_matrix(1, node->n_vars, total_nnz, &node->bytes);
 
     // ---------------------------------------------------------------
     // fill sparsity pattern and idx_map
     // ---------------------------------------------------------------
     trace_expr *tnode = (trace_expr *) node;
-    node->work->iwork = malloc(MAX(node->jacobian->n, total_nnz) * sizeof(int));
+    int iwork_count = MAX(node->jacobian->n, total_nnz);
+    node->work->iwork = malloc(iwork_count * sizeof(int));
+    node->bytes += iwork_count * sizeof(int);
 
     /* the idx_map array maps each nonzero entry j in the original matrix A (from the
        selected, evenly spaced rows) to the corresponding index in the output row
@@ -77,6 +79,7 @@ static void jacobian_init_impl(expr *node)
        rows), idx_map[j] gives the position in C->x where the value from A->x[j]
        should be accumulated. */
     tnode->idx_map = malloc(x->jacobian->nnz * sizeof(int));
+    node->bytes += x->jacobian->nnz * sizeof(int);
     sum_spaced_rows_into_row_csr_alloc(A, node->jacobian, row_spacing,
                                        node->work->iwork, tnode->idx_map);
 }
@@ -104,12 +107,13 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     node->work->dwork = (double *) calloc(x->size, sizeof(double));
+    node->bytes += x->size * sizeof(double);
 
     /* We copy over the sparsity pattern from the child. This also includes the
        contribution to wsum_hess of entries of the child that will always have
        zero weight in eval_wsum_hess. We do this for simplicity. But the Hessian
        can for sure be made more sophisticated. */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
diff --git a/src/atoms/affine/transpose.c b/src/atoms/affine/transpose.c
index 56b7326..cefaf96 100644
--- a/src/atoms/affine/transpose.c
+++ b/src/atoms/affine/transpose.c
@@ -45,7 +45,7 @@ static void jacobian_init_impl(expr *node)
     expr *child = node->left;
     jacobian_init(child);
     CSR_Matrix *Jc = child->jacobian;
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, Jc->nnz);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, Jc->nnz, &node->bytes);
 
     /* fill sparsity */
     CSR_Matrix *J = node->jacobian;
@@ -92,10 +92,11 @@ static void wsum_hess_init_impl(expr *node)
     wsum_hess_init(x);
 
     /* same sparsity pattern as child */
-    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess);
+    node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 
     /* for computing Kw where K is the commutation matrix */
     node->work->dwork = (double *) malloc(node->size * sizeof(double));
+    node->bytes += node->size * sizeof(double);
 }
 static void eval_wsum_hess(expr *node, const double *w)
 {
diff --git a/src/atoms/affine/variable.c b/src/atoms/affine/variable.c
index 23844d1..89c739b 100644
--- a/src/atoms/affine/variable.c
+++ b/src/atoms/affine/variable.c
@@ -26,7 +26,8 @@ static void forward(expr *node, const double *u)
 
 static void jacobian_init_impl(expr *node)
 {
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size);
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, node->size, &node->bytes);
     for (int j = 0; j < node->size; j++)
     {
         node->jacobian->p[j] = j;
@@ -45,7 +46,7 @@ static void eval_jacobian(expr *node)
 static void wsum_hess_init_impl(expr *node)
 {
     /* Variables have zero Hessian */
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0);
+    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0, &node->bytes);
 }
 
 static void wsum_hess_eval(expr *node, const double *w)
diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c
index cfe3ceb..f5e901f 100644
--- a/src/atoms/bivariate_full_dom/matmul.c
+++ b/src/atoms/bivariate_full_dom/matmul.c
@@ -40,10 +40,10 @@
 // column positions (offset by j in the Y-variable indexing).
 // ------------------------------------------------------------------------------
 
-static CSR_Matrix *build_cross_hessian_sparsity(int m, int k, int n)
+static CSR_Matrix *build_cross_hessian_sparsity(int m, int k, int n, size_t *mem)
 {
     int total_nnz = m * k * n;
-    CSR_Matrix *B = new_csr_matrix(m * k, k * n, total_nnz);
+    CSR_Matrix *B = new_csr_matrix(m * k, k * n, total_nnz, mem);
     int idx = 0;
 
     for (int j = 0; j < k; j++)
@@ -138,7 +138,7 @@ static void jacobian_init_no_chain_rule(expr *node)
     int k = x->d2;
     int n = y->d2;
     int nnz = m * n * 2 * k;
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz, &node->bytes);
 
     int nnz_idx = 0;
     for (int i = 0; i < node->size; i++)
@@ -230,10 +230,10 @@ static void jacobian_init_chain_rule(expr *node)
     jacobian_csc_init(g);
 
     /* initialize term1, term2, and their sum */
-    mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc);
-    mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc);
+    mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc, &node->bytes);
+    mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc, &node->bytes);
     int max_nnz = mnode->term1_CSR->nnz + mnode->term2_CSR->nnz;
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, max_nnz);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, max_nnz, &node->bytes);
     sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, node->jacobian);
 }
 
@@ -271,7 +271,8 @@ static void wsum_hess_init_no_chain_rule(expr *node)
     int k = x->d2;
     int n = y->d2;
     int total_nnz = 2 * m * k * n;
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, total_nnz);
+    node->wsum_hess =
+        new_csr_matrix(node->n_vars, node->n_vars, total_nnz, &node->bytes);
     int nnz = 0;
     int *Hi = node->wsum_hess->i;
     int *Hp = node->wsum_hess->p;
@@ -417,16 +418,18 @@ static void wsum_hess_init_chain_rule(expr *node)
     CSC_Matrix *Jg = g->work->jacobian_csc;
 
     /* initialize C = Jf^T @ B @ Jg = Jf^T @ (B @ Jg) */
-    mnode->B = build_cross_hessian_sparsity(m, k, n);
-    mnode->BJg = csr_csc_matmul_alloc(mnode->B, Jg);
+    mnode->B = build_cross_hessian_sparsity(m, k, n, &node->bytes);
+    mnode->BJg = csr_csc_matmul_alloc(mnode->B, Jg, &node->bytes);
     int max_alloc = MAX(mnode->BJg->m, mnode->BJg->n);
     mnode->BJg_csc_work = (int *) malloc(max_alloc * sizeof(int));
-    mnode->BJg_CSC = csr_to_csc_alloc(mnode->BJg, mnode->BJg_csc_work);
-    mnode->C = BTA_alloc(mnode->BJg_CSC, Jf);
+    node->bytes += max_alloc * sizeof(int);
+    mnode->BJg_CSC = csr_to_csc_alloc(mnode->BJg, mnode->BJg_csc_work, &node->bytes);
+    mnode->C = BTA_alloc(mnode->BJg_CSC, Jf, &node->bytes);
 
     /* initialize C^T */
     node->work->iwork = (int *) malloc(mnode->C->m * sizeof(int));
-    mnode->CT = AT_alloc(mnode->C, node->work->iwork);
+    node->bytes += mnode->C->m * sizeof(int);
+    mnode->CT = AT_alloc(mnode->C, node->work->iwork, &node->bytes);
 
     /* initialize Hessians of children */
     wsum_hess_init(f);
@@ -434,8 +437,8 @@ static void wsum_hess_init_chain_rule(expr *node)
 
     /* sum the four terms and fill idx maps */
     int *maps[4];
-    node->wsum_hess =
-        sum_4_csr_alloc(mnode->C, mnode->CT, f->wsum_hess, g->wsum_hess, maps);
+    node->wsum_hess = sum_4_csr_alloc(mnode->C, mnode->CT, f->wsum_hess,
+                                      g->wsum_hess, maps, &node->bytes);
     mnode->idx_map_C = maps[0];
     mnode->idx_map_CT = maps[1];
     mnode->idx_map_Hf = maps[2];
@@ -446,6 +449,7 @@ static void wsum_hess_init_chain_rule(expr *node)
     {
         node->work->dwork =
             (double *) malloc(MAX(f->size, g->size) * sizeof(double));
+        node->bytes += MAX(f->size, g->size) * sizeof(double);
     }
 }
 
diff --git a/src/atoms/bivariate_full_dom/multiply.c b/src/atoms/bivariate_full_dom/multiply.c
index d1e2a6f..c7d5685 100644
--- a/src/atoms/bivariate_full_dom/multiply.c
+++ b/src/atoms/bivariate_full_dom/multiply.c
@@ -49,7 +49,7 @@ static void jacobian_init_impl(expr *node)
     jacobian_init(node->left);
     jacobian_init(node->right);
     int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz;
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max);
+    node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max, &node->bytes);
 
     /* fill sparsity pattern */
     sum_csr_alloc(node->left->jacobian, node->right->jacobian, node->jacobian);
@@ -79,7 +79,8 @@ static void wsum_hess_init_impl(expr *node)
         x->var_id != y->var_id)
     {
         assert(y->var_id != NOT_A_VARIABLE);
-        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 2 * node->size);
+        node->wsum_hess =
+            new_csr_matrix(node->n_vars, node->n_vars, 2 * node->size, &node->bytes);
 
         int i, var1_id, var2_id;
 
@@ -142,6 +143,7 @@ static void wsum_hess_init_impl(expr *node)
         if (!x->is_affine(x) || !y->is_affine(y))
         {
             node->work->dwork = (double *) malloc(node->size * sizeof(double));
+            node->bytes += node->size * sizeof(double);
         }
 
         /* prepare sparsity pattern of csc conversion */
@@ -151,9 +153,10 @@ static void wsum_hess_init_impl(expr *node)
         CSC_Matrix *Jg2 = y->work->jacobian_csc;
 
         /* compute sparsity of C and prepare CT */
-        CSR_Matrix *C = BTA_alloc(Jg1, Jg2);
+        CSR_Matrix *C = BTA_alloc(Jg1, Jg2, &node->bytes);
         node->work->iwork = (int *) malloc(C->m * sizeof(int));
-        CSR_Matrix *CT = AT_alloc(C, node->work->iwork);
+        node->bytes += C->m * sizeof(int);
+        CSR_Matrix *CT = AT_alloc(C, node->work->iwork, &node->bytes);
 
         /* initialize wsum_hessians of children */
         wsum_hess_init(x);
@@ -167,7 +170,8 @@ static void wsum_hess_init_impl(expr *node)
            fill index maps telling us where to accumulate each element of each
            matrix in the sum) */
         int *maps[4];
-        node->wsum_hess = sum_4_csr_alloc(C, CT, x->wsum_hess, y->wsum_hess, maps);
+        node->wsum_hess =
+            sum_4_csr_alloc(C, CT, x->wsum_hess, y->wsum_hess, maps, &node->bytes);
         mul_node->idx_map_C = maps[0];
         mul_node->idx_map_CT = maps[1];
         mul_node->idx_map_Hx = maps[2];
diff --git a/src/atoms/bivariate_restricted_dom/quad_over_lin.c b/src/atoms/bivariate_restricted_dom/quad_over_lin.c
index 4ad795a..962cf53 100644
--- a/src/atoms/bivariate_restricted_dom/quad_over_lin.c
+++ b/src/atoms/bivariate_restricted_dom/quad_over_lin.c
@@ -57,7 +57,7 @@ static void jacobian_init_impl(expr *node)
     /* if left node is a variable */
     if (x->var_id != NOT_A_VARIABLE)
     {
-        node->jacobian = new_csr_matrix(1, node->n_vars, x->size + 1);
+        node->jacobian = new_csr_matrix(1, node->n_vars, x->size + 1, &node->bytes);
         node->jacobian->p[0] = 0;
         node->jacobian->p[1] = x->size + 1;
 
@@ -82,12 +82,14 @@ static void jacobian_init_impl(expr *node)
     else /* left node is not a variable (guaranteed to be a linear operator) */
     {
         node->work->dwork = (double *) malloc(x->size * sizeof(double));
+        node->bytes += x->size * sizeof(double);
 
         /* compute required allocation and allocate jacobian */
         bool *col_nz = (bool *) calloc(
             node->n_vars, sizeof(bool)); /* TODO: could use iwork here instead*/
         int nonzero_cols = count_nonzero_cols(x->jacobian, col_nz);
-        node->jacobian = new_csr_matrix(1, node->n_vars, nonzero_cols + 1);
+        node->jacobian =
+            new_csr_matrix(1, node->n_vars, nonzero_cols + 1, &node->bytes);
 
         /* precompute column indices */
         node->jacobian->nnz = 0;
@@ -111,6 +113,7 @@ static void jacobian_init_impl(expr *node)
 
         /* find position where y should be inserted */
         node->work->iwork = (int *) malloc(sizeof(int));
+        node->bytes += sizeof(int);
         for (int j = 0; j < node->jacobian->nnz; j++)
         {
             if (node->jacobian->i[j] == y->var_id)
@@ -183,8 +186,8 @@ static void wsum_hess_init_impl(expr *node)
     /* if left node is a variable */
     if (x->var_id != NOT_A_VARIABLE)
     {
-        node->wsum_hess =
-            new_csr_matrix(node->n_vars, node->n_vars, 3 * x->size + 1);
+        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 3 * x->size + 1,
+                                         &node->bytes);
         CSR_Matrix *H = node->wsum_hess;
 
         /* if x has lower idx than y*/
diff --git a/src/atoms/bivariate_restricted_dom/rel_entr.c b/src/atoms/bivariate_restricted_dom/rel_entr.c
index 62772f8..5b60149 100644
--- a/src/atoms/bivariate_restricted_dom/rel_entr.c
+++ b/src/atoms/bivariate_restricted_dom/rel_entr.c
@@ -44,7 +44,8 @@ static void forward_vector_args(expr *node, const double *u)
 
 static void jacobian_init_vectors_args(expr *node)
 {
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, 2 * node->size);
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, 2 * node->size, &node->bytes);
 
     expr *x = node->left;
     expr *y = node->right;
@@ -100,7 +101,8 @@ static void eval_jacobian_vector_args(expr *node)
 
 static void wsum_hess_init_vector_args(expr *node)
 {
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 4 * node->size);
+    node->wsum_hess =
+        new_csr_matrix(node->n_vars, node->n_vars, 4 * node->size, &node->bytes);
     expr *x = node->left;
     expr *y = node->right;
 
diff --git a/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c b/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c
index 28a3c0d..22dd7b1 100644
--- a/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c
+++ b/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c
@@ -49,7 +49,8 @@ static void jacobian_init_scalar_vector(expr *node)
     assert(x->var_id != NOT_A_VARIABLE && y->var_id != NOT_A_VARIABLE);
     assert(x->var_id != y->var_id);
 
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, 2 * node->size);
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, 2 * node->size, &node->bytes);
 
     if (x->var_id < y->var_id)
     {
@@ -103,7 +104,8 @@ static void wsum_hess_init_scalar_vector(expr *node)
     int var_id_x = x->var_id;
     int var_id_y = y->var_id;
 
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 3 * node->size + 1);
+    node->wsum_hess =
+        new_csr_matrix(node->n_vars, node->n_vars, 3 * node->size + 1, &node->bytes);
     CSR_Matrix *H = node->wsum_hess;
 
     if (var_id_x < var_id_y)
diff --git a/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c b/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c
index b8e4c2c..60e2fcb 100644
--- a/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c
+++ b/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c
@@ -49,7 +49,8 @@ static void jacobian_init_vector_scalar(expr *node)
     assert(x->var_id != NOT_A_VARIABLE && y->var_id != NOT_A_VARIABLE);
     assert(x->var_id != y->var_id);
 
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, 2 * node->size);
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, 2 * node->size, &node->bytes);
 
     if (x->var_id < y->var_id)
     {
@@ -103,7 +104,8 @@ static void wsum_hess_init_vector_scalar(expr *node)
     int var_id_x = x->var_id;
     int var_id_y = y->var_id;
 
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 3 * node->size + 1);
+    node->wsum_hess =
+        new_csr_matrix(node->n_vars, node->n_vars, 3 * node->size + 1, &node->bytes);
     CSR_Matrix *H = node->wsum_hess;
 
     if (var_id_x < var_id_y)
diff --git a/src/atoms/elementwise_full_dom/common.c b/src/atoms/elementwise_full_dom/common.c
index 3a6df35..470e837 100644
--- a/src/atoms/elementwise_full_dom/common.c
+++ b/src/atoms/elementwise_full_dom/common.c
@@ -14,7 +14,8 @@ void jacobian_init_elementwise(expr *node)
     /* if the variable is a child */
     if (child->var_id != NOT_A_VARIABLE)
     {
-        node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size);
+        node->jacobian =
+            new_csr_matrix(node->size, node->n_vars, node->size, &node->bytes);
         for (int j = 0; j < node->size; j++)
         {
             node->jacobian->p[j] = j;
@@ -27,9 +28,11 @@ void jacobian_init_elementwise(expr *node)
         /* jacobian of h(x) = f(g(x)) is Jf @ Jg, and here Jf is diagonal */
         jacobian_init(child);
         CSR_Matrix *Jg = child->jacobian;
-        node->jacobian = new_csr_copy_sparsity(Jg);
+        node->jacobian = new_csr_copy_sparsity(Jg, &node->bytes);
         node->work->dwork = (double *) malloc(node->size * sizeof(double));
+        node->bytes += node->size * sizeof(double);
         node->work->local_jac_diag = (double *) malloc(node->size * sizeof(double));
+        node->bytes += node->size * sizeof(double);
     }
 }
 
@@ -62,7 +65,8 @@ void wsum_hess_init_elementwise(expr *node)
     /* if the variable is a child */
     if (id != NOT_A_VARIABLE)
     {
-        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, node->size);
+        node->wsum_hess =
+            new_csr_matrix(node->n_vars, node->n_vars, node->size, &node->bytes);
 
         for (i = 0; i < node->size; i++)
         {
@@ -87,21 +91,22 @@ void wsum_hess_init_elementwise(expr *node)
 
         if (child->is_affine(child))
         {
-            node->wsum_hess = ATA_alloc(Jg);
+            node->wsum_hess = ATA_alloc(Jg, &node->bytes);
         }
         else
         {
             /* term1: Jg^T @ D @ Jg */
-            node->work->hess_term1 = ATA_alloc(Jg);
+            node->work->hess_term1 = ATA_alloc(Jg, &node->bytes);
 
             /* term2: child's Hessian */
             wsum_hess_init(child);
             CSR_Matrix *Hg = child->wsum_hess;
-            node->work->hess_term2 = new_csr_copy_sparsity(Hg);
+            node->work->hess_term2 = new_csr_copy_sparsity(Hg, &node->bytes);
 
             /* wsum_hess = term1 + term2 */
             int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz;
-            node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, max_nnz);
+            node->wsum_hess =
+                new_csr_matrix(node->n_vars, node->n_vars, max_nnz, &node->bytes);
             sum_csr_alloc(node->work->hess_term1, node->work->hess_term2,
                           node->wsum_hess);
         }
diff --git a/src/atoms/elementwise_restricted_dom/common.c b/src/atoms/elementwise_restricted_dom/common.c
index 67d8484..0eec75d 100644
--- a/src/atoms/elementwise_restricted_dom/common.c
+++ b/src/atoms/elementwise_restricted_dom/common.c
@@ -5,7 +5,8 @@ void jacobian_init_restricted(expr *node)
 {
     expr *child = node->left;
 
-    node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size);
+    node->jacobian =
+        new_csr_matrix(node->size, node->n_vars, node->size, &node->bytes);
     for (int j = 0; j < node->size; j++)
     {
         node->jacobian->p[j] = j;
@@ -20,7 +21,8 @@ void wsum_hess_init_restricted(expr *node)
     int id = child->var_id;
     int i;
 
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, node->size);
+    node->wsum_hess =
+        new_csr_matrix(node->n_vars, node->n_vars, node->size, &node->bytes);
 
     for (i = 0; i < node->size; i++)
     {
diff --git a/src/atoms/other/prod.c b/src/atoms/other/prod.c
index 5826167..6c196ed 100644
--- a/src/atoms/other/prod.c
+++ b/src/atoms/other/prod.c
@@ -53,7 +53,7 @@ static void jacobian_init_impl(expr *node)
     /* if x is a variable */
     if (x->var_id != NOT_A_VARIABLE)
     {
-        node->jacobian = new_csr_matrix(1, node->n_vars, x->size);
+        node->jacobian = new_csr_matrix(1, node->n_vars, x->size, &node->bytes);
         node->jacobian->p[0] = 0;
         node->jacobian->p[1] = x->size;
         for (int j = 0; j < x->size; j++)
@@ -113,7 +113,8 @@ static void wsum_hess_init_impl(expr *node)
         /* allocate n_vars x n_vars CSR matrix with dense block */
         int block_size = x->size;
         int nnz = block_size * block_size;
-        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz);
+        node->wsum_hess =
+            new_csr_matrix(node->n_vars, node->n_vars, nnz, &node->bytes);
 
         /* fill row pointers for the dense block */
         for (int i = 0; i < block_size; i++)
diff --git a/src/atoms/other/prod_axis_one.c b/src/atoms/other/prod_axis_one.c
index 72cd12a..4514e08 100644
--- a/src/atoms/other/prod_axis_one.c
+++ b/src/atoms/other/prod_axis_one.c
@@ -63,7 +63,8 @@ static void jacobian_init_impl(expr *node)
     /* if x is a variable */
     if (x->var_id != NOT_A_VARIABLE)
     {
-        node->jacobian = new_csr_matrix(node->size, node->n_vars, x->size);
+        node->jacobian =
+            new_csr_matrix(node->size, node->n_vars, x->size, &node->bytes);
 
         /* set row pointers (each row has d2 nnzs) */
         for (int row = 0; row < x->d1; row++)
@@ -143,7 +144,8 @@ static void wsum_hess_init_impl(expr *node)
         /* each row i has d2-1 non-zero entries, with column indices corresponding to
            the columns in that row (except the diagonal element). */
         int nnz = x->d1 * x->d2 * (x->d2 - 1);
-        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz);
+        node->wsum_hess =
+            new_csr_matrix(node->n_vars, node->n_vars, nnz, &node->bytes);
         CSR_Matrix *H = node->wsum_hess;
 
         /* fill sparsity pattern */
@@ -388,8 +390,11 @@ expr *new_prod_axis_one(expr *child)
 
     /* allocate arrays to store per-row statistics */
     pnode->num_of_zeros = (int *) calloc(child->d1, sizeof(int));
+    node->bytes += child->d1 * sizeof(int);
     pnode->zero_index = (int *) calloc(child->d1, sizeof(int));
+    node->bytes += child->d1 * sizeof(int);
     pnode->prod_nonzero = (double *) calloc(child->d1, sizeof(double));
+    node->bytes += child->d1 * sizeof(double);
 
     node->left = child;
     expr_retain(child);
diff --git a/src/atoms/other/prod_axis_zero.c b/src/atoms/other/prod_axis_zero.c
index f6411de..7ca1872 100644
--- a/src/atoms/other/prod_axis_zero.c
+++ b/src/atoms/other/prod_axis_zero.c
@@ -58,7 +58,8 @@ static void jacobian_init_impl(expr *node)
     /* if x is a variable */
     if (x->var_id != NOT_A_VARIABLE)
     {
-        node->jacobian = new_csr_matrix(node->size, node->n_vars, x->size);
+        node->jacobian =
+            new_csr_matrix(node->size, node->n_vars, x->size, &node->bytes);
 
         /* set row pointers (each row has d1 nnzs) */
         for (int row = 0; row < x->d2; row++)
@@ -136,7 +137,8 @@ static void wsum_hess_init_impl(expr *node)
     {
         /* Hessian has block diagonal structure: d2 blocks of size d1 x d1 */
         int nnz = x->d2 * x->d1 * x->d1;
-        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz);
+        node->wsum_hess =
+            new_csr_matrix(node->n_vars, node->n_vars, nnz, &node->bytes);
         CSR_Matrix *H = node->wsum_hess;
 
         /* fill row pointers for the variable's rows (block diagonal) */
@@ -348,8 +350,11 @@ expr *new_prod_axis_zero(expr *child)
 
     /* allocate arrays to store per-column statistics */
     pnode->num_of_zeros = (int *) calloc(child->d2, sizeof(int));
+    node->bytes += child->d2 * sizeof(int);
     pnode->zero_index = (int *) calloc(child->d2, sizeof(int));
+    node->bytes += child->d2 * sizeof(int);
     pnode->prod_nonzero = (double *) calloc(child->d2, sizeof(double));
+    node->bytes += child->d2 * sizeof(double);
 
     node->left = child;
     expr_retain(child);
diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c
index 02dba70..5a5b815 100644
--- a/src/atoms/other/quad_form.c
+++ b/src/atoms/other/quad_form.c
@@ -33,10 +33,11 @@ static void jacobian_init_impl(expr *node)
 
     /* dwork stores the result of Q @ f(x) in the forward pass */
     node->work->dwork = (double *) malloc(x->size * sizeof(double));
+    node->bytes += x->size * sizeof(double);
 
     if (x->var_id != NOT_A_VARIABLE)
     {
-        node->jacobian = new_csr_matrix(1, node->n_vars, x->size);
+        node->jacobian = new_csr_matrix(1, node->n_vars, x->size, &node->bytes);
         node->jacobian->p[0] = 0;
         node->jacobian->p[1] = x->size;
 
@@ -54,7 +55,7 @@ static void jacobian_init_impl(expr *node)
 
         /* allocate the right number of nnz */
         int nnz = count_nonzero_cols_csc(J_csc);
-        node->jacobian = new_csr_matrix(1, node->n_vars, nnz);
+        node->jacobian = new_csr_matrix(1, node->n_vars, nnz, &node->bytes);
         node->jacobian->p[0] = 0;
         node->jacobian->p[1] = nnz;
 
@@ -112,7 +113,8 @@ static void wsum_hess_init_impl(expr *node)
 
     if (x->var_id != NOT_A_VARIABLE)
     {
-        CSR_Matrix *H = new_csr_matrix(node->n_vars, node->n_vars, Q->nnz);
+        CSR_Matrix *H =
+            new_csr_matrix(node->n_vars, node->n_vars, Q->nnz, &node->bytes);
 
         /* set global row pointers */
         memcpy(H->p + x->var_id, Q->p, (x->size + 1) * sizeof(int));
@@ -145,17 +147,18 @@ static void wsum_hess_init_impl(expr *node)
         CSC_Matrix *Jf = x->work->jacobian_csc;
 
         /* term1 = Jf^T W Jf = Jf^T B*/
-        CSC_Matrix *B = symBA_alloc(Q, Jf);
+        CSC_Matrix *B = symBA_alloc(Q, Jf, &node->bytes);
         qnode->QJf = B;
-        node->work->hess_term1 = BTA_alloc(Jf, B);
+        node->work->hess_term1 = BTA_alloc(Jf, B, &node->bytes);
 
         /* term2 = sum_i (Qf(x))_i nabla^2 f_i */
         wsum_hess_init(x);
-        node->work->hess_term2 = new_csr_copy_sparsity(x->wsum_hess);
+        node->work->hess_term2 = new_csr_copy_sparsity(x->wsum_hess, &node->bytes);
 
         /* hess = term1 + term2 */
         int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz;
-        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, max_nnz);
+        node->wsum_hess =
+            new_csr_matrix(node->n_vars, node->n_vars, max_nnz, &node->bytes);
         sum_csr_alloc(node->work->hess_term1, node->work->hess_term2,
                       node->wsum_hess);
     }
@@ -240,7 +243,7 @@ expr *new_quad_form(expr *left, CSR_Matrix *Q)
     expr_retain(left);
 
     /* Set type-specific field */
-    qnode->Q = new_csr_matrix(Q->m, Q->n, Q->nnz);
+    qnode->Q = new_csr_matrix(Q->m, Q->n, Q->nnz, &node->bytes);
     copy_csr_matrix(Q, qnode->Q);
     return node;
 }
diff --git a/src/expr.c b/src/expr.c
index 01e5b49..e8e8bed 100644
--- a/src/expr.c
+++ b/src/expr.c
@@ -31,7 +31,10 @@ void init_expr(expr *node, int d1, int d2, int n_vars, forward_fn forward,
     node->size = d1 * d2;
     node->n_vars = n_vars;
     node->refcount = 0;
+    node->bytes = 0;
+    node->visited = false;
     node->value = (double *) calloc(d1 * d2, sizeof(double));
+    node->bytes += (size_t) (d1 * d2) * sizeof(double);
     node->var_id = NOT_A_VARIABLE;
     node->forward = forward;
     node->jacobian_init_impl = jacobian_init;
@@ -50,8 +53,9 @@ void jacobian_csc_init(expr *node)
         return;
     }
     node->work->csc_work = (int *) malloc(node->n_vars * sizeof(int));
+    node->bytes += (size_t) node->n_vars * sizeof(int);
     node->work->jacobian_csc =
-        csr_to_csc_alloc(node->jacobian, node->work->csc_work);
+        csr_to_csc_alloc(node->jacobian, node->work->csc_work, &node->bytes);
 }
 
 void free_expr(expr *node)
diff --git a/src/old-code/linear_op.c b/src/old-code/linear_op.c
index c1dfc12..78defe8 100644
--- a/src/old-code/linear_op.c
+++ b/src/old-code/linear_op.c
@@ -73,7 +73,7 @@ static void eval_jacobian(expr *node)
 static void wsum_hess_init_impl(expr *node)
 {
     /* Linear operator Hessian is always zero */
-    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0);
+    node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0, NULL);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
@@ -95,7 +95,7 @@ expr *new_linear(expr *u, const CSR_Matrix *A, const double *b)
     expr_retain(u);
 
     /* Store A directly as the jacobian (linear op jacobian is constant) */
-    node->jacobian = new_csr_matrix(A->m, A->n, A->nnz);
+    node->jacobian = new_csr_matrix(A->m, A->n, A->nnz, NULL);
     copy_csr_matrix(A, node->jacobian);
 
     /* Initialize offset (copy b if provided, otherwise NULL) */
diff --git a/src/old-code/old_CSR.c b/src/old-code/old_CSR.c
index 3876fa6..ab776a7 100644
--- a/src/old-code/old_CSR.c
+++ b/src/old-code/old_CSR.c
@@ -28,7 +28,7 @@ CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p)
     int n = A->n;
     int nnz = A->nnz;
 
-    CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p);
+    CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p, NULL);
 
     int nnz_cursor = 0;
     for (int block = 0; block < p; block++)
@@ -63,7 +63,7 @@ CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p)
     int n = A->n;
     int nnz = A->nnz;
 
-    CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p);
+    CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p, NULL);
 
     int nnz_cursor = 0;
     for (int row_block = 0; row_block < m; row_block++)
diff --git a/src/problem.c b/src/problem.c
index 99b6fd1..585f40e 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -68,6 +68,7 @@ problem *new_problem(expr *objective, expr **constraints, int n_constraints,
     prob->stats.nnz_hessian = 0;
     prob->stats.n_vars = prob->n_vars;
     prob->stats.total_constraint_size = prob->total_constraint_size;
+    prob->stats.bytes = 0;
 
     prob->verbose = verbose;
 
@@ -185,7 +186,8 @@ void problem_init_jacobian(problem *prob)
         }
     }
 
-    prob->jacobian = new_csr_matrix(prob->total_constraint_size, prob->n_vars, nnz);
+    prob->jacobian =
+        new_csr_matrix(prob->total_constraint_size, prob->n_vars, nnz, NULL);
 
     /* set sparsity pattern of jacobian */
     CSR_Matrix *H = prob->jacobian;
@@ -228,7 +230,7 @@ void problem_init_hessian(problem *prob)
         nnz += prob->constraints[i]->wsum_hess->nnz;
     }
 
-    prob->lagrange_hessian = new_csr_matrix(prob->n_vars, prob->n_vars, nnz);
+    prob->lagrange_hessian = new_csr_matrix(prob->n_vars, prob->n_vars, nnz, NULL);
     memset(prob->lagrange_hessian->x, 0, nnz * sizeof(double)); /* affine shortcut */
     prob->stats.nnz_hessian = nnz;
     prob->hess_idx_map = (int *) malloc(nnz * sizeof(int));
@@ -267,6 +269,37 @@ void problem_init_derivatives(problem *prob)
     problem_init_hessian(prob);
 }
 
+/* NOTE: hstack children (stored in args[]) are not traversed here.
+   Their memory is counted at the hstack node itself but not at the
+   hstack children's subtrees. */
+static size_t sum_dag_memory(expr *node)
+{
+    if (node == NULL || node->visited) return 0;
+    node->visited = true;
+    size_t total = node->bytes;
+    total += sum_dag_memory(node->left);
+    total += sum_dag_memory(node->right);
+    return total;
+}
+
+static void reset_visited(expr *node)
+{
+    if (node == NULL || !node->visited) return;
+    node->visited = false;
+    reset_visited(node->left);
+    reset_visited(node->right);
+}
+
+static inline void format_memory(size_t bytes, char *buf, size_t buf_size)
+{
+    if (bytes < 1024)
+        snprintf(buf, buf_size, "%zu B", bytes);
+    else if (bytes < 1024 * 1024)
+        snprintf(buf, buf_size, "%.2f KB", (double) bytes / 1024.0);
+    else
+        snprintf(buf, buf_size, "%.2f MB", (double) bytes / (1024.0 * 1024.0));
+}
+
 static inline void print_end_message(const Diff_engine_stats *stats)
 {
     printf("\n"
@@ -284,6 +317,11 @@ static inline void print_end_message(const Diff_engine_stats *stats)
     printf("  Jacobian nonlinear constraints (nnz):   %d\n", stats->nnz_nonlinear);
     printf("  Lagrange Hessian (nnz):                 %d\n", stats->nnz_hessian);
 
+    char mem_buf[64];
+    format_memory(stats->bytes, mem_buf, sizeof(mem_buf));
+    printf("\nMemory:\n");
+    printf("  Total tracked allocations:       %12s\n", mem_buf);
+
     printf("\nTiming (seconds):\n");
     printf("  Derivative structure (sparsity):     %8.3f\n",
            stats->time_init_derivatives);
@@ -308,12 +346,33 @@ void free_problem(problem *prob)
 {
     if (prob == NULL) return;
 
+    /* Compute memory stats and print before freeing */
+    if (prob->verbose)
+    {
+        size_t mem = sum_dag_memory(prob->objective);
+        for (int i = 0; i < prob->n_constraints; i++)
+            mem += sum_dag_memory(prob->constraints[i]);
+        reset_visited(prob->objective);
+        for (int i = 0; i < prob->n_constraints; i++)
+            reset_visited(prob->constraints[i]);
+
+        /* problem-level allocations */
+        mem += csr_bytes(prob->jacobian);
+        mem += csr_bytes(prob->lagrange_hessian);
+        mem += coo_bytes(prob->jacobian_coo);
+        mem += coo_bytes(prob->lagrange_hessian_coo);
+
+        prob->stats.bytes = mem;
+        print_end_message(&prob->stats);
+    }
+
     /* Free allocated arrays */
     free(prob->constraint_values);
     free(prob->gradient_values);
     free_csr_matrix(prob->jacobian);
     free_csr_matrix(prob->lagrange_hessian);
     free_coo_matrix(prob->jacobian_coo);
+    free_coo_matrix(prob->lagrange_hessian_coo);
     free(prob->hess_idx_map);
 
     /* Release expression references (decrements refcount) */
@@ -324,11 +383,6 @@ void free_problem(problem *prob)
     }
     free(prob->constraints);
 
-    if (prob->verbose)
-    {
-        print_end_message(&prob->stats);
-    }
-
     /* Free problem struct */
     free(prob);
 }
diff --git a/src/utils/COO_Matrix.c b/src/utils/COO_Matrix.c
index 1a39df4..dd00f63 100644
--- a/src/utils/COO_Matrix.c
+++ b/src/utils/COO_Matrix.c
@@ -107,3 +107,13 @@ void free_coo_matrix(COO_Matrix *matrix)
         free(matrix);
     }
 }
+
+size_t coo_bytes(const COO_Matrix *A)
+{
+    if (!A) return 0;
+    size_t bytes = (size_t) A->nnz * sizeof(int)       /* rows */
+                   + (size_t) A->nnz * sizeof(int)     /* cols */
+                   + (size_t) A->nnz * sizeof(double); /* x */
+    if (A->value_map) bytes += (size_t) A->nnz * sizeof(int);
+    return bytes;
+}
diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c
index 9d4e078..76b10e1 100644
--- a/src/utils/CSC_Matrix.c
+++ b/src/utils/CSC_Matrix.c
@@ -21,7 +21,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-CSC_Matrix *new_csc_matrix(int m, int n, int nnz)
+CSC_Matrix *new_csc_matrix(int m, int n, int nnz, size_t *mem)
 {
     CSC_Matrix *matrix = (CSC_Matrix *) malloc(sizeof(CSC_Matrix));
     if (!matrix) return NULL;
@@ -43,6 +43,7 @@ CSC_Matrix *new_csc_matrix(int m, int n, int nnz)
     matrix->n = n;
     matrix->nnz = nnz;
 
+    if (mem) *mem += csc_bytes(matrix);
     return matrix;
 }
 
@@ -57,7 +58,14 @@ void free_csc_matrix(CSC_Matrix *matrix)
     }
 }
 
-CSR_Matrix *ATA_alloc(const CSC_Matrix *A)
+size_t csc_bytes(const CSC_Matrix *A)
+{
+    if (!A) return 0;
+    return (size_t) (A->n + 1) * sizeof(int) + (size_t) A->nnz * sizeof(int) +
+           (size_t) A->nnz * sizeof(double);
+}
+
+CSR_Matrix *ATA_alloc(const CSC_Matrix *A, size_t *mem)
 {
     /* A is m x n, A^T A is n x n */
     int n = A->n;
@@ -101,7 +109,7 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A)
     }
 
     /* Allocate C and symmetrize it */
-    CSR_Matrix *C = new_csr_matrix(n, n, nnz);
+    CSR_Matrix *C = new_csr_matrix(n, n, nnz, mem);
     symmetrize_csr(Cp, Ci->data, n, C);
 
     /* free workspace */
@@ -202,9 +210,9 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
     }
 }
 
-CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork)
+CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork, size_t *mem)
 {
-    CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz);
+    CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz, mem);
 
     int i, j;
     int *count = iwork;
@@ -265,9 +273,9 @@ void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork)
     }
 }
 
-CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork)
+CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork, size_t *mem)
 {
-    CSR_Matrix *C = new_csr_matrix(A->m, A->n, A->nnz);
+    CSR_Matrix *C = new_csr_matrix(A->m, A->n, A->nnz, mem);
 
     int i, j;
     int *count = iwork;
@@ -331,7 +339,7 @@ void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork)
     }
 }
 
-CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B)
+CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B, size_t *mem)
 {
     /* A is m x n, B is m x p, C = B^T A is p x n */
     int n = A->n;
@@ -377,7 +385,7 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B)
     }
 
     /* Allocate C */
-    CSR_Matrix *C = new_csr_matrix(p, n, nnz);
+    CSR_Matrix *C = new_csr_matrix(p, n, nnz, mem);
     memcpy(C->p, Cp, (p + 1) * sizeof(int));
     memcpy(C->i, Ci->data, nnz * sizeof(int));
 
@@ -466,7 +474,7 @@ void BA_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A, CSC_Matrix *C)
     }
 }
 
-CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A)
+CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A, size_t *mem)
 {
     /* Allocate C = B * A (sparsity only). B must be symmetric.
      * B is CSR (m x m), A is CSC (m x n), C is CSC (m x n).
@@ -524,7 +532,7 @@ CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A)
 
     /* allocate C and copy the computed structure */
     int total_nnz = Cp[n];
-    CSC_Matrix *C = new_csc_matrix(m, n, total_nnz);
+    CSC_Matrix *C = new_csc_matrix(m, n, total_nnz, mem);
     memcpy(C->p, Cp, (n + 1) * sizeof(int));
     memcpy(C->i, Ci->data, total_nnz * sizeof(int));
 
diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_Matrix.c
index 5fa8eb2..50dac40 100644
--- a/src/utils/CSR_Matrix.c
+++ b/src/utils/CSR_Matrix.c
@@ -24,7 +24,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-CSR_Matrix *new_csr_matrix(int m, int n, int nnz)
+CSR_Matrix *new_csr_matrix(int m, int n, int nnz, size_t *mem)
 {
     CSR_Matrix *matrix = (CSR_Matrix *) malloc(sizeof(CSR_Matrix));
     matrix->p = (int *) calloc(m + 1, sizeof(int));
@@ -33,21 +33,22 @@ CSR_Matrix *new_csr_matrix(int m, int n, int nnz)
     matrix->m = m;
     matrix->n = n;
     matrix->nnz = nnz;
+    if (mem) *mem += csr_bytes(matrix);
     return matrix;
 }
 
-CSR_Matrix *new_csr(const CSR_Matrix *A)
+CSR_Matrix *new_csr(const CSR_Matrix *A, size_t *mem)
 {
-    CSR_Matrix *copy = new_csr_matrix(A->m, A->n, A->nnz);
+    CSR_Matrix *copy = new_csr_matrix(A->m, A->n, A->nnz, mem);
     memcpy(copy->p, A->p, (A->m + 1) * sizeof(int));
     memcpy(copy->i, A->i, A->nnz * sizeof(int));
     memcpy(copy->x, A->x, A->nnz * sizeof(double));
     return copy;
 }
 
-CSR_Matrix *new_csr_copy_sparsity(const CSR_Matrix *A)
+CSR_Matrix *new_csr_copy_sparsity(const CSR_Matrix *A, size_t *mem)
 {
-    CSR_Matrix *copy = new_csr_matrix(A->m, A->n, A->nnz);
+    CSR_Matrix *copy = new_csr_matrix(A->m, A->n, A->nnz, mem);
     memcpy(copy->p, A->p, (A->m + 1) * sizeof(int));
     memcpy(copy->i, A->i, A->nnz * sizeof(int));
     return copy;
@@ -64,6 +65,13 @@ void free_csr_matrix(CSR_Matrix *matrix)
     }
 }
 
+size_t csr_bytes(const CSR_Matrix *A)
+{
+    if (!A) return 0;
+    return (size_t) (A->m + 1) * sizeof(int) + (size_t) A->nnz * sizeof(int) +
+           (size_t) A->nnz * sizeof(double);
+}
+
 void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C)
 {
     C->m = A->m;
@@ -134,7 +142,7 @@ void DA_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C)
 
 CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork)
 {
-    CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz);
+    CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz, NULL);
 
     int i, j;
     int *count = iwork;
@@ -177,10 +185,10 @@ CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork)
     return AT;
 }
 
-CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork)
+CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork, size_t *mem)
 {
     /* Allocate A^T and compute sparsity pattern without filling values */
-    CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz);
+    CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz, mem);
 
     int i, j;
     int *count = iwork;
diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c
index 8c3e5f6..7567836 100644
--- a/src/utils/CSR_sum.c
+++ b/src/utils/CSR_sum.c
@@ -361,7 +361,7 @@ void sum_all_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, int *iwork,
  */
 CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B,
                             const CSR_Matrix *C, const CSR_Matrix *D,
-                            int *idx_maps[4])
+                            int *idx_maps[4], size_t *mem)
 {
     const CSR_Matrix *inputs[4] = {A, B, C, D};
     int m = A->m;
@@ -369,10 +369,11 @@ CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B,
     int nnz_ub = A->nnz + B->nnz + C->nnz + D->nnz;
 
     /* allocate output and index maps */
-    CSR_Matrix *out = new_csr_matrix(m, n, nnz_ub);
+    CSR_Matrix *out = new_csr_matrix(m, n, nnz_ub, mem);
     for (int k = 0; k < 4; k++)
     {
         idx_maps[k] = (int *) malloc(inputs[k]->nnz * sizeof(int));
+        if (mem) *mem += (size_t) inputs[k]->nnz * sizeof(int);
     }
 
     /* 4-way sorted merge per row */
diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c
index a073349..4bf82d7 100644
--- a/src/utils/linalg_dense_sparse_matmuls.c
+++ b/src/utils/linalg_dense_sparse_matmuls.c
@@ -28,7 +28,7 @@
  * C = (I_p kron A) @ J  via the polymorphic Matrix interface.
  * A is dense m x n, J is (n*p) x k CSC, C is (m*p) x k CSC.
  * --------------------------------------------------------------- */
-CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p)
+CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p, size_t *mem)
 {
     int m = A->m;
     int n = A->n;
@@ -81,7 +81,7 @@ CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p)
         Cp[j + 1] = Ci->len;
     }
 
-    CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len);
+    CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len, mem);
     memcpy(C->p, Cp, (J->n + 1) * sizeof(int));
     memcpy(C->i, Ci->data, Ci->len * sizeof(int));
     free(Cp);
@@ -157,7 +157,7 @@ void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C)
  * C = (Y^T kron I_m) @ J
  * Y is k x n (col-major), J is (m*k) x p CSC, C is (m*n) x p CSR
  * --------------------------------------------------------------- */
-CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J)
+CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J, size_t *mem)
 {
     (void) k;
     /* C has n blocks of m rows.  All rows at the same position within
@@ -197,7 +197,7 @@ CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J)
     // ---------------------------------------------------------------
     //           replicate sparsity pattern across blocks
     // ---------------------------------------------------------------
-    CSR_Matrix *C = new_csr_matrix(m * n, J->n, total_nnz);
+    CSR_Matrix *C = new_csr_matrix(m * n, J->n, total_nnz, mem);
     int idx = 0;
     for (i = 0; i < m * n; i++)
     {
@@ -255,7 +255,7 @@ void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_Matri
     }
 }
 
-CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J)
+CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J, size_t *mem)
 {
     /* Step 1: for each block, find which columns of J have any
      *         nonzero in row range [blk*k, blk*k + k). */
@@ -286,7 +286,7 @@ CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J)
 
     /* Step 2: replicate each block's pattern for all m rows
      *         within that block. */
-    CSR_Matrix *C = new_csr_matrix(m * n, J->n, total_nnz);
+    CSR_Matrix *C = new_csr_matrix(m * n, J->n, total_nnz, mem);
     int idx = 0;
     for (i = 0; i < m * n; i++)
     {
diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c
index 9a7b164..c835a34 100644
--- a/src/utils/linalg_sparse_matmuls.c
+++ b/src/utils/linalg_sparse_matmuls.c
@@ -103,8 +103,8 @@ static inline double sparse_dot_offset(const double *a_x, const int *a_idx,
     return sum;
 }
 
-CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A,
-                                              const CSC_Matrix *J, int p)
+CSC_Matrix *block_left_multiply_alloc(const CSR_Matrix *A, const CSC_Matrix *J,
+                                      int p, size_t *mem)
 {
     /* A is m x n, J is (n*p) x k, C is (m*p) x k */
     int m = A->m;
@@ -174,7 +174,7 @@ CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A,
         Cp[j + 1] = Ci->len;
     }
 
-    CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len);
+    CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len, mem);
     memcpy(C->p, Cp, (J->n + 1) * sizeof(int));
     memcpy(C->i, Ci->data, Ci->len * sizeof(int));
     free(Cp);
@@ -269,7 +269,8 @@ void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B,
 
 /* C = A @ B where A is CSR (m x n), B is CSC (n x p). Result C is CSR (m x p)
   with precomputed sparsity pattern */
-CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B)
+CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B,
+                                 size_t *mem)
 {
     int m = A->m;
     int p = B->n;
@@ -303,7 +304,7 @@ CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B)
         Cp[i + 1] = nnz;
     }
 
-    CSR_Matrix *C = new_csr_matrix(m, p, nnz);
+    CSR_Matrix *C = new_csr_matrix(m, p, nnz, mem);
     memcpy(C->p, Cp, (m + 1) * sizeof(int));
     memcpy(C->i, Ci->data, nnz * sizeof(int));
     free(Cp);
diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c
index 24ed539..c6f49e6 100644
--- a/src/utils/sparse_matrix.c
+++ b/src/utils/sparse_matrix.c
@@ -27,10 +27,11 @@ static void sparse_block_left_mult_vec(const Matrix *self, const double *x,
 }
 
 static CSC_Matrix *sparse_block_left_mult_sparsity(const Matrix *self,
-                                                   const CSC_Matrix *J, int p)
+                                                   const CSC_Matrix *J, int p,
+                                                   size_t *mem)
 {
     const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
-    return block_left_multiply_fill_sparsity(sm->csr, J, p);
+    return block_left_multiply_alloc(sm->csr, J, p, mem);
 }
 
 static void sparse_block_left_mult_values(const Matrix *self, const CSC_Matrix *J,
@@ -56,7 +57,7 @@ Matrix *new_sparse_matrix(const CSR_Matrix *A)
     sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
     sm->base.block_left_mult_values = sparse_block_left_mult_values;
     sm->base.free_fn = sparse_free;
-    sm->csr = new_csr(A);
+    sm->csr = new_csr(A, NULL);
     return &sm->base;
 }
 
diff --git a/tests/forward_pass/affine/test_linear_op.h b/tests/forward_pass/affine/test_linear_op.h
index 801a403..a121201 100644
--- a/tests/forward_pass/affine/test_linear_op.h
+++ b/tests/forward_pass/affine/test_linear_op.h
@@ -18,7 +18,7 @@ const char *test_linear_op(void)
     double Ax[7] = {2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0};
     int Ai[7] = {2, 3, 2, 4, 2, 3, 4};
     int Ap[4] = {0, 2, 4, 7};
-    CSR_Matrix *A = new_csr_matrix(3, 6, 7);
+    CSR_Matrix *A = new_csr_matrix(3, 6, 7, NULL);
     memcpy(A->x, Ax, 7 * sizeof(double));
     memcpy(A->i, Ai, 7 * sizeof(int));
     memcpy(A->p, Ap, 4 * sizeof(int));
diff --git a/tests/jacobian_tests/affine/test_left_matmul.h b/tests/jacobian_tests/affine/test_left_matmul.h
index c4968d6..2efd83c 100644
--- a/tests/jacobian_tests/affine/test_left_matmul.h
+++ b/tests/jacobian_tests/affine/test_left_matmul.h
@@ -33,7 +33,7 @@ const char *test_jacobian_left_matmul_log(void)
     expr *x = new_variable(3, 1, 0, 3);
 
     /* Create sparse matrix A in CSR format */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 7);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 7, NULL);
     int A_p[5] = {0, 2, 4, 6, 7};
     int A_i[7] = {0, 2, 0, 2, 0, 2, 0};
     double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0};
@@ -77,7 +77,7 @@ const char *test_jacobian_left_matmul_log_matrix(void)
     expr *x = new_variable(3, 2, 0, 6);
 
     /* Create sparse matrix A in CSR format (4x3) */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 7);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 7, NULL);
     int A_p[5] = {0, 2, 4, 6, 7};
     int A_i[7] = {0, 2, 0, 2, 0, 2, 0};
     double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0};
@@ -116,7 +116,7 @@ const char *test_jacobian_left_matmul_exp_composite(void)
     expr *x = new_variable(3, 1, 0, 3);
 
     /* Create B matrix (3x3 all ones) */
-    CSR_Matrix *B = new_csr_matrix(3, 3, 9);
+    CSR_Matrix *B = new_csr_matrix(3, 3, 9, NULL);
     int B_p[4] = {0, 3, 6, 9};
     int B_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2};
     double B_x[9] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
@@ -125,7 +125,7 @@ const char *test_jacobian_left_matmul_exp_composite(void)
     memcpy(B->x, B_x, 9 * sizeof(double));
 
     /* Create A matrix */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 7);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 7, NULL);
     int A_p[5] = {0, 2, 4, 6, 7};
     int A_i[7] = {0, 2, 0, 2, 0, 2, 0};
     double A_x[7] = {1, 2, 3, 4, 5, 6, 7};
diff --git a/tests/jacobian_tests/affine/test_right_matmul.h b/tests/jacobian_tests/affine/test_right_matmul.h
index e9f40da..6ddbd8f 100644
--- a/tests/jacobian_tests/affine/test_right_matmul.h
+++ b/tests/jacobian_tests/affine/test_right_matmul.h
@@ -18,7 +18,7 @@ const char *test_jacobian_right_matmul_log(void)
     expr *x = new_variable(2, 2, 0, 4);
 
     /* Create sparse matrix A in CSR format (2x3) */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 4);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 4, NULL);
     int A_p[3] = {0, 2, 4};
     int A_i[4] = {0, 2, 0, 2};
     double A_x[4] = {1.0, 2.0, 3.0, 4.0};
@@ -67,7 +67,7 @@ const char *test_jacobian_right_matmul_log_vector(void)
     expr *x = new_variable(1, 3, 0, 3);
 
     /* Create sparse matrix A in CSR format (3x2) */
-    CSR_Matrix *A = new_csr_matrix(3, 2, 4);
+    CSR_Matrix *A = new_csr_matrix(3, 2, 4, NULL);
     int A_p[4] = {0, 1, 3, 4};
     int A_i[4] = {0, 0, 1, 1};
     double A_x[4] = {1.0, 2.0, 3.0, 4.0};
diff --git a/tests/jacobian_tests/affine/test_transpose.h b/tests/jacobian_tests/affine/test_transpose.h
index 4419315..28ec5f6 100644
--- a/tests/jacobian_tests/affine/test_transpose.h
+++ b/tests/jacobian_tests/affine/test_transpose.h
@@ -11,7 +11,7 @@
 const char *test_jacobian_transpose(void)
 {
     // A = [1 2; 3 4]
-    CSR_Matrix *A = new_csr_matrix(2, 2, 4);
+    CSR_Matrix *A = new_csr_matrix(2, 2, 4, NULL);
     int A_p[3] = {0, 2, 4};
     int A_i[4] = {0, 1, 0, 1};
     double A_x[4] = {1, 2, 3, 4};
diff --git a/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h b/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h
index 9e7e6e5..82e172b 100644
--- a/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h
+++ b/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h
@@ -66,7 +66,7 @@ const char *test_jacobian_elementwise_mult_3(void)
          0 0 1 1 3 0 0 0 0 0
          0 0 1 -1 1 0 0 0 0 0]
     */
-    CSR_Matrix *A = new_csr_matrix(3, 10, 9);
+    CSR_Matrix *A = new_csr_matrix(3, 10, 9, NULL);
     double Ax_vals[9] = {1.0, 2.0, 1.0, 1.0, 3.0, 1.0, -1.0, 1.0};
     int Ai[9] = {2, 3, 2, 3, 4, 2, 3, 4};
     int Ap[4] = {0, 2, 5, 8};
@@ -80,7 +80,7 @@ const char *test_jacobian_elementwise_mult_3(void)
          0 0 0 0 0 0 0 1 -2 1]
 
     */
-    CSR_Matrix *B = new_csr_matrix(3, 10, 9);
+    CSR_Matrix *B = new_csr_matrix(3, 10, 9, NULL);
     double Bx_vals[9] = {1.0, 3.0, 1.0, 1.0, 4.0, 1.0, -2.0, 1.0};
     int Bi[9] = {7, 8, 7, 8, 9, 7, 8, 9};
     int Bp[4] = {0, 2, 5, 8};
@@ -131,7 +131,7 @@ const char *test_jacobian_elementwise_mult_4(void)
          0 0 1 1 3 0 0 0 0 0
          0 0 1 -1 1 0 0 0 0 0]
     */
-    CSR_Matrix *A = new_csr_matrix(3, 10, 9);
+    CSR_Matrix *A = new_csr_matrix(3, 10, 9, NULL);
     double Ax_vals[9] = {1.0, 2.0, 1.0, 1.0, 3.0, 1.0, -1.0, 1.0};
     int Ai[9] = {2, 3, 2, 3, 4, 2, 3, 4};
     int Ap[4] = {0, 2, 5, 8};
diff --git a/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h b/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h
index e1007f8..b6efc2f 100644
--- a/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h
+++ b/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h
@@ -63,7 +63,7 @@ const char *test_quad_over_lin3(void)
     // A = [0 0 1 2 3 0 0 0
     //      0 0 4 5 6 0 0]
 
-    CSR_Matrix *A = new_csr_matrix(2, 8, 6);
+    CSR_Matrix *A = new_csr_matrix(2, 8, 6, NULL);
     double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     int Ai[6] = {2, 3, 4, 2, 3, 4};
     int Ap[3] = {0, 3, 6};
@@ -103,7 +103,7 @@ const char *test_quad_over_lin4(void)
     //      0 0 0 0 0 4 5 6
     //
 
-    CSR_Matrix *A = new_csr_matrix(2, 8, 6);
+    CSR_Matrix *A = new_csr_matrix(2, 8, 6, NULL);
     double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     int Ai[6] = {5, 6, 7, 5, 6, 7};
     int Ap[3] = {0, 3, 6};
@@ -143,7 +143,7 @@ const char *test_quad_over_lin5(void)
     //      0 2 0 0 0 4 5 6
     //
 
-    CSR_Matrix *A = new_csr_matrix(2, 8, 9);
+    CSR_Matrix *A = new_csr_matrix(2, 8, 9, NULL);
     double Ax[9] = {1, 3, 1.0, 2.0, 3.0, 2, 4.0, 5.0, 6.0};
     int Ai[9] = {0, 3, 5, 6, 7, 1, 5, 6, 7};
     int Ap[3] = {0, 5, 9};
diff --git a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h
index 8139a33..bfad35c 100644
--- a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h
+++ b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h
@@ -123,7 +123,7 @@ const char *test_jacobian_quad_form_Ax(void)
     CSR_Matrix *A = new_csr_random(3, 4, 1.0);
 
     /* Q = [1 2 0; 2 3 0; 0 0 4] */
-    CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
     double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
     int Qi[5] = {0, 1, 0, 1, 2};
     int Qp[4] = {0, 2, 4, 5};
@@ -151,7 +151,7 @@ const char *test_jacobian_quad_form_exp(void)
     double u_vals[3] = {0.5, 1.0, 1.5};
 
     /* Q = [1 2 0; 2 3 0; 0 0 4] */
-    CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
     double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
     int Qi[5] = {0, 1, 0, 1, 2};
     int Qp[4] = {0, 2, 4, 5};
diff --git a/tests/jacobian_tests/composite/test_composite_exp.h b/tests/jacobian_tests/composite/test_composite_exp.h
index 725e153..8eb241e 100644
--- a/tests/jacobian_tests/composite/test_composite_exp.h
+++ b/tests/jacobian_tests/composite/test_composite_exp.h
@@ -11,7 +11,7 @@ const char *test_jacobian_composite_exp(void)
 {
     double u_vals[6] = {0, 0, 1, 2, 3, 0};
 
-    CSR_Matrix *A = new_csr_matrix(2, 6, 6);
+    CSR_Matrix *A = new_csr_matrix(2, 6, 6, NULL);
     double Ax[6] = {3, 2, 1, 2, 1, 1};
     int Ai[6] = {2, 3, 4, 2, 3, 4};
     int Ap[3] = {0, 3, 6};
@@ -46,7 +46,7 @@ const char *test_jacobian_composite_exp_add(void)
 {
     double u_vals[7] = {0, 0, 1, 1, 1, 2, 2};
 
-    CSR_Matrix *A = new_csr_matrix(3, 7, 9);
+    CSR_Matrix *A = new_csr_matrix(3, 7, 9, NULL);
     double Ax[9] = {1, 1, 1, 2, 2, 2, 3, 3, 3};
     int Ai[9] = {2, 3, 4, 2, 3, 4, 2, 3, 4};
     int Ap[4] = {0, 3, 6, 9};
@@ -54,7 +54,7 @@ const char *test_jacobian_composite_exp_add(void)
     memcpy(A->i, Ai, 9 * sizeof(int));
     memcpy(A->p, Ap, 4 * sizeof(int));
 
-    CSR_Matrix *B = new_csr_matrix(3, 7, 6);
+    CSR_Matrix *B = new_csr_matrix(3, 7, 6, NULL);
     double Bx[6] = {1, 1, 2, 2, 3, 3};
     int Bi[6] = {5, 6, 5, 6, 5, 6};
     int Bp[4] = {0, 2, 4, 6};
diff --git a/tests/jacobian_tests/other/test_quad_form.h b/tests/jacobian_tests/other/test_quad_form.h
index d832a83..2752824 100644
--- a/tests/jacobian_tests/other/test_quad_form.h
+++ b/tests/jacobian_tests/other/test_quad_form.h
@@ -14,7 +14,7 @@ const char *test_quad_form(void)
     // Q = [1 2 0; 2 3 0; 0 0 4]
     double u_vals[5] = {0, 0, 1, 2, 3};
     expr *x = new_variable(3, 1, 2, 5);
-    CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
     double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
     int Qi[5] = {0, 1, 0, 1, 2};
     int Qp[4] = {0, 2, 4, 5};
@@ -51,7 +51,7 @@ src/other/quad_form.c. const char *test_quad_form2(void)
     //         1 0 0 2 0 1]
 double u_vals[6] = {1, 2, 3, 4, 5, 6};
 expr *u = new_variable(6, 1, 0, 6);
-CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
 double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
 int Qi[5] = {0, 1, 0, 1, 2};
 int Qp[4] = {0, 2, 4, 5};
@@ -59,7 +59,7 @@ memcpy(Q->x, Qx, 5 * sizeof(double));
 memcpy(Q->i, Qi, 5 * sizeof(int));
 memcpy(Q->p, Qp, 4 * sizeof(int));
 
-CSR_Matrix *A = new_csr_matrix(3, 6, 10);
+CSR_Matrix *A = new_csr_matrix(3, 6, 10, NULL);
 double Ax[10] = {1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6, 1.0, 2.0, 1.0};
 int Ai[10] = {0, 2, 3, 4, 2, 3, 4, 0, 3, 5};
 int Ap[4] = {0, 4, 7, 10};
diff --git a/tests/numerical_diff/test_numerical_diff.h b/tests/numerical_diff/test_numerical_diff.h
index f77e3e9..05fe24f 100644
--- a/tests/numerical_diff/test_numerical_diff.h
+++ b/tests/numerical_diff/test_numerical_diff.h
@@ -9,7 +9,7 @@ const char *test_check_jacobian_composite_exp(void)
 {
     double u_vals[6] = {0, 0, 1, 2, 3, 0};
 
-    CSR_Matrix *A = new_csr_matrix(2, 6, 6);
+    CSR_Matrix *A = new_csr_matrix(2, 6, 6, NULL);
     double Ax[6] = {3, 2, 1, 2, 1, 1};
     int Ai[6] = {2, 3, 4, 2, 3, 4};
     int Ap[3] = {0, 3, 6};
@@ -36,7 +36,7 @@ const char *test_check_wsum_hess_exp_composite(void)
     double Ax[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
     int Ai[] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4};
     int Ap[] = {0, 5, 10, 15};
-    CSR_Matrix *A_csr = new_csr_matrix(3, 5, 15);
+    CSR_Matrix *A_csr = new_csr_matrix(3, 5, 15, NULL);
     memcpy(A_csr->x, Ax, 15 * sizeof(double));
     memcpy(A_csr->i, Ai, 15 * sizeof(int));
     memcpy(A_csr->p, Ap, 4 * sizeof(int));
diff --git a/tests/profiling/profile_left_matmul.h b/tests/profiling/profile_left_matmul.h
index df80a90..7663ed9 100644
--- a/tests/profiling/profile_left_matmul.h
+++ b/tests/profiling/profile_left_matmul.h
@@ -16,7 +16,7 @@ const char *profile_left_matmul(void)
     /* A @ X where A is 50 x 50 dense stored in CSR and X is 50 x 50 variable */
     int n = 100;
     expr *X = new_variable(n, n, 0, n * n);
-    CSR_Matrix *A = new_csr_matrix(n, n, n * n);
+    CSR_Matrix *A = new_csr_matrix(n, n, n * n, NULL);
     for (int i = 0; i < n * n; i++)
     {
         A->x[i] = 1.0; /* dense matrix of all ones */
diff --git a/tests/test_helpers.c b/tests/test_helpers.c
index 90d8446..86594c3 100644
--- a/tests/test_helpers.c
+++ b/tests/test_helpers.c
@@ -87,7 +87,7 @@ CSR_Matrix *new_csr_random(int m, int n, double density)
     }
     tmp_p[m] = nnz;
 
-    CSR_Matrix *A = new_csr_matrix(m, n, nnz);
+    CSR_Matrix *A = new_csr_matrix(m, n, nnz, NULL);
     memcpy(A->p, tmp_p, ((size_t) m + 1) * sizeof(int));
     memcpy(A->i, tmp_i, (size_t) nnz * sizeof(int));
     memcpy(A->x, tmp_x, (size_t) nnz * sizeof(double));
diff --git a/tests/utils/test_coo_matrix.h b/tests/utils/test_coo_matrix.h
index 53fa307..a5fc89c 100644
--- a/tests/utils/test_coo_matrix.h
+++ b/tests/utils/test_coo_matrix.h
@@ -13,7 +13,7 @@ const char *test_csr_to_coo(void)
      * [0.0  3.0  4.0]
      * [5.0  0.0  6.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 3, 6);
+    CSR_Matrix *A = new_csr_matrix(3, 3, 6, NULL);
     double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     int Ai[6] = {0, 1, 1, 2, 0, 2};
     int Ap[4] = {0, 2, 4, 6};
@@ -48,7 +48,7 @@ const char *test_csr_to_coo_lower_triangular(void)
      * [2  5  6]
      * [3  6  9]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 3, 9);
+    CSR_Matrix *A = new_csr_matrix(3, 3, 9, NULL);
     int Ap[4] = {0, 3, 6, 9};
     int Ai[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2};
     double Ax[9] = {1, 2, 3, 2, 5, 6, 3, 6, 9};
@@ -81,7 +81,7 @@ const char *test_csr_to_coo_lower_triangular(void)
 
 const char *test_refresh_lower_triangular_coo(void)
 {
-    CSR_Matrix *A = new_csr_matrix(3, 3, 9);
+    CSR_Matrix *A = new_csr_matrix(3, 3, 9, NULL);
     int Ap[4] = {0, 3, 6, 9};
     int Ai[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2};
     double Ax[9] = {1, 2, 3, 2, 5, 6, 3, 6, 9};
diff --git a/tests/utils/test_csc_matrix.h b/tests/utils/test_csc_matrix.h
index 5459021..f9753d2 100644
--- a/tests/utils/test_csc_matrix.h
+++ b/tests/utils/test_csc_matrix.h
@@ -21,14 +21,14 @@
  */
 const char *test_ATA_alloc_simple(void)
 {
-    CSC_Matrix *A = new_csc_matrix(4, 3, 6);
+    CSC_Matrix *A = new_csc_matrix(4, 3, 6, NULL);
     int Ap[4] = {0, 2, 3, 6};
     int Ai[5] = {0, 2, 1, 2, 1};
     memcpy(A->p, Ap, 4 * sizeof(int));
     memcpy(A->i, Ai, 5 * sizeof(int));
 
     /* Compute C = A^T A */
-    CSR_Matrix *C = ATA_alloc(A);
+    CSR_Matrix *C = ATA_alloc(A, NULL);
     int expected_p[4] = {0, 2, 3, 5};
     int expected_i[5] = {0, 2, 1, 0, 2};
 
@@ -58,12 +58,12 @@ const char *test_ATA_alloc_simple(void)
 const char *test_ATA_alloc_diagonal_like(void)
 {
     /* Create A in CSC format (3 rows, 4 cols, 4 nonzeros) */
-    CSC_Matrix *A = new_csc_matrix(3, 4, 4);
+    CSC_Matrix *A = new_csc_matrix(3, 4, 4, NULL);
     int Ap[5] = {0, 1, 2, 3, 4};
     int Ai[4] = {0, 1, 2, 0};
     memcpy(A->p, Ap, 5 * sizeof(int));
     memcpy(A->i, Ai, 4 * sizeof(int));
-    CSR_Matrix *C = ATA_alloc(A);
+    CSR_Matrix *C = ATA_alloc(A, NULL);
 
     int expected_p[5] = {0, 2, 3, 4, 6};
     int expected_i[6] = {0, 3, 1, 2, 0, 3};
@@ -81,14 +81,14 @@ const char *test_ATA_alloc_diagonal_like(void)
 const char *test_ATA_alloc_random(void)
 {
     /* Create A in CSC format  */
-    CSC_Matrix *A = new_csc_matrix(10, 15, 15);
+    CSC_Matrix *A = new_csc_matrix(10, 15, 15, NULL);
     int Ap[16] = {0, 1, 1, 1, 1, 4, 5, 6, 7, 8, 9, 11, 11, 11, 13, 15};
     int Ai[15] = {5, 0, 6, 9, 0, 5, 1, 3, 6, 0, 6, 3, 6, 6, 8};
     double Ax[15] = {7, 4, 8, 5, 7, 3, 7, 8, 5, 4, 8, 8, 3, 6, 5};
     memcpy(A->p, Ap, 16 * sizeof(int));
     memcpy(A->i, Ai, 15 * sizeof(int));
     memcpy(A->x, Ax, 15 * sizeof(double));
-    CSR_Matrix *C = ATA_alloc(A);
+    CSR_Matrix *C = ATA_alloc(A, NULL);
 
     int expected_p[16] = {0, 2, 2, 2, 2, 8, 11, 13, 14, 16, 21, 27, 27, 27, 33, 38};
     int expected_i[38] = {0,  6, 4,  5, 9,  10, 13, 14, 4, 5,  10, 0,  6,
@@ -120,7 +120,7 @@ const char *test_ATA_alloc_random2(void)
     /* Create A in CSC format  */
     int m = 15;
     int n = 10;
-    CSC_Matrix *A = new_csc_matrix(m, n, 15);
+    CSC_Matrix *A = new_csc_matrix(m, n, 15, NULL);
     int Ap[11] = {0, 2, 4, 6, 6, 9, 12, 12, 14, 14, 15};
     int Ai[15] = {9, 12, 3, 4, 1, 6, 4, 8, 13, 1, 3, 7, 5, 13, 6};
     double Ax[15] = {0.99, 0.9,  0.51, 0.64, 0.39, 0.29, 0.26, 0.91,
@@ -128,7 +128,7 @@ const char *test_ATA_alloc_random2(void)
     memcpy(A->p, Ap, 11 * sizeof(int));
     memcpy(A->i, Ai, 15 * sizeof(int));
     memcpy(A->x, Ax, 15 * sizeof(double));
-    CSR_Matrix *C = ATA_alloc(A);
+    CSR_Matrix *C = ATA_alloc(A, NULL);
 
     int expected_p[11] = {0, 1, 4, 7, 7, 10, 13, 13, 15, 15, 17};
     int expected_i[17] = {0, 1, 4, 5, 2, 5, 9, 1, 4, 7, 1, 2, 5, 4, 7, 2, 9};
@@ -162,7 +162,7 @@ const char *test_BTA_alloc_and_BTDA_fill(void)
      */
     int m = 4;
     int n = 3;
-    CSC_Matrix *A = new_csc_matrix(m, n, 6);
+    CSC_Matrix *A = new_csc_matrix(m, n, 6, NULL);
     int Ap_A[4] = {0, 2, 4, 6};
     int Ai_A[6] = {0, 2, 1, 3, 0, 2};
     double Ax_A[6] = {1.0, 4.0, 3.0, 6.0, 2.0, 5.0};
@@ -177,7 +177,7 @@ const char *test_BTA_alloc_and_BTDA_fill(void)
      * [0.0  4.0]
      */
     int p = 2;
-    CSC_Matrix *B = new_csc_matrix(m, p, 4);
+    CSC_Matrix *B = new_csc_matrix(m, p, 4, NULL);
     int Bp[3] = {0, 2, 4};
     int Bi[4] = {0, 2, 1, 3};
     double Bx[4] = {1.0, 3.0, 2.0, 4.0};
@@ -186,7 +186,7 @@ const char *test_BTA_alloc_and_BTDA_fill(void)
     memcpy(B->x, Bx, 4 * sizeof(double));
 
     /* Allocate C = B^T A (should be 2x3) */
-    CSR_Matrix *C = BTA_alloc(A, B);
+    CSR_Matrix *C = BTA_alloc(A, B, NULL);
 
     /* Sparsity pattern check before filling values */
     int expected_p[3] = {0, 2, 3};
diff --git a/tests/utils/test_csr_csc_conversion.h b/tests/utils/test_csr_csc_conversion.h
index efbdc9e..a9c101c 100644
--- a/tests/utils/test_csr_csc_conversion.h
+++ b/tests/utils/test_csr_csc_conversion.h
@@ -17,7 +17,7 @@ const char *test_csr_to_csc_split(void)
      * [0.0  2.0  0.0  0.0  0.0]
      * [0.0  0.0  0.0  4.0  0.0]
      */
-    CSR_Matrix *A = new_csr_matrix(4, 5, 5);
+    CSR_Matrix *A = new_csr_matrix(4, 5, 5, NULL);
     double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0};
     int Ai[5] = {0, 4, 2, 1, 3};
     int Ap[5] = {0, 2, 3, 4, 5};
@@ -29,7 +29,7 @@ const char *test_csr_to_csc_split(void)
     int *iwork = (int *) malloc(A->n * sizeof(int));
 
     /* First, fill sparsity pattern */
-    CSC_Matrix *C = csr_to_csc_alloc(A, iwork);
+    CSC_Matrix *C = csr_to_csc_alloc(A, iwork, NULL);
 
     /* Check sparsity pattern */
     int Cp_correct[6] = {0, 1, 2, 3, 4, 5};
@@ -62,7 +62,7 @@ const char *test_csc_to_csr_sparsity(void)
      * [0.0  4.0  0.0  0.0  0.0]
      * [0.0  0.0  0.0  5.0  0.0]
      */
-    CSC_Matrix *A = new_csc_matrix(4, 5, 5);
+    CSC_Matrix *A = new_csc_matrix(4, 5, 5, NULL);
     double Ax[5] = {1.0, 4.0, 3.0, 5.0, 2.0};
     int Ai[5] = {0, 2, 1, 3, 0};
     int Ap[6] = {0, 1, 2, 3, 4, 5};
@@ -74,7 +74,7 @@ const char *test_csc_to_csr_sparsity(void)
     int *iwork = (int *) malloc(A->m * sizeof(int));
 
     /* Fill sparsity pattern */
-    CSR_Matrix *C = csc_to_csr_alloc(A, iwork);
+    CSR_Matrix *C = csc_to_csr_alloc(A, iwork, NULL);
 
     /* Expected CSR format:
      * Row 0: [1.0 at col 0, 2.0 at col 4]
@@ -101,7 +101,7 @@ const char *test_csc_to_csr_sparsity(void)
 const char *test_csc_to_csr_values(void)
 {
     /* Create a 4x5 CSC matrix A */
-    CSC_Matrix *A = new_csc_matrix(4, 5, 5);
+    CSC_Matrix *A = new_csc_matrix(4, 5, 5, NULL);
     double Ax[5] = {1.0, 4.0, 3.0, 5.0, 2.0};
     int Ai[5] = {0, 2, 1, 3, 0};
     int Ap[6] = {0, 1, 2, 3, 4, 5};
@@ -113,7 +113,7 @@ const char *test_csc_to_csr_values(void)
     int *iwork = (int *) malloc(A->m * sizeof(int));
 
     /* Fill sparsity pattern */
-    CSR_Matrix *C = csc_to_csr_alloc(A, iwork);
+    CSR_Matrix *C = csc_to_csr_alloc(A, iwork, NULL);
 
     /* Fill values */
     csc_to_csr_fill_values(A, C, iwork);
@@ -138,7 +138,7 @@ const char *test_csr_csc_csr_roundtrip(void)
      * [0.0  4.0  5.0  0.0]
      * [6.0  0.0  7.0  8.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 4, 8);
+    CSR_Matrix *A = new_csr_matrix(3, 4, 8, NULL);
     double Ax[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
     int Ai[8] = {0, 1, 3, 1, 2, 0, 2, 3};
     int Ap[4] = {0, 3, 5, 8};
@@ -148,12 +148,12 @@ const char *test_csr_csc_csr_roundtrip(void)
 
     /* Convert CSR to CSC */
     int *iwork_csc = (int *) malloc(A->n * sizeof(int));
-    CSC_Matrix *B = csr_to_csc_alloc(A, iwork_csc);
+    CSC_Matrix *B = csr_to_csc_alloc(A, iwork_csc, NULL);
     csr_to_csc_fill_values(A, B, iwork_csc);
 
     /* Convert CSC back to CSR */
     int *iwork_csr = (int *) malloc(B->m * sizeof(int));
-    CSR_Matrix *C = csc_to_csr_alloc(B, iwork_csr);
+    CSR_Matrix *C = csc_to_csr_alloc(B, iwork_csr, NULL);
     csc_to_csr_fill_values(B, C, iwork_csr);
 
     /* C should match A */
diff --git a/tests/utils/test_csr_matrix.h b/tests/utils/test_csr_matrix.h
index 68e0128..ae455b0 100644
--- a/tests/utils/test_csr_matrix.h
+++ b/tests/utils/test_csr_matrix.h
@@ -17,7 +17,7 @@ const char *test_diag_csr_mult(void)
      * [0.0  3.0  4.0]
      * [5.0  0.0  6.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 3, 6);
+    CSR_Matrix *A = new_csr_matrix(3, 3, 6, NULL);
     double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     int Ai[6] = {0, 1, 1, 2, 0, 2};
     int Ap[4] = {0, 2, 4, 6};
@@ -31,7 +31,7 @@ const char *test_diag_csr_mult(void)
      * [0.0  9.0  12.0]
      * [2.5  0.0  3.0]
      */
-    CSR_Matrix *C = new_csr_matrix(3, 3, 6);
+    CSR_Matrix *C = new_csr_matrix(3, 3, 6, NULL);
     diag_csr_mult(d, A, C);
 
     double Ax_correct[6] = {2.0, 4.0, 9.0, 12.0, 2.5, 3.0};
@@ -55,7 +55,7 @@ const char *test_diag_csr_mult(void)
 */
 const char *test_csr_sum(void)
 {
-    CSR_Matrix *A = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *A = new_csr_matrix(3, 3, 5, NULL);
     double Ax[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
     int Ai[5] = {0, 2, 1, 0, 2};
     int Ap[4] = {0, 2, 3, 5};
@@ -63,7 +63,7 @@ const char *test_csr_sum(void)
     memcpy(A->i, Ai, 5 * sizeof(int));
     memcpy(A->p, Ap, 4 * sizeof(int));
 
-    CSR_Matrix *B = new_csr_matrix(3, 3, 4);
+    CSR_Matrix *B = new_csr_matrix(3, 3, 4, NULL);
     double Bx[4] = {1.0, 2.0, 3.0, 6.0};
     int Bi[4] = {1, 0, 2, 1};
     int Bp[4] = {0, 1, 3, 4};
@@ -71,7 +71,7 @@ const char *test_csr_sum(void)
     memcpy(B->i, Bi, 4 * sizeof(int));
     memcpy(B->p, Bp, 4 * sizeof(int));
 
-    CSR_Matrix *C = new_csr_matrix(3, 3, 9);
+    CSR_Matrix *C = new_csr_matrix(3, 3, 9, NULL);
     sum_csr_matrices(A, B, C);
 
     double Cx_correct[9] = {1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 5.0};
@@ -97,7 +97,7 @@ const char *test_csr_sum(void)
 */
 const char *test_csr_sum2(void)
 {
-    CSR_Matrix *A = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *A = new_csr_matrix(3, 3, 5, NULL);
     double Ax[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
     int Ai[5] = {0, 2, 2, 0, 2};
     int Ap[4] = {0, 2, 3, 5};
@@ -105,7 +105,7 @@ const char *test_csr_sum2(void)
     memcpy(A->i, Ai, 5 * sizeof(int));
     memcpy(A->p, Ap, 4 * sizeof(int));
 
-    CSR_Matrix *B = new_csr_matrix(3, 3, 4);
+    CSR_Matrix *B = new_csr_matrix(3, 3, 4, NULL);
     double Bx[4] = {1.0, 2.0, 3.0, 6.0};
     int Bi[4] = {1, 0, 2, 1};
     int Bp[4] = {0, 1, 3, 4};
@@ -113,7 +113,7 @@ const char *test_csr_sum2(void)
     memcpy(B->i, Bi, 4 * sizeof(int));
     memcpy(B->p, Bp, 4 * sizeof(int));
 
-    CSR_Matrix *C = new_csr_matrix(3, 3, 8);
+    CSR_Matrix *C = new_csr_matrix(3, 3, 8, NULL);
     sum_csr_matrices(A, B, C);
 
     double Cx_correct[8] = {1, 1, 2, 2, 6, 4, 6, 5};
@@ -134,7 +134,7 @@ const char *test_csr_sum2(void)
 
 const char *test_transpose(void)
 {
-    CSR_Matrix *A = new_csr_matrix(4, 5, 5);
+    CSR_Matrix *A = new_csr_matrix(4, 5, 5, NULL);
     double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0};
     int Ai[5] = {0, 4, 1, 0, 1};
     int Ap[5] = {0, 2, 3, 4, 5};
@@ -167,7 +167,7 @@ A = [1 0 0 0 1
 */
 const char *test_csr_vecmat_values_sparse(void)
 {
-    CSR_Matrix *A = new_csr_matrix(4, 5, 5);
+    CSR_Matrix *A = new_csr_matrix(4, 5, 5, NULL);
     double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0};
     int Ai[5] = {0, 4, 1, 0, 1};
     int Ap[5] = {0, 2, 3, 4, 5};
@@ -177,7 +177,7 @@ const char *test_csr_vecmat_values_sparse(void)
 
     double z[4] = {1.0, 2.0, 3.0, 4.0};
 
-    CSR_Matrix *C = new_csr_matrix(1, 3, 3);
+    CSR_Matrix *C = new_csr_matrix(1, 3, 3, NULL);
     double Cx[3] = {0.0, 0.0, 0.0};
     int Ci[3] = {0, 1, 4};
     int Cp[2] = {0, 3};
@@ -212,14 +212,14 @@ const char *test_sum_all_rows_csr(void)
      * Sum all rows should give:
      * [6.0  5.0  10.0  7.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 4, 7);
+    CSR_Matrix *A = new_csr_matrix(3, 4, 7, NULL);
     double Ax[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0};
     int Ai[7] = {0, 1, 1, 2, 0, 2, 3};
     int Ap[4] = {0, 2, 4, 7};
     memcpy(A->x, Ax, 7 * sizeof(double));
     memcpy(A->i, Ai, 7 * sizeof(int));
     memcpy(A->p, Ap, 4 * sizeof(int));
-    CSR_Matrix *C = new_csr_matrix(1, 4, 4);
+    CSR_Matrix *C = new_csr_matrix(1, 4, 4, NULL);
     int_double_pair *pairs = new_int_double_pair_array(7);
     sum_all_rows_csr(A, C, pairs);
     double Cx_correct[4] = {6.0, 5.0, 10.0, 7.0};
@@ -260,7 +260,7 @@ const char *test_sum_block_of_rows_csr(void)
      *
      * Result C should be 3x4 matrix with the sums above
      */
-    CSR_Matrix *A = new_csr_matrix(9, 4, 18);
+    CSR_Matrix *A = new_csr_matrix(9, 4, 18, NULL);
 
     double Ax[18] = {1.0, 2.0,  /* row 0 */
                      3.0, 1.0,  /* row 1 */
@@ -289,7 +289,7 @@ const char *test_sum_block_of_rows_csr(void)
     memcpy(A->p, Ap, 10 * sizeof(int));
 
     /* Allocate C for 3 blocks and enough space for all nonzeros */
-    CSR_Matrix *C = new_csr_matrix(3, 4, 12);
+    CSR_Matrix *C = new_csr_matrix(3, 4, 12, NULL);
     int_double_pair *pairs = new_int_double_pair_array(18);
 
     sum_block_of_rows_csr(A, C, pairs, 3);
@@ -338,7 +338,7 @@ const char *test_sum_evenly_spaced_rows_csr(void)
             row 1: sum of rows 1, 4, 7 = [1 4 6 0]
             row 2: sum of rows 2, 5, 8 = [3 2 4 11]
     */
-    CSR_Matrix *A = new_csr_matrix(9, 4, 18);
+    CSR_Matrix *A = new_csr_matrix(9, 4, 18, NULL);
 
     double Ax[18] = {1.0, 2.0,  /* row 0 */
                      3.0, 1.0,  /* row 1 */
@@ -367,7 +367,7 @@ const char *test_sum_evenly_spaced_rows_csr(void)
     memcpy(A->p, Ap, 10 * sizeof(int));
 
     /* Allocate C for 3 rows (row_spacing=3) and enough space for all nonzeros */
-    CSR_Matrix *C = new_csr_matrix(3, 4, 10);
+    CSR_Matrix *C = new_csr_matrix(3, 4, 10, NULL);
     int_double_pair *pairs = new_int_double_pair_array(18);
 
     sum_evenly_spaced_rows_csr(A, C, pairs, 3);
@@ -401,7 +401,7 @@ const char *test_AT_alloc_and_fill(void)
      * [0.0  3.0  0.0  4.0]
      * [5.0  0.0  6.0  0.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 4, 6);
+    CSR_Matrix *A = new_csr_matrix(3, 4, 6, NULL);
     double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     int Ai[6] = {0, 2, 1, 3, 0, 2};
     int Ap[4] = {0, 2, 4, 6};
@@ -411,7 +411,7 @@ const char *test_AT_alloc_and_fill(void)
 
     /* Allocate A^T (should be 4x3) */
     int *iwork = (int *) malloc(A->n * sizeof(int));
-    CSR_Matrix *AT = AT_alloc(A, iwork);
+    CSR_Matrix *AT = AT_alloc(A, iwork, NULL);
 
     /* Fill values of A^T */
     AT_fill_values(A, AT, iwork);
@@ -454,7 +454,7 @@ const char *test_kron_identity_csr(void)
      * [3  0 | 0  0 | 4  0]
      * [0  3 | 0  0 | 0  4]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 4);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 4, NULL);
     double Ax[4] = {1.0, 2.0, 3.0, 4.0};
     int Ai[4] = {0, 2, 0, 2};
     int Ap[3] = {0, 2, 4};
diff --git a/tests/utils/test_linalg_sparse_matmuls.h b/tests/utils/test_linalg_sparse_matmuls.h
index 120f99b..8bb58b5 100644
--- a/tests/utils/test_linalg_sparse_matmuls.h
+++ b/tests/utils/test_linalg_sparse_matmuls.h
@@ -9,14 +9,14 @@
 #include "utils/CSR_Matrix.h"
 #include "utils/linalg_sparse_matmuls.h"
 
-/* Test block_left_multiply_fill_sparsity with simple case: single block */
+/* Test block_left_multiply_alloc with simple case: single block */
 const char *test_block_left_multiply_single_block(void)
 {
     /* A is 2x3 CSR:
      * [1.0  0.0  0.0]
      * [0.0  1.0  1.0]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 3);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 3, NULL);
     double Ax[3] = {1.0, 1.0, 1.0};
     int Ai[3] = {0, 1, 2};
     int Ap[3] = {0, 1, 3};
@@ -29,7 +29,7 @@ const char *test_block_left_multiply_single_block(void)
      * [1.0  0.0]
      * [0.0  1.0]
      */
-    CSC_Matrix *J = new_csc_matrix(3, 2, 3);
+    CSC_Matrix *J = new_csc_matrix(3, 2, 3, NULL);
     double Jx[3] = {1.0, 1.0, 1.0};
     int Ji[3] = {0, 1, 2};
     int Jp[3] = {0, 2, 3};
@@ -38,7 +38,7 @@ const char *test_block_left_multiply_single_block(void)
     memcpy(J->p, Jp, 3 * sizeof(int));
 
     /* Compute C = A @ J1 (p=1 means just one block) */
-    CSC_Matrix *C = block_left_multiply_fill_sparsity(A, J, 1);
+    CSC_Matrix *C = block_left_multiply_alloc(A, J, 1, NULL);
 
     /* Expected C is 2x2:
      * C[0,0] = A[0,:] @ J[:,0] = 1.0 * 1.0 = 1.0 (row 0 has column 0, J col 0 has
@@ -60,14 +60,14 @@ const char *test_block_left_multiply_single_block(void)
     return NULL;
 }
 
-/* Test block_left_multiply_fill_sparsity with two blocks */
+/* Test block_left_multiply_alloc with two blocks */
 const char *test_block_left_multiply_two_blocks(void)
 {
     /* A is 2x2 CSR:
      * [1.0  0.0]
      * [0.0  1.0]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 2, 2);
+    CSR_Matrix *A = new_csr_matrix(2, 2, 2, NULL);
     double Ax[2] = {1.0, 1.0};
     int Ai[2] = {0, 1};
     int Ap[3] = {0, 1, 2};
@@ -88,7 +88,7 @@ const char *test_block_left_multiply_two_blocks(void)
      * [0.0  1.0  0.0]
      * [0.0  0.0  1.0]
      */
-    CSC_Matrix *J = new_csc_matrix(4, 3, 3);
+    CSC_Matrix *J = new_csc_matrix(4, 3, 3, NULL);
     double Jx[3] = {1.0, 1.0, 1.0};
     int Ji[3] = {0, 2, 3};
     int Jp[4] = {0, 1, 2, 3};
@@ -109,7 +109,7 @@ const char *test_block_left_multiply_two_blocks(void)
      * [0.0  0.0  0.0]
      * [0.0  1.0  1.0]
      */
-    CSC_Matrix *C = block_left_multiply_fill_sparsity(A, J, 2);
+    CSC_Matrix *C = block_left_multiply_alloc(A, J, 2, NULL);
     block_left_multiply_fill_values(A, J, C);
 
     int expected_p2[4] = {0, 1, 2, 3};
@@ -127,11 +127,11 @@ const char *test_block_left_multiply_two_blocks(void)
     return NULL;
 }
 
-/* Test block_left_multiply_fill_sparsity with all zero column in J */
+/* Test block_left_multiply_alloc with all zero column in J */
 const char *test_block_left_multiply_zero_column(void)
 {
     /* A is 2x2 CSR (identity) */
-    CSR_Matrix *A = new_csr_matrix(2, 2, 2);
+    CSR_Matrix *A = new_csr_matrix(2, 2, 2, NULL);
     double Ax[2] = {1.0, 1.0};
     int Ai[2] = {0, 1};
     int Ap[3] = {0, 1, 2};
@@ -143,7 +143,7 @@ const char *test_block_left_multiply_zero_column(void)
      * [1.0  0.0]
      * [0.0  0.0]
      */
-    CSC_Matrix *J = new_csc_matrix(2, 2, 1);
+    CSC_Matrix *J = new_csc_matrix(2, 2, 1, NULL);
     double Jx[1] = {1.0};
     int Ji[1] = {0};
     int Jp[3] = {0, 1, 1}; /* Column 0 has one nonzero, column 1 is empty */
@@ -151,7 +151,7 @@ const char *test_block_left_multiply_zero_column(void)
     memcpy(J->i, Ji, 1 * sizeof(int));
     memcpy(J->p, Jp, 3 * sizeof(int));
 
-    CSC_Matrix *C = block_left_multiply_fill_sparsity(A, J, 1);
+    CSC_Matrix *C = block_left_multiply_alloc(A, J, 1, NULL);
 
     int expected_p3[3] = {0, 1, 1};
     int expected_i3[1] = {0};
@@ -174,7 +174,7 @@ const char *test_csr_csc_matmul_alloc_basic(void)
      * [0.0  1.0]
      * [1.0  1.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 2, 4);
+    CSR_Matrix *A = new_csr_matrix(3, 2, 4, NULL);
     double Ax[4] = {1.0, 1.0, 1.0, 1.0};
     int Ai[4] = {0, 1, 0, 1};
     int Ap[4] = {0, 1, 2, 4};
@@ -186,7 +186,7 @@ const char *test_csr_csc_matmul_alloc_basic(void)
      * [1.0  0.0  1.0]
      * [0.0  1.0  1.0]
      */
-    CSC_Matrix *B = new_csc_matrix(2, 3, 4);
+    CSC_Matrix *B = new_csc_matrix(2, 3, 4, NULL);
     double Bx[4] = {1.0, 1.0, 1.0, 1.0};
     int Bi[4] = {0, 1, 0, 1};
     int Bp[4] = {0, 1, 2, 4};
@@ -199,7 +199,7 @@ const char *test_csr_csc_matmul_alloc_basic(void)
      *      [0, 1, 1],
      *      [1, 1, 2]]
      */
-    CSR_Matrix *C = csr_csc_matmul_alloc(A, B);
+    CSR_Matrix *C = csr_csc_matmul_alloc(A, B, NULL);
 
     int expected_p4[4] = {0, 2, 4, 7};
     int expected_i4[7] = {0, 2, 1, 2, 0, 1, 2};
@@ -221,7 +221,7 @@ const char *test_csr_csc_matmul_alloc_sparse(void)
      * [1.0  0.0  0.0]
      * [0.0  0.0  1.0]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 2);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 2, NULL);
     double Ax[2] = {1.0, 1.0};
     int Ai[2] = {0, 2};
     int Ap[3] = {0, 1, 2};
@@ -234,7 +234,7 @@ const char *test_csr_csc_matmul_alloc_sparse(void)
      * [0.0  0.0]
      * [0.0  1.0]
      */
-    CSC_Matrix *B = new_csc_matrix(3, 2, 2);
+    CSC_Matrix *B = new_csc_matrix(3, 2, 2, NULL);
     double Bx[2] = {1.0, 1.0};
     int Bi[2] = {0, 2};
     int Bp[3] = {0, 1, 2};
@@ -246,7 +246,7 @@ const char *test_csr_csc_matmul_alloc_sparse(void)
      * C = [[1, 0],
      *      [0, 1]]
      */
-    CSR_Matrix *C = csr_csc_matmul_alloc(A, B);
+    CSR_Matrix *C = csr_csc_matmul_alloc(A, B, NULL);
 
     int expected_p5[3] = {0, 1, 2};
     int expected_i5[2] = {0, 1};
@@ -268,7 +268,7 @@ const char *test_block_left_multiply_vec_single_block(void)
      * [1.0  0.0  2.0]
      * [0.0  3.0  0.0]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 3);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 3, NULL);
     double Ax[3] = {1.0, 3.0, 2.0};
     int Ai[3] = {0, 1, 2};
     int Ap[3] = {0, 2, 3};
@@ -300,7 +300,7 @@ const char *test_block_left_multiply_vec_two_blocks(void)
      * [1.0  2.0  0.0]
      * [0.0  3.0  4.0]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 4);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 4, NULL);
     double Ax[4] = {1.0, 2.0, 3.0, 4.0};
     int Ai[4] = {0, 1, 1, 2};
     int Ap[3] = {0, 2, 4};
@@ -335,7 +335,7 @@ const char *test_block_left_multiply_vec_sparse(void)
      * [0.0  0.0  3.0  0.0]
      * [0.0  0.0  0.0  4.0]
      */
-    CSR_Matrix *A = new_csr_matrix(3, 4, 3);
+    CSR_Matrix *A = new_csr_matrix(3, 4, 3, NULL);
     double Ax[3] = {2.0, 3.0, 4.0};
     int Ai[3] = {0, 2, 3};
     int Ap[4] = {0, 1, 2, 3};
@@ -369,7 +369,7 @@ const char *test_block_left_multiply_vec_three_blocks(void)
      * [1.0  2.0]
      * [3.0  4.0]
      */
-    CSR_Matrix *A = new_csr_matrix(2, 2, 4);
+    CSR_Matrix *A = new_csr_matrix(2, 2, 4, NULL);
     double Ax[4] = {1.0, 2.0, 3.0, 4.0};
     int Ai[4] = {0, 1, 0, 1};
     int Ap[3] = {0, 2, 4};
diff --git a/tests/utils/test_linalg_utils_matmul_chain_rule.h b/tests/utils/test_linalg_utils_matmul_chain_rule.h
index 0c66589..a2ae8ed 100644
--- a/tests/utils/test_linalg_utils_matmul_chain_rule.h
+++ b/tests/utils/test_linalg_utils_matmul_chain_rule.h
@@ -34,7 +34,7 @@ const char *test_YT_kron_I(void)
     int m = 2, k = 2, n = 2;
 
     /* J is 4x3 CSC */
-    CSC_Matrix *J = new_csc_matrix(4, 3, 5);
+    CSC_Matrix *J = new_csc_matrix(4, 3, 5, NULL);
     int Jp[4] = {0, 2, 3, 5};
     int Ji[5] = {0, 2, 1, 0, 3};
     double Jx[5] = {1.0, 3.0, 1.0, 2.0, 1.0};
@@ -45,7 +45,7 @@ const char *test_YT_kron_I(void)
     /* Y col-major: Y[0,0]=1, Y[1,0]=2, Y[0,1]=3, Y[1,1]=4 */
     double Y[4] = {1.0, 2.0, 3.0, 4.0};
 
-    CSR_Matrix *C = YT_kron_I_alloc(m, k, n, J);
+    CSR_Matrix *C = YT_kron_I_alloc(m, k, n, J, NULL);
 
     /* Expected CSR (from scipy) */
     int exp_p[5] = {0, 2, 4, 6, 8};
@@ -86,7 +86,7 @@ const char *test_YT_kron_I_larger(void)
     int m = 3, k = 2, n = 3;
 
     /* J is 6x4 CSC */
-    CSC_Matrix *J = new_csc_matrix(6, 4, 8);
+    CSC_Matrix *J = new_csc_matrix(6, 4, 8, NULL);
     int Jp[5] = {0, 2, 4, 6, 8};
     int Ji[8] = {0, 3, 2, 4, 1, 5, 0, 3};
     double Jx[8] = {1.0, 2.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0};
@@ -97,7 +97,7 @@ const char *test_YT_kron_I_larger(void)
     /* Y col-major */
     double Y[6] = {1.0, 3.0, 0.5, 1.0, 2.0, 0.5};
 
-    CSR_Matrix *C = YT_kron_I_alloc(m, k, n, J);
+    CSR_Matrix *C = YT_kron_I_alloc(m, k, n, J, NULL);
 
     /* Expected CSR (from scipy) */
     int exp_p[10] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18};
@@ -144,7 +144,7 @@ const char *test_I_kron_X(void)
     int m = 2, k = 2, n = 2;
 
     /* J is 4x3 CSC */
-    CSC_Matrix *J = new_csc_matrix(4, 3, 5);
+    CSC_Matrix *J = new_csc_matrix(4, 3, 5, NULL);
     int Jp[4] = {0, 2, 3, 5};
     int Ji[5] = {0, 2, 1, 0, 3};
     double Jx[5] = {1.0, 3.0, 1.0, 2.0, 1.0};
@@ -155,7 +155,7 @@ const char *test_I_kron_X(void)
     /* X col-major */
     double X[4] = {1.0, 2.0, 3.0, 4.0};
 
-    CSR_Matrix *C = I_kron_X_alloc(m, k, n, J);
+    CSR_Matrix *C = I_kron_X_alloc(m, k, n, J, NULL);
 
     /* Expected CSR */
     int exp_p[5] = {0, 3, 6, 8, 10};
@@ -195,7 +195,7 @@ const char *test_I_kron_X_larger(void)
     int m = 3, k = 2, n = 2;
 
     /* J is 4x4 CSC */
-    CSC_Matrix *J = new_csc_matrix(4, 4, 7);
+    CSC_Matrix *J = new_csc_matrix(4, 4, 7, NULL);
     int Jp[5] = {0, 2, 3, 5, 7};
     int Ji[7] = {0, 3, 1, 1, 2, 0, 3};
     double Jx[7] = {1.0, 2.0, 3.0, 1.0, 4.0, 2.0, 1.0};
@@ -206,7 +206,7 @@ const char *test_I_kron_X_larger(void)
     /* X col-major */
     double X[6] = {1.0, 2.0, 3.0, 0.5, 1.0, 0.5};
 
-    CSR_Matrix *C = I_kron_X_alloc(m, k, n, J);
+    CSR_Matrix *C = I_kron_X_alloc(m, k, n, J, NULL);
 
     /* Expected CSR */
     int exp_p[7] = {0, 4, 8, 12, 15, 18, 21};
diff --git a/tests/utils/test_matrix.h b/tests/utils/test_matrix.h
index c329a16..269e612 100644
--- a/tests/utils/test_matrix.h
+++ b/tests/utils/test_matrix.h
@@ -52,7 +52,7 @@ const char *test_dense_matrix_mult_vec_blocks(void)
 const char *test_sparse_vs_dense_mult_vec(void)
 {
     /* Build CSR for A = [1 2 3; 4 5 6] */
-    CSR_Matrix *csr = new_csr_matrix(2, 3, 6);
+    CSR_Matrix *csr = new_csr_matrix(2, 3, 6, NULL);
     int Ap[3] = {0, 3, 6};
     int Ai[6] = {0, 1, 2, 0, 1, 2};
     double Ax[6] = {1, 2, 3, 4, 5, 6};
@@ -104,7 +104,7 @@ const char *test_dense_matrix_trans(void)
    A = [1 2; 3 4], x = [1; 2; 3; 4], p = 2 */
 const char *test_sparse_vs_dense_mult_vec_blocks(void)
 {
-    CSR_Matrix *csr = new_csr_matrix(2, 2, 4);
+    CSR_Matrix *csr = new_csr_matrix(2, 2, 4, NULL);
     int Ap[3] = {0, 2, 4};
     int Ai[4] = {0, 1, 0, 1};
     double Ax[4] = {1, 2, 3, 4};
diff --git a/tests/wsum_hess/affine/test_left_matmul.h b/tests/wsum_hess/affine/test_left_matmul.h
index f5441b9..228e34b 100644
--- a/tests/wsum_hess/affine/test_left_matmul.h
+++ b/tests/wsum_hess/affine/test_left_matmul.h
@@ -54,7 +54,7 @@ const char *test_wsum_hess_left_matmul(void)
     expr *x = new_variable(3, 1, 0, 3);
 
     /* Create sparse matrix A in CSR format */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 7);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 7, NULL);
     int A_p[5] = {0, 2, 4, 6, 7};
     int A_i[7] = {0, 2, 0, 2, 0, 2, 0};
     double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0};
@@ -99,7 +99,7 @@ const char *test_wsum_hess_left_matmul_exp_composite(void)
     expr *x = new_variable(3, 1, 0, 3);
 
     /* Create B matrix (3x3 all ones) */
-    CSR_Matrix *B = new_csr_matrix(3, 3, 9);
+    CSR_Matrix *B = new_csr_matrix(3, 3, 9, NULL);
     int B_p[4] = {0, 3, 6, 9};
     int B_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2};
     double B_x[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
@@ -108,7 +108,7 @@ const char *test_wsum_hess_left_matmul_exp_composite(void)
     memcpy(B->x, B_x, 9 * sizeof(double));
 
     /* Create A matrix */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 7);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 7, NULL);
     int A_p[5] = {0, 2, 4, 6, 7};
     int A_i[7] = {0, 2, 0, 2, 0, 2, 0};
     double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0};
@@ -161,7 +161,7 @@ const char *test_wsum_hess_left_matmul_matrix(void)
     expr *x = new_variable(3, 2, 0, 6);
 
     /* Create sparse matrix A in CSR format */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 7);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 7, NULL);
     int A_p[5] = {0, 2, 4, 6, 7};
     int A_i[7] = {0, 2, 0, 2, 0, 2, 0};
     double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0};
diff --git a/tests/wsum_hess/affine/test_right_matmul.h b/tests/wsum_hess/affine/test_right_matmul.h
index a74df5c..ac3a6a9 100644
--- a/tests/wsum_hess/affine/test_right_matmul.h
+++ b/tests/wsum_hess/affine/test_right_matmul.h
@@ -24,7 +24,7 @@ const char *test_wsum_hess_right_matmul(void)
     expr *x = new_variable(2, 2, 0, 4);
 
     /* Create sparse matrix A in CSR format (2x3) */
-    CSR_Matrix *A = new_csr_matrix(2, 3, 4);
+    CSR_Matrix *A = new_csr_matrix(2, 3, 4, NULL);
     int A_p[3] = {0, 2, 4};
     int A_i[4] = {0, 2, 0, 2};
     double A_x[4] = {1.0, 2.0, 3.0, 4.0};
@@ -74,7 +74,7 @@ const char *test_wsum_hess_right_matmul_vector(void)
     expr *x = new_variable(1, 3, 0, 3);
 
     /* Create sparse matrix A in CSR format (3x2) */
-    CSR_Matrix *A = new_csr_matrix(3, 2, 4);
+    CSR_Matrix *A = new_csr_matrix(3, 2, 4, NULL);
     int A_p[4] = {0, 1, 3, 4};
     int A_i[4] = {0, 0, 1, 1};
     double A_x[4] = {1.0, 2.0, 3.0, 4.0};
diff --git a/tests/wsum_hess/affine/test_sum.h b/tests/wsum_hess/affine/test_sum.h
index 8ade78b..183375b 100644
--- a/tests/wsum_hess/affine/test_sum.h
+++ b/tests/wsum_hess/affine/test_sum.h
@@ -16,7 +16,7 @@ const char *test_wsum_hess_sum_exp_linear(void)
     double Ax[6] = {1, 1, 2, 3, 1, -1};
     int Ai[6] = {0, 1, 0, 1, 0, 1};
     int Ap[4] = {0, 2, 4, 6};
-    CSR_Matrix *A = new_csr_matrix(3, 2, 6);
+    CSR_Matrix *A = new_csr_matrix(3, 2, 6, NULL);
     memcpy(A->x, Ax, 6 * sizeof(double));
     memcpy(A->i, Ai, 6 * sizeof(int));
     memcpy(A->p, Ap, 4 * sizeof(int));
diff --git a/tests/wsum_hess/bivariate_full_dom/test_multiply.h b/tests/wsum_hess/bivariate_full_dom/test_multiply.h
index e85c7a1..a4ee7da 100644
--- a/tests/wsum_hess/bivariate_full_dom/test_multiply.h
+++ b/tests/wsum_hess/bivariate_full_dom/test_multiply.h
@@ -48,7 +48,7 @@ const char *test_wsum_hess_multiply_sparse_random(void)
      */
 
     /* Create A matrix (5x10) */
-    CSR_Matrix *A = new_csr_matrix(5, 10, 10);
+    CSR_Matrix *A = new_csr_matrix(5, 10, 10, NULL);
     double Ax[10] = {-1.44165273, -1.13687223, 0.55892257,  0.24912193,  0.84959744,
                      -0.23998915, 0.5913356,   -1.21627912, -0.50379166, 0.41531801};
     int Ai[10] = {1, 2, 4, 8, 2, 3, 8, 9, 1, 2};
@@ -58,7 +58,7 @@ const char *test_wsum_hess_multiply_sparse_random(void)
     memcpy(A->p, Ap, 6 * sizeof(int));
 
     /* Create B matrix (5x10) */
-    CSR_Matrix *B = new_csr_matrix(5, 10, 10);
+    CSR_Matrix *B = new_csr_matrix(5, 10, 10, NULL);
     double Bx[10] = {1.27549062,  0.04194731, -0.4356034,  0.405574,   1.34670487,
                      -0.57738638, 0.9411464,  -0.31563179, 1.90831766, -0.89802958};
     int Bi[10] = {0, 3, 5, 7, 0, 5, 0, 3, 7, 9};
@@ -132,7 +132,7 @@ const char *test_wsum_hess_multiply_linear_ops(void)
      */
 
     /* Create CSR matrix A */
-    CSR_Matrix *A = new_csr_matrix(4, 3, 6);
+    CSR_Matrix *A = new_csr_matrix(4, 3, 6, NULL);
     double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     int Ai[6] = {0, 2, 1, 0, 2, 1};
     int Ap[5] = {0, 2, 3, 5, 6};
@@ -141,7 +141,7 @@ const char *test_wsum_hess_multiply_linear_ops(void)
     memcpy(A->p, Ap, 5 * sizeof(int));
 
     /* Create CSR matrix B */
-    CSR_Matrix *B = new_csr_matrix(4, 3, 8);
+    CSR_Matrix *B = new_csr_matrix(4, 3, 8, NULL);
     double Bx[8] = {1.0, 4.0, 2.0, 7.0, 3.0, 2.0, 4.0, -1.0};
     int Bi[8] = {0, 2, 1, 2, 0, 2, 1, 2};
     int Bp[5] = {0, 2, 4, 6, 8};
diff --git a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h
index ca414f7..df22bfa 100644
--- a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h
+++ b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h
@@ -208,7 +208,7 @@ const char *test_wsum_hess_quad_form_Ax(void)
     CSR_Matrix *A = new_csr_random(3, 4, 1.0);
 
     /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */
-    CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
     double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
     int Qi[5] = {0, 1, 0, 1, 2};
     int Qp[4] = {0, 2, 4, 5};
@@ -237,7 +237,7 @@ const char *test_wsum_hess_quad_form_sin_Ax(void)
     CSR_Matrix *A = new_csr_random(3, 4, 1.0);
 
     /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */
-    CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
     double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
     int Qi[5] = {0, 1, 0, 1, 2};
     int Qp[4] = {0, 2, 4, 5};
@@ -368,7 +368,7 @@ const char *test_wsum_hess_quad_form_exp(void)
     double w = 3.0;
 
     /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */
-    CSR_Matrix *Q = new_csr_matrix(3, 3, 5);
+    CSR_Matrix *Q = new_csr_matrix(3, 3, 5, NULL);
     double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0};
     int Qi[5] = {0, 1, 0, 1, 2};
     int Qp[4] = {0, 2, 4, 5};
diff --git a/tests/wsum_hess/elementwise_restricted_dom/test_log.h b/tests/wsum_hess/elementwise_restricted_dom/test_log.h
index 024ea35..9c1f1d4 100644
--- a/tests/wsum_hess/elementwise_restricted_dom/test_log.h
+++ b/tests/wsum_hess/elementwise_restricted_dom/test_log.h
@@ -58,7 +58,7 @@ const char *test_wsum_hess_exp_composite(void)
     double Ax[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
     int Ai[] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4};
     int Ap[] = {0, 5, 10, 15};
-    CSR_Matrix *A_csr = new_csr_matrix(3, 5, 15);
+    CSR_Matrix *A_csr = new_csr_matrix(3, 5, 15, NULL);
     memcpy(A_csr->x, Ax, 15 * sizeof(double));
     memcpy(A_csr->i, Ai, 15 * sizeof(int));
     memcpy(A_csr->p, Ap, 4 * sizeof(int));
diff --git a/tests/wsum_hess/other/test_quad_form.h b/tests/wsum_hess/other/test_quad_form.h
index 73f8a1d..91e35a2 100644
--- a/tests/wsum_hess/other/test_quad_form.h
+++ b/tests/wsum_hess/other/test_quad_form.h
@@ -17,7 +17,7 @@ const char *test_wsum_hess_quad_form(void)
      * [0 3 4 1]
      * [0 0 1 6]
      */
-    CSR_Matrix *Q = new_csr_matrix(4, 4, 10);
+    CSR_Matrix *Q = new_csr_matrix(4, 4, 10, NULL);
     double Qx[10] = {1.0, 2.0, 2.0, 5.0, 3.0, 3.0, 4.0, 1.0, 1.0, 6.0};
     int Qi[10] = {0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
     int Qp[5] = {0, 2, 5, 8, 10};