From fe8cf7b0555bd1c5a48454d8e83c7e2798a6afff Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Wed, 25 Mar 2026 16:30:24 -0400
Subject: [PATCH 01/14] Unify constants and parameters into single
 parameter_expr type

Constants and updatable parameters are now represented by a single
parameter_expr node. Constants use param_id == PARAM_FIXED (-1),
while updatable parameters use param_id >= 0. Bivariate ops
(left_matmul, right_matmul, scalar_mult, vector_mult) accept an
optional param_source node and refresh their internal data on
forward/jacobian/hessian evaluation. Adds problem_register_params
and problem_update_params for problem-level parameter management.
Also adds update_values to the Matrix interface for parameter refresh.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 include/affine.h                              |   2 +-
 include/bivariate.h                           |  28 +-
 include/problem.h                             |   8 +
 include/subexpr.h                             |  31 +-
 include/utils/matrix.h                        |   1 +
 src/affine/{constant.c => parameter.c}        |  24 +-
 src/bivariate/left_matmul.c                   |  84 ++++-
 src/bivariate/right_matmul.c                  |  71 +++-
 .../{const_scalar_mult.c => scalar_mult.c}    |  32 +-
 .../{const_vector_mult.c => vector_mult.c}    |  29 +-
 src/problem.c                                 |  32 ++
 src/utils/dense_matrix.c                      |   7 +
 src/utils/sparse_matrix.c                     |   9 +
 tests/all_tests.c                             |  33 +-
 tests/forward_pass/affine/test_add.h          |   2 +-
 tests/forward_pass/affine/test_sum.h          |   6 +-
 ...e_constant.h => test_variable_parameter.h} |   2 +-
 tests/forward_pass/composite/test_composite.h |   2 +-
 tests/forward_pass/test_left_matmul_dense.h   |   2 +-
 tests/forward_pass/test_prod_axis_one.h       |   2 +-
 tests/forward_pass/test_prod_axis_zero.h      |   2 +-
 tests/jacobian_tests/test_broadcast.h         |   2 +-
 tests/jacobian_tests/test_left_matmul.h       |   6 +-
 tests/jacobian_tests/test_right_matmul.h      |   4 +-
 ...const_scalar_mult.h => test_scalar_mult.h} |  18 +-
 tests/jacobian_tests/test_transpose.h         |   2 +-
 ...const_vector_mult.h => test_vector_mult.h} |  25 +-
 tests/problem/test_param_prob.h               | 348 ++++++++++++++++++
 tests/profiling/profile_left_matmul.h         |   2 +-
 tests/wsum_hess/test_left_matmul.h            |   6 +-
 tests/wsum_hess/test_right_matmul.h           |   4 +-
 ...const_scalar_mult.h => test_scalar_mult.h} |  31 +-
 ...const_vector_mult.h => test_vector_mult.h} |  26 +-
 33 files changed, 723 insertions(+), 160 deletions(-)
 rename src/affine/{constant.c => parameter.c} (70%)
 rename src/bivariate/{const_scalar_mult.c => scalar_mult.c} (78%)
 rename src/bivariate/{const_vector_mult.c => vector_mult.c} (81%)
 rename tests/forward_pass/affine/{test_variable_constant.h => test_variable_parameter.h} (91%)
 rename tests/jacobian_tests/{test_const_scalar_mult.h => test_scalar_mult.h} (81%)
 rename tests/jacobian_tests/{test_const_vector_mult.h => test_vector_mult.h} (76%)
 create mode 100644 tests/problem/test_param_prob.h
 rename tests/wsum_hess/{test_const_scalar_mult.h => test_scalar_mult.h} (71%)
 rename tests/wsum_hess/{test_const_vector_mult.h => test_vector_mult.h} (72%)

diff --git a/include/affine.h b/include/affine.h
index ada58f1..a07fbe2 100644
--- a/include/affine.h
+++ b/include/affine.h
@@ -32,7 +32,7 @@ expr *new_hstack(expr **args, int n_args, int n_vars);
 expr *new_promote(expr *child, int d1, int d2);
 expr *new_trace(expr *child);
 
-expr *new_constant(int d1, int d2, int n_vars, const double *values);
+expr *new_parameter(int d1, int d2, int param_id, int n_vars, const double *values);
 expr *new_variable(int d1, int d2, int var_id, int n_vars);
 
 expr *new_index(expr *child, int d1, int d2, const int *indices, int n_idxs);
diff --git a/include/bivariate.h b/include/bivariate.h
index 7260f33..8f5521c 100644
--- a/include/bivariate.h
+++ b/include/bivariate.h
@@ -30,22 +30,26 @@ expr *new_rel_entr_second_arg_scalar(expr *left, expr *right);
 /* Matrix multiplication: Z = X @ Y */
 expr *new_matmul(expr *x, expr *y);
 
-/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix */
-expr *new_left_matmul(expr *u, const CSR_Matrix *A);
+/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
+ * sparse matrix. param_node is NULL for fixed constants. */
+expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
 
-/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
- * (row-major, m x n). Uses CBLAS for efficient computation. */
-expr *new_left_matmul_dense(expr *u, int m, int n, const double *data);
+/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
+ * dense matrix (row-major, m x n). Uses CBLAS for efficient computation. */
+expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
+                            const double *data);
 
-/* Right matrix multiplication: f(x) @ A where A is a constant matrix */
-expr *new_right_matmul(expr *u, const CSR_Matrix *A);
+/* Right matrix multiplication: f(x) @ A where A is a constant or parameter
+ * matrix. */
+expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
 
-expr *new_right_matmul_dense(expr *u, int m, int n, const double *data);
+expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
+                             const double *data);
 
-/* Constant scalar multiplication: a * f(x) where a is a constant double */
-expr *new_const_scalar_mult(double a, expr *child);
+/* Scalar multiplication: a * f(x) where a comes from param_node */
+expr *new_scalar_mult(expr *param_node, expr *child);
 
-/* Constant vector elementwise multiplication: a ∘ f(x) where a is constant */
-expr *new_const_vector_mult(const double *a, expr *child);
+/* Vector elementwise multiplication: a ∘ f(x) where a comes from param_node */
+expr *new_vector_mult(expr *param_node, expr *child);
 
 #endif /* BIVARIATE_H */
diff --git a/include/problem.h b/include/problem.h
index 9d7a446..3dc448e 100644
--- a/include/problem.h
+++ b/include/problem.h
@@ -46,6 +46,11 @@ typedef struct problem
     int n_vars;
     int total_constraint_size;
 
+    /* parameter support */
+    expr **param_nodes;
+    int n_param_nodes;
+    int total_parameter_size;
+
     /* allocated by new_problem */
     double *constraint_values;
     double *gradient_values;
@@ -76,6 +81,9 @@ void problem_init_jacobian_coo(problem *prob);
 void problem_init_hessian_coo_lower_triangular(problem *prob);
 void free_problem(problem *prob);
 
+void problem_register_params(problem *prob, expr **param_nodes, int n_param_nodes);
+void problem_update_params(problem *prob, const double *theta);
+
 double problem_objective_forward(problem *prob, const double *u);
 void problem_constraint_forward(problem *prob, const double *u);
 void problem_gradient(problem *prob);
diff --git a/include/subexpr.h b/include/subexpr.h
index b4b427c..9cf51bf 100644
--- a/include/subexpr.h
+++ b/include/subexpr.h
@@ -26,8 +26,20 @@
 /* Forward declaration */
 struct int_double_pair;
 
+/* Parameter ID for fixed constants (not updatable) */
+#define PARAM_FIXED -1
+
 /* Type-specific expression structures that "inherit" from expr */
 
+/* Unified constant/parameter node. Constants use param_id == PARAM_FIXED.
+ * Updatable parameters use param_id >= 0 (offset into global theta). */
+typedef struct parameter_expr
+{
+    expr base;
+    int param_id;
+    bool has_been_refreshed;
+} parameter_expr;
+
 /* Linear operator: y = A * x + b */
 typedef struct linear_op_expr
 {
@@ -114,6 +126,8 @@ typedef struct left_matmul_expr
     CSC_Matrix *Jchild_CSC;
     CSC_Matrix *J_CSC;
     int *csc_to_csr_work;
+    expr *param_source;
+    void (*refresh_param_values)(struct left_matmul_expr *);
 } left_matmul_expr;
 
 /* Right matrix multiplication: y = f(x) * A where f(x) is an expression.
@@ -127,19 +141,20 @@ typedef struct right_matmul_expr
     CSC_Matrix *CSC_work;
 } right_matmul_expr;
 
-/* Constant scalar multiplication: y = a * child where a is a constant double */
-typedef struct const_scalar_mult_expr
+/* Scalar multiplication: y = a * child where a comes from param_source */
+typedef struct scalar_mult_expr
 {
     expr base;
-    double a;
-} const_scalar_mult_expr;
+    expr *param_source;
+} scalar_mult_expr;
 
-/* Constant vector elementwise multiplication: y = a \circ child for constant a */
-typedef struct const_vector_mult_expr
+/* Vector elementwise multiplication: y = a \circ child where a comes from
+ * param_source */
+typedef struct vector_mult_expr
 {
     expr base;
-    double *a; /* length equals node->size */
-} const_vector_mult_expr;
+    expr *param_source;
+} vector_mult_expr;
 
 /* Index/slicing: y = child[indices] where indices is a list of flat positions */
 typedef struct index_expr
diff --git a/include/utils/matrix.h b/include/utils/matrix.h
index fe7db5f..49c55a5 100644
--- a/include/utils/matrix.h
+++ b/include/utils/matrix.h
@@ -31,6 +31,7 @@ typedef struct Matrix
                                             const CSC_Matrix *J, int p);
     void (*block_left_mult_values)(const struct Matrix *self, const CSC_Matrix *J,
                                    CSC_Matrix *C);
+    void (*update_values)(struct Matrix *self, const double *new_values);
     void (*free_fn)(struct Matrix *self);
 } Matrix;
 
diff --git a/src/affine/constant.c b/src/affine/parameter.c
similarity index 70%
rename from src/affine/constant.c
rename to src/affine/parameter.c
index 59da3e2..77bf26d 100644
--- a/src/affine/constant.c
+++ b/src/affine/parameter.c
@@ -16,38 +16,36 @@
  * limitations under the License.
  */
 #include "affine.h"
+#include "subexpr.h"
 #include <stdlib.h>
 #include <string.h>
 
 static void forward(expr *node, const double *u)
 {
-    /* Constants don't depend on u; values are already set */
+    /* Parameters/constants don't depend on u; values are already set */
     (void) node;
     (void) u;
 }
 
 static void jacobian_init(expr *node)
 {
-    /* Constant jacobian is all zeros: size x n_vars with 0 nonzeros.
-     * new_csr_matrix uses calloc for row pointers, so they're already 0. */
+    /* Zero jacobian: size x n_vars with 0 nonzeros. */
     node->jacobian = new_csr_matrix(node->size, node->n_vars, 0);
 }
 
 static void eval_jacobian(expr *node)
 {
-    /* Constant jacobian never changes - nothing to evaluate */
     (void) node;
 }
 
 static void wsum_hess_init(expr *node)
 {
-    /* Constant Hessian is all zeros: n_vars x n_vars with 0 nonzeros. */
+    /* Zero Hessian: n_vars x n_vars with 0 nonzeros. */
     node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0);
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
 {
-    /* Constant Hessian is always zero - nothing to compute */
     (void) node;
     (void) w;
 }
@@ -58,12 +56,20 @@ static bool is_affine(const expr *node)
     return true;
 }
 
-expr *new_constant(int d1, int d2, int n_vars, const double *values)
+expr *new_parameter(int d1, int d2, int param_id, int n_vars, const double *values)
 {
-    expr *node = (expr *) calloc(1, sizeof(expr));
+    parameter_expr *pnode = (parameter_expr *) calloc(1, sizeof(parameter_expr));
+    expr *node = &pnode->base;
     init_expr(node, d1, d2, n_vars, forward, jacobian_init, eval_jacobian, is_affine,
               wsum_hess_init, eval_wsum_hess, NULL);
-    memcpy(node->value, values, node->size * sizeof(double));
+
+    pnode->param_id = param_id;
+    pnode->has_been_refreshed = true;
+
+    if (values != NULL)
+    {
+        memcpy(node->value, values, node->size * sizeof(double));
+    }
 
     return node;
 }
diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c
index 362d4f4..bd3cdb1 100644
--- a/src/bivariate/left_matmul.c
+++ b/src/bivariate/left_matmul.c
@@ -48,16 +48,34 @@
 
 #include "utils/utils.h"
 
+static void refresh_param_values(left_matmul_expr *lnode)
+{
+    if (lnode->param_source == NULL)
+    {
+        return;
+    }
+    parameter_expr *param = (parameter_expr *) lnode->param_source;
+    if (param->has_been_refreshed)
+    {
+        return;
+    }
+    param->has_been_refreshed = true;
+    lnode->refresh_param_values(lnode);
+}
+
 static void forward(expr *node, const double *u)
 {
+    left_matmul_expr *lnode = (left_matmul_expr *) node;
+    refresh_param_values(lnode);
+
     expr *x = node->left;
 
     /* child's forward pass */
     node->left->forward(node->left, u);
 
     /* y = A_kron @ vec(f(x)) */
-    Matrix *A = ((left_matmul_expr *) node)->A;
-    int n_blocks = ((left_matmul_expr *) node)->n_blocks;
+    Matrix *A = lnode->A;
+    int n_blocks = lnode->n_blocks;
     A->block_left_mult_vec(A, x->value, node->value, n_blocks);
 }
 
@@ -74,11 +92,16 @@ static void free_type_data(expr *node)
     free_csc_matrix(lnode->Jchild_CSC);
     free_csc_matrix(lnode->J_CSC);
     free(lnode->csc_to_csr_work);
+    if (lnode->param_source != NULL)
+    {
+        free_expr(lnode->param_source);
+    }
     lnode->A = NULL;
     lnode->AT = NULL;
     lnode->Jchild_CSC = NULL;
     lnode->J_CSC = NULL;
     lnode->csc_to_csr_work = NULL;
+    lnode->param_source = NULL;
 }
 
 static void jacobian_init(expr *node)
@@ -98,8 +121,9 @@ static void jacobian_init(expr *node)
 
 static void eval_jacobian(expr *node)
 {
-    expr *x = node->left;
     left_matmul_expr *lnode = (left_matmul_expr *) node;
+    refresh_param_values(lnode);
+    expr *x = node->left;
 
     CSC_Matrix *Jchild_CSC = lnode->Jchild_CSC;
     CSC_Matrix *J_CSC = lnode->J_CSC;
@@ -132,9 +156,12 @@ static void wsum_hess_init(expr *node)
 
 static void eval_wsum_hess(expr *node, const double *w)
 {
+    left_matmul_expr *lnode = (left_matmul_expr *) node;
+    refresh_param_values(lnode);
+
     /* compute A^T w*/
-    Matrix *AT = ((left_matmul_expr *) node)->AT;
-    int n_blocks = ((left_matmul_expr *) node)->n_blocks;
+    Matrix *AT = lnode->AT;
+    int n_blocks = lnode->n_blocks;
     AT->block_left_mult_vec(AT, w, node->dwork, n_blocks);
 
     node->left->eval_wsum_hess(node->left, node->dwork);
@@ -142,7 +169,33 @@ static void eval_wsum_hess(expr *node, const double *w)
            node->wsum_hess->nnz * sizeof(double));
 }
 
-expr *new_left_matmul(expr *u, const CSR_Matrix *A)
+static void refresh_sparse_left(left_matmul_expr *lnode)
+{
+    Sparse_Matrix *sm_A = (Sparse_Matrix *) lnode->A;
+    Sparse_Matrix *sm_AT = (Sparse_Matrix *) lnode->AT;
+    lnode->A->update_values(lnode->A, lnode->param_source->value);
+    /* Recompute AT values from A */
+    AT_fill_values(sm_A->csr, sm_AT->csr, lnode->base.iwork);
+}
+
+static void refresh_dense_left(left_matmul_expr *lnode)
+{
+    Dense_Matrix *dm_A = (Dense_Matrix *) lnode->A;
+    int m = dm_A->base.m;
+    int n = dm_A->base.n;
+    lnode->A->update_values(lnode->A, lnode->param_source->value);
+    /* Recompute AT data (transpose of row-major A) */
+    Dense_Matrix *dm_AT = (Dense_Matrix *) lnode->AT;
+    for (int i = 0; i < m; i++)
+    {
+        for (int j = 0; j < n; j++)
+        {
+            dm_AT->x[j * m + i] = dm_A->x[i * n + j];
+        }
+    }
+}
+
+expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
 {
     /* We expect u->d1 == A->n. However, numpy's broadcasting rules allow users
        to do A @ u where u is (n, ) which in C is actually (1, n). In that case
@@ -188,10 +241,19 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A)
     lnode->A = new_sparse_matrix(A);
     lnode->AT = sparse_matrix_trans((const Sparse_Matrix *) lnode->A, node->iwork);
 
+    /* parameter support */
+    lnode->param_source = param_node;
+    if (param_node != NULL)
+    {
+        expr_retain(param_node);
+        lnode->refresh_param_values = refresh_sparse_left;
+    }
+
     return node;
 }
 
-expr *new_left_matmul_dense(expr *u, int m, int n, const double *data)
+expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
+                            const double *data)
 {
     int d1, d2, n_blocks;
     if (u->d1 == n)
@@ -227,5 +289,13 @@ expr *new_left_matmul_dense(expr *u, int m, int n, const double *data)
     lnode->A = new_dense_matrix(m, n, data);
     lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
 
+    /* parameter support */
+    lnode->param_source = param_node;
+    if (param_node != NULL)
+    {
+        expr_retain(param_node);
+        lnode->refresh_param_values = refresh_dense_left;
+    }
+
     return node;
 }
diff --git a/src/bivariate/right_matmul.c b/src/bivariate/right_matmul.c
index 1ebff1f..c342f5e 100644
--- a/src/bivariate/right_matmul.c
+++ b/src/bivariate/right_matmul.c
@@ -17,14 +17,53 @@
  */
 #include "affine.h"
 #include "bivariate.h"
+#include "subexpr.h"
 #include "utils/CSR_Matrix.h"
 #include <stdlib.h>
 
 /* This file implements the atom 'right_matmul' corresponding to the operation y =
    f(x) @ A, where A is a given matrix and f(x) is an arbitrary expression.
     We implement this by expressing right matmul in terms of left matmul and
-    transpose: f(x) @ A = (A^T @ f(x)^T)^T. */
-expr *new_right_matmul(expr *u, const CSR_Matrix *A)
+    transpose: f(x) @ A = (A^T @ f(x)^T)^T.
+
+   For the parameter case:
+     - param_source stores A values in CSR data order
+     - inner left_matmul stores AT as its A-matrix and A as its AT-matrix
+     - on refresh: update AT (inner's AT, the original A) from param_source,
+       then recompute A^T (inner's A) from the updated A. */
+
+/* Refresh for sparse right_matmul: param stores A in CSR data order.
+   Inner left_matmul: lnode->A = AT (transposed), lnode->AT = A (original).
+   So: update lnode->AT from param values, then recompute lnode->A. */
+static void refresh_sparse_right(left_matmul_expr *lnode)
+{
+    Sparse_Matrix *sm_AT_inner = (Sparse_Matrix *) lnode->A;
+    Sparse_Matrix *sm_A_inner = (Sparse_Matrix *) lnode->AT;
+    /* lnode->AT holds the original A; update its values from param */
+    lnode->AT->update_values(lnode->AT, lnode->param_source->value);
+    /* Recompute A^T (lnode->A) from A (lnode->AT) */
+    AT_fill_values(sm_A_inner->csr, sm_AT_inner->csr, lnode->base.iwork);
+}
+
+static void refresh_dense_right(left_matmul_expr *lnode)
+{
+    Dense_Matrix *dm_AT_inner = (Dense_Matrix *) lnode->A;
+    Dense_Matrix *dm_A_inner = (Dense_Matrix *) lnode->AT;
+    int m_orig = dm_A_inner->base.m; /* original A is m x n */
+    int n_orig = dm_A_inner->base.n;
+    /* Update original A (inner's AT) from param values */
+    lnode->AT->update_values(lnode->AT, lnode->param_source->value);
+    /* Recompute A^T (inner's A) from A */
+    for (int i = 0; i < m_orig; i++)
+    {
+        for (int j = 0; j < n_orig; j++)
+        {
+            dm_AT_inner->x[j * m_orig + i] = dm_A_inner->x[i * n_orig + j];
+        }
+    }
+}
+
+expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
 {
     /* We can express right matmul using left matmul and transpose:
        u @ A = (A^T @ u^T)^T. */
@@ -32,7 +71,18 @@ expr *new_right_matmul(expr *u, const CSR_Matrix *A)
     CSR_Matrix *AT = transpose(A, work_transpose);
 
     expr *u_transpose = new_transpose(u);
-    expr *left_matmul = new_left_matmul(u_transpose, AT);
+    expr *left_matmul = new_left_matmul(NULL, u_transpose, AT);
+
+    /* If parameterized, attach param_source and custom refresh to inner
+       left_matmul */
+    if (param_node != NULL)
+    {
+        left_matmul_expr *lnode = (left_matmul_expr *) left_matmul;
+        lnode->param_source = param_node;
+        expr_retain(param_node);
+        lnode->refresh_param_values = refresh_sparse_right;
+    }
+
     expr *node = new_transpose(left_matmul);
 
     free_csr_matrix(AT);
@@ -40,7 +90,8 @@ expr *new_right_matmul(expr *u, const CSR_Matrix *A)
     return node;
 }
 
-expr *new_right_matmul_dense(expr *u, int m, int n, const double *data)
+expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
+                             const double *data)
 {
     /* We express: u @ A = (A^T @ u^T)^T
        A is m x n, so A^T is n x m. */
@@ -54,7 +105,17 @@ expr *new_right_matmul_dense(expr *u, int m, int n, const double *data)
     }
 
     expr *u_transpose = new_transpose(u);
-    expr *left_matmul_node = new_left_matmul_dense(u_transpose, n, m, AT);
+    expr *left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
+
+    /* If parameterized, attach param_source and custom refresh */
+    if (param_node != NULL)
+    {
+        left_matmul_expr *lnode = (left_matmul_expr *) left_matmul_node;
+        lnode->param_source = param_node;
+        expr_retain(param_node);
+        lnode->refresh_param_values = refresh_dense_right;
+    }
+
     expr *node = new_transpose(left_matmul_node);
 
     free(AT);
diff --git a/src/bivariate/const_scalar_mult.c b/src/bivariate/scalar_mult.c
similarity index 78%
rename from src/bivariate/const_scalar_mult.c
rename to src/bivariate/scalar_mult.c
index 4898389..5873706 100644
--- a/src/bivariate/const_scalar_mult.c
+++ b/src/bivariate/scalar_mult.c
@@ -22,17 +22,17 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* Constant scalar multiplication: y = a * child where a is a constant double */
+/* Scalar multiplication: y = a * child where a comes from param_source */
 
 static void forward(expr *node, const double *u)
 {
     expr *child = node->left;
+    double a = ((scalar_mult_expr *) node)->param_source->value[0];
 
     /* child's forward pass */
     child->forward(child, u);
 
     /* local forward pass: multiply each element by scalar a */
-    double a = ((const_scalar_mult_expr *) node)->a;
     for (int i = 0; i < node->size; i++)
     {
         node->value[i] = a * child->value[i];
@@ -55,7 +55,7 @@ static void jacobian_init(expr *node)
 static void eval_jacobian(expr *node)
 {
     expr *child = node->left;
-    double a = ((const_scalar_mult_expr *) node)->a;
+    double a = ((scalar_mult_expr *) node)->param_source->value[0];
 
     /* evaluate child */
     child->eval_jacobian(child);
@@ -85,7 +85,7 @@ static void eval_wsum_hess(expr *node, const double *w)
     expr *x = node->left;
     x->eval_wsum_hess(x, w);
 
-    double a = ((const_scalar_mult_expr *) node)->a;
+    double a = ((scalar_mult_expr *) node)->param_source->value[0];
     for (int j = 0; j < x->wsum_hess->nnz; j++)
     {
         node->wsum_hess->x[j] = a * x->wsum_hess->x[j];
@@ -94,21 +94,33 @@ static void eval_wsum_hess(expr *node, const double *w)
 
 static bool is_affine(const expr *node)
 {
-    /* Affine iff the child is affine */
     return node->left->is_affine(node->left);
 }
 
-expr *new_const_scalar_mult(double a, expr *child)
+static void free_type_data(expr *node)
 {
-    const_scalar_mult_expr *mult_node =
-        (const_scalar_mult_expr *) calloc(1, sizeof(const_scalar_mult_expr));
+    scalar_mult_expr *snode = (scalar_mult_expr *) node;
+    if (snode->param_source != NULL)
+    {
+        free_expr(snode->param_source);
+        snode->param_source = NULL;
+    }
+}
+
+expr *new_scalar_mult(expr *param_node, expr *child)
+{
+    scalar_mult_expr *mult_node =
+        (scalar_mult_expr *) calloc(1, sizeof(scalar_mult_expr));
     expr *node = &mult_node->base;
 
     init_expr(node, child->d1, child->d2, child->n_vars, forward, jacobian_init,
-              eval_jacobian, is_affine, wsum_hess_init, eval_wsum_hess, NULL);
+              eval_jacobian, is_affine, wsum_hess_init, eval_wsum_hess,
+              free_type_data);
     node->left = child;
-    mult_node->a = a;
     expr_retain(child);
 
+    mult_node->param_source = param_node;
+    expr_retain(param_node);
+
     return node;
 }
diff --git a/src/bivariate/const_vector_mult.c b/src/bivariate/vector_mult.c
similarity index 81%
rename from src/bivariate/const_vector_mult.c
rename to src/bivariate/vector_mult.c
index 65823a7..bf973ad 100644
--- a/src/bivariate/const_vector_mult.c
+++ b/src/bivariate/vector_mult.c
@@ -21,12 +21,13 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* Constant vector elementwise multiplication: y = a \circ child */
+/* Vector elementwise multiplication: y = a \circ child
+ * where a comes from param_source */
 
 static void forward(expr *node, const double *u)
 {
     expr *child = node->left;
-    const double *a = ((const_vector_mult_expr *) node)->a;
+    const double *a = ((vector_mult_expr *) node)->param_source->value;
 
     /* child's forward pass */
     child->forward(child, u);
@@ -54,7 +55,7 @@ static void jacobian_init(expr *node)
 static void eval_jacobian(expr *node)
 {
     expr *x = node->left;
-    const double *a = ((const_vector_mult_expr *) node)->a;
+    const double *a = ((vector_mult_expr *) node)->param_source->value;
 
     /* evaluate x */
     x->eval_jacobian(x);
@@ -87,7 +88,7 @@ static void wsum_hess_init(expr *node)
 static void eval_wsum_hess(expr *node, const double *w)
 {
     expr *x = node->left;
-    const double *a = ((const_vector_mult_expr *) node)->a;
+    const double *a = ((vector_mult_expr *) node)->param_source->value;
 
     /* scale weights w by a */
     for (int i = 0; i < node->size; i++)
@@ -103,20 +104,23 @@ static void eval_wsum_hess(expr *node, const double *w)
 
 static void free_type_data(expr *node)
 {
-    const_vector_mult_expr *vnode = (const_vector_mult_expr *) node;
-    free(vnode->a);
+    vector_mult_expr *vnode = (vector_mult_expr *) node;
+    if (vnode->param_source != NULL)
+    {
+        free_expr(vnode->param_source);
+        vnode->param_source = NULL;
+    }
 }
 
 static bool is_affine(const expr *node)
 {
-    /* Affine iff the child is affine */
     return node->left->is_affine(node->left);
 }
 
-expr *new_const_vector_mult(const double *a, expr *child)
+expr *new_vector_mult(expr *param_node, expr *child)
 {
-    const_vector_mult_expr *vnode =
-        (const_vector_mult_expr *) calloc(1, sizeof(const_vector_mult_expr));
+    vector_mult_expr *vnode =
+        (vector_mult_expr *) calloc(1, sizeof(vector_mult_expr));
     expr *node = &vnode->base;
 
     init_expr(node, child->d1, child->d2, child->n_vars, forward, jacobian_init,
@@ -125,9 +129,8 @@ expr *new_const_vector_mult(const double *a, expr *child)
     node->left = child;
     expr_retain(child);
 
-    /* copy a vector */
-    vnode->a = (double *) malloc(child->size * sizeof(double));
-    memcpy(vnode->a, a, child->size * sizeof(double));
+    vnode->param_source = param_node;
+    expr_retain(param_node);
 
     return node;
 }
diff --git a/src/problem.c b/src/problem.c
index 6dbd1a6..e32eae3 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 #include "problem.h"
+#include "subexpr.h"
 #include "utils/CSR_sum.h"
 #include "utils/utils.h"
 #include <assert.h>
@@ -302,6 +303,9 @@ void free_problem(problem *prob)
 {
     if (prob == NULL) return;
 
+    /* Free param_nodes array (weak refs, don't free the nodes) */
+    free(prob->param_nodes);
+
     /* Free allocated arrays */
     free(prob->constraint_values);
     free(prob->gradient_values);
@@ -327,6 +331,34 @@ void free_problem(problem *prob)
     free(prob);
 }
 
+void problem_register_params(problem *prob, expr **param_nodes, int n_param_nodes)
+{
+    prob->n_param_nodes = n_param_nodes;
+    prob->param_nodes = (expr **) malloc(n_param_nodes * sizeof(expr *));
+    prob->total_parameter_size = 0;
+
+    for (int i = 0; i < n_param_nodes; i++)
+    {
+        prob->param_nodes[i] = param_nodes[i];
+        prob->total_parameter_size += param_nodes[i]->size;
+    }
+}
+
+void problem_update_params(problem *prob, const double *theta)
+{
+    for (int i = 0; i < prob->n_param_nodes; i++)
+    {
+        expr *pnode = prob->param_nodes[i];
+        parameter_expr *param = (parameter_expr *) pnode;
+        int offset = param->param_id;
+        memcpy(pnode->value, theta + offset, pnode->size * sizeof(double));
+        param->has_been_refreshed = false;
+    }
+
+    /* Force re-evaluation of affine Jacobians on next call */
+    prob->jacobian_called = false;
+}
+
 double problem_objective_forward(problem *prob, const double *u)
 {
     Timer timer;
diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c
index 5400bb2..37c9550 100644
--- a/src/utils/dense_matrix.c
+++ b/src/utils/dense_matrix.c
@@ -154,6 +154,12 @@ static void dense_block_left_mult_values(const Matrix *A, const CSC_Matrix *J,
     }
 }
 
+static void dense_update_values(Matrix *self, const double *new_values)
+{
+    Dense_Matrix *dm = (Dense_Matrix *) self;
+    memcpy(dm->x, new_values, dm->base.m * dm->base.n * sizeof(double));
+}
+
 static void dense_free(Matrix *A)
 {
     Dense_Matrix *dm = (Dense_Matrix *) A;
@@ -170,6 +176,7 @@ Matrix *new_dense_matrix(int m, int n, const double *data)
     dm->base.block_left_mult_vec = dense_block_left_mult_vec;
     dm->base.block_left_mult_sparsity = dense_block_left_mult_sparsity;
     dm->base.block_left_mult_values = dense_block_left_mult_values;
+    dm->base.update_values = dense_update_values;
     dm->base.free_fn = dense_free;
     dm->x = (double *) malloc(m * n * sizeof(double));
     memcpy(dm->x, data, m * n * sizeof(double));
diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c
index 24ed539..076890a 100644
--- a/src/utils/sparse_matrix.c
+++ b/src/utils/sparse_matrix.c
@@ -18,6 +18,7 @@
 #include "utils/linalg_sparse_matmuls.h"
 #include "utils/matrix.h"
 #include <stdlib.h>
+#include <string.h>
 
 static void sparse_block_left_mult_vec(const Matrix *self, const double *x,
                                        double *y, int p)
@@ -40,6 +41,12 @@ static void sparse_block_left_mult_values(const Matrix *self, const CSC_Matrix *
     block_left_multiply_fill_values(sm->csr, J, C);
 }
 
+static void sparse_update_values(Matrix *self, const double *new_values)
+{
+    Sparse_Matrix *sm = (Sparse_Matrix *) self;
+    memcpy(sm->csr->x, new_values, sm->csr->nnz * sizeof(double));
+}
+
 static void sparse_free(Matrix *self)
 {
     Sparse_Matrix *sm = (Sparse_Matrix *) self;
@@ -55,6 +62,7 @@ Matrix *new_sparse_matrix(const CSR_Matrix *A)
     sm->base.block_left_mult_vec = sparse_block_left_mult_vec;
     sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
     sm->base.block_left_mult_values = sparse_block_left_mult_values;
+    sm->base.update_values = sparse_update_values;
     sm->base.free_fn = sparse_free;
     sm->csr = new_csr(A);
     return &sm->base;
@@ -69,6 +77,7 @@ Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork)
     sm->base.block_left_mult_vec = sparse_block_left_mult_vec;
     sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
     sm->base.block_left_mult_values = sparse_block_left_mult_values;
+    sm->base.update_values = sparse_update_values;
     sm->base.free_fn = sparse_free;
     sm->csr = AT;
     return &sm->base;
diff --git a/tests/all_tests.c b/tests/all_tests.c
index 8f839f4..273f510 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -11,7 +11,7 @@
 #include "forward_pass/affine/test_neg.h"
 #include "forward_pass/affine/test_promote.h"
 #include "forward_pass/affine/test_sum.h"
-#include "forward_pass/affine/test_variable_constant.h"
+#include "forward_pass/affine/test_variable_parameter.h"
 #include "forward_pass/composite/test_composite.h"
 #include "forward_pass/elementwise/test_exp.h"
 #include "forward_pass/elementwise/test_log.h"
@@ -22,8 +22,6 @@
 #include "forward_pass/test_prod_axis_zero.h"
 #include "jacobian_tests/test_broadcast.h"
 #include "jacobian_tests/test_composite.h"
-#include "jacobian_tests/test_const_scalar_mult.h"
-#include "jacobian_tests/test_const_vector_mult.h"
 #include "jacobian_tests/test_elementwise_mult.h"
 #include "jacobian_tests/test_hstack.h"
 #include "jacobian_tests/test_index.h"
@@ -41,9 +39,12 @@
 #include "jacobian_tests/test_rel_entr_scalar_vector.h"
 #include "jacobian_tests/test_rel_entr_vector_scalar.h"
 #include "jacobian_tests/test_right_matmul.h"
+#include "jacobian_tests/test_scalar_mult.h"
 #include "jacobian_tests/test_sum.h"
 #include "jacobian_tests/test_trace.h"
 #include "jacobian_tests/test_transpose.h"
+#include "jacobian_tests/test_vector_mult.h"
+#include "problem/test_param_prob.h"
 #include "problem/test_problem.h"
 #include "utils/test_cblas.h"
 #include "utils/test_coo_matrix.h"
@@ -61,8 +62,6 @@
 #include "wsum_hess/elementwise/test_trig.h"
 #include "wsum_hess/elementwise/test_xexp.h"
 #include "wsum_hess/test_broadcast.h"
-#include "wsum_hess/test_const_scalar_mult.h"
-#include "wsum_hess/test_const_vector_mult.h"
 #include "wsum_hess/test_hstack.h"
 #include "wsum_hess/test_index.h"
 #include "wsum_hess/test_left_matmul.h"
@@ -77,9 +76,11 @@
 #include "wsum_hess/test_rel_entr_scalar_vector.h"
 #include "wsum_hess/test_rel_entr_vector_scalar.h"
 #include "wsum_hess/test_right_matmul.h"
+#include "wsum_hess/test_scalar_mult.h"
 #include "wsum_hess/test_sum.h"
 #include "wsum_hess/test_trace.h"
 #include "wsum_hess/test_transpose.h"
+#include "wsum_hess/test_vector_mult.h"
 #endif /* PROFILE_ONLY */
 
 #ifdef PROFILE_ONLY
@@ -124,10 +125,10 @@ int main(void)
     mu_run_test(test_jacobian_log_matrix, tests_run);
     mu_run_test(test_jacobian_composite_log, tests_run);
     mu_run_test(test_jacobian_composite_log_add, tests_run);
-    mu_run_test(test_jacobian_const_scalar_mult_log_vector, tests_run);
-    mu_run_test(test_jacobian_const_scalar_mult_log_matrix, tests_run);
-    mu_run_test(test_jacobian_const_vector_mult_log_vector, tests_run);
-    mu_run_test(test_jacobian_const_vector_mult_log_matrix, tests_run);
+    mu_run_test(test_jacobian_scalar_mult_log_vector, tests_run);
+    mu_run_test(test_jacobian_scalar_mult_log_matrix, tests_run);
+    mu_run_test(test_jacobian_vector_mult_log_vector, tests_run);
+    mu_run_test(test_jacobian_vector_mult_log_matrix, tests_run);
     mu_run_test(test_jacobian_rel_entr_vector_args_1, tests_run);
     mu_run_test(test_jacobian_rel_entr_vector_args_2, tests_run);
     mu_run_test(test_jacobian_rel_entr_matrix_args, tests_run);
@@ -224,10 +225,10 @@ int main(void)
     mu_run_test(test_wsum_hess_quad_over_lin_xy, tests_run);
     mu_run_test(test_wsum_hess_quad_over_lin_yx, tests_run);
     mu_run_test(test_wsum_hess_quad_form, tests_run);
-    mu_run_test(test_wsum_hess_const_scalar_mult_log_vector, tests_run);
-    mu_run_test(test_wsum_hess_const_scalar_mult_log_matrix, tests_run);
-    mu_run_test(test_wsum_hess_const_vector_mult_log_vector, tests_run);
-    mu_run_test(test_wsum_hess_const_vector_mult_log_matrix, tests_run);
+    mu_run_test(test_wsum_hess_scalar_mult_log_vector, tests_run);
+    mu_run_test(test_wsum_hess_scalar_mult_log_matrix, tests_run);
+    mu_run_test(test_wsum_hess_vector_mult_log_vector, tests_run);
+    mu_run_test(test_wsum_hess_vector_mult_log_matrix, tests_run);
     mu_run_test(test_wsum_hess_multiply_linear_ops, tests_run);
     mu_run_test(test_wsum_hess_multiply_sparse_random, tests_run);
     mu_run_test(test_wsum_hess_multiply_1, tests_run);
@@ -296,6 +297,12 @@ int main(void)
     mu_run_test(test_problem_jacobian_multi, tests_run);
     mu_run_test(test_problem_constraint_forward, tests_run);
     mu_run_test(test_problem_hessian, tests_run);
+
+    printf("\n--- Parameter Tests ---\n");
+    mu_run_test(test_param_scalar_mult_problem, tests_run);
+    mu_run_test(test_param_vector_mult_problem, tests_run);
+    mu_run_test(test_param_left_matmul_problem, tests_run);
+    mu_run_test(test_param_right_matmul_problem, tests_run);
 #endif /* PROFILE_ONLY */
 
 #ifdef PROFILE_ONLY
diff --git a/tests/forward_pass/affine/test_add.h b/tests/forward_pass/affine/test_add.h
index 11fb35c..12f8cb9 100644
--- a/tests/forward_pass/affine/test_add.h
+++ b/tests/forward_pass/affine/test_add.h
@@ -12,7 +12,7 @@ const char *test_addition(void)
     double u[2] = {3.0, 4.0};
     double c[2] = {1.0, 2.0};
     expr *var = new_variable(2, 1, 0, 2);
-    expr *const_node = new_constant(2, 1, 0, c);
+    expr *const_node = new_parameter(2, 1, PARAM_FIXED, 0, c);
     expr *sum = new_add(var, const_node);
     sum->forward(sum, u);
     double expected[2] = {4.0, 6.0};
diff --git a/tests/forward_pass/affine/test_sum.h b/tests/forward_pass/affine/test_sum.h
index ae0bc37..bb3fe47 100644
--- a/tests/forward_pass/affine/test_sum.h
+++ b/tests/forward_pass/affine/test_sum.h
@@ -17,7 +17,7 @@ const char *test_sum_axis_neg1(void)
        Stored as: [1, 2, 3, 4, 5, 6]
     */
     double values[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *const_node = new_constant(3, 2, 0, values);
+    expr *const_node = new_parameter(3, 2, PARAM_FIXED, 0, values);
     expr *log_node = new_log(const_node);
     expr *sum_node = new_sum(log_node, -1);
     sum_node->forward(sum_node, NULL);
@@ -42,7 +42,7 @@ const char *test_sum_axis_0(void)
        Stored as: [1, 2, 3, 4, 5, 6]
     */
     double values[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *const_node = new_constant(3, 2, 0, values);
+    expr *const_node = new_parameter(3, 2, PARAM_FIXED, 0, values);
     expr *log_node = new_log(const_node);
     expr *sum_node = new_sum(log_node, 0);
     sum_node->forward(sum_node, NULL);
@@ -69,7 +69,7 @@ const char *test_sum_axis_1(void)
        Stored as: [1, 2, 3, 4, 5, 6]
     */
     double values[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *const_node = new_constant(3, 2, 0, values);
+    expr *const_node = new_parameter(3, 2, PARAM_FIXED, 0, values);
     expr *log_node = new_log(const_node);
     expr *sum_node = new_sum(log_node, 1);
     sum_node->forward(sum_node, NULL);
diff --git a/tests/forward_pass/affine/test_variable_constant.h b/tests/forward_pass/affine/test_variable_parameter.h
similarity index 91%
rename from tests/forward_pass/affine/test_variable_constant.h
rename to tests/forward_pass/affine/test_variable_parameter.h
index 9df0f1a..a2ed535 100644
--- a/tests/forward_pass/affine/test_variable_constant.h
+++ b/tests/forward_pass/affine/test_variable_parameter.h
@@ -21,7 +21,7 @@ const char *test_constant(void)
 {
     double c[2] = {5.0, 10.0};
     double u[2] = {0.0, 0.0};
-    expr *const_node = new_constant(2, 1, 0, c);
+    expr *const_node = new_parameter(2, 1, PARAM_FIXED, 0, c);
     const_node->forward(const_node, u);
     mu_assert("Constant test failed", cmp_double_array(const_node->value, c, 2));
     free_expr(const_node);
diff --git a/tests/forward_pass/composite/test_composite.h b/tests/forward_pass/composite/test_composite.h
index 8839040..9a16657 100644
--- a/tests/forward_pass/composite/test_composite.h
+++ b/tests/forward_pass/composite/test_composite.h
@@ -16,7 +16,7 @@ const char *test_composite(void)
     /* Build tree: log(exp(x) + c) */
     expr *var = new_variable(2, 1, 0, 2);
     expr *exp_node = new_exp(var);
-    expr *const_node = new_constant(2, 1, 0, c);
+    expr *const_node = new_parameter(2, 1, PARAM_FIXED, 0, c);
     expr *sum = new_add(exp_node, const_node);
     expr *log_node = new_log(sum);
 
diff --git a/tests/forward_pass/test_left_matmul_dense.h b/tests/forward_pass/test_left_matmul_dense.h
index a953806..4d9e1fd 100644
--- a/tests/forward_pass/test_left_matmul_dense.h
+++ b/tests/forward_pass/test_left_matmul_dense.h
@@ -24,7 +24,7 @@ const char *test_left_matmul_dense(void)
     double A_data[9] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
 
     /* Build expression Z = A @ X */
-    expr *Z = new_left_matmul_dense(X, 3, 3, A_data);
+    expr *Z = new_left_matmul_dense(NULL, X, 3, 3, A_data);
 
     /* Variable values in column-major order */
     double u[9] = {1.0, 2.0, 3.0,  /* first column */
diff --git a/tests/forward_pass/test_prod_axis_one.h b/tests/forward_pass/test_prod_axis_one.h
index 7cf74e0..2d74b0a 100644
--- a/tests/forward_pass/test_prod_axis_one.h
+++ b/tests/forward_pass/test_prod_axis_one.h
@@ -16,7 +16,7 @@ const char *test_forward_prod_axis_one(void)
        Stored as: [1, 2, 3, 4, 5, 6]
     */
     double values[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *const_node = new_constant(2, 3, 0, values);
+    expr *const_node = new_parameter(2, 3, PARAM_FIXED, 0, values);
     expr *prod_node = new_prod_axis_one(const_node);
     prod_node->forward(prod_node, NULL);
 
diff --git a/tests/forward_pass/test_prod_axis_zero.h b/tests/forward_pass/test_prod_axis_zero.h
index f87782a..aec2cb7 100644
--- a/tests/forward_pass/test_prod_axis_zero.h
+++ b/tests/forward_pass/test_prod_axis_zero.h
@@ -16,7 +16,7 @@ const char *test_forward_prod_axis_zero(void)
        Stored as: [1, 2, 3, 4, 5, 6]
     */
     double values[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *const_node = new_constant(2, 3, 0, values);
+    expr *const_node = new_parameter(2, 3, PARAM_FIXED, 0, values);
     expr *prod_node = new_prod_axis_zero(const_node);
     prod_node->forward(prod_node, NULL);
 
diff --git a/tests/jacobian_tests/test_broadcast.h b/tests/jacobian_tests/test_broadcast.h
index e6ffa4f..72c42ea 100644
--- a/tests/jacobian_tests/test_broadcast.h
+++ b/tests/jacobian_tests/test_broadcast.h
@@ -141,7 +141,7 @@ const char *test_double_broadcast(void)
 
     /* form the expression x + b */
     expr *x = new_variable(5, 1, 0, 5);
-    expr *b = new_constant(1, 5, 5, b_vals);
+    expr *b = new_parameter(1, 5, PARAM_FIXED, 5, b_vals);
     expr *bcast_x = new_broadcast(x, 5, 5);
     expr *bcast_b = new_broadcast(b, 5, 5);
     expr *sum = new_add(bcast_x, bcast_b);
diff --git a/tests/jacobian_tests/test_left_matmul.h b/tests/jacobian_tests/test_left_matmul.h
index 480679a..a565f1e 100644
--- a/tests/jacobian_tests/test_left_matmul.h
+++ b/tests/jacobian_tests/test_left_matmul.h
@@ -40,7 +40,7 @@ const char *test_jacobian_left_matmul_log(void)
     memcpy(A->x, A_x, 7 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *A_log_x = new_left_matmul(log_x, A);
+    expr *A_log_x = new_left_matmul(NULL, log_x, A);
 
     A_log_x->forward(A_log_x, x_vals);
     A_log_x->jacobian_init(A_log_x);
@@ -84,7 +84,7 @@ const char *test_jacobian_left_matmul_log_matrix(void)
     memcpy(A->x, A_x, 7 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *A_log_x = new_left_matmul(log_x, A);
+    expr *A_log_x = new_left_matmul(NULL, log_x, A);
 
     A_log_x->forward(A_log_x, x_vals);
     A_log_x->jacobian_init(A_log_x);
@@ -156,7 +156,7 @@ const char *test_jacobian_left_matmul_log_composite(void)
 
     expr *Bx = new_linear(x, B, NULL);
     expr *log_Bx = new_log(Bx);
-    expr *A_log_Bx = new_left_matmul(log_Bx, A);
+    expr *A_log_Bx = new_left_matmul(NULL, log_Bx, A);
 
     A_log_Bx->forward(A_log_Bx, x_vals);
     A_log_Bx->jacobian_init(A_log_Bx);
diff --git a/tests/jacobian_tests/test_right_matmul.h b/tests/jacobian_tests/test_right_matmul.h
index 6155b19..834e69a 100644
--- a/tests/jacobian_tests/test_right_matmul.h
+++ b/tests/jacobian_tests/test_right_matmul.h
@@ -27,7 +27,7 @@ const char *test_jacobian_right_matmul_log(void)
     memcpy(A->x, A_x, 4 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *log_x_A = new_right_matmul(log_x, A);
+    expr *log_x_A = new_right_matmul(NULL, log_x, A);
 
     log_x_A->forward(log_x_A, x_vals);
     log_x_A->jacobian_init(log_x_A);
@@ -76,7 +76,7 @@ const char *test_jacobian_right_matmul_log_vector(void)
     memcpy(A->x, A_x, 4 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *log_x_A = new_right_matmul(log_x, A);
+    expr *log_x_A = new_right_matmul(NULL, log_x, A);
 
     log_x_A->forward(log_x_A, x_vals);
     log_x_A->jacobian_init(log_x_A);
diff --git a/tests/jacobian_tests/test_const_scalar_mult.h b/tests/jacobian_tests/test_scalar_mult.h
similarity index 81%
rename from tests/jacobian_tests/test_const_scalar_mult.h
rename to tests/jacobian_tests/test_scalar_mult.h
index 55aca74..99e9cbd 100644
--- a/tests/jacobian_tests/test_const_scalar_mult.h
+++ b/tests/jacobian_tests/test_scalar_mult.h
@@ -1,14 +1,16 @@
 #include <stdio.h>
 
+#include "affine.h"
 #include "bivariate.h"
 #include "elementwise_univariate.h"
 #include "expr.h"
 #include "minunit.h"
+#include "subexpr.h"
 #include "test_helpers.h"
 
-/* Test: y = a * log(x) where a is a scalar constant */
+/* Test: y = a * log(x) where a is a scalar parameter */
 
-const char *test_jacobian_const_scalar_mult_log_vector(void)
+const char *test_jacobian_scalar_mult_log_vector(void)
 {
     /* Create variable x: [1.0, 2.0, 4.0] with 3 elements */
     double u_vals[3] = {1.0, 2.0, 4.0};
@@ -18,8 +20,9 @@ const char *test_jacobian_const_scalar_mult_log_vector(void)
     expr *log_node = new_log(x);
 
     /* Create scalar mult node: y = 2.5 * log(x) */
-    double a = 2.5;
-    expr *y = new_const_scalar_mult(a, log_node);
+    double a_val = 2.5;
+    expr *a_param = new_parameter(1, 1, PARAM_FIXED, 3, &a_val);
+    expr *y = new_scalar_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -44,7 +47,7 @@ const char *test_jacobian_const_scalar_mult_log_vector(void)
     return 0;
 }
 
-const char *test_jacobian_const_scalar_mult_log_matrix(void)
+const char *test_jacobian_scalar_mult_log_matrix(void)
 {
     /* Create variable x as 2x2 matrix: [[1.0, 2.0], [4.0, 8.0]] */
     double u_vals[4] = {1.0, 2.0, 4.0, 8.0};
@@ -54,8 +57,9 @@ const char *test_jacobian_const_scalar_mult_log_matrix(void)
     expr *log_node = new_log(x);
 
     /* Create scalar mult node: y = 3.0 * log(x) */
-    double a = 3.0;
-    expr *y = new_const_scalar_mult(a, log_node);
+    double a_val = 3.0;
+    expr *a_param = new_parameter(1, 1, PARAM_FIXED, 4, &a_val);
+    expr *y = new_scalar_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
diff --git a/tests/jacobian_tests/test_transpose.h b/tests/jacobian_tests/test_transpose.h
index 96ae6b2..c9996a5 100644
--- a/tests/jacobian_tests/test_transpose.h
+++ b/tests/jacobian_tests/test_transpose.h
@@ -21,7 +21,7 @@ const char *test_jacobian_transpose(void)
 
     // X = [1 2; 3 4] (columnwise: x = [1 3 2 4])
     expr *X = new_variable(2, 2, 0, 4);
-    expr *AX = new_left_matmul(X, A);
+    expr *AX = new_left_matmul(NULL, X, A);
     expr *transpose_AX = new_transpose(AX);
     double u[4] = {1, 3, 2, 4};
     transpose_AX->forward(transpose_AX, u);
diff --git a/tests/jacobian_tests/test_const_vector_mult.h b/tests/jacobian_tests/test_vector_mult.h
similarity index 76%
rename from tests/jacobian_tests/test_const_vector_mult.h
rename to tests/jacobian_tests/test_vector_mult.h
index 8688433..cd786ec 100644
--- a/tests/jacobian_tests/test_const_vector_mult.h
+++ b/tests/jacobian_tests/test_vector_mult.h
@@ -1,14 +1,16 @@
 #include <stdio.h>
 
+#include "affine.h"
 #include "bivariate.h"
 #include "elementwise_univariate.h"
 #include "expr.h"
 #include "minunit.h"
+#include "subexpr.h"
 #include "test_helpers.h"
 
-/* Test: y = a ∘ log(x) where a is a constant vector */
+/* Test: y = a ∘ log(x) where a is a parameter vector */
 
-const char *test_jacobian_const_vector_mult_log_vector(void)
+const char *test_jacobian_vector_mult_log_vector(void)
 {
     /* Create variable x: [1.0, 2.0, 4.0] */
     double u_vals[3] = {1.0, 2.0, 4.0};
@@ -19,7 +21,8 @@ const char *test_jacobian_const_vector_mult_log_vector(void)
 
     /* Create vector mult node: y = [2.0, 3.0, 4.0] ∘ log(x) */
     double a[3] = {2.0, 3.0, 4.0};
-    expr *y = new_const_vector_mult(a, log_node);
+    expr *a_param = new_parameter(3, 1, PARAM_FIXED, 3, a);
+    expr *y = new_vector_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -28,11 +31,6 @@ const char *test_jacobian_const_vector_mult_log_vector(void)
     y->jacobian_init(y);
     y->eval_jacobian(y);
 
-    /* Expected jacobian (row-wise scaling):
-     * Row 0: 2.0 * [1/1] = [2.0]
-     * Row 1: 3.0 * [1/2] = [1.5]
-     * Row 2: 4.0 * [1/4] = [1.0]
-     */
     double expected_x[3] = {2.0, 1.5, 1.0};
     int expected_p[4] = {0, 1, 2, 3};
     int expected_i[3] = {0, 1, 2};
@@ -48,7 +46,7 @@ const char *test_jacobian_const_vector_mult_log_vector(void)
     return 0;
 }
 
-const char *test_jacobian_const_vector_mult_log_matrix(void)
+const char *test_jacobian_vector_mult_log_matrix(void)
 {
     /* Create variable x as 2x2 matrix: [[1.0, 2.0], [4.0, 8.0]] */
     double u_vals[4] = {1.0, 2.0, 4.0, 8.0};
@@ -59,7 +57,8 @@ const char *test_jacobian_const_vector_mult_log_matrix(void)
 
     /* Create vector mult node: y = [1.5, 2.5, 3.5, 4.5] ∘ log(x) */
     double a[4] = {1.5, 2.5, 3.5, 4.5};
-    expr *y = new_const_vector_mult(a, log_node);
+    expr *a_param = new_parameter(4, 1, PARAM_FIXED, 4, a);
+    expr *y = new_vector_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -68,12 +67,6 @@ const char *test_jacobian_const_vector_mult_log_matrix(void)
     y->jacobian_init(y);
     y->eval_jacobian(y);
 
-    /* Expected jacobian (row-wise scaling):
-     * Row 0: 1.5 * [1/1] = [1.5]
-     * Row 1: 2.5 * [1/2] = [1.25]
-     * Row 2: 3.5 * [1/4] = [0.875]
-     * Row 3: 4.5 * [1/8] = [0.5625]
-     */
     double expected_x[4] = {1.5, 1.25, 0.875, 0.5625};
     int expected_p[5] = {0, 1, 2, 3, 4};
     int expected_i[4] = {0, 1, 2, 3};
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
new file mode 100644
index 0000000..7b9467d
--- /dev/null
+++ b/tests/problem/test_param_prob.h
@@ -0,0 +1,348 @@
+#ifndef TEST_PARAM_PROB_H
+#define TEST_PARAM_PROB_H
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "affine.h"
+#include "bivariate.h"
+#include "elementwise_univariate.h"
+#include "expr.h"
+#include "minunit.h"
+#include "problem.h"
+#include "subexpr.h"
+#include "test_helpers.h"
+
+/*
+ * Test 1: param_scalar_mult in objective
+ *
+ * Problem: minimize a * sum(log(x)), no constraints, x size 2
+ *   a is a scalar parameter (param_id=0)
+ *
+ * At x=[1,2], a=3:
+ *   obj = 3*(log(1)+log(2)) = 3*log(2)
+ *   gradient = [3/1, 3/2] = [3.0, 1.5]
+ *
+ * After update a=5:
+ *   obj = 5*log(2)
+ *   gradient = [5.0, 2.5]
+ */
+const char *test_param_scalar_mult_problem(void)
+{
+    int n_vars = 2;
+
+    /* Build tree: sum(a * log(x)) */
+    expr *x = new_variable(2, 1, 0, n_vars);
+    expr *log_x = new_log(x);
+    expr *a_param = new_parameter(1, 1, 0, n_vars, NULL);
+    expr *scaled = new_scalar_mult(a_param, log_x);
+    expr *objective = new_sum(scaled, -1);
+
+    /* Create problem (no constraints) */
+    problem *prob = new_problem(objective, NULL, 0, false);
+
+    /* Register parameter */
+    expr *param_nodes[1] = {a_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* Set a=3 and evaluate at x=[1,2] */
+    double theta[1] = {3.0};
+    problem_update_params(prob, theta);
+
+    double u[2] = {1.0, 2.0};
+    double obj_val = problem_objective_forward(prob, u);
+    problem_gradient(prob);
+
+    double expected_obj = 3.0 * log(2.0);
+    mu_assert("obj wrong (a=3)", fabs(obj_val - expected_obj) < 1e-10);
+
+    double expected_grad[2] = {3.0, 1.5};
+    mu_assert("gradient wrong (a=3)",
+              cmp_double_array(prob->gradient_values, expected_grad, 2));
+
+    /* Update a=5 and re-evaluate */
+    theta[0] = 5.0;
+    problem_update_params(prob, theta);
+
+    obj_val = problem_objective_forward(prob, u);
+    problem_gradient(prob);
+
+    expected_obj = 5.0 * log(2.0);
+    mu_assert("obj wrong (a=5)", fabs(obj_val - expected_obj) < 1e-10);
+
+    double expected_grad2[2] = {5.0, 2.5};
+    mu_assert("gradient wrong (a=5)",
+              cmp_double_array(prob->gradient_values, expected_grad2, 2));
+
+    free_problem(prob);
+
+    return 0;
+}
+
+/*
+ * Test 2: param_vector_mult in constraint
+ *
+ * Problem: minimize sum(x), subject to p ∘ x, x size 2
+ *   p is a vector parameter of size 2 (param_id=0)
+ *
+ * At x=[1,2], p=[3,4]:
+ *   constraint_values = [3, 8]
+ *   jacobian = diag([3, 4])
+ *
+ * After update p=[5,6]:
+ *   constraint_values = [5, 12]
+ *   jacobian = diag([5, 6])
+ */
+const char *test_param_vector_mult_problem(void)
+{
+    int n_vars = 2;
+
+    /* Objective: sum(x) */
+    expr *x_obj = new_variable(2, 1, 0, n_vars);
+    expr *objective = new_sum(x_obj, -1);
+
+    /* Constraint: p ∘ x */
+    expr *x_con = new_variable(2, 1, 0, n_vars);
+    expr *p_param = new_parameter(2, 1, 0, n_vars, NULL);
+    expr *constraint = new_vector_mult(p_param, x_con);
+
+    expr *constraints[1] = {constraint};
+
+    /* Create problem */
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    expr *param_nodes[1] = {p_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* Set p=[3,4] and evaluate at x=[1,2] */
+    double theta[2] = {3.0, 4.0};
+    problem_update_params(prob, theta);
+
+    double u[2] = {1.0, 2.0};
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv[2] = {3.0, 8.0};
+    mu_assert("constraint values wrong (p=[3,4])",
+              cmp_double_array(prob->constraint_values, expected_cv, 2));
+
+    CSR_Matrix *jac = prob->jacobian;
+    mu_assert("jac rows wrong", jac->m == 2);
+    mu_assert("jac cols wrong", jac->n == 2);
+
+    int expected_p[3] = {0, 1, 2};
+    mu_assert("jac->p wrong (p=[3,4])", cmp_int_array(jac->p, expected_p, 3));
+
+    int expected_i[2] = {0, 1};
+    mu_assert("jac->i wrong (p=[3,4])", cmp_int_array(jac->i, expected_i, 2));
+
+    double expected_x[2] = {3.0, 4.0};
+    mu_assert("jac->x wrong (p=[3,4])", cmp_double_array(jac->x, expected_x, 2));
+
+    /* Update p=[5,6] and re-evaluate */
+    double theta2[2] = {5.0, 6.0};
+    problem_update_params(prob, theta2);
+
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv2[2] = {5.0, 12.0};
+    mu_assert("constraint values wrong (p=[5,6])",
+              cmp_double_array(prob->constraint_values, expected_cv2, 2));
+
+    double expected_x2[2] = {5.0, 6.0};
+    mu_assert("jac->x wrong (p=[5,6])", cmp_double_array(jac->x, expected_x2, 2));
+
+    free_problem(prob);
+
+    return 0;
+}
+
+/*
+ * Test 3: left_param_matmul in constraint
+ *
+ * Problem: minimize sum(x), subject to A @ x, x size 2, A is 2x2
+ *   A is a 2x2 matrix parameter (param_id=0, size=4, CSR data order)
+ *   A = [[1,2],[3,4]] → CSR data order theta = [1,2,3,4]
+ *
+ * At x=[1,2]:
+ *   constraint_values = [1*1+2*2, 3*1+4*2] = [5, 11]
+ *   jacobian = [[1,2],[3,4]]
+ *
+ * After update A = [[5,6],[7,8]] → theta = [5,6,7,8]:
+ *   constraint_values = [5*1+6*2, 7*1+8*2] = [17, 23]
+ *   jacobian = [[5,6],[7,8]]
+ */
+const char *test_param_left_matmul_problem(void)
+{
+    int n_vars = 2;
+
+    /* Objective: sum(x) */
+    expr *x_obj = new_variable(2, 1, 0, n_vars);
+    expr *objective = new_sum(x_obj, -1);
+
+    /* Constraint: A @ x */
+    expr *x_con = new_variable(2, 1, 0, n_vars);
+    expr *A_param = new_parameter(2, 2, 0, n_vars, NULL);
+
+    /* Dense 2x2 CSR with placeholder zeros */
+    CSR_Matrix *A = new_csr_matrix(2, 2, 4);
+    int Ap[3] = {0, 2, 4};
+    int Ai[4] = {0, 1, 0, 1};
+    double Ax[4] = {0.0, 0.0, 0.0, 0.0};
+    memcpy(A->p, Ap, 3 * sizeof(int));
+    memcpy(A->i, Ai, 4 * sizeof(int));
+    memcpy(A->x, Ax, 4 * sizeof(double));
+
+    expr *constraint = new_left_matmul(A_param, x_con, A);
+    free_csr_matrix(A);
+
+    expr *constraints[1] = {constraint};
+
+    /* Create problem */
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    expr *param_nodes[1] = {A_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* Set A = [[1,2],[3,4]], CSR data order: [1,2,3,4] */
+    double theta[4] = {1.0, 2.0, 3.0, 4.0};
+    problem_update_params(prob, theta);
+
+    double u[2] = {1.0, 2.0};
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv[2] = {5.0, 11.0};
+    mu_assert("constraint values wrong (A1)",
+              cmp_double_array(prob->constraint_values, expected_cv, 2));
+
+    CSR_Matrix *jac = prob->jacobian;
+    mu_assert("jac rows wrong", jac->m == 2);
+    mu_assert("jac cols wrong", jac->n == 2);
+
+    int expected_p[3] = {0, 2, 4};
+    mu_assert("jac->p wrong (A1)", cmp_int_array(jac->p, expected_p, 3));
+
+    int expected_i[4] = {0, 1, 0, 1};
+    mu_assert("jac->i wrong (A1)", cmp_int_array(jac->i, expected_i, 4));
+
+    double expected_x[4] = {1.0, 2.0, 3.0, 4.0};
+    mu_assert("jac->x wrong (A1)", cmp_double_array(jac->x, expected_x, 4));
+
+    /* Update A = [[5,6],[7,8]], CSR data order: [5,6,7,8] */
+    double theta2[4] = {5.0, 6.0, 7.0, 8.0};
+    problem_update_params(prob, theta2);
+
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv2[2] = {17.0, 23.0};
+    mu_assert("constraint values wrong (A2)",
+              cmp_double_array(prob->constraint_values, expected_cv2, 2));
+
+    double expected_x2[4] = {5.0, 6.0, 7.0, 8.0};
+    mu_assert("jac->x wrong (A2)", cmp_double_array(jac->x, expected_x2, 4));
+
+    free_problem(prob);
+
+    return 0;
+}
+
+/*
+ * Test 4: right_param_matmul in constraint
+ *
+ * Problem: minimize sum(x), subject to x @ A, x size 1x2, A is 2x2
+ *   A is a 2x2 matrix parameter (param_id=0, size=4, CSR data order)
+ *   A = [[1,2],[3,4]] → CSR data order theta = [1,2,3,4]
+ *
+ * At x=[1,2]:
+ *   constraint_values = [1*1+2*3, 1*2+2*4] = [7, 10]
+ *   jacobian = [[1,3],[2,4]] = A^T
+ *
+ * After update A = [[5,6],[7,8]] → theta = [5,6,7,8]:
+ *   constraint_values = [1*5+2*7, 1*6+2*8] = [19, 22]
+ *   jacobian = [[5,7],[6,8]] = A^T
+ */
+const char *test_param_right_matmul_problem(void)
+{
+    int n_vars = 2;
+
+    /* Objective: sum(x) */
+    expr *x_obj = new_variable(1, 2, 0, n_vars);
+    expr *objective = new_sum(x_obj, -1);
+
+    /* Constraint: x @ A */
+    expr *x_con = new_variable(1, 2, 0, n_vars);
+    expr *A_param = new_parameter(2, 2, 0, n_vars, NULL);
+
+    /* Dense 2x2 CSR with placeholder zeros */
+    CSR_Matrix *A = new_csr_matrix(2, 2, 4);
+    int Ap[3] = {0, 2, 4};
+    int Ai[4] = {0, 1, 0, 1};
+    double Ax[4] = {0.0, 0.0, 0.0, 0.0};
+    memcpy(A->p, Ap, 3 * sizeof(int));
+    memcpy(A->i, Ai, 4 * sizeof(int));
+    memcpy(A->x, Ax, 4 * sizeof(double));
+
+    expr *constraint = new_right_matmul(A_param, x_con, A);
+    free_csr_matrix(A);
+
+    expr *constraints[1] = {constraint};
+
+    /* Create problem */
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    expr *param_nodes[1] = {A_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* Set A = [[1,2],[3,4]], CSR data order: [1,2,3,4] */
+    double theta[4] = {1.0, 2.0, 3.0, 4.0};
+    problem_update_params(prob, theta);
+
+    double u[2] = {1.0, 2.0};
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv[2] = {7.0, 10.0};
+    mu_assert("constraint values wrong (A1)",
+              cmp_double_array(prob->constraint_values, expected_cv, 2));
+
+    CSR_Matrix *jac = prob->jacobian;
+    mu_assert("jac rows wrong", jac->m == 2);
+    mu_assert("jac cols wrong", jac->n == 2);
+
+    int expected_p[3] = {0, 2, 4};
+    mu_assert("jac->p wrong (A1)", cmp_int_array(jac->p, expected_p, 3));
+
+    int expected_i[4] = {0, 1, 0, 1};
+    mu_assert("jac->i wrong (A1)", cmp_int_array(jac->i, expected_i, 4));
+
+    double expected_x[4] = {1.0, 3.0, 2.0, 4.0};
+    mu_assert("jac->x wrong (A1)", cmp_double_array(jac->x, expected_x, 4));
+
+    /* Update A = [[5,6],[7,8]], CSR data order: [5,6,7,8] */
+    double theta2[4] = {5.0, 6.0, 7.0, 8.0};
+    problem_update_params(prob, theta2);
+
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv2[2] = {19.0, 22.0};
+    mu_assert("constraint values wrong (A2)",
+              cmp_double_array(prob->constraint_values, expected_cv2, 2));
+
+    double expected_x2[4] = {5.0, 7.0, 6.0, 8.0};
+    mu_assert("jac->x wrong (A2)", cmp_double_array(jac->x, expected_x2, 4));
+
+    free_problem(prob);
+
+    return 0;
+}
+
+#endif /* TEST_PARAM_PROB_H */
diff --git a/tests/profiling/profile_left_matmul.h b/tests/profiling/profile_left_matmul.h
index 89a5f97..c2211a1 100644
--- a/tests/profiling/profile_left_matmul.h
+++ b/tests/profiling/profile_left_matmul.h
@@ -31,7 +31,7 @@ const char *profile_left_matmul(void)
     }
     A->p[n] = n * n;
 
-    expr *AX = new_left_matmul(X, A);
+    expr *AX = new_left_matmul(NULL, X, A);
 
     double *x_vals = (double *) malloc(n * n * sizeof(double));
     for (int i = 0; i < n * n; i++)
diff --git a/tests/wsum_hess/test_left_matmul.h b/tests/wsum_hess/test_left_matmul.h
index 00f587e..d95c00e 100644
--- a/tests/wsum_hess/test_left_matmul.h
+++ b/tests/wsum_hess/test_left_matmul.h
@@ -62,7 +62,7 @@ const char *test_wsum_hess_left_matmul(void)
     memcpy(A->x, A_x, 7 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *A_log_x = new_left_matmul(log_x, A);
+    expr *A_log_x = new_left_matmul(NULL, log_x, A);
 
     A_log_x->forward(A_log_x, x_vals);
     A_log_x->jacobian_init(A_log_x);
@@ -161,7 +161,7 @@ const char *test_wsum_hess_left_matmul_composite(void)
 
     expr *Bx = new_linear(x, B, NULL);
     expr *log_Bx = new_log(Bx);
-    expr *A_log_Bx = new_left_matmul(log_Bx, A);
+    expr *A_log_Bx = new_left_matmul(NULL, log_Bx, A);
 
     A_log_Bx->forward(A_log_Bx, x_vals);
     A_log_Bx->jacobian_init(A_log_Bx);
@@ -233,7 +233,7 @@ const char *test_wsum_hess_left_matmul_matrix(void)
     memcpy(A->x, A_x, 7 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *A_log_x = new_left_matmul(log_x, A);
+    expr *A_log_x = new_left_matmul(NULL, log_x, A);
 
     A_log_x->forward(A_log_x, x_vals);
     A_log_x->jacobian_init(A_log_x);
diff --git a/tests/wsum_hess/test_right_matmul.h b/tests/wsum_hess/test_right_matmul.h
index 660e823..9ebb5ac 100644
--- a/tests/wsum_hess/test_right_matmul.h
+++ b/tests/wsum_hess/test_right_matmul.h
@@ -33,7 +33,7 @@ const char *test_wsum_hess_right_matmul(void)
     memcpy(A->x, A_x, 4 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *log_x_A = new_right_matmul(log_x, A);
+    expr *log_x_A = new_right_matmul(NULL, log_x, A);
 
     log_x_A->forward(log_x_A, x_vals);
     log_x_A->jacobian_init(log_x_A);
@@ -83,7 +83,7 @@ const char *test_wsum_hess_right_matmul_vector(void)
     memcpy(A->x, A_x, 4 * sizeof(double));
 
     expr *log_x = new_log(x);
-    expr *log_x_A = new_right_matmul(log_x, A);
+    expr *log_x_A = new_right_matmul(NULL, log_x, A);
 
     log_x_A->forward(log_x_A, x_vals);
     log_x_A->jacobian_init(log_x_A);
diff --git a/tests/wsum_hess/test_const_scalar_mult.h b/tests/wsum_hess/test_scalar_mult.h
similarity index 71%
rename from tests/wsum_hess/test_const_scalar_mult.h
rename to tests/wsum_hess/test_scalar_mult.h
index 34ccb7c..a2f7e9d 100644
--- a/tests/wsum_hess/test_const_scalar_mult.h
+++ b/tests/wsum_hess/test_scalar_mult.h
@@ -1,15 +1,17 @@
 #include <math.h>
 #include <stdio.h>
 
+#include "affine.h"
 #include "bivariate.h"
 #include "elementwise_univariate.h"
 #include "expr.h"
 #include "minunit.h"
+#include "subexpr.h"
 #include "test_helpers.h"
 
-/* Test: y = a * log(x) where a is a scalar constant */
+/* Test: y = a * log(x) where a is a scalar parameter */
 
-const char *test_wsum_hess_const_scalar_mult_log_vector(void)
+const char *test_wsum_hess_scalar_mult_log_vector(void)
 {
     /* Create variable x: [1.0, 2.0, 4.0] */
     double u_vals[3] = {1.0, 2.0, 4.0};
@@ -19,8 +21,9 @@ const char *test_wsum_hess_const_scalar_mult_log_vector(void)
     expr *log_node = new_log(x);
 
     /* Create scalar mult node: y = 2.5 * log(x) */
-    double a = 2.5;
-    expr *y = new_const_scalar_mult(a, log_node);
+    double a_val = 2.5;
+    expr *a_param = new_parameter(1, 1, PARAM_FIXED, 3, &a_val);
+    expr *y = new_scalar_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -30,15 +33,6 @@ const char *test_wsum_hess_const_scalar_mult_log_vector(void)
     double w[3] = {1.0, 0.5, 0.25};
     y->eval_wsum_hess(y, w);
 
-    /* For y = a * log(x), the Hessian is:
-     * H = a * H_log = a * diag([-1/x_i^2])
-     * With weights w and scalar a:
-     * H_weighted = a * diag([-w_i / x_i^2])
-     *
-     * Expected diagonal: 2.5 * [-1/1^2, -0.5/2^2, -0.25/4^2]
-     *                  = 2.5 * [-1, -0.125, -0.015625]
-     *                  = [-2.5, -0.3125, -0.0390625]
-     */
     double expected_x[3] = {-2.5, -0.3125, -0.0390625};
     int expected_p[4] = {0, 1, 2, 3};
     int expected_i[3] = {0, 1, 2};
@@ -54,7 +48,7 @@ const char *test_wsum_hess_const_scalar_mult_log_vector(void)
     return 0;
 }
 
-const char *test_wsum_hess_const_scalar_mult_log_matrix(void)
+const char *test_wsum_hess_scalar_mult_log_matrix(void)
 {
     /* Create variable x as 2x2 matrix: [[1.0, 2.0], [4.0, 8.0]] */
     double u_vals[4] = {1.0, 2.0, 4.0, 8.0};
@@ -64,8 +58,9 @@ const char *test_wsum_hess_const_scalar_mult_log_matrix(void)
     expr *log_node = new_log(x);
 
     /* Create scalar mult node: y = 3.0 * log(x) */
-    double a = 3.0;
-    expr *y = new_const_scalar_mult(a, log_node);
+    double a_val = 3.0;
+    expr *a_param = new_parameter(1, 1, PARAM_FIXED, 4, &a_val);
+    expr *y = new_scalar_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -75,10 +70,6 @@ const char *test_wsum_hess_const_scalar_mult_log_matrix(void)
     double w[4] = {1.0, 1.0, 1.0, 1.0};
     y->eval_wsum_hess(y, w);
 
-    /* Expected diagonal: 3.0 * [-1/1^2, -1/2^2, -1/4^2, -1/8^2]
-     *                  = 3.0 * [-1, -0.25, -0.0625, -0.015625]
-     *                  = [-3.0, -0.75, -0.1875, -0.046875]
-     */
     double expected_x[4] = {-3.0, -0.75, -0.1875, -0.046875};
     int expected_p[5] = {0, 1, 2, 3, 4};
     int expected_i[4] = {0, 1, 2, 3};
diff --git a/tests/wsum_hess/test_const_vector_mult.h b/tests/wsum_hess/test_vector_mult.h
similarity index 72%
rename from tests/wsum_hess/test_const_vector_mult.h
rename to tests/wsum_hess/test_vector_mult.h
index b0a75b5..da08013 100644
--- a/tests/wsum_hess/test_const_vector_mult.h
+++ b/tests/wsum_hess/test_vector_mult.h
@@ -1,15 +1,17 @@
 #include <math.h>
 #include <stdio.h>
 
+#include "affine.h"
 #include "bivariate.h"
 #include "elementwise_univariate.h"
 #include "expr.h"
 #include "minunit.h"
+#include "subexpr.h"
 #include "test_helpers.h"
 
-/* Test: y = a ∘ log(x) where a is a constant vector */
+/* Test: y = a ∘ log(x) where a is a parameter vector */
 
-const char *test_wsum_hess_const_vector_mult_log_vector(void)
+const char *test_wsum_hess_vector_mult_log_vector(void)
 {
     /* Create variable x: [1.0, 2.0, 4.0] */
     double u_vals[3] = {1.0, 2.0, 4.0};
@@ -20,7 +22,8 @@ const char *test_wsum_hess_const_vector_mult_log_vector(void)
 
     /* Create vector mult node: y = [2.0, 3.0, 4.0] ∘ log(x) */
     double a[3] = {2.0, 3.0, 4.0};
-    expr *y = new_const_vector_mult(a, log_node);
+    expr *a_param = new_parameter(3, 1, PARAM_FIXED, 3, a);
+    expr *y = new_vector_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -30,13 +33,6 @@ const char *test_wsum_hess_const_vector_mult_log_vector(void)
     double w[3] = {1.0, 0.5, 0.25};
     y->eval_wsum_hess(y, w);
 
-    /* For y = a ∘ log(x), the weighted Hessian is:
-     * H = diag([a_i * (-w_i / x_i^2)])
-     *
-     * Expected diagonal: [2.0 * (-1 / 1^2), 3.0 * (-0.5 / 2^2), 4.0 * (-0.25 / 4^2)]
-     *                  = [2.0 * (-1), 3.0 * (-0.125), 4.0 * (-0.015625)]
-     *                  = [-2.0, -0.375, -0.0625]
-     */
     double expected_x[3] = {-2.0, -0.375, -0.0625};
     int expected_p[4] = {0, 1, 2, 3};
     int expected_i[3] = {0, 1, 2};
@@ -52,7 +48,7 @@ const char *test_wsum_hess_const_vector_mult_log_vector(void)
     return 0;
 }
 
-const char *test_wsum_hess_const_vector_mult_log_matrix(void)
+const char *test_wsum_hess_vector_mult_log_matrix(void)
 {
     /* Create variable x as 2x2 matrix: [[1.0, 2.0], [4.0, 8.0]] */
     double u_vals[4] = {1.0, 2.0, 4.0, 8.0};
@@ -63,7 +59,8 @@ const char *test_wsum_hess_const_vector_mult_log_matrix(void)
 
     /* Create vector mult node: y = [1.5, 2.5, 3.5, 4.5] ∘ log(x) */
     double a[4] = {1.5, 2.5, 3.5, 4.5};
-    expr *y = new_const_vector_mult(a, log_node);
+    expr *a_param = new_parameter(4, 1, PARAM_FIXED, 4, a);
+    expr *y = new_vector_mult(a_param, log_node);
 
     /* Forward pass */
     y->forward(y, u_vals);
@@ -73,11 +70,6 @@ const char *test_wsum_hess_const_vector_mult_log_matrix(void)
     double w[4] = {1.0, 1.0, 1.0, 1.0};
     y->eval_wsum_hess(y, w);
 
-    /* Expected diagonal: [1.5 * (-1 / 1^2), 2.5 * (-1 / 2^2),
-     *                     3.5 * (-1 / 4^2), 4.5 * (-1 / 8^2)]
-     *                  = [1.5 * (-1), 2.5 * (-0.25), 3.5 * (-0.0625), 4.5 *
-     * (-0.015625)] = [-1.5, -0.625, -0.21875, -0.0703125]
-     */
     double expected_x[4] = {-1.5, -0.625, -0.21875, -0.0703125};
     int expected_p[5] = {0, 1, 2, 3, 4};
     int expected_i[4] = {0, 1, 2, 3};

From 61d657f556c64313fe98e6bf4f006c070984cea9 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Thu, 26 Mar 2026 23:33:15 -0400
Subject: [PATCH 02/14] Address review comments on parameter-support-v2

- Remove redundant refresh_param_values calls from eval_jacobian and
  eval_wsum_hess in left_matmul (forward always runs first)
- Use memcpy in problem_register_params for pointer array copy
- Add PARAM_FIXED guard in problem_update_params to skip fixed constants
- Remove unused right_matmul_expr struct from subexpr.h
- Add test_param_fixed_skip_in_update covering mixed fixed/updatable params
- Add CLAUDE.md for Claude Code guidance

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                       | 82 +++++++++++++++++++++++++++++++++
 include/subexpr.h               | 11 -----
 src/bivariate/left_matmul.c     |  2 -
 src/problem.c                   |  6 ++-
 tests/all_tests.c               |  1 +
 tests/problem/test_param_prob.h | 82 +++++++++++++++++++++++++++++++++
 6 files changed, 169 insertions(+), 15 deletions(-)
 create mode 100644 CLAUDE.md

diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..c96e672
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,82 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+SparseDiffEngine is a C library implementing the differentiation engine for CVXPY's DNLP extension. It computes sparse Jacobians and Hessians for nonlinear programming solvers. Used as a backend via Python bindings in the parent SparseDiffPy package.
+
+## Build & Test Commands
+
+```bash
+# Build standalone (CMake)
+mkdir -p build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Debug && make
+
+# Run all tests
+cd build && ctest --output-on-failure
+
+# Run tests directly (more verbose output)
+cd build && ./all_tests
+
+# Rebuild after changes (from build/)
+make && ctest --output-on-failure
+
+# Format code
+clang-format -i src/**/*.c include/**/*.h
+```
+
+**Platform dependencies**: macOS uses Accelerate framework (automatic), Linux needs `libopenblas-dev`, Windows uses OpenBLAS via vcpkg.
+
+## Architecture
+
+### Expression DAG with Function-Pointer Polymorphism
+
+Every node is an `expr` struct (`include/expr.h`) containing function pointers that act as a vtable:
+- `forward()` — evaluate node given variable values `u`
+- `jacobian_init()` / `eval_jacobian()` — sparsity pattern allocation and numeric fill
+- `wsum_hess_init()` / `eval_wsum_hess()` — weighted-sum Hessian
+- `is_affine()` — used for caching optimizations
+- `free_type_data()` — type-specific cleanup
+
+Type-specific nodes use C struct inheritance: embed `expr base` as the first field, then cast. Defined in `include/subexpr.h` (e.g., `parameter_expr`, `scalar_mult_expr`).
+
+All node constructors call `init_expr()` to wire up the dispatch table. Children are owned via `left`/`right` pointers with reference counting (`expr_retain`/`free_expr`).
+
+### Parameter System
+
+`parameter_expr` (in `subexpr.h`) unifies constants and updatable parameters:
+- `param_id == PARAM_FIXED (-1)`: fixed constant, not updatable
+- `param_id >= 0`: offset into the parameter vector `theta`
+
+Bivariate ops that involve parameters (scalar_mult, vector_mult, left_matmul, right_matmul) store a `param_source` pointer and read values during forward/jacobian/hessian evaluation.
+
+**Workflow**: `new_parameter()` → `problem_register_params()` → `problem_update_params(prob, theta)` copies from `theta` into each parameter node's `value` array using `param_id` as offset.
+
+### Problem Interface (`include/problem.h`)
+
+The `problem` struct owns an objective, constraints array, and parameter nodes. Key lifecycle:
+1. `new_problem()` — allocates problem, output arrays
+2. `problem_register_params()` — registers parameter nodes
+3. `problem_init_derivatives()` — allocates sparsity patterns (Jacobian + Hessian)
+4. `problem_objective_forward()` / `problem_constraint_forward()` — evaluate
+5. `problem_gradient()` / `problem_jacobian()` / `problem_hessian()` — compute derivatives
+6. `problem_update_params()` — update parameter values, reset caches
+
+### Sparse Matrix Formats (`include/utils/`)
+
+CSR is primary. CSC is intermediate during computation. COO for Python interop; lower-triangular COO for symmetric Hessians.
+
+## Adding a New Operation
+
+1. Create `src/<category>/new_op.c` implementing the function pointer interface
+2. Add constructor declaration to the appropriate `include/<category>.h`
+3. Write test as a `.h` file in `tests/<test_category>/`
+4. Register test in `tests/all_tests.c` with `mu_run_test(test_name, tests_run)`
+
+## Test Framework
+
+Uses minunit.h (header-only). Tests are `const char *test_name(void)` functions returning NULL on success. Tolerance: `ABS_TOL=1e-6, REL_TOL=1e-6` via `cmp_double_array()` in `test_helpers.c`. No built-in test filtering — all tests run via single `all_tests` binary.
+
+## C Code Style
+
+C99, Allman braces, 4-space indent, 85-column limit, right-aligned pointers (`int *ptr`). Enforced by `.clang-format`. Strict warnings: `-Wall -Wextra -Wpedantic -Wshadow -Wformat=2 -Wcast-qual -Wcast-align -Wdouble-promotion -Wnull-dereference`.
diff --git a/include/subexpr.h b/include/subexpr.h
index 9cf51bf..7e5504b 100644
--- a/include/subexpr.h
+++ b/include/subexpr.h
@@ -130,17 +130,6 @@ typedef struct left_matmul_expr
     void (*refresh_param_values)(struct left_matmul_expr *);
 } left_matmul_expr;
 
-/* Right matrix multiplication: y = f(x) * A where f(x) is an expression.
- * f(x) has shape p x n, A has shape n x q, output y has shape p x q.
- * Uses vec(y) = B * vec(f(x)) where B = A^T kron I_p. */
-typedef struct right_matmul_expr
-{
-    expr base;
-    CSR_Matrix *B;  /* B = A^T kron I_p */
-    CSR_Matrix *BT; /* B^T for backpropagating Hessian weights */
-    CSC_Matrix *CSC_work;
-} right_matmul_expr;
-
 /* Scalar multiplication: y = a * child where a comes from param_source */
 typedef struct scalar_mult_expr
 {
diff --git a/src/bivariate/left_matmul.c b/src/bivariate/left_matmul.c
index bd3cdb1..c0bd6f6 100644
--- a/src/bivariate/left_matmul.c
+++ b/src/bivariate/left_matmul.c
@@ -122,7 +122,6 @@ static void jacobian_init(expr *node)
 static void eval_jacobian(expr *node)
 {
     left_matmul_expr *lnode = (left_matmul_expr *) node;
-    refresh_param_values(lnode);
     expr *x = node->left;
 
     CSC_Matrix *Jchild_CSC = lnode->Jchild_CSC;
@@ -157,7 +156,6 @@ static void wsum_hess_init(expr *node)
 static void eval_wsum_hess(expr *node, const double *w)
 {
     left_matmul_expr *lnode = (left_matmul_expr *) node;
-    refresh_param_values(lnode);
 
     /* compute A^T w*/
     Matrix *AT = lnode->AT;
diff --git a/src/problem.c b/src/problem.c
index e32eae3..755cedb 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -335,11 +335,11 @@ void problem_register_params(problem *prob, expr **param_nodes, int n_param_node
 {
     prob->n_param_nodes = n_param_nodes;
     prob->param_nodes = (expr **) malloc(n_param_nodes * sizeof(expr *));
-    prob->total_parameter_size = 0;
+    memcpy(prob->param_nodes, param_nodes, n_param_nodes * sizeof(expr *));
 
+    prob->total_parameter_size = 0;
     for (int i = 0; i < n_param_nodes; i++)
     {
-        prob->param_nodes[i] = param_nodes[i];
         prob->total_parameter_size += param_nodes[i]->size;
     }
 }
@@ -350,6 +350,8 @@ void problem_update_params(problem *prob, const double *theta)
     {
         expr *pnode = prob->param_nodes[i];
         parameter_expr *param = (parameter_expr *) pnode;
+        if (param->param_id == PARAM_FIXED)
+            continue;
         int offset = param->param_id;
         memcpy(pnode->value, theta + offset, pnode->size * sizeof(double));
         param->has_been_refreshed = false;
diff --git a/tests/all_tests.c b/tests/all_tests.c
index 273f510..c7c5063 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -303,6 +303,7 @@ int main(void)
     mu_run_test(test_param_vector_mult_problem, tests_run);
     mu_run_test(test_param_left_matmul_problem, tests_run);
     mu_run_test(test_param_right_matmul_problem, tests_run);
+    mu_run_test(test_param_fixed_skip_in_update, tests_run);
 #endif /* PROFILE_ONLY */
 
 #ifdef PROFILE_ONLY
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 7b9467d..f9538a6 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -345,4 +345,86 @@ const char *test_param_right_matmul_problem(void)
     return 0;
 }
 
+/*
+ * Test 5: PARAM_FIXED params are skipped by problem_update_params
+ *
+ * Problem: minimize a * sum(log(x)) + b * sum(x), no constraints, x size 2
+ *   a is a FIXED scalar parameter (param_id=PARAM_FIXED, value=2.0)
+ *   b is an updatable scalar parameter (param_id=0)
+ *
+ * At x=[1,2], a=2, b=3:
+ *   obj = 2*(log(1)+log(2)) + 3*(1+2) = 2*log(2) + 9
+ *   gradient = [2/1 + 3, 2/2 + 3] = [5.0, 4.0]
+ *
+ * After update theta={5.0} (only b changes to 5, a stays 2):
+ *   obj = 2*log(2) + 5*3 = 2*log(2) + 15
+ *   gradient = [2/1 + 5, 2/2 + 5] = [7.0, 6.0]
+ */
+const char *test_param_fixed_skip_in_update(void)
+{
+    int n_vars = 2;
+
+    /* Build tree: a * sum(log(x)) + b * sum(x) */
+    expr *x1 = new_variable(2, 1, 0, n_vars);
+    expr *log_x = new_log(x1);
+    double a_val = 2.0;
+    expr *a_param = new_parameter(1, 1, PARAM_FIXED, n_vars, &a_val);
+    expr *a_log = new_scalar_mult(a_param, log_x);
+    expr *sum_a_log = new_sum(a_log, -1);
+
+    expr *x2 = new_variable(2, 1, 0, n_vars);
+    expr *b_param = new_parameter(1, 1, 0, n_vars, NULL);
+    expr *b_x = new_scalar_mult(b_param, x2);
+    expr *sum_b_x = new_sum(b_x, -1);
+
+    expr *objective = new_add(sum_a_log, sum_b_x);
+
+    /* Create problem and register BOTH params */
+    problem *prob = new_problem(objective, NULL, 0, false);
+
+    expr *param_nodes[2] = {a_param, b_param};
+    problem_register_params(prob, param_nodes, 2);
+    problem_init_derivatives(prob);
+
+    /* Set b=3 and evaluate at x=[1,2] */
+    double theta[1] = {3.0};
+    problem_update_params(prob, theta);
+
+    /* Verify a is still 2.0 (not overwritten) */
+    mu_assert("a_param changed after update",
+              fabs(a_param->value[0] - 2.0) < 1e-10);
+
+    double u[2] = {1.0, 2.0};
+    double obj_val = problem_objective_forward(prob, u);
+    problem_gradient(prob);
+
+    double expected_obj = 2.0 * log(2.0) + 9.0;
+    mu_assert("obj wrong (b=3)", fabs(obj_val - expected_obj) < 1e-10);
+
+    double expected_grad[2] = {5.0, 4.0};
+    mu_assert("gradient wrong (b=3)",
+              cmp_double_array(prob->gradient_values, expected_grad, 2));
+
+    /* Update b=5, a should stay 2 */
+    theta[0] = 5.0;
+    problem_update_params(prob, theta);
+
+    mu_assert("a_param changed after second update",
+              fabs(a_param->value[0] - 2.0) < 1e-10);
+
+    obj_val = problem_objective_forward(prob, u);
+    problem_gradient(prob);
+
+    double expected_obj2 = 2.0 * log(2.0) + 15.0;
+    mu_assert("obj wrong (b=5)", fabs(obj_val - expected_obj2) < 1e-10);
+
+    double expected_grad2[2] = {7.0, 6.0};
+    mu_assert("gradient wrong (b=5)",
+              cmp_double_array(prob->gradient_values, expected_grad2, 2));
+
+    free_problem(prob);
+
+    return 0;
+}
+
 #endif /* TEST_PARAM_PROB_H */

From b23c79fee4961c00d4763fe60d9b085b628fc4e9 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Thu, 26 Mar 2026 23:52:51 -0400
Subject: [PATCH 03/14] Run clang-format and remove CLAUDE.md

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                       | 82 ---------------------------------
 src/affine/trace.c              |  2 +-
 src/problem.c                   |  3 +-
 tests/problem/test_param_prob.h |  3 +-
 4 files changed, 3 insertions(+), 87 deletions(-)
 delete mode 100644 CLAUDE.md

diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index c96e672..0000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# CLAUDE.md
-
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-
-## Project Overview
-
-SparseDiffEngine is a C library implementing the differentiation engine for CVXPY's DNLP extension. It computes sparse Jacobians and Hessians for nonlinear programming solvers. Used as a backend via Python bindings in the parent SparseDiffPy package.
-
-## Build & Test Commands
-
-```bash
-# Build standalone (CMake)
-mkdir -p build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Debug && make
-
-# Run all tests
-cd build && ctest --output-on-failure
-
-# Run tests directly (more verbose output)
-cd build && ./all_tests
-
-# Rebuild after changes (from build/)
-make && ctest --output-on-failure
-
-# Format code
-clang-format -i src/**/*.c include/**/*.h
-```
-
-**Platform dependencies**: macOS uses Accelerate framework (automatic), Linux needs `libopenblas-dev`, Windows uses OpenBLAS via vcpkg.
-
-## Architecture
-
-### Expression DAG with Function-Pointer Polymorphism
-
-Every node is an `expr` struct (`include/expr.h`) containing function pointers that act as a vtable:
-- `forward()` — evaluate node given variable values `u`
-- `jacobian_init()` / `eval_jacobian()` — sparsity pattern allocation and numeric fill
-- `wsum_hess_init()` / `eval_wsum_hess()` — weighted-sum Hessian
-- `is_affine()` — used for caching optimizations
-- `free_type_data()` — type-specific cleanup
-
-Type-specific nodes use C struct inheritance: embed `expr base` as the first field, then cast. Defined in `include/subexpr.h` (e.g., `parameter_expr`, `scalar_mult_expr`).
-
-All node constructors call `init_expr()` to wire up the dispatch table. Children are owned via `left`/`right` pointers with reference counting (`expr_retain`/`free_expr`).
-
-### Parameter System
-
-`parameter_expr` (in `subexpr.h`) unifies constants and updatable parameters:
-- `param_id == PARAM_FIXED (-1)`: fixed constant, not updatable
-- `param_id >= 0`: offset into the parameter vector `theta`
-
-Bivariate ops that involve parameters (scalar_mult, vector_mult, left_matmul, right_matmul) store a `param_source` pointer and read values during forward/jacobian/hessian evaluation.
-
-**Workflow**: `new_parameter()` → `problem_register_params()` → `problem_update_params(prob, theta)` copies from `theta` into each parameter node's `value` array using `param_id` as offset.
-
-### Problem Interface (`include/problem.h`)
-
-The `problem` struct owns an objective, constraints array, and parameter nodes. Key lifecycle:
-1. `new_problem()` — allocates problem, output arrays
-2. `problem_register_params()` — registers parameter nodes
-3. `problem_init_derivatives()` — allocates sparsity patterns (Jacobian + Hessian)
-4. `problem_objective_forward()` / `problem_constraint_forward()` — evaluate
-5. `problem_gradient()` / `problem_jacobian()` / `problem_hessian()` — compute derivatives
-6. `problem_update_params()` — update parameter values, reset caches
-
-### Sparse Matrix Formats (`include/utils/`)
-
-CSR is primary. CSC is intermediate during computation. COO for Python interop; lower-triangular COO for symmetric Hessians.
-
-## Adding a New Operation
-
-1. Create `src/<category>/new_op.c` implementing the function pointer interface
-2. Add constructor declaration to the appropriate `include/<category>.h`
-3. Write test as a `.h` file in `tests/<test_category>/`
-4. Register test in `tests/all_tests.c` with `mu_run_test(test_name, tests_run)`
-
-## Test Framework
-
-Uses minunit.h (header-only). Tests are `const char *test_name(void)` functions returning NULL on success. Tolerance: `ABS_TOL=1e-6, REL_TOL=1e-6` via `cmp_double_array()` in `test_helpers.c`. No built-in test filtering — all tests run via single `all_tests` binary.
-
-## C Code Style
-
-C99, Allman braces, 4-space indent, 85-column limit, right-aligned pointers (`int *ptr`). Enforced by `.clang-format`. Strict warnings: `-Wall -Wextra -Wpedantic -Wshadow -Wformat=2 -Wcast-qual -Wcast-align -Wdouble-promotion -Wnull-dereference`.
diff --git a/src/affine/trace.c b/src/affine/trace.c
index 014ee65..289dc31 100644
--- a/src/affine/trace.c
+++ b/src/affine/trace.c
@@ -102,7 +102,7 @@ static void wsum_hess_init(expr *node)
 
     /* initialize child's hessian */
     x->wsum_hess_init(x);
-  
+
     node->work->dwork = (double *) calloc(x->size, sizeof(double));
 
     /* We copy over the sparsity pattern from the child. This also includes the
diff --git a/src/problem.c b/src/problem.c
index 755cedb..1328387 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -350,8 +350,7 @@ void problem_update_params(problem *prob, const double *theta)
     {
         expr *pnode = prob->param_nodes[i];
         parameter_expr *param = (parameter_expr *) pnode;
-        if (param->param_id == PARAM_FIXED)
-            continue;
+        if (param->param_id == PARAM_FIXED) continue;
         int offset = param->param_id;
         memcpy(pnode->value, theta + offset, pnode->size * sizeof(double));
         param->has_been_refreshed = false;
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 36c85d9..d76c84e 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -391,8 +391,7 @@ const char *test_param_fixed_skip_in_update(void)
     problem_update_params(prob, theta);
 
     /* Verify a is still 2.0 (not overwritten) */
-    mu_assert("a_param changed after update",
-              fabs(a_param->value[0] - 2.0) < 1e-10);
+    mu_assert("a_param changed after update", fabs(a_param->value[0] - 2.0) < 1e-10);
 
     double u[2] = {1.0, 2.0};
     double obj_val = problem_objective_forward(prob, u);

From 0dd827c1c40b7c44f2fb961dcdc0b8604f0c60f3 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Fri, 27 Mar 2026 11:00:06 -0400
Subject: [PATCH 04/14] Set has_been_refreshed to false in parameter
 constructor

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/affine/parameter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/affine/parameter.c b/src/affine/parameter.c
index 77bf26d..9bf92b0 100644
--- a/src/affine/parameter.c
+++ b/src/affine/parameter.c
@@ -64,7 +64,7 @@ expr *new_parameter(int d1, int d2, int param_id, int n_vars, const double *valu
               wsum_hess_init, eval_wsum_hess, NULL);
 
     pnode->param_id = param_id;
-    pnode->has_been_refreshed = true;
+    pnode->has_been_refreshed = false;
 
     if (values != NULL)
     {

From ff5c33ceca0bb44c05c47402141026833d81f93a Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Fri, 27 Mar 2026 17:53:30 -0400
Subject: [PATCH 05/14] Remove CLAUDE.md from PR

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md | 109 ------------------------------------------------------
 1 file changed, 109 deletions(-)
 delete mode 100644 CLAUDE.md

diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index ae80a5a..0000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# CLAUDE.md
-
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-
-## Build & Test
-
-```bash
-# Build (from repo root)
-mkdir -p build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Debug && make
-
-# Run all tests
-./build/all_tests
-
-# Run tests via CTest
-cd build && ctest --output-on-failure
-
-# Format check (requires clang-format)
-find src include -name '*.c' -o -name '*.h' | xargs clang-format --dry-run -Werror
-
-# Build with sanitizers (for debugging)
-cd build && cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-fsanitize=address,undefined" && make
-```
-
-There is no way to run a single test in isolation — `all_tests` runs the full suite. To test selectively, temporarily comment out `mu_run_test()` calls in `tests/all_tests.c`.
-
-## Architecture
-
-### Expr Node (Virtual Dispatch via Function Pointers)
-
-Every operation is an `expr` node (`include/expr.h`). Polymorphism is achieved through function pointers set at construction time via `init_expr()`:
-
-- `forward` — evaluate node value from input vector `u`
-- `jacobian_init` / `eval_jacobian` — allocate then fill sparse Jacobian (CSR)
-- `wsum_hess_init` / `eval_wsum_hess` — allocate then fill weighted-sum Hessian (CSR)
-- `is_affine` — used to skip zero Hessian computation
-- `local_jacobian` / `local_wsum_hess` — element-wise ops only: diagonal f'(g(x)) and f''(g(x))
-- `free_type_data` — cleanup for type-specific allocations (e.g., param_source pointers)
-
-Type-specific data uses C struct inheritance: cast the first field (`expr base`) to a subtype defined in `include/subexpr.h` (e.g., `parameter_expr`, `scalar_mult_expr`, `left_matmul_expr`).
-
-### Operation Categories
-
-| Directory | Description | Chain rule pattern |
-|-----------|-------------|-------------------|
-| `src/affine/` | Linear ops (variable, parameter, add, index, matmul, etc.) | J = structural combination of child Jacobians |
-| `src/elementwise_full_dom/` | Univariate ops (exp, sin, log, power, etc.) | J = diag(f'(g(x))) · J_child; common chain rule in `common.c` |
-| `src/elementwise_restricted_dom/` | Univariate ops with domain restrictions (log, entropy) | Same pattern as full_dom |
-| `src/bivariate_full_dom/` | Two-arg ops (elementwise multiply) | Product rule on two children |
-| `src/bivariate_restricted_dom/` | Two-arg restricted domain (quad_over_lin, rel_entr) | Custom chain rules |
-| `src/other/` | Special ops (prod, quad_form) | Custom implementations |
-
-### Init vs Eval Split
-
-Derivative computation is split into two phases:
-1. **Init** (`jacobian_init`, `wsum_hess_init`) — allocates CSR matrices and sets sparsity patterns. Called once per expression tree. Guarded by `jacobian_init()` / `wsum_hess_init()` wrappers in `expr.c` to handle DAGs where nodes are visited multiple times.
-2. **Eval** (`eval_jacobian`, `eval_wsum_hess`) — fills numerical values into pre-allocated structures. Called each time input changes.
-
-### Elementwise Common Chain Rule
-
-`src/elementwise_full_dom/common.c` and `src/elementwise_restricted_dom/common.c` implement shared chain rule logic for all univariate ops. Each op only provides `local_jacobian` (diagonal of f') and `local_wsum_hess` (diagonal of f''). The common code handles:
-- Variable child: Jacobian is diagonal
-- Composite child: J = diag(f') · J_child; H = J_child^T · diag(w·f'') · J_child + f' · H_child
-
-### Parameter System
-
-Parameters (`src/affine/parameter.c`) unify constants and updatable values:
-- `param_id == PARAM_FIXED` (-1): fixed constant, value set at construction
-- `param_id >= 0`: updatable parameter, indexed into a global theta array
-
-Problem-level API (`src/problem.c`):
-- `problem_register_params()` — register parameter nodes
-- `problem_update_params()` — update all parameter values from theta vector
-
-Operations that use parameters (scalar_mult, vector_mult, left/right_matmul) store a `param_source` pointer to the parameter node and read its value during forward/jacobian/hessian evaluation.
-
-### Workspace (`Expr_Work`)
-
-Each node has a `work` pointer for cached intermediate results: CSC conversion of Jacobian, local Jacobian diagonal, Hessian term1/term2 matrices. Allocated during init, reused across evals.
-
-## Adding a New Operation
-
-1. If needed, add a subtype struct in `include/subexpr.h`
-2. Declare the constructor in the appropriate header (`include/affine.h`, `include/elementwise_full_dom.h`, etc.)
-3. Implement in `src/<category>/<op_name>.c` with static functions for forward, jacobian_init_impl, eval_jacobian, wsum_hess_init_impl, eval_wsum_hess, is_affine
-4. For elementwise univariate ops: only implement `local_jacobian` and `local_wsum_hess`, then use `common_jacobian_init`/`common_eval_jacobian`/etc. from `common.c`
-5. Add tests in `tests/forward_pass/`, `tests/jacobian_tests/`, `tests/wsum_hess/` and register them in `tests/all_tests.c`
-6. Use `check_jacobian()` and `check_wsum_hess()` from `tests/numerical_diff.h` to validate against finite differences
-
-## Code Style
-
-C99. Enforced by `.clang-format`: Allman braces, 4-space indent, 85-column limit, right-aligned pointers. Run `clang-format -i <file>` before committing.
-
-## CI Workflows (`.github/workflows/`)
-
-- `cmake.yml` — build and test on Linux, macOS, Windows
-- `formatting.yml` — clang-format check
-- `sanitizer.yml` — ASan, MSan, UBSan
-- `valgrind.yml` — memory leak detection
-- `release.yml` — release packaging
-
-## Platform Notes
-
-- **macOS**: Links Accelerate for BLAS
-- **Linux**: Links OpenBLAS (`libopenblas-dev`)
-- **Windows**: Links OpenBLAS via vcpkg; no dense matmul in some configurations
-
-## Test Tolerances
-
-`ABS_TOL = 1e-6`, `REL_TOL = 1e-6` (defined in test headers). Numerical differentiation step size: `NUMERICAL_DIFF_DEFAULT_H = 1e-7`.

From c090e0f338702b1fa9851eaf03cf8ef434b2b147 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Mon, 30 Mar 2026 19:00:25 -0400
Subject: [PATCH 06/14] remove unused constructors

---
 include/subexpr.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/include/subexpr.h b/include/subexpr.h
index bf44e8c..3a41232 100644
--- a/include/subexpr.h
+++ b/include/subexpr.h
@@ -171,19 +171,6 @@ typedef struct matmul_expr
     int *idx_map_Hg;
 } matmul_expr;
 
-/* Constant scalar multiplication: y = a * child where a is a constant double */
-typedef struct const_scalar_mult_expr
-{
-    expr base;
-    double a;
-} const_scalar_mult_expr;
-
-/* Constant vector elementwise multiplication: y = a \circ child for constant a */
-typedef struct const_vector_mult_expr
-{
-    expr base;
-    double *a; /* length equals node->size */
-} const_vector_mult_expr;
 
 /* Index/slicing: y = child[indices] where indices is a list of flat positions */
 typedef struct index_expr

From 91e2af24df792783f60b60e88d15d257cbbb8f60 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Mon, 30 Mar 2026 19:09:13 -0400
Subject: [PATCH 07/14] run formatter and push it up

---
 include/subexpr.h                             |  1 -
 .../composite/test_chain_rule_jacobian.h      | 22 +++++++++----------
 .../composite/test_chain_rule_wsum_hess.h     |  2 +-
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/subexpr.h b/include/subexpr.h
index 3a41232..2ffd891 100644
--- a/include/subexpr.h
+++ b/include/subexpr.h
@@ -171,7 +171,6 @@ typedef struct matmul_expr
     int *idx_map_Hg;
 } matmul_expr;
 
-
 /* Index/slicing: y = child[indices] where indices is a list of flat positions */
 typedef struct index_expr
 {
diff --git a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h
index 1b7fbbd..4d3d433 100644
--- a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h
+++ b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h
@@ -215,11 +215,11 @@ const char *test_jacobian_matmul_Ax_By(void)
     CSR_Matrix *A = new_csr_random(3, 2, 1.0);
     CSR_Matrix *B = new_csr_random(2, 3, 1.0);
 
-    expr *X = new_variable(2, 2, 0, 10); /* 2x2, vars 0-3 */
-    expr *Y = new_variable(3, 2, 4, 10); /* 3x2, vars 4-9 */
-    expr *AX = new_left_matmul(NULL, X, A);    /* 3x2 */
-    expr *BY = new_left_matmul(NULL, Y, B);    /* 2x2 */
-    expr *Z = new_matmul(AX, BY);        /* 3x2 */
+    expr *X = new_variable(2, 2, 0, 10);    /* 2x2, vars 0-3 */
+    expr *Y = new_variable(3, 2, 4, 10);    /* 3x2, vars 4-9 */
+    expr *AX = new_left_matmul(NULL, X, A); /* 3x2 */
+    expr *BY = new_left_matmul(NULL, Y, B); /* 2x2 */
+    expr *Z = new_matmul(AX, BY);           /* 3x2 */
 
     mu_assert("check_jacobian failed",
               check_jacobian(Z, u_vals, NUMERICAL_DIFF_DEFAULT_H));
@@ -238,12 +238,12 @@ const char *test_jacobian_matmul_sin_Ax_cos_Bx(void)
     CSR_Matrix *A = new_csr_random(2, 3, 1.0);
     CSR_Matrix *B = new_csr_random(2, 3, 1.0);
 
-    expr *X = new_variable(3, 2, 0, 6);   /* 3x2, vars 0-5 */
-    expr *AX = new_left_matmul(NULL, X, A);     /* 2x2 */
-    expr *BX = new_left_matmul(NULL, X, B);     /* 2x2 */
-    expr *sin_AX = new_sin(AX);           /* 2x2 */
-    expr *cos_BX = new_cos(BX);           /* 2x2 */
-    expr *Z = new_matmul(sin_AX, cos_BX); /* 2x2 */
+    expr *X = new_variable(3, 2, 0, 6);     /* 3x2, vars 0-5 */
+    expr *AX = new_left_matmul(NULL, X, A); /* 2x2 */
+    expr *BX = new_left_matmul(NULL, X, B); /* 2x2 */
+    expr *sin_AX = new_sin(AX);             /* 2x2 */
+    expr *cos_BX = new_cos(BX);             /* 2x2 */
+    expr *Z = new_matmul(sin_AX, cos_BX);   /* 2x2 */
 
     mu_assert("check_jacobian failed",
               check_jacobian(Z, u_vals, NUMERICAL_DIFF_DEFAULT_H));
diff --git a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h
index 8810e0d..9bc8279 100644
--- a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h
+++ b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h
@@ -310,7 +310,7 @@ const char *test_wsum_hess_matmul_Ax_By(void)
     expr *Y = new_variable(3, 2, 4, 10);
     expr *AX = new_left_matmul(NULL, X, A); /* 3x2 */
     expr *BY = new_left_matmul(NULL, Y, B); /* 2x2 */
-    expr *Z = new_matmul(AX, BY);     /* 3x2 */
+    expr *Z = new_matmul(AX, BY);           /* 3x2 */
 
     mu_assert("check_wsum_hess failed",
               check_wsum_hess(Z, u_vals, w, NUMERICAL_DIFF_DEFAULT_H));

From dc8c22e2a726e0422c2c09f4ea8323a8545a4863 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Tue, 31 Mar 2026 10:59:36 -0400
Subject: [PATCH 08/14] Update includes in test_param_prob.h to use atoms/
 prefix

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/problem/test_param_prob.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index d76c84e..7b72da7 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -5,9 +5,8 @@
 #include <stdio.h>
 #include <string.h>
 
-#include "affine.h"
-#include "bivariate.h"
-#include "elementwise_restricted_dom.h"
+#include "atoms/affine.h"
+#include "atoms/elementwise_restricted_dom.h"
 #include "expr.h"
 #include "minunit.h"
 #include "problem.h"

From 0f2288cd5d509d889a9f0a62c50c21b98b12517c Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Mon, 6 Apr 2026 16:46:05 -0400
Subject: [PATCH 09/14] Fix column-major parameter ordering in parameterized
 matmul

CVXPY sends parameter values in Fortran (column-major) order, but the
matmul refresh functions assumed row-major/CSR order via raw memcpy.
This produced incorrect matrix values for non-symmetric matrices.

For sparse matrices, iterate the CSR pattern and index into the
column-major source array. For dense matrices, exploit the fact that
column-major A is row-major A^T to memcpy directly into AT, then
transpose to get A.

Also fixes a latent bug where sparse update_values would blindly copy
the first nnz values from the full d1*d2 parameter array, which is
wrong for matrices with structural zeros.

Adds tests for rectangular (3x2) and sparse (3x3 with zeros) cases.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/atoms/affine/left_matmul.c  |  25 ++++-
 src/atoms/affine/right_matmul.c |  28 ++++--
 tests/all_tests.c               |   2 +
 tests/problem/test_param_prob.h | 161 +++++++++++++++++++++++++++++---
 4 files changed, 190 insertions(+), 26 deletions(-)

diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
index bf1de63..a79161e 100644
--- a/src/atoms/affine/left_matmul.c
+++ b/src/atoms/affine/left_matmul.c
@@ -170,7 +170,19 @@ static void refresh_sparse_left(left_matmul_expr *lnode)
 {
     Sparse_Matrix *sm_A = (Sparse_Matrix *) lnode->A;
     Sparse_Matrix *sm_AT = (Sparse_Matrix *) lnode->AT;
-    lnode->A->update_values(lnode->A, lnode->param_source->value);
+    CSR_Matrix *csr = sm_A->csr;
+    const double *vals = lnode->param_source->value;
+    int d1 = lnode->param_source->d1;
+
+    /* Parameter values are column-major; extract into CSR data order */
+    for (int row = 0; row < csr->m; row++)
+    {
+        for (int k = csr->p[row]; k < csr->p[row + 1]; k++)
+        {
+            int col = csr->i[k];
+            csr->x[k] = vals[col * d1 + row];
+        }
+    }
     /* Recompute AT values from A */
     AT_fill_values(sm_A->csr, sm_AT->csr, lnode->base.work->iwork);
 }
@@ -178,16 +190,19 @@ static void refresh_sparse_left(left_matmul_expr *lnode)
 static void refresh_dense_left(left_matmul_expr *lnode)
 {
     Dense_Matrix *dm_A = (Dense_Matrix *) lnode->A;
+    Dense_Matrix *dm_AT = (Dense_Matrix *) lnode->AT;
     int m = dm_A->base.m;
     int n = dm_A->base.n;
-    lnode->A->update_values(lnode->A, lnode->param_source->value);
-    /* Recompute AT data (transpose of row-major A) */
-    Dense_Matrix *dm_AT = (Dense_Matrix *) lnode->AT;
+    const double *vals = lnode->param_source->value;
+
+    /* Column-major A is row-major AT; copy directly into AT */
+    memcpy(dm_AT->x, vals, m * n * sizeof(double));
+    /* Transpose AT to get row-major A */
     for (int i = 0; i < m; i++)
     {
         for (int j = 0; j < n; j++)
         {
-            dm_AT->x[j * m + i] = dm_A->x[i * n + j];
+            dm_A->x[i * n + j] = dm_AT->x[j * m + i];
         }
     }
 }
diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c
index 719c550..6283312 100644
--- a/src/atoms/affine/right_matmul.c
+++ b/src/atoms/affine/right_matmul.c
@@ -40,8 +40,19 @@ static void refresh_sparse_right(left_matmul_expr *lnode)
 {
     Sparse_Matrix *sm_AT_inner = (Sparse_Matrix *) lnode->A;
     Sparse_Matrix *sm_A_inner = (Sparse_Matrix *) lnode->AT;
-    /* lnode->AT holds the original A; update its values from param */
-    lnode->AT->update_values(lnode->AT, lnode->param_source->value);
+    CSR_Matrix *csr_A = sm_A_inner->csr;
+    const double *vals = lnode->param_source->value;
+    int d1 = lnode->param_source->d1;
+
+    /* Parameter values are column-major; extract into CSR data order */
+    for (int row = 0; row < csr_A->m; row++)
+    {
+        for (int k = csr_A->p[row]; k < csr_A->p[row + 1]; k++)
+        {
+            int col = csr_A->i[k];
+            csr_A->x[k] = vals[col * d1 + row];
+        }
+    }
     /* Recompute A^T (lnode->A) from A (lnode->AT) */
     AT_fill_values(sm_A_inner->csr, sm_AT_inner->csr, lnode->base.work->iwork);
 }
@@ -50,16 +61,19 @@ static void refresh_dense_right(left_matmul_expr *lnode)
 {
     Dense_Matrix *dm_AT_inner = (Dense_Matrix *) lnode->A;
     Dense_Matrix *dm_A_inner = (Dense_Matrix *) lnode->AT;
-    int m_orig = dm_A_inner->base.m; /* original A is m x n */
+    int m_orig = dm_A_inner->base.m;
     int n_orig = dm_A_inner->base.n;
-    /* Update original A (inner's AT) from param values */
-    lnode->AT->update_values(lnode->AT, lnode->param_source->value);
-    /* Recompute A^T (inner's A) from A */
+    const double *vals = lnode->param_source->value;
+
+    /* Column-major A is row-major AT; copy directly into inner A (= A^T) */
+    memcpy(dm_AT_inner->x, vals, m_orig * n_orig * sizeof(double));
+    /* Transpose to get row-major A (inner's AT = original A) */
     for (int i = 0; i < m_orig; i++)
     {
         for (int j = 0; j < n_orig; j++)
         {
-            dm_AT_inner->x[j * m_orig + i] = dm_A_inner->x[i * n_orig + j];
+            dm_A_inner->x[i * n_orig + j] =
+                dm_AT_inner->x[j * m_orig + i];
         }
     }
 }
diff --git a/tests/all_tests.c b/tests/all_tests.c
index 390c72c..91c82ed 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -354,6 +354,8 @@ int main(void)
     mu_run_test(test_param_vector_mult_problem, tests_run);
     mu_run_test(test_param_left_matmul_problem, tests_run);
     mu_run_test(test_param_right_matmul_problem, tests_run);
+    mu_run_test(test_param_left_matmul_rectangular, tests_run);
+    mu_run_test(test_param_left_matmul_sparse, tests_run);
     mu_run_test(test_param_fixed_skip_in_update, tests_run);
 #endif /* PROFILE_ONLY */
 
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 7b72da7..1fe8fab 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -164,14 +164,14 @@ const char *test_param_vector_mult_problem(void)
  * Test 3: left_param_matmul in constraint
  *
  * Problem: minimize sum(x), subject to A @ x, x size 2, A is 2x2
- *   A is a 2x2 matrix parameter (param_id=0, size=4, CSR data order)
- *   A = [[1,2],[3,4]] → CSR data order theta = [1,2,3,4]
+ *   A is a 2x2 matrix parameter (param_id=0, size=4, column-major order)
+ *   A = [[1,2],[3,4]] → column-major theta = [1,3,2,4]
  *
  * At x=[1,2]:
  *   constraint_values = [1*1+2*2, 3*1+4*2] = [5, 11]
  *   jacobian = [[1,2],[3,4]]
  *
- * After update A = [[5,6],[7,8]] → theta = [5,6,7,8]:
+ * After update A = [[5,6],[7,8]] → column-major theta = [5,7,6,8]:
  *   constraint_values = [5*1+6*2, 7*1+8*2] = [17, 23]
  *   jacobian = [[5,6],[7,8]]
  */
@@ -208,8 +208,8 @@ const char *test_param_left_matmul_problem(void)
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set A = [[1,2],[3,4]], CSR data order: [1,2,3,4] */
-    double theta[4] = {1.0, 2.0, 3.0, 4.0};
+    /* Set A = [[1,2],[3,4]], column-major: [1,3,2,4] */
+    double theta[4] = {1.0, 3.0, 2.0, 4.0};
     problem_update_params(prob, theta);
 
     double u[2] = {1.0, 2.0};
@@ -233,8 +233,8 @@ const char *test_param_left_matmul_problem(void)
     double expected_x[4] = {1.0, 2.0, 3.0, 4.0};
     mu_assert("jac->x wrong (A1)", cmp_double_array(jac->x, expected_x, 4));
 
-    /* Update A = [[5,6],[7,8]], CSR data order: [5,6,7,8] */
-    double theta2[4] = {5.0, 6.0, 7.0, 8.0};
+    /* Update A = [[5,6],[7,8]], column-major: [5,7,6,8] */
+    double theta2[4] = {5.0, 7.0, 6.0, 8.0};
     problem_update_params(prob, theta2);
 
     problem_constraint_forward(prob, u);
@@ -256,14 +256,14 @@ const char *test_param_left_matmul_problem(void)
  * Test 4: right_param_matmul in constraint
  *
  * Problem: minimize sum(x), subject to x @ A, x size 1x2, A is 2x2
- *   A is a 2x2 matrix parameter (param_id=0, size=4, CSR data order)
- *   A = [[1,2],[3,4]] → CSR data order theta = [1,2,3,4]
+ *   A is a 2x2 matrix parameter (param_id=0, size=4, column-major order)
+ *   A = [[1,2],[3,4]] → column-major theta = [1,3,2,4]
  *
  * At x=[1,2]:
  *   constraint_values = [1*1+2*3, 1*2+2*4] = [7, 10]
  *   jacobian = [[1,3],[2,4]] = A^T
  *
- * After update A = [[5,6],[7,8]] → theta = [5,6,7,8]:
+ * After update A = [[5,6],[7,8]] → column-major theta = [5,7,6,8]:
  *   constraint_values = [1*5+2*7, 1*6+2*8] = [19, 22]
  *   jacobian = [[5,7],[6,8]] = A^T
  */
@@ -300,8 +300,8 @@ const char *test_param_right_matmul_problem(void)
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set A = [[1,2],[3,4]], CSR data order: [1,2,3,4] */
-    double theta[4] = {1.0, 2.0, 3.0, 4.0};
+    /* Set A = [[1,2],[3,4]], column-major: [1,3,2,4] */
+    double theta[4] = {1.0, 3.0, 2.0, 4.0};
     problem_update_params(prob, theta);
 
     double u[2] = {1.0, 2.0};
@@ -325,8 +325,8 @@ const char *test_param_right_matmul_problem(void)
     double expected_x[4] = {1.0, 3.0, 2.0, 4.0};
     mu_assert("jac->x wrong (A1)", cmp_double_array(jac->x, expected_x, 4));
 
-    /* Update A = [[5,6],[7,8]], CSR data order: [5,6,7,8] */
-    double theta2[4] = {5.0, 6.0, 7.0, 8.0};
+    /* Update A = [[5,6],[7,8]], column-major: [5,7,6,8] */
+    double theta2[4] = {5.0, 7.0, 6.0, 8.0};
     problem_update_params(prob, theta2);
 
     problem_constraint_forward(prob, u);
@@ -425,4 +425,137 @@ const char *test_param_fixed_skip_in_update(void)
     return 0;
 }
 
+/*
+ * Test 6: left_param_matmul with rectangular 3x2 matrix
+ *
+ * Problem: minimize sum(x), subject to A @ x, x size 2, A is 3x2
+ *   A = [[1,2],[3,4],[5,6]] → column-major theta = [1,3,5,2,4,6]
+ *
+ * At x=[1,2]:
+ *   constraint_values = [1+4, 3+8, 5+12] = [5, 11, 17]
+ *   jacobian = [[1,2],[3,4],[5,6]]
+ */
+const char *test_param_left_matmul_rectangular(void)
+{
+    int n_vars = 2;
+
+    /* Objective: sum(x) */
+    expr *x_obj = new_variable(2, 1, 0, n_vars);
+    expr *objective = new_sum(x_obj, -1);
+
+    /* Constraint: A @ x, A is 3x2 */
+    expr *x_con = new_variable(2, 1, 0, n_vars);
+    expr *A_param = new_parameter(3, 2, 0, n_vars, NULL);
+
+    /* Dense 3x2 CSR */
+    CSR_Matrix *A = new_csr_matrix(3, 2, 6);
+    int Ap[4] = {0, 2, 4, 6};
+    int Ai[6] = {0, 1, 0, 1, 0, 1};
+    double Ax[6] = {0, 0, 0, 0, 0, 0};
+    memcpy(A->p, Ap, 4 * sizeof(int));
+    memcpy(A->i, Ai, 6 * sizeof(int));
+    memcpy(A->x, Ax, 6 * sizeof(double));
+
+    expr *constraint = new_left_matmul(A_param, x_con, A);
+    free_csr_matrix(A);
+
+    expr *constraints[1] = {constraint};
+
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    expr *param_nodes[1] = {A_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* Set A = [[1,2],[3,4],[5,6]], column-major: [1,3,5,2,4,6] */
+    double theta[6] = {1.0, 3.0, 5.0, 2.0, 4.0, 6.0};
+    problem_update_params(prob, theta);
+
+    double u[2] = {1.0, 2.0};
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv[3] = {5.0, 11.0, 17.0};
+    mu_assert("rect constraint values wrong",
+              cmp_double_array(prob->constraint_values, expected_cv, 3));
+
+    CSR_Matrix *jac = prob->jacobian;
+    double expected_x[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+    mu_assert("rect jac->x wrong", cmp_double_array(jac->x, expected_x, 6));
+
+    free_problem(prob);
+
+    return 0;
+}
+
+/*
+ * Test 7: left_param_matmul with sparse matrix (structural zeros)
+ *
+ * Problem: minimize sum(x), subject to A @ x, x size 3, A is 3x3 sparse
+ *   A = [[1, 0, 2],
+ *        [0, 3, 0],
+ *        [4, 0, 5]]
+ *   CSR: p=[0,2,3,5], i=[0,2, 1, 0,2], nnz=5
+ *   Column-major theta (full 9 values): [1,0,4, 0,3,0, 2,0,5]
+ *
+ * At x=[1,2,3]:
+ *   constraint_values = [1+6, 6, 4+15] = [7, 6, 19]
+ *   jacobian = A
+ */
+const char *test_param_left_matmul_sparse(void)
+{
+    int n_vars = 3;
+
+    /* Objective: sum(x) */
+    expr *x_obj = new_variable(3, 1, 0, n_vars);
+    expr *objective = new_sum(x_obj, -1);
+
+    /* Constraint: A @ x, A is 3x3 sparse */
+    expr *x_con = new_variable(3, 1, 0, n_vars);
+    expr *A_param = new_parameter(3, 3, 0, n_vars, NULL);
+
+    CSR_Matrix *A = new_csr_matrix(3, 3, 5);
+    int Ap[4] = {0, 2, 3, 5};
+    int Ai[5] = {0, 2, 1, 0, 2};
+    double Ax[5] = {0, 0, 0, 0, 0};
+    memcpy(A->p, Ap, 4 * sizeof(int));
+    memcpy(A->i, Ai, 5 * sizeof(int));
+    memcpy(A->x, Ax, 5 * sizeof(double));
+
+    expr *constraint = new_left_matmul(A_param, x_con, A);
+    free_csr_matrix(A);
+
+    expr *constraints[1] = {constraint};
+
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    expr *param_nodes[1] = {A_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* A = [[1,0,2],[0,3,0],[4,0,5]], column-major: [1,0,4, 0,3,0, 2,0,5] */
+    double theta[9] = {1.0, 0.0, 4.0, 0.0, 3.0, 0.0, 2.0, 0.0, 5.0};
+    problem_update_params(prob, theta);
+
+    double u[3] = {1.0, 2.0, 3.0};
+    problem_constraint_forward(prob, u);
+    problem_jacobian(prob);
+
+    double expected_cv[3] = {7.0, 6.0, 19.0};
+    mu_assert("sparse constraint values wrong",
+              cmp_double_array(prob->constraint_values, expected_cv, 3));
+
+    CSR_Matrix *jac = prob->jacobian;
+    mu_assert("sparse jac nnz wrong", jac->nnz == 5);
+
+    /* CSR data order: row0=[1,2], row1=[3], row2=[4,5] */
+    double expected_x[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
+    mu_assert("sparse jac->x wrong",
+              cmp_double_array(jac->x, expected_x, 5));
+
+    free_problem(prob);
+
+    return 0;
+}
+
 #endif /* TEST_PARAM_PROB_H */

From 85f456aff2dab5bfdeed1cf636052d227c331018 Mon Sep 17 00:00:00 2001
From: William Zijie Zhang <william@gridmatic.com>
Date: Mon, 6 Apr 2026 16:51:02 -0400
Subject: [PATCH 10/14] Run clang-format on changed files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/atoms/affine/right_matmul.c | 3 +--
 tests/problem/test_param_prob.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c
index 6283312..c2f6beb 100644
--- a/src/atoms/affine/right_matmul.c
+++ b/src/atoms/affine/right_matmul.c
@@ -72,8 +72,7 @@ static void refresh_dense_right(left_matmul_expr *lnode)
     {
         for (int j = 0; j < n_orig; j++)
         {
-            dm_A_inner->x[i * n_orig + j] =
-                dm_AT_inner->x[j * m_orig + i];
+            dm_A_inner->x[i * n_orig + j] = dm_AT_inner->x[j * m_orig + i];
         }
     }
 }
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 1fe8fab..10a6437 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -550,8 +550,7 @@ const char *test_param_left_matmul_sparse(void)
 
     /* CSR data order: row0=[1,2], row1=[3], row2=[4,5] */
     double expected_x[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
-    mu_assert("sparse jac->x wrong",
-              cmp_double_array(jac->x, expected_x, 5));
+    mu_assert("sparse jac->x wrong", cmp_double_array(jac->x, expected_x, 5));
 
     free_problem(prob);
 

From 04c9a8cd9285e0d562190f28d84895a3be524771 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 7 Apr 2026 07:07:11 -0700
Subject: [PATCH 11/14] introduce explicit transpose function for dense matrix

---
 include/utils/dense_matrix.h   |  2 ++
 src/atoms/affine/left_matmul.c | 16 +++++-----------
 src/utils/dense_matrix.c       | 15 ++++++++++-----
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/include/utils/dense_matrix.h b/include/utils/dense_matrix.h
index 8121020..edcea66 100644
--- a/include/utils/dense_matrix.h
+++ b/include/utils/dense_matrix.h
@@ -17,4 +17,6 @@ Matrix *new_dense_matrix(int m, int n, const double *data);
 /* Transpose helper */
 Matrix *dense_matrix_trans(const Dense_Matrix *self);
 
+void A_transpose(double *AT, const double *A, int m, int n);
+
 #endif /* DENSE_MATRIX_H */
diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
index a79161e..224b990 100644
--- a/src/atoms/affine/left_matmul.c
+++ b/src/atoms/affine/left_matmul.c
@@ -193,18 +193,12 @@ static void refresh_dense_left(left_matmul_expr *lnode)
     Dense_Matrix *dm_AT = (Dense_Matrix *) lnode->AT;
     int m = dm_A->base.m;
     int n = dm_A->base.n;
-    const double *vals = lnode->param_source->value;
 
-    /* Column-major A is row-major AT; copy directly into AT */
-    memcpy(dm_AT->x, vals, m * n * sizeof(double));
-    /* Transpose AT to get row-major A */
-    for (int i = 0; i < m; i++)
-    {
-        for (int j = 0; j < n; j++)
-        {
-            dm_A->x[i * n + j] = dm_AT->x[j * m + i];
-        }
-    }
+    /* The parameter represents the A in left_matmul_dense(A, x) in column-major.
+       In this diffengine, we store A in row-major order. Hence, param->vals
+       actually corresponds to the transpose of A, and we transpose AT to get A. */
+    memcpy(dm_AT->x, lnode->param_source->value, m * n * sizeof(double));
+    A_transpose(dm_A->x, dm_AT->x, m, n);
 }
 
 expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c
index 3d1e52d..445d36e 100644
--- a/src/utils/dense_matrix.c
+++ b/src/utils/dense_matrix.c
@@ -78,15 +78,20 @@ Matrix *dense_matrix_trans(const Dense_Matrix *A)
     int n = A->base.n;
     double *AT_x = (double *) SP_MALLOC(m * n * sizeof(double));
 
+    A_transpose(AT_x, A->x, m, n);
+
+    Matrix *result = new_dense_matrix(n, m, AT_x);
+    free(AT_x);
+    return result;
+}
+
+void A_transpose(double *AT, const double *A, int m, int n)
+{
     for (int i = 0; i < m; i++)
     {
         for (int j = 0; j < n; j++)
         {
-            AT_x[j * m + i] = A->x[i * n + j];
+            AT[j * m + i] = A[i * n + j];
         }
     }
-
-    Matrix *result = new_dense_matrix(n, m, AT_x);
-    free(AT_x);
-    return result;
 }

From 23e7cb4a5834cc339e04f33c9c744a1f77a29022 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 7 Apr 2026 07:30:39 -0700
Subject: [PATCH 12/14] clean up refresh dense right

---
 src/atoms/affine/left_matmul.c  | 12 +++++-----
 src/atoms/affine/right_matmul.c | 39 ++++++++++++---------------------
 2 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
index 224b990..2098265 100644
--- a/src/atoms/affine/left_matmul.c
+++ b/src/atoms/affine/left_matmul.c
@@ -170,17 +170,17 @@ static void refresh_sparse_left(left_matmul_expr *lnode)
 {
     Sparse_Matrix *sm_A = (Sparse_Matrix *) lnode->A;
     Sparse_Matrix *sm_AT = (Sparse_Matrix *) lnode->AT;
-    CSR_Matrix *csr = sm_A->csr;
-    const double *vals = lnode->param_source->value;
+    CSR_Matrix *A = sm_A->csr;
+    const double *param_vals = lnode->param_source->value;
     int d1 = lnode->param_source->d1;
 
     /* Parameter values are column-major; extract into CSR data order */
-    for (int row = 0; row < csr->m; row++)
+    for (int i = 0; i < A->m; i++)
     {
-        for (int k = csr->p[row]; k < csr->p[row + 1]; k++)
+        for (int jj = A->p[i]; jj < A->p[i + 1]; jj++)
         {
-            int col = csr->i[k];
-            csr->x[k] = vals[col * d1 + row];
+            int j = A->i[jj];
+            A->x[jj] = param_vals[j * d1 + i];
         }
     }
     /* Recompute AT values from A */
diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c
index c2f6beb..5440116 100644
--- a/src/atoms/affine/right_matmul.c
+++ b/src/atoms/affine/right_matmul.c
@@ -59,22 +59,18 @@ static void refresh_sparse_right(left_matmul_expr *lnode)
 
 static void refresh_dense_right(left_matmul_expr *lnode)
 {
-    Dense_Matrix *dm_AT_inner = (Dense_Matrix *) lnode->A;
-    Dense_Matrix *dm_A_inner = (Dense_Matrix *) lnode->AT;
-    int m_orig = dm_A_inner->base.m;
-    int n_orig = dm_A_inner->base.n;
-    const double *vals = lnode->param_source->value;
-
-    /* Column-major A is row-major AT; copy directly into inner A (= A^T) */
-    memcpy(dm_AT_inner->x, vals, m_orig * n_orig * sizeof(double));
-    /* Transpose to get row-major A (inner's AT = original A) */
-    for (int i = 0; i < m_orig; i++)
-    {
-        for (int j = 0; j < n_orig; j++)
-        {
-            dm_A_inner->x[i * n_orig + j] = dm_AT_inner->x[j * m_orig + i];
-        }
-    }
+    /* This left_matmul_expr node corresponds to left multiplication with B = AT,
+       where A is the original (m x n) matrix given to the right_matmul function.
+       Furthermore, lnode->param_source->value corresponds to the column-major
+       version of A, which is BT (an m x n matrix) */
+
+    Dense_Matrix *B = (Dense_Matrix *) lnode->AT;
+    Dense_Matrix *BT = (Dense_Matrix *) lnode->A;
+    int m = B->base.n;
+    int n = B->base.m;
+
+    memcpy(BT->x, lnode->param_source->value, m * n * sizeof(double));
+    A_transpose(B->x, BT->x, m, n);
 }
 
 expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
@@ -107,16 +103,9 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
 expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
                              const double *data)
 {
-    /* We express: u @ A = (A^T @ u^T)^T
-       A is m x n, so A^T is n x m. */
+    /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
     double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
-    for (int i = 0; i < m; i++)
-    {
-        for (int j = 0; j < n; j++)
-        {
-            AT[j * m + i] = data[i * n + j];
-        }
-    }
+    A_transpose(AT, data, m, n);
 
     expr *u_transpose = new_transpose(u);
     expr *left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);

From ecab1d56cb0fd1ad82873f3d5bc02b70869569e9 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 7 Apr 2026 10:59:14 -0700
Subject: [PATCH 13/14] clean up tests...

---
 include/subexpr.h               |   4 +-
 src/atoms/affine/left_matmul.c  |  33 +-
 src/atoms/affine/parameter.c    |   2 +-
 src/atoms/affine/right_matmul.c |  28 +-
 src/problem.c                   |   2 +-
 tests/all_tests.c               |   1 -
 tests/problem/test_param_prob.h | 587 ++++++++++----------------------
 7 files changed, 214 insertions(+), 443 deletions(-)

diff --git a/include/subexpr.h b/include/subexpr.h
index 2ffd891..2b638da 100644
--- a/include/subexpr.h
+++ b/include/subexpr.h
@@ -37,7 +37,9 @@ typedef struct parameter_expr
 {
     expr base;
     int param_id;
-    bool has_been_refreshed;
+    /* Set to true by problem_update_params(), cleared by
+       refresh_param_values() after propagating new values. */
+    bool needs_refresh;
 } parameter_expr;
 
 /* Linear operator: y = A * x + b
diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
index 2098265..bce4201 100644
--- a/src/atoms/affine/left_matmul.c
+++ b/src/atoms/affine/left_matmul.c
@@ -56,11 +56,11 @@ static void refresh_param_values(left_matmul_expr *lnode)
         return;
     }
     parameter_expr *param = (parameter_expr *) lnode->param_source;
-    if (param->has_been_refreshed)
+    if (!param->needs_refresh)
     {
         return;
     }
-    param->has_been_refreshed = true;
+    param->needs_refresh = false;
     lnode->refresh_param_values(lnode);
 }
 
@@ -168,23 +168,11 @@ static void eval_wsum_hess(expr *node, const double *w)
 
 static void refresh_sparse_left(left_matmul_expr *lnode)
 {
-    Sparse_Matrix *sm_A = (Sparse_Matrix *) lnode->A;
-    Sparse_Matrix *sm_AT = (Sparse_Matrix *) lnode->AT;
-    CSR_Matrix *A = sm_A->csr;
-    const double *param_vals = lnode->param_source->value;
-    int d1 = lnode->param_source->d1;
-
-    /* Parameter values are column-major; extract into CSR data order */
-    for (int i = 0; i < A->m; i++)
-    {
-        for (int jj = A->p[i]; jj < A->p[i + 1]; jj++)
-        {
-            int j = A->i[jj];
-            A->x[jj] = param_vals[j * d1 + i];
-        }
-    }
-    /* Recompute AT values from A */
-    AT_fill_values(sm_A->csr, sm_AT->csr, lnode->base.work->iwork);
+    (void) lnode;
+    fprintf(stderr,
+            "Error in refresh_sparse_left: parameter for a sparse matrix not "
+            "supported \n");
+    exit(1);
 }
 
 static void refresh_dense_left(left_matmul_expr *lnode)
@@ -198,7 +186,7 @@ static void refresh_dense_left(left_matmul_expr *lnode)
        In this diffengine, we store A in row-major order. Hence, param->vals
        actually corresponds to the transpose of A, and we transpose AT to get A. */
     memcpy(dm_AT->x, lnode->param_source->value, m * n * sizeof(double));
-    A_transpose(dm_A->x, dm_AT->x, m, n);
+    A_transpose(dm_A->x, dm_AT->x, n, m);
 }
 
 expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
@@ -252,6 +240,11 @@ expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
     lnode->param_source = param_node;
     if (param_node != NULL)
     {
+
+        fprintf(stderr, "Error in new_left_matmul: parameter for a sparse matrix "
+                        "not supported \n");
+        exit(1);
+
         expr_retain(param_node);
         lnode->refresh_param_values = refresh_sparse_left;
     }
diff --git a/src/atoms/affine/parameter.c b/src/atoms/affine/parameter.c
index d8d00f6..7cd2909 100644
--- a/src/atoms/affine/parameter.c
+++ b/src/atoms/affine/parameter.c
@@ -65,7 +65,7 @@ expr *new_parameter(int d1, int d2, int param_id, int n_vars, const double *valu
               is_affine, wsum_hess_init_impl, eval_wsum_hess, NULL);
 
     pnode->param_id = param_id;
-    pnode->has_been_refreshed = false;
+    pnode->needs_refresh = false;
 
     if (values != NULL)
     {
diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c
index 5440116..d0f9082 100644
--- a/src/atoms/affine/right_matmul.c
+++ b/src/atoms/affine/right_matmul.c
@@ -20,6 +20,7 @@
 #include "utils/CSR_Matrix.h"
 #include "utils/dense_matrix.h"
 #include "utils/tracked_alloc.h"
+#include <stdio.h>
 #include <stdlib.h>
 
 /* This file implements the atom 'right_matmul' corresponding to the operation y =
@@ -38,23 +39,11 @@
    So: update lnode->AT from param values, then recompute lnode->A. */
 static void refresh_sparse_right(left_matmul_expr *lnode)
 {
-    Sparse_Matrix *sm_AT_inner = (Sparse_Matrix *) lnode->A;
-    Sparse_Matrix *sm_A_inner = (Sparse_Matrix *) lnode->AT;
-    CSR_Matrix *csr_A = sm_A_inner->csr;
-    const double *vals = lnode->param_source->value;
-    int d1 = lnode->param_source->d1;
-
-    /* Parameter values are column-major; extract into CSR data order */
-    for (int row = 0; row < csr_A->m; row++)
-    {
-        for (int k = csr_A->p[row]; k < csr_A->p[row + 1]; k++)
-        {
-            int col = csr_A->i[k];
-            csr_A->x[k] = vals[col * d1 + row];
-        }
-    }
-    /* Recompute A^T (lnode->A) from A (lnode->AT) */
-    AT_fill_values(sm_A_inner->csr, sm_AT_inner->csr, lnode->base.work->iwork);
+    (void) lnode;
+    fprintf(stderr,
+            "Error in refresh_sparse_right: parameter for a sparse matrix not "
+            "supported \n");
+    exit(1);
 }
 
 static void refresh_dense_right(left_matmul_expr *lnode)
@@ -87,6 +76,11 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
        left_matmul */
     if (param_node != NULL)
     {
+
+        fprintf(stderr, "Error in new_right_matmul: parameter for a sparse matrix "
+                        "not supported \n");
+        exit(1);
+
         left_matmul_expr *lnode = (left_matmul_expr *) left_matmul;
         lnode->param_source = param_node;
         expr_retain(param_node);
diff --git a/src/problem.c b/src/problem.c
index 1dff40c..e436e82 100644
--- a/src/problem.c
+++ b/src/problem.c
@@ -382,7 +382,7 @@ void problem_update_params(problem *prob, const double *theta)
         if (param->param_id == PARAM_FIXED) continue;
         int offset = param->param_id;
         memcpy(pnode->value, theta + offset, pnode->size * sizeof(double));
-        param->has_been_refreshed = false;
+        param->needs_refresh = true;
     }
 
     /* Force re-evaluation of affine Jacobians on next call */
diff --git a/tests/all_tests.c b/tests/all_tests.c
index 91c82ed..145712f 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -355,7 +355,6 @@ int main(void)
     mu_run_test(test_param_left_matmul_problem, tests_run);
     mu_run_test(test_param_right_matmul_problem, tests_run);
     mu_run_test(test_param_left_matmul_rectangular, tests_run);
-    mu_run_test(test_param_left_matmul_sparse, tests_run);
     mu_run_test(test_param_fixed_skip_in_update, tests_run);
 #endif /* PROFILE_ONLY */
 
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 10a6437..8d008a6 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -13,544 +13,327 @@
 #include "subexpr.h"
 #include "test_helpers.h"
 
-/*
- * Test 1: param_scalar_mult in objective
- *
- * Problem: minimize a * sum(log(x)), no constraints, x size 2
- *   a is a scalar parameter (param_id=0)
- *
- * At x=[1,2], a=3:
- *   obj = 3*(log(1)+log(2)) = 3*log(2)
- *   gradient = [3/1, 3/2] = [3.0, 1.5]
- *
- * After update a=5:
- *   obj = 5*log(2)
- *   gradient = [5.0, 2.5]
- */
 const char *test_param_scalar_mult_problem(void)
 {
-    int n_vars = 2;
+    int n = 2;
 
-    /* Build tree: sum(a * log(x)) */
-    expr *x = new_variable(2, 1, 0, n_vars);
+    /* minimize a * sum(log(x)), with a parameter */
+    expr *x = new_variable(2, 1, 0, n);
     expr *log_x = new_log(x);
-    expr *a_param = new_parameter(1, 1, 0, n_vars, NULL);
+    expr *a_param = new_parameter(1, 1, 0, n, NULL);
     expr *scaled = new_scalar_mult(a_param, log_x);
     expr *objective = new_sum(scaled, -1);
-
-    /* Create problem (no constraints) */
     problem *prob = new_problem(objective, NULL, 0, false);
 
-    /* Register parameter */
+    /* register parameters and fill sparsity patterns */
     expr *param_nodes[1] = {a_param};
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set a=3 and evaluate at x=[1,2] */
+    /* point for evaluating */
+    double x_vals[2] = {1.0, 2.0};
+
+    /* test 1: a=3 */
     double theta[1] = {3.0};
     problem_update_params(prob, theta);
-
-    double u[2] = {1.0, 2.0};
-    double obj_val = problem_objective_forward(prob, u);
+    double obj_val = problem_objective_forward(prob, x_vals);
     problem_gradient(prob);
-
     double expected_obj = 3.0 * log(2.0);
-    mu_assert("obj wrong (a=3)", fabs(obj_val - expected_obj) < 1e-10);
+    mu_assert("vals fail", fabs(obj_val - expected_obj) < 1e-10);
+    double grad[2] = {3.0, 1.5};
+    mu_assert("vals fail", cmp_double_array(prob->gradient_values, grad, 2));
 
-    double expected_grad[2] = {3.0, 1.5};
-    mu_assert("gradient wrong (a=3)",
-              cmp_double_array(prob->gradient_values, expected_grad, 2));
-
-    /* Update a=5 and re-evaluate */
+    /* test 2: a=5 */
     theta[0] = 5.0;
     problem_update_params(prob, theta);
-
-    obj_val = problem_objective_forward(prob, u);
+    obj_val = problem_objective_forward(prob, x_vals);
     problem_gradient(prob);
-
     expected_obj = 5.0 * log(2.0);
-    mu_assert("obj wrong (a=5)", fabs(obj_val - expected_obj) < 1e-10);
-
-    double expected_grad2[2] = {5.0, 2.5};
-    mu_assert("gradient wrong (a=5)",
-              cmp_double_array(prob->gradient_values, expected_grad2, 2));
+    mu_assert("vals fail", fabs(obj_val - expected_obj) < 1e-10);
+    grad[0] = 5.0;
+    grad[1] = 2.5;
+    mu_assert("vals fail", cmp_double_array(prob->gradient_values, grad, 2));
 
     free_problem(prob);
 
     return 0;
 }
 
-/*
- * Test 2: param_vector_mult in constraint
- *
- * Problem: minimize sum(x), subject to p ∘ x, x size 2
- *   p is a vector parameter of size 2 (param_id=0)
- *
- * At x=[1,2], p=[3,4]:
- *   constraint_values = [3, 8]
- *   jacobian = diag([3, 4])
- *
- * After update p=[5,6]:
- *   constraint_values = [5, 12]
- *   jacobian = diag([5, 6])
- */
 const char *test_param_vector_mult_problem(void)
 {
-    int n_vars = 2;
-
-    /* Objective: sum(x) */
-    expr *x_obj = new_variable(2, 1, 0, n_vars);
-    expr *objective = new_sum(x_obj, -1);
-
-    /* Constraint: p ∘ x */
-    expr *x_con = new_variable(2, 1, 0, n_vars);
-    expr *p_param = new_parameter(2, 1, 0, n_vars, NULL);
-    expr *constraint = new_vector_mult(p_param, x_con);
+    int n = 2;
 
+    /* minimize sum(x) subject to p ∘ x, with p parameter */
+    expr *x = new_variable(2, 1, 0, n);
+    expr *objective = new_sum(x, -1);
+    expr *p_param = new_parameter(2, 1, 0, n, NULL);
+    expr *constraint = new_vector_mult(p_param, x);
     expr *constraints[1] = {constraint};
-
-    /* Create problem */
     problem *prob = new_problem(objective, constraints, 1, false);
 
+    /* register parameters and fill sparsity patterns */
     expr *param_nodes[1] = {p_param};
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set p=[3,4] and evaluate at x=[1,2] */
+    /* point for evaluating and utilities for test */
+    double x_vals[2] = {1.0, 2.0};
+    int Ap[3] = {0, 1, 2};
+    int Ai[2] = {0, 1};
+    double Ax[2] = {3.0, 4.0};
+
+    /* test 1: p=[3,4] */
     double theta[2] = {3.0, 4.0};
     problem_update_params(prob, theta);
-
-    double u[2] = {1.0, 2.0};
-    problem_constraint_forward(prob, u);
+    problem_constraint_forward(prob, x_vals);
+    double constrs[2] = {3.0, 8.0};
     problem_jacobian(prob);
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 2));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 3));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 2));
 
-    double expected_cv[2] = {3.0, 8.0};
-    mu_assert("constraint values wrong (p=[3,4])",
-              cmp_double_array(prob->constraint_values, expected_cv, 2));
-
-    CSR_Matrix *jac = prob->jacobian;
-    mu_assert("jac rows wrong", jac->m == 2);
-    mu_assert("jac cols wrong", jac->n == 2);
-
-    int expected_p[3] = {0, 1, 2};
-    mu_assert("jac->p wrong (p=[3,4])", cmp_int_array(jac->p, expected_p, 3));
-
-    int expected_i[2] = {0, 1};
-    mu_assert("jac->i wrong (p=[3,4])", cmp_int_array(jac->i, expected_i, 2));
-
-    double expected_x[2] = {3.0, 4.0};
-    mu_assert("jac->x wrong (p=[3,4])", cmp_double_array(jac->x, expected_x, 2));
-
-    /* Update p=[5,6] and re-evaluate */
-    double theta2[2] = {5.0, 6.0};
-    problem_update_params(prob, theta2);
-
-    problem_constraint_forward(prob, u);
+    /* test 2: p=[5,6] */
+    theta[0] = 5.0;
+    theta[1] = 6.0;
+    problem_update_params(prob, theta);
+    problem_constraint_forward(prob, x_vals);
     problem_jacobian(prob);
-
-    double expected_cv2[2] = {5.0, 12.0};
-    mu_assert("constraint values wrong (p=[5,6])",
-              cmp_double_array(prob->constraint_values, expected_cv2, 2));
-
-    double expected_x2[2] = {5.0, 6.0};
-    mu_assert("jac->x wrong (p=[5,6])", cmp_double_array(jac->x, expected_x2, 2));
+    constrs[0] = 5.0;
+    constrs[1] = 12.0;
+    Ax[0] = 5.0;
+    Ax[1] = 6.0;
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 2));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 3));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 2));
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
 
     free_problem(prob);
 
     return 0;
 }
 
-/*
- * Test 3: left_param_matmul in constraint
- *
- * Problem: minimize sum(x), subject to A @ x, x size 2, A is 2x2
- *   A is a 2x2 matrix parameter (param_id=0, size=4, column-major order)
- *   A = [[1,2],[3,4]] → column-major theta = [1,3,2,4]
- *
- * At x=[1,2]:
- *   constraint_values = [1*1+2*2, 3*1+4*2] = [5, 11]
- *   jacobian = [[1,2],[3,4]]
- *
- * After update A = [[5,6],[7,8]] → column-major theta = [5,7,6,8]:
- *   constraint_values = [5*1+6*2, 7*1+8*2] = [17, 23]
- *   jacobian = [[5,6],[7,8]]
- */
 const char *test_param_left_matmul_problem(void)
 {
-    int n_vars = 2;
-
-    /* Objective: sum(x) */
-    expr *x_obj = new_variable(2, 1, 0, n_vars);
-    expr *objective = new_sum(x_obj, -1);
-
-    /* Constraint: A @ x */
-    expr *x_con = new_variable(2, 1, 0, n_vars);
-    expr *A_param = new_parameter(2, 2, 0, n_vars, NULL);
-
-    /* Dense 2x2 CSR with placeholder zeros */
-    CSR_Matrix *A = new_csr_matrix(2, 2, 4);
-    int Ap[3] = {0, 2, 4};
-    int Ai[4] = {0, 1, 0, 1};
-    double Ax[4] = {0.0, 0.0, 0.0, 0.0};
-    memcpy(A->p, Ap, 3 * sizeof(int));
-    memcpy(A->i, Ai, 4 * sizeof(int));
-    memcpy(A->x, Ax, 4 * sizeof(double));
+    int n = 2;
 
-    expr *constraint = new_left_matmul(A_param, x_con, A);
-    free_csr_matrix(A);
+    /* minimize sum(x) subject to Ax = ?, with A parameter */
+    expr *x = new_variable(2, 1, 0, n);
+    expr *objective = new_sum(x, -1);
+    expr *A_param = new_parameter(2, 2, 0, n, NULL);
 
+    /* dense 2x2 matrix */
+    double Ax[4] = {2.0, 0.0, 0.0, 1.0};
+    expr *constraint = new_left_matmul_dense(A_param, x, 2, 2, Ax);
     expr *constraints[1] = {constraint};
-
-    /* Create problem */
     problem *prob = new_problem(objective, constraints, 1, false);
 
+    /* register parameters and fill sparsity patterns*/
     expr *param_nodes[1] = {A_param};
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set A = [[1,2],[3,4]], column-major: [1,3,2,4] */
-    double theta[4] = {1.0, 3.0, 2.0, 4.0};
-    problem_update_params(prob, theta);
+    /* point for evaluating and utilities for test */
+    double x_vals[2] = {1.0, 2.0};
+    int Ap[3] = {0, 2, 4};
+    int Ai[4] = {0, 1, 0, 1};
 
-    double u[2] = {1.0, 2.0};
-    problem_constraint_forward(prob, u);
+    /* test 1: initial jacobian */
+    problem_constraint_forward(prob, x_vals);
+    double constrs[2] = {2.0, 2.0};
     problem_jacobian(prob);
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 3));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 4));
 
-    double expected_cv[2] = {5.0, 11.0};
-    mu_assert("constraint values wrong (A1)",
-              cmp_double_array(prob->constraint_values, expected_cv, 2));
-
-    CSR_Matrix *jac = prob->jacobian;
-    mu_assert("jac rows wrong", jac->m == 2);
-    mu_assert("jac cols wrong", jac->n == 2);
-
-    int expected_p[3] = {0, 2, 4};
-    mu_assert("jac->p wrong (A1)", cmp_int_array(jac->p, expected_p, 3));
-
-    int expected_i[4] = {0, 1, 0, 1};
-    mu_assert("jac->i wrong (A1)", cmp_int_array(jac->i, expected_i, 4));
-
-    double expected_x[4] = {1.0, 2.0, 3.0, 4.0};
-    mu_assert("jac->x wrong (A1)", cmp_double_array(jac->x, expected_x, 4));
-
-    /* Update A = [[5,6],[7,8]], column-major: [5,7,6,8] */
-    double theta2[4] = {5.0, 7.0, 6.0, 8.0};
-    problem_update_params(prob, theta2);
-
-    problem_constraint_forward(prob, u);
+    /* test 2: A = [[1,2],[3,4]] (column-major [1,3,2,4]) */
+    double theta[4] = {1.0, 3.0, 2.0, 4.0};
+    problem_update_params(prob, theta);
+    problem_constraint_forward(prob, x_vals);
     problem_jacobian(prob);
-
-    double expected_cv2[2] = {17.0, 23.0};
-    mu_assert("constraint values wrong (A2)",
-              cmp_double_array(prob->constraint_values, expected_cv2, 2));
-
-    double expected_x2[4] = {5.0, 6.0, 7.0, 8.0};
-    mu_assert("jac->x wrong (A2)", cmp_double_array(jac->x, expected_x2, 4));
+    constrs[0] = 5;
+    constrs[1] = 11;
+    Ax[0] = 1.0;
+    Ax[1] = 2.0;
+    Ax[2] = 3.0;
+    Ax[3] = 4.0;
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 3));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 4));
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
 
     free_problem(prob);
 
     return 0;
 }
 
-/*
- * Test 4: right_param_matmul in constraint
- *
- * Problem: minimize sum(x), subject to x @ A, x size 1x2, A is 2x2
- *   A is a 2x2 matrix parameter (param_id=0, size=4, column-major order)
- *   A = [[1,2],[3,4]] → column-major theta = [1,3,2,4]
- *
- * At x=[1,2]:
- *   constraint_values = [1*1+2*3, 1*2+2*4] = [7, 10]
- *   jacobian = [[1,3],[2,4]] = A^T
- *
- * After update A = [[5,6],[7,8]] → column-major theta = [5,7,6,8]:
- *   constraint_values = [1*5+2*7, 1*6+2*8] = [19, 22]
- *   jacobian = [[5,7],[6,8]] = A^T
- */
 const char *test_param_right_matmul_problem(void)
 {
-    int n_vars = 2;
-
-    /* Objective: sum(x) */
-    expr *x_obj = new_variable(1, 2, 0, n_vars);
-    expr *objective = new_sum(x_obj, -1);
-
-    /* Constraint: x @ A */
-    expr *x_con = new_variable(1, 2, 0, n_vars);
-    expr *A_param = new_parameter(2, 2, 0, n_vars, NULL);
-
-    /* Dense 2x2 CSR with placeholder zeros */
-    CSR_Matrix *A = new_csr_matrix(2, 2, 4);
-    int Ap[3] = {0, 2, 4};
-    int Ai[4] = {0, 1, 0, 1};
-    double Ax[4] = {0.0, 0.0, 0.0, 0.0};
-    memcpy(A->p, Ap, 3 * sizeof(int));
-    memcpy(A->i, Ai, 4 * sizeof(int));
-    memcpy(A->x, Ax, 4 * sizeof(double));
+    int n = 2;
 
-    expr *constraint = new_right_matmul(A_param, x_con, A);
-    free_csr_matrix(A);
+    /* minimize sum(x) subject to xA = ?, with A parameter */
+    expr *x = new_variable(1, 2, 0, n);
+    expr *objective = new_sum(x, -1);
+    expr *A_param = new_parameter(2, 2, 0, n, NULL);
 
+    /* dense 2x2 matrix */
+    double Ax[4] = {2.0, 0.0, 0.0, 1.0};
+    expr *constraint = new_right_matmul_dense(A_param, x, 2, 2, Ax);
     expr *constraints[1] = {constraint};
-
-    /* Create problem */
     problem *prob = new_problem(objective, constraints, 1, false);
 
+    /* register parameters and fill sparsity patterns */
     expr *param_nodes[1] = {A_param};
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set A = [[1,2],[3,4]], column-major: [1,3,2,4] */
-    double theta[4] = {1.0, 3.0, 2.0, 4.0};
-    problem_update_params(prob, theta);
+    /* point for evaluating and utilities for test */
+    double x_vals[2] = {1.0, 2.0};
+    int Ap[3] = {0, 2, 4};
+    int Ai[4] = {0, 1, 0, 1};
 
-    double u[2] = {1.0, 2.0};
-    problem_constraint_forward(prob, u);
+    /* test 1: initial jacobian */
+    problem_constraint_forward(prob, x_vals);
+    double constrs[2] = {2.0, 2.0};
     problem_jacobian(prob);
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 3));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 4));
 
-    double expected_cv[2] = {7.0, 10.0};
-    mu_assert("constraint values wrong (A1)",
-              cmp_double_array(prob->constraint_values, expected_cv, 2));
-
-    CSR_Matrix *jac = prob->jacobian;
-    mu_assert("jac rows wrong", jac->m == 2);
-    mu_assert("jac cols wrong", jac->n == 2);
-
-    int expected_p[3] = {0, 2, 4};
-    mu_assert("jac->p wrong (A1)", cmp_int_array(jac->p, expected_p, 3));
-
-    int expected_i[4] = {0, 1, 0, 1};
-    mu_assert("jac->i wrong (A1)", cmp_int_array(jac->i, expected_i, 4));
-
-    double expected_x[4] = {1.0, 3.0, 2.0, 4.0};
-    mu_assert("jac->x wrong (A1)", cmp_double_array(jac->x, expected_x, 4));
-
-    /* Update A = [[5,6],[7,8]], column-major: [5,7,6,8] */
-    double theta2[4] = {5.0, 7.0, 6.0, 8.0};
-    problem_update_params(prob, theta2);
-
-    problem_constraint_forward(prob, u);
+    /* test 2: A = [[1,2],[3,4]] (column-major [1,3,2,4]) */
+    double theta[4] = {1.0, 3.0, 2.0, 4.0};
+    problem_update_params(prob, theta);
+    problem_constraint_forward(prob, x_vals);
     problem_jacobian(prob);
-
-    double expected_cv2[2] = {19.0, 22.0};
-    mu_assert("constraint values wrong (A2)",
-              cmp_double_array(prob->constraint_values, expected_cv2, 2));
-
-    double expected_x2[4] = {5.0, 7.0, 6.0, 8.0};
-    mu_assert("jac->x wrong (A2)", cmp_double_array(jac->x, expected_x2, 4));
+    constrs[0] = 7;
+    constrs[1] = 10;
+    Ax[0] = 1.0;
+    Ax[1] = 3.0;
+    Ax[2] = 2.0;
+    Ax[3] = 4.0;
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 3));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 4));
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
 
     free_problem(prob);
 
     return 0;
 }
 
-/*
- * Test 5: PARAM_FIXED params are skipped by problem_update_params
- *
- * Problem: minimize a * sum(log(x)) + b * sum(x), no constraints, x size 2
- *   a is a FIXED scalar parameter (param_id=PARAM_FIXED, value=2.0)
- *   b is an updatable scalar parameter (param_id=0)
- *
- * At x=[1,2], a=2, b=3:
- *   obj = 2*(log(1)+log(2)) + 3*(1+2) = 2*log(2) + 9
- *   gradient = [2/1 + 3, 2/2 + 3] = [5.0, 4.0]
- *
- * After update theta={5.0} (only b changes to 5, a stays 2):
- *   obj = 2*log(2) + 5*3 = 2*log(2) + 15
- *   gradient = [2/1 + 5, 2/2 + 5] = [7.0, 6.0]
- */
 const char *test_param_fixed_skip_in_update(void)
 {
-    int n_vars = 2;
+    int n = 2;
 
-    /* Build tree: a * sum(log(x)) + b * sum(x) */
-    expr *x1 = new_variable(2, 1, 0, n_vars);
-    expr *log_x = new_log(x1);
+    /* minimize a * sum(log(x)) + b * sum(x), a fixed, b updatable */
+    expr *x = new_variable(2, 1, 0, n);
+    expr *log_x = new_log(x);
     double a_val = 2.0;
-    expr *a_param = new_parameter(1, 1, PARAM_FIXED, n_vars, &a_val);
+    expr *a_param = new_parameter(1, 1, PARAM_FIXED, n, &a_val);
     expr *a_log = new_scalar_mult(a_param, log_x);
     expr *sum_a_log = new_sum(a_log, -1);
 
-    expr *x2 = new_variable(2, 1, 0, n_vars);
-    expr *b_param = new_parameter(1, 1, 0, n_vars, NULL);
-    expr *b_x = new_scalar_mult(b_param, x2);
+    expr *b_param = new_parameter(1, 1, 0, n, NULL);
+    expr *b_x = new_scalar_mult(b_param, x);
     expr *sum_b_x = new_sum(b_x, -1);
 
     expr *objective = new_add(sum_a_log, sum_b_x);
-
-    /* Create problem and register BOTH params */
     problem *prob = new_problem(objective, NULL, 0, false);
 
+    /* register parameters and fill sparsity patterns */
     expr *param_nodes[2] = {a_param, b_param};
     problem_register_params(prob, param_nodes, 2);
     problem_init_derivatives(prob);
 
-    /* Set b=3 and evaluate at x=[1,2] */
+    /* point for evaluating */
+    double x_vals[2] = {1.0, 2.0};
+
+    /* test 1: b=3, a stays 2 */
     double theta[1] = {3.0};
     problem_update_params(prob, theta);
-
-    /* Verify a is still 2.0 (not overwritten) */
-    mu_assert("a_param changed after update", fabs(a_param->value[0] - 2.0) < 1e-10);
-
-    double u[2] = {1.0, 2.0};
-    double obj_val = problem_objective_forward(prob, u);
+    mu_assert("vals fail", fabs(a_param->value[0] - 2.0) < 1e-10);
+    double obj_val = problem_objective_forward(prob, x_vals);
     problem_gradient(prob);
-
     double expected_obj = 2.0 * log(2.0) + 9.0;
-    mu_assert("obj wrong (b=3)", fabs(obj_val - expected_obj) < 1e-10);
-
-    double expected_grad[2] = {5.0, 4.0};
-    mu_assert("gradient wrong (b=3)",
-              cmp_double_array(prob->gradient_values, expected_grad, 2));
+    mu_assert("vals fail", fabs(obj_val - expected_obj) < 1e-10);
+    double grad[2] = {5.0, 4.0};
+    mu_assert("vals fail", cmp_double_array(prob->gradient_values, grad, 2));
 
-    /* Update b=5, a should stay 2 */
+    /* test 2: b=5, a stays 2 */
     theta[0] = 5.0;
     problem_update_params(prob, theta);
-
-    mu_assert("a_param changed after second update",
-              fabs(a_param->value[0] - 2.0) < 1e-10);
-
-    obj_val = problem_objective_forward(prob, u);
+    mu_assert("vals fail", fabs(a_param->value[0] - 2.0) < 1e-10);
+    obj_val = problem_objective_forward(prob, x_vals);
     problem_gradient(prob);
-
-    double expected_obj2 = 2.0 * log(2.0) + 15.0;
-    mu_assert("obj wrong (b=5)", fabs(obj_val - expected_obj2) < 1e-10);
-
-    double expected_grad2[2] = {7.0, 6.0};
-    mu_assert("gradient wrong (b=5)",
-              cmp_double_array(prob->gradient_values, expected_grad2, 2));
+    expected_obj = 2.0 * log(2.0) + 15.0;
+    mu_assert("vals fail", fabs(obj_val - expected_obj) < 1e-10);
+    grad[0] = 7.0;
+    grad[1] = 6.0;
+    mu_assert("vals fail", cmp_double_array(prob->gradient_values, grad, 2));
 
     free_problem(prob);
 
     return 0;
 }
 
-/*
- * Test 6: left_param_matmul with rectangular 3x2 matrix
- *
- * Problem: minimize sum(x), subject to A @ x, x size 2, A is 3x2
- *   A = [[1,2],[3,4],[5,6]] → column-major theta = [1,3,5,2,4,6]
- *
- * At x=[1,2]:
- *   constraint_values = [1+4, 3+8, 5+12] = [5, 11, 17]
- *   jacobian = [[1,2],[3,4],[5,6]]
- */
 const char *test_param_left_matmul_rectangular(void)
 {
-    int n_vars = 2;
-
-    /* Objective: sum(x) */
-    expr *x_obj = new_variable(2, 1, 0, n_vars);
-    expr *objective = new_sum(x_obj, -1);
+    int n = 2;
 
-    /* Constraint: A @ x, A is 3x2 */
-    expr *x_con = new_variable(2, 1, 0, n_vars);
-    expr *A_param = new_parameter(3, 2, 0, n_vars, NULL);
-
-    /* Dense 3x2 CSR */
-    CSR_Matrix *A = new_csr_matrix(3, 2, 6);
-    int Ap[4] = {0, 2, 4, 6};
-    int Ai[6] = {0, 1, 0, 1, 0, 1};
-    double Ax[6] = {0, 0, 0, 0, 0, 0};
-    memcpy(A->p, Ap, 4 * sizeof(int));
-    memcpy(A->i, Ai, 6 * sizeof(int));
-    memcpy(A->x, Ax, 6 * sizeof(double));
-
-    expr *constraint = new_left_matmul(A_param, x_con, A);
-    free_csr_matrix(A);
+    /* minimize sum(x) subject to Ax = ?, with A parameter (3x2) */
+    expr *x = new_variable(2, 1, 0, n);
+    expr *objective = new_sum(x, -1);
+    expr *A_param = new_parameter(3, 2, 0, n, NULL);
 
+    /* dense 3x2 matrix */
+    double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+    expr *constraint = new_left_matmul_dense(A_param, x, 3, 2, Ax);
     expr *constraints[1] = {constraint};
-
     problem *prob = new_problem(objective, constraints, 1, false);
 
+    /* register parameters and fill sparsity patterns */
     expr *param_nodes[1] = {A_param};
     problem_register_params(prob, param_nodes, 1);
     problem_init_derivatives(prob);
 
-    /* Set A = [[1,2],[3,4],[5,6]], column-major: [1,3,5,2,4,6] */
-    double theta[6] = {1.0, 3.0, 5.0, 2.0, 4.0, 6.0};
-    problem_update_params(prob, theta);
+    /* point for evaluating and utilities for test */
+    double x_vals[2] = {1.0, 2.0};
+    int Ap[4] = {0, 2, 4, 6};
+    int Ai[6] = {0, 1, 0, 1, 0, 1};
 
-    double u[2] = {1.0, 2.0};
-    problem_constraint_forward(prob, u);
+    /* test 1: initial jacobian */
+    problem_constraint_forward(prob, x_vals);
+    double constrs[3] = {5.0, 11.0, 17.0};
     problem_jacobian(prob);
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 6));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 4));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 6));
 
-    double expected_cv[3] = {5.0, 11.0, 17.0};
-    mu_assert("rect constraint values wrong",
-              cmp_double_array(prob->constraint_values, expected_cv, 3));
-
-    CSR_Matrix *jac = prob->jacobian;
-    double expected_x[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    mu_assert("rect jac->x wrong", cmp_double_array(jac->x, expected_x, 6));
-
-    free_problem(prob);
-
-    return 0;
-}
-
-/*
- * Test 7: left_param_matmul with sparse matrix (structural zeros)
- *
- * Problem: minimize sum(x), subject to A @ x, x size 3, A is 3x3 sparse
- *   A = [[1, 0, 2],
- *        [0, 3, 0],
- *        [4, 0, 5]]
- *   CSR: p=[0,2,3,5], i=[0,2, 1, 0,2], nnz=5
- *   Column-major theta (full 9 values): [1,0,4, 0,3,0, 2,0,5]
- *
- * At x=[1,2,3]:
- *   constraint_values = [1+6, 6, 4+15] = [7, 6, 19]
- *   jacobian = A
- */
-const char *test_param_left_matmul_sparse(void)
-{
-    int n_vars = 3;
-
-    /* Objective: sum(x) */
-    expr *x_obj = new_variable(3, 1, 0, n_vars);
-    expr *objective = new_sum(x_obj, -1);
-
-    /* Constraint: A @ x, A is 3x3 sparse */
-    expr *x_con = new_variable(3, 1, 0, n_vars);
-    expr *A_param = new_parameter(3, 3, 0, n_vars, NULL);
-
-    CSR_Matrix *A = new_csr_matrix(3, 3, 5);
-    int Ap[4] = {0, 2, 3, 5};
-    int Ai[5] = {0, 2, 1, 0, 2};
-    double Ax[5] = {0, 0, 0, 0, 0};
-    memcpy(A->p, Ap, 4 * sizeof(int));
-    memcpy(A->i, Ai, 5 * sizeof(int));
-    memcpy(A->x, Ax, 5 * sizeof(double));
-
-    expr *constraint = new_left_matmul(A_param, x_con, A);
-    free_csr_matrix(A);
-
-    expr *constraints[1] = {constraint};
-
-    problem *prob = new_problem(objective, constraints, 1, false);
-
-    expr *param_nodes[1] = {A_param};
-    problem_register_params(prob, param_nodes, 1);
-    problem_init_derivatives(prob);
-
-    /* A = [[1,0,2],[0,3,0],[4,0,5]], column-major: [1,0,4, 0,3,0, 2,0,5] */
-    double theta[9] = {1.0, 0.0, 4.0, 0.0, 3.0, 0.0, 2.0, 0.0, 5.0};
+    /* test 2: A = [[7,8],[9,10],[11,12]] (column-major [7,9,11,8,10,12]) */
+    double theta[6] = {7.0, 9.0, 11.0, 8.0, 10.0, 12.0};
     problem_update_params(prob, theta);
-
-    double u[3] = {1.0, 2.0, 3.0};
-    problem_constraint_forward(prob, u);
+    problem_constraint_forward(prob, x_vals);
     problem_jacobian(prob);
-
-    double expected_cv[3] = {7.0, 6.0, 19.0};
-    mu_assert("sparse constraint values wrong",
-              cmp_double_array(prob->constraint_values, expected_cv, 3));
-
-    CSR_Matrix *jac = prob->jacobian;
-    mu_assert("sparse jac nnz wrong", jac->nnz == 5);
-
-    /* CSR data order: row0=[1,2], row1=[3], row2=[4,5] */
-    double expected_x[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
-    mu_assert("sparse jac->x wrong", cmp_double_array(jac->x, expected_x, 5));
+    constrs[0] = 23.0;
+    constrs[1] = 29.0;
+    constrs[2] = 35.0;
+    Ax[0] = 7.0;
+    Ax[1] = 8.0;
+    Ax[2] = 9.0;
+    Ax[3] = 10.0;
+    Ax[4] = 11.0;
+    Ax[5] = 12.0;
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 6));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 4));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 6));
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
 
     free_problem(prob);
 

From ff777fa72d44defd325d3d2ec4e4fc36a3ab38e7 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 7 Apr 2026 11:02:32 -0700
Subject: [PATCH 14/14] one more test

---
 tests/all_tests.c               |  1 +
 tests/problem/test_param_prob.h | 60 +++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/tests/all_tests.c b/tests/all_tests.c
index 145712f..c0bb1b9 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -355,6 +355,7 @@ int main(void)
     mu_run_test(test_param_left_matmul_problem, tests_run);
     mu_run_test(test_param_right_matmul_problem, tests_run);
     mu_run_test(test_param_left_matmul_rectangular, tests_run);
+    mu_run_test(test_param_right_matmul_rectangular, tests_run);
     mu_run_test(test_param_fixed_skip_in_update, tests_run);
 #endif /* PROFILE_ONLY */
 
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 8d008a6..76b24e3 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -340,4 +340,64 @@ const char *test_param_left_matmul_rectangular(void)
     return 0;
 }
 
+const char *test_param_right_matmul_rectangular(void)
+{
+    int n = 2;
+
+    /* minimize sum(x) subject to xA = ?, with A parameter (2x3) */
+    expr *x = new_variable(1, 2, 0, n);
+    expr *objective = new_sum(x, -1);
+    expr *A_param = new_parameter(2, 3, 0, n, NULL);
+
+    /* dense 2x3 matrix */
+    double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+    expr *constraint = new_right_matmul_dense(A_param, x, 2, 3, Ax);
+    expr *constraints[1] = {constraint};
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    /* register parameters and fill sparsity patterns */
+    expr *param_nodes[1] = {A_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* point for evaluating and utilities for test */
+    double x_vals[2] = {1.0, 2.0};
+    int Ap[4] = {0, 2, 4, 6};
+    int Ai[6] = {0, 1, 0, 1, 0, 1};
+
+    /* test 1: initial jacobian */
+    problem_constraint_forward(prob, x_vals);
+    double constrs[3] = {9.0, 12.0, 15.0};
+    problem_jacobian(prob);
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 4));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 6));
+    double jac_x[6] = {1.0, 4.0, 2.0, 5.0, 3.0, 6.0};
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, jac_x, 6));
+
+    /* test 2: A = [[7,8,9],[10,11,12]] (column-major [7,10,8,11,9,12]) */
+    double theta[6] = {7.0, 10.0, 8.0, 11.0, 9.0, 12.0};
+    problem_update_params(prob, theta);
+    problem_constraint_forward(prob, x_vals);
+    problem_jacobian(prob);
+    constrs[0] = 27.0;
+    constrs[1] = 30.0;
+    constrs[2] = 33.0;
+    jac_x[0] = 7.0;
+    jac_x[1] = 10.0;
+    jac_x[2] = 8.0;
+    jac_x[3] = 11.0;
+    jac_x[4] = 9.0;
+    jac_x[5] = 12.0;
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, jac_x, 6));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 4));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 6));
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
+
+    free_problem(prob);
+
+    return 0;
+}
+
 #endif /* TEST_PARAM_PROB_H */