first draft of hessian chain rule

dance858 · dance858 · commit a7d8dfad7320 · 2026-03-29T10:09:18.000-07:00
diff --git a/include/subexpr.h b/include/subexpr.h
@@ -48,6 +48,7 @@ typedef struct quad_form_expr
 {
     expr base;
     CSR_Matrix *Q;
+    CSC_Matrix *QJf; /* Q * J_f in CSC (for chain rule hessian) */
 } quad_form_expr;
 
 /* Sum reduction along an axis */
diff --git a/include/utils/CSC_Matrix.h b/include/utils/CSC_Matrix.h
@@ -37,10 +37,12 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A);
 /* Allocate sparsity pattern for C = B^T D A for diagonal D */
 CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B);
 
-/* Compute values for C = A^T D A. C must have precomputed sparsity pattern  */
+/* Compute values for C = A^T D A. C must have precomputed sparsity pattern.
+ * If d is NULL, D is treated as the identity (computes A^T A). */
 void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C);
 
-/* Compute values for C = B^T D A. C must have precomputed sparsity pattern  */
+/* Compute values for C = B^T D A. C must have precomputed sparsity pattern.
+ * If d is NULL, D is treated as the identity (computes B^T A). */
 void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
                       CSR_Matrix *C);
 
@@ -49,6 +51,13 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
  */
 void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C);
 
+/* Allocate B = Q * A (sparsity only). Q is CSR, A is CSC, B is CSC. */
+CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix *A);
+
+/* Fill values of B = Q * A. B must have sparsity from above. */
+void csr_csc_multiply_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A,
+                                  CSC_Matrix *B);
+
 /* Count nonzero columns of a CSC matrix */
 int count_nonzero_cols_csc(const CSC_Matrix *A);
 
diff --git a/src/other/quad_form.c b/src/other/quad_form.c
@@ -1,6 +1,8 @@
 #include "other.h"
 #include "subexpr.h"
 #include "utils/CSC_Matrix.h"
+#include "utils/CSR_sum.h"
+#include "utils/cblas_wrapper.h"
 #include <assert.h>
 #include <math.h>
 #include <stdio.h>
@@ -115,33 +117,106 @@ static void wsum_hess_init_impl(expr *node)
 {
     CSR_Matrix *Q = ((quad_form_expr *) node)->Q;
     expr *x = node->left;
-    CSR_Matrix *H = new_csr_matrix(node->n_vars, node->n_vars, Q->nnz);
 
-    /* set global row pointers */
-    memcpy(H->p + x->var_id, Q->p, (x->size + 1) * sizeof(int));
-    for (int i = x->var_id + x->size + 1; i <= node->n_vars; i++)
+    if (x->var_id != NOT_A_VARIABLE)
     {
+        CSR_Matrix *H = new_csr_matrix(node->n_vars, node->n_vars, Q->nnz);
 
-        H->p[i] = Q->nnz;
-    }
+        /* set global row pointers */
+        memcpy(H->p + x->var_id, Q->p, (x->size + 1) * sizeof(int));
+        for (int i = x->var_id + x->size + 1; i <= node->n_vars; i++)
+        {
+            H->p[i] = Q->nnz;
+        }
 
-    /* set global column indices */
-    for (int i = 0; i < Q->nnz; i++)
-    {
-        H->i[i] = Q->i[i] + x->var_id;
+        /* set global column indices */
+        for (int i = 0; i < Q->nnz; i++)
+        {
+            H->i[i] = Q->i[i] + x->var_id;
+        }
+
+        node->wsum_hess = H;
     }
+    else
+    {
+        /* The hessian of h(x) = f(x)^T Q f(x) is term1 + term2 where
 
-    node->wsum_hess = H;
+            * term1 = J_f^T Q J_f
+            * term2 = sum_i (Qf(x))_i nabla^2 f_i.
+
+            To compute term1, we first compute B = Q J_f and then compute term1
+            = J_f^T B.
+        */
+
+        /* jacobian_csc_init(x) already called in jacobian_init */
+        quad_form_expr *qnode = (quad_form_expr *) node;
+        CSC_Matrix *Jf = x->work->jacobian_csc;
+
+        /* term1 = Jf^T W Jf = Jf^T B*/
+        CSC_Matrix *B = csr_csc_multiply_fill_sparsity(Q, Jf);
+        qnode->QJf = B;
+        node->work->hess_term1 = BTA_alloc(Jf, B);
+
+        /* term2 = sum_i (Qf(x))_i nabla^2 f_i */
+        wsum_hess_init(x);
+        node->work->hess_term2 = new_csr_copy_sparsity(x->wsum_hess);
+
+        /* hess = term1 + term2 */
+        int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz;
+        node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, max_nnz);
+        sum_csr_matrices_fill_sparsity(node->work->hess_term1,
+                                       node->work->hess_term2, node->wsum_hess);
+    }
 }
 
 static void eval_wsum_hess(expr *node, const double *w)
 {
     CSR_Matrix *Q = ((quad_form_expr *) node)->Q;
-    double *H = node->wsum_hess->x;
+    expr *x = node->left;
     double two_w = 2.0 * w[0];
-    for (int i = 0; i < Q->nnz; i++)
+
+    if (x->var_id != NOT_A_VARIABLE)
     {
-        H[i] = two_w * Q->x[i];
+        /* TODO: do we want to compute this hessian only once (up to a scaling)?
+         * Maybe unnecessary optimization. */
+        double *H = node->wsum_hess->x;
+        for (int i = 0; i < Q->nnz; i++)
+        {
+            H[i] = two_w * Q->x[i];
+        }
+    }
+    else
+    {
+        /* fill the CSC representation of the Jacobian of the child */
+        CSC_Matrix *Jf = x->work->jacobian_csc;
+        if (!x->work->jacobian_csc_filled)
+        {
+            csr_to_csc_fill_values(x->jacobian, Jf, x->work->csc_work);
+
+            if (x->is_affine(x))
+            {
+                x->work->jacobian_csc_filled = true;
+            }
+        }
+
+        CSC_Matrix *QJf = ((quad_form_expr *) node)->QJf;
+        CSR_Matrix *term1 = node->work->hess_term1;
+        CSR_Matrix *term2 = node->work->hess_term2;
+
+        /* term1 = J_f^T Q J_f = J_f^T B  */
+        csr_csc_multiply_fill_values(Q, Jf, QJf);
+        BTDA_fill_values(Jf, QJf, NULL, term1);
+
+        /* term2 */
+        x->eval_wsum_hess(x, node->work->dwork);
+        memcpy(term2->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double));
+
+        /* scale both terms by 2w */
+        cblas_dscal(term1->nnz, two_w, term1->x, 1);
+        cblas_dscal(term2->nnz, two_w, term2->x, 1);
+
+        /* sum the two terms */
+        sum_csr_matrices_fill_values(term1, term2, node->wsum_hess);
     }
 }
 
@@ -150,12 +225,17 @@ static void free_type_data(expr *node)
     quad_form_expr *qnode = (quad_form_expr *) node;
     free_csr_matrix(qnode->Q);
     qnode->Q = NULL;
+    if (qnode->QJf != NULL)
+    {
+        free_csc_matrix(qnode->QJf);
+        qnode->QJf = NULL;
+    }
 }
 
 static bool is_affine(const expr *node)
 {
     (void) node;
-    /* TODO: it is affine if both children are constant */
+    /* TODO: it is affine (constant) if both children are constant */
     return false;
 }
 
diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c
@@ -111,6 +111,34 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A)
     return C;
 }
 
+static inline double sparse_dot(const double *a_x, const int *a_i, int a_nnz,
+                                const double *b_x, const int *b_i, int b_nnz)
+{
+    int ii = 0;
+    int jj = 0;
+    double sum = 0.0;
+
+    while (ii < a_nnz && jj < b_nnz)
+    {
+        if (a_i[ii] == b_i[jj])
+        {
+            sum += a_x[ii] * b_x[jj];
+            ii++;
+            jj++;
+        }
+        else if (a_i[ii] < b_i[jj])
+        {
+            ii++;
+        }
+        else
+        {
+            jj++;
+        }
+    }
+
+    return sum;
+}
+
 static inline double sparse_wdot(const double *a_x, const int *a_i, int a_nnz,
                                  const double *b_x, const int *b_i, int b_nnz,
                                  const double *d)
@@ -158,9 +186,17 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
                 int nnz_ai = A->p[ii + 1] - A->p[ii];
                 int nnz_aj = A->p[j + 1] - A->p[j];
 
-                /* compute Cij = weighted inner product of column i and column j */
-                double sum = sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
-                                         A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
+                double sum;
+                if (d != NULL)
+                {
+                    sum = sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
+                                      A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
+                }
+                else
+                {
+                    sum = sparse_dot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
+                                     A->x + A->p[j], A->i + A->p[j], nnz_aj);
+                }
 
                 C->x[jj] = sum;
             }
@@ -443,15 +479,115 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
             int nnz_bi = B->p[i + 1] - B->p[i];
             int nnz_aj = A->p[j + 1] - A->p[j];
 
-            /* compute Cij = weighted inner product of col i of B and col j of A */
-            double sum = sparse_wdot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
-                                     A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
+            double sum;
+            if (d != NULL)
+            {
+                sum = sparse_wdot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
+                                  A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
+            }
+            else
+            {
+                sum = sparse_dot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
+                                 A->x + A->p[j], A->i + A->p[j], nnz_aj);
+            }
 
             C->x[jj] = sum;
         }
     }
 }
 
+CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix *A)
+{
+    /* Allocate B = Q * A (sparsity only).
+     * Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n). */
+
+    int m = Q->m;
+    int n = A->n;
+
+    int *marker = (int *) malloc(m * sizeof(int));
+    memset(marker, -1, m * sizeof(int));
+
+    int *Bp = (int *) malloc((n + 1) * sizeof(int));
+    iVec *Bi = iVec_new(A->nnz);
+    Bp[0] = 0;
+
+    for (int j = 0; j < n; j++)
+    {
+        int col_nnz = 0;
+
+        for (int t = A->p[j]; t < A->p[j + 1]; t++)
+        {
+            int k = A->i[t];
+
+            for (int s = Q->p[k]; s < Q->p[k + 1]; s++)
+            {
+                int row = Q->i[s];
+                if (marker[row] != j)
+                {
+                    marker[row] = j;
+                    iVec_append(Bi, row);
+                    col_nnz++;
+                }
+            }
+        }
+
+        Bp[j + 1] = Bp[j] + col_nnz;
+    }
+
+    int total_nnz = Bp[n];
+    CSC_Matrix *B = new_csc_matrix(m, n, total_nnz);
+    memcpy(B->p, Bp, (n + 1) * sizeof(int));
+    memcpy(B->i, Bi->data, total_nnz * sizeof(int));
+
+    free(marker);
+    free(Bp);
+    iVec_free(Bi);
+
+    return B;
+}
+
+void csr_csc_multiply_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A,
+                                  CSC_Matrix *B)
+{
+    /* Fill values of B = Q * A. B must have sparsity from
+     * csr_csc_multiply_fill_sparsity. */
+
+    int m = Q->m;
+
+    int *marker = (int *) malloc(m * sizeof(int));
+    memset(marker, -1, m * sizeof(int));
+    memset(B->x, 0, B->nnz * sizeof(double));
+
+    for (int j = 0; j < B->n; j++)
+    {
+        /* map row index -> position in column j of B */
+        for (int t = B->p[j]; t < B->p[j + 1]; t++)
+        {
+            marker[B->i[t]] = t;
+        }
+
+        /* accumulate A_{k,j} * Q[k, :] */
+        for (int t = A->p[j]; t < A->p[j + 1]; t++)
+        {
+            int k = A->i[t];
+            double a_kj = A->x[t];
+
+            for (int s = Q->p[k]; s < Q->p[k + 1]; s++)
+            {
+                B->x[marker[Q->i[s]]] += a_kj * Q->x[s];
+            }
+        }
+
+        /* reset marker */
+        for (int t = B->p[j]; t < B->p[j + 1]; t++)
+        {
+            marker[B->i[t]] = -1;
+        }
+    }
+
+    free(marker);
+}
+
 int count_nonzero_cols_csc(const CSC_Matrix *A)
 {
     int count = 0;
diff --git a/tests/all_tests.c b/tests/all_tests.c
@@ -276,6 +276,9 @@ int main(void)
     mu_run_test(test_wsum_hess_x_x_multiply, tests_run);
     mu_run_test(test_wsum_hess_AX_BX_multiply, tests_run);
     mu_run_test(test_wsum_hess_multiply_deep_composite, tests_run);
+    mu_run_test(test_wsum_hess_quad_form_Ax, tests_run);
+    mu_run_test(test_wsum_hess_quad_form_sin_Ax, tests_run);
+    mu_run_test(test_wsum_hess_quad_form_exp, tests_run);
 
     printf("\n--- Utility Tests ---\n");
     mu_run_test(test_cblas_ddot, tests_run);
diff --git a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@ typedef struct quad_form_expr`
`48`	`48`	`{`
`49`	`49`	`expr base;`
`50`	`50`	`CSR_Matrix *Q;`
	`51`	`+ CSC_Matrix QJf; / Q * J_f in CSC (for chain rule hessian) */`
`51`	`52`	`} quad_form_expr;`
`52`	`53`
`53`	`54`	`/* Sum reduction along an axis */`