dense matrix class first draft

dance858 · dance858 · commit 5280ff872978 · 2026-03-07T15:27:34.000-08:00
diff --git a/include/utils/cblas_wrapper.h b/include/utils/cblas_wrapper.h
@@ -0,0 +1,11 @@
+#ifndef CBLAS_WRAPPER_H
+#define CBLAS_WRAPPER_H
+
+#ifdef __APPLE__
+#define ACCELERATE_NEW_LAPACK
+#include <Accelerate/Accelerate.h>
+#else
+#include <cblas.h>
+#endif
+
+#endif /* CBLAS_WRAPPER_H */
diff --git a/include/utils/matrix.h b/include/utils/matrix.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2026 Daniel Cederberg and William Zhang
+ *
+ * This file is part of the DNLP-differentiation-engine project.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include "CSC_Matrix.h"
+#include "CSR_Matrix.h"
+
+/* Base matrix type with function pointers for polymorphic dispatch */
+typedef struct Matrix
+{
+    int m, n;
+    void (*block_left_mult_vec)(const struct Matrix *self, const double *x,
+                                double *y, int p);
+    CSC_Matrix *(*block_left_mult_sparsity)(const struct Matrix *self,
+                                            const CSC_Matrix *J, int p);
+    void (*block_left_mult_values)(const struct Matrix *self, const CSC_Matrix *J,
+                                   CSC_Matrix *C);
+    void (*free_fn)(struct Matrix *self);
+} Matrix;
+
+/* Sparse matrix wrapping CSR */
+typedef struct Sparse_Matrix
+{
+    Matrix base;
+    CSR_Matrix *csr;
+} Sparse_Matrix;
+
+/* Dense matrix (row-major) */
+typedef struct Dense_Matrix
+{
+    Matrix base;
+    double *x;
+    double *work; /* scratch buffer, length n */
+} Dense_Matrix;
+
+/* Constructors */
+Matrix *new_sparse_matrix(const CSR_Matrix *A);
+Matrix *new_dense_matrix(int m, int n, const double *data);
+
+/* Transpose helpers */
+Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork);
+Matrix *dense_matrix_trans(const Dense_Matrix *self);
+
+/* Free helper */
+static inline void free_matrix(Matrix *m)
+{
+    if (m)
+    {
+        m->free_fn(m);
+    }
+}
+
+#endif /* MATRIX_H */
diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2026 Daniel Cederberg and William Zhang
+ *
+ * This file is part of the DNLP-differentiation-engine project.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "utils/cblas_wrapper.h"
+#include "utils/iVec.h"
+#include "utils/matrix.h"
+#include <stdlib.h>
+#include <string.h>
+
+static void dense_block_left_mult_vec(const Matrix *A, const double *x, double *y,
+                                      int p)
+{
+    const Dense_Matrix *dm = (const Dense_Matrix *) A;
+    int m = dm->base.m;
+    int n = dm->base.n;
+
+    /* y = kron(I_p, A) @ x via a single dgemm call:
+       Treat x as n x p (column-major blocks) and y as m x p.
+       But x and y are stored as p blocks of length n and m respectively
+       (i.e. block-interleaved). This is the same as treating them as
+       row-major matrices of shape p x n and p x m, so:
+       y (p x m) = x (p x n) * A^T (n x m), all row-major.
+       cblas with RowMajor: C = alpha * A * B + beta * C
+       where A = x (p x n), B = A^T (n x m), C = y (p x m). */
+    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, p, m, n, 1.0, x, n, dm->x,
+                n, 0.0, y, m);
+}
+
+static CSC_Matrix *dense_block_left_mult_sparsity(const Matrix *A,
+                                                  const CSC_Matrix *J, int p)
+{
+    int m = A->m;
+    int n = A->n;
+    int i, j, jj, block, block_start, block_end, block_jj_start, row_offset;
+
+    int *Cp = (int *) malloc((J->n + 1) * sizeof(int));
+    iVec *Ci = iVec_new(J->n * m);
+    Cp[0] = 0;
+
+    /* for each column of J */
+    for (j = 0; j < J->n; j++)
+    {
+        /* if empty we continue */
+        if (J->p[j] == J->p[j + 1])
+        {
+            Cp[j + 1] = Cp[j];
+            continue;
+        }
+
+        /* process each of p blocks of rows in this column of J */
+        jj = J->p[j];
+        for (block = 0; block < p; block++)
+        {
+            // -----------------------------------------------------------------
+            //  find start and end indices of rows of J in this block
+            // -----------------------------------------------------------------
+            block_start = block * n;
+            block_end = block_start + n;
+            while (jj < J->p[j + 1] && J->i[jj] < block_start)
+            {
+                jj++;
+            }
+
+            block_jj_start = jj;
+            while (jj < J->p[j + 1] && J->i[jj] < block_end)
+            {
+                jj++;
+            }
+
+            /* if no entries in this block, continue */
+            if (jj == block_jj_start)
+            {
+                continue;
+            }
+
+            /* dense A: all m rows contribute */
+            row_offset = block * m;
+            for (i = 0; i < m; i++)
+            {
+                iVec_append(Ci, row_offset + i);
+            }
+        }
+        Cp[j + 1] = Ci->len;
+    }
+
+    CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len);
+    memcpy(C->p, Cp, (J->n + 1) * sizeof(int));
+    memcpy(C->i, Ci->data, Ci->len * sizeof(int));
+    free(Cp);
+    iVec_free(Ci);
+
+    return C;
+}
+
+static void dense_block_left_mult_values(const Matrix *A, const CSC_Matrix *J,
+                                         CSC_Matrix *C)
+{
+    const Dense_Matrix *dm = (const Dense_Matrix *) A;
+    int m = dm->base.m;
+    int n = dm->base.n;
+    int k = J->n;
+
+    int i, j, s, block, block_start, block_end, start, end;
+
+    double *j_dense = dm->work;
+
+    /* for each column of J (and C) */
+    for (j = 0; j < k; j++)
+    {
+        for (i = C->p[j]; i < C->p[j + 1]; i += m)
+        {
+            block = C->i[i] / m;
+            block_start = block * n;
+            block_end = block_start + n;
+
+            start = J->p[j];
+            end = J->p[j + 1];
+
+            while (start < J->p[j + 1] && J->i[start] < block_start)
+            {
+                start++;
+            }
+
+            while (end > start && J->i[end - 1] >= block_end)
+            {
+                end--;
+            }
+
+            /* scatter sparse J column into dense vector and then compute
+               A @ j_dense */
+            memset(j_dense, 0, n * sizeof(double));
+            for (s = start; s < end; s++)
+            {
+                j_dense[J->i[s] - block_start] = J->x[s];
+            }
+
+            cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, dm->x, n, j_dense, 1,
+                        0.0, C->x + i, 1);
+        }
+    }
+}
+
+static void dense_free(Matrix *A)
+{
+    Dense_Matrix *dm = (Dense_Matrix *) A;
+    free(dm->x);
+    free(dm->work);
+    free(dm);
+}
+
+Matrix *new_dense_matrix(int m, int n, const double *data)
+{
+    Dense_Matrix *dm = (Dense_Matrix *) calloc(1, sizeof(Dense_Matrix));
+    dm->base.m = m;
+    dm->base.n = n;
+    dm->base.block_left_mult_vec = dense_block_left_mult_vec;
+    dm->base.block_left_mult_sparsity = dense_block_left_mult_sparsity;
+    dm->base.block_left_mult_values = dense_block_left_mult_values;
+    dm->base.free_fn = dense_free;
+    dm->x = (double *) malloc(m * n * sizeof(double));
+    memcpy(dm->x, data, m * n * sizeof(double));
+    dm->work = (double *) malloc(n * sizeof(double));
+    return &dm->base;
+}
+
+Matrix *dense_matrix_trans(const Dense_Matrix *A)
+{
+    int m = A->base.m;
+    int n = A->base.n;
+    double *AT_x = (double *) malloc(m * n * sizeof(double));
+
+    for (int i = 0; i < m; i++)
+    {
+        for (int j = 0; j < n; j++)
+        {
+            AT_x[j * m + i] = A->x[i * n + j];
+        }
+    }
+
+    Matrix *result = new_dense_matrix(n, m, AT_x);
+    free(AT_x);
+    return result;
+}
diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2026 Daniel Cederberg and William Zhang
+ *
+ * This file is part of the DNLP-differentiation-engine project.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "utils/linalg_sparse_matmuls.h"
+#include "utils/matrix.h"
+#include <stdlib.h>
+
+static void sparse_block_left_mult_vec(const Matrix *self, const double *x,
+                                       double *y, int p)
+{
+    const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
+    block_left_multiply_vec(sm->csr, x, y, p);
+}
+
+static CSC_Matrix *sparse_block_left_mult_sparsity(const Matrix *self,
+                                                   const CSC_Matrix *J, int p)
+{
+    const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
+    return block_left_multiply_fill_sparsity(sm->csr, J, p);
+}
+
+static void sparse_block_left_mult_values(const Matrix *self, const CSC_Matrix *J,
+                                          CSC_Matrix *C)
+{
+    const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
+    block_left_multiply_fill_values(sm->csr, J, C);
+}
+
+static void sparse_free(Matrix *self)
+{
+    Sparse_Matrix *sm = (Sparse_Matrix *) self;
+    free_csr_matrix(sm->csr);
+    free(sm);
+}
+
+Matrix *new_sparse_matrix(const CSR_Matrix *A)
+{
+    Sparse_Matrix *sm = (Sparse_Matrix *) calloc(1, sizeof(Sparse_Matrix));
+    sm->base.m = A->m;
+    sm->base.n = A->n;
+    sm->base.block_left_mult_vec = sparse_block_left_mult_vec;
+    sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
+    sm->base.block_left_mult_values = sparse_block_left_mult_values;
+    sm->base.free_fn = sparse_free;
+    sm->csr = new_csr(A);
+    return &sm->base;
+}
+
+Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork)
+{
+    CSR_Matrix *AT = transpose(self->csr, iwork);
+    Sparse_Matrix *sm = (Sparse_Matrix *) calloc(1, sizeof(Sparse_Matrix));
+    sm->base.m = AT->m;
+    sm->base.n = AT->n;
+    sm->base.block_left_mult_vec = sparse_block_left_mult_vec;
+    sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
+    sm->base.block_left_mult_values = sparse_block_left_mult_values;
+    sm->base.free_fn = sparse_free;
+    sm->csr = AT;
+    return &sm->base;
+}
diff --git a/tests/utils/test_cblas.h b/tests/utils/test_cblas.h
@@ -0,0 +1,18 @@
+#ifndef TEST_CBLAS_H
+#define TEST_CBLAS_H
+
+#include "minunit.h"
+#include "utils/cblas_wrapper.h"
+#include <math.h>
+
+static char *test_cblas_ddot(void)
+{
+    double x[] = {1.0, 2.0, 3.0, 4.0};
+    double y[] = {5.0, 6.0, 7.0, 8.0};
+    double result = cblas_ddot(4, x, 1, y, 1);
+    double expected = 1.0 * 5.0 + 2.0 * 6.0 + 3.0 * 7.0 + 4.0 * 8.0;
+    mu_assert("test_cblas_ddot: wrong dot product", fabs(result - expected) < 1e-12);
+    return 0;
+}
+
+#endif /* TEST_CBLAS_H */
diff --git a/tests/utils/test_matrix.h b/tests/utils/test_matrix.h