Skip to content

Commit 9480159

Browse files
authored
Merge pull request #49 from SparseDifferentiation/matrix-abstraction
[WIP] Matrix abstraction
2 parents 7c84b13 + 71e3d71 commit 9480159

83 files changed

Lines changed: 871 additions & 218 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/cmake.yml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,24 @@ jobs:
1313
matrix:
1414
os: [ubuntu-latest, macos-latest, windows-latest]
1515
build_type: [Release, Debug]
16-
16+
1717
steps:
1818
- uses: actions/checkout@v3
1919

20+
- name: Install BLAS (Ubuntu)
21+
if: runner.os == 'Linux'
22+
run: sudo apt-get update && sudo apt-get install -y libopenblas-dev
23+
24+
- name: Install OpenBLAS (Windows)
25+
if: runner.os == 'Windows'
26+
run: vcpkg install openblas:x64-windows
27+
2028
- name: Configure CMake
21-
run: cmake -B build -S . -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_VERBOSE_MAKEFILE=ON
29+
run: >
30+
cmake -B build -S .
31+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
32+
-DCMAKE_VERBOSE_MAKEFILE=ON
33+
${{ runner.os == 'Windows' && '-DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }}
2234
2335
- name: Build
2436
run: cmake --build build --config ${{ matrix.build_type }}

.github/workflows/sanitizer.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v5
1818

19+
- name: Install BLAS (Ubuntu)
20+
if: runner.os == 'Linux'
21+
run: sudo apt-get update && sudo apt-get install -y libopenblas-dev
22+
1923
# Configure CMake with ASan + UBSan
2024
- name: Configure with sanitizers
2125
run: cmake -B build -S . \

.github/workflows/valgrind.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
- name: Install dependencies
1212
run: |
1313
sudo apt-get update
14-
sudo apt-get install -y valgrind
14+
sudo apt-get install -y valgrind libopenblas-dev
1515
1616
- name: Configure Debug build
1717
run: cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cmake_minimum_required(VERSION 3.15)
22
project(DNLP_Diff_Engine C)
33
set(CMAKE_C_STANDARD 99)
4+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
45

56
set(DIFF_ENGINE_VERSION_MAJOR 0)
67
set(DIFF_ENGINE_VERSION_MINOR 1)
@@ -53,6 +54,18 @@ if(NOT MSVC)
5354
target_link_libraries(dnlp_diff m)
5455
endif()
5556

57+
# Find and link BLAS
58+
if(APPLE)
59+
find_library(ACCELERATE_FRAMEWORK Accelerate)
60+
target_link_libraries(dnlp_diff ${ACCELERATE_FRAMEWORK})
61+
elseif(MSVC)
62+
find_package(OpenBLAS CONFIG REQUIRED)
63+
target_link_libraries(dnlp_diff OpenBLAS::OpenBLAS)
64+
else()
65+
find_package(BLAS REQUIRED)
66+
target_link_libraries(dnlp_diff ${BLAS_LIBRARIES})
67+
endif()
68+
5669
# Config-specific compile options (compiler-specific)
5770
if(MSVC)
5871
target_compile_options(dnlp_diff PRIVATE

include/bivariate.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,18 @@ expr *new_rel_entr_second_arg_scalar(expr *left, expr *right);
3030
/* Matrix multiplication: Z = X @ Y */
3131
expr *new_matmul(expr *x, expr *y);
3232

33-
/* Left matrix multiplication: A @ f(x) where A is a constant matrix */
33+
/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix */
3434
expr *new_left_matmul(expr *u, const CSR_Matrix *A);
3535

36+
/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
37+
* (row-major, m x n). Uses CBLAS for efficient computation. */
38+
expr *new_left_matmul_dense(expr *u, int m, int n, const double *data);
39+
3640
/* Right matrix multiplication: f(x) @ A where A is a constant matrix */
3741
expr *new_right_matmul(expr *u, const CSR_Matrix *A);
3842

43+
expr *new_right_matmul_dense(expr *u, int m, int n, const double *data);
44+
3945
/* Constant scalar multiplication: a * f(x) where a is a constant double */
4046
expr *new_const_scalar_mult(double a, expr *child);
4147

include/subexpr.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "expr.h"
2222
#include "utils/CSC_Matrix.h"
2323
#include "utils/CSR_Matrix.h"
24+
#include "utils/matrix.h"
2425

2526
/* Forward declaration */
2627
struct int_double_pair;
@@ -107,12 +108,12 @@ important distinction compared to linear_op_expr. */
107108
typedef struct left_matmul_expr
108109
{
109110
expr base;
110-
CSR_Matrix *A;
111-
CSR_Matrix *AT;
111+
Matrix *A;
112+
Matrix *AT;
112113
int n_blocks;
113114
CSC_Matrix *Jchild_CSC;
114115
CSC_Matrix *J_CSC;
115-
int *csc_to_csr_workspace;
116+
int *csc_to_csr_work;
116117
} left_matmul_expr;
117118

118119
/* Right matrix multiplication: y = f(x) * A where f(x) is an expression.

include/utils/cblas_wrapper.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef CBLAS_WRAPPER_H
2+
#define CBLAS_WRAPPER_H
3+
4+
#ifdef __APPLE__
5+
#define ACCELERATE_NEW_LAPACK
6+
#include <Accelerate/Accelerate.h>
7+
#else
8+
#include <cblas.h>
9+
#endif
10+
11+
#endif /* CBLAS_WRAPPER_H */

include/utils/matrix.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright 2026 Daniel Cederberg and William Zhang
3+
*
4+
* This file is part of the DNLP-differentiation-engine project.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
#ifndef MATRIX_H
19+
#define MATRIX_H
20+
21+
#include "CSC_Matrix.h"
22+
#include "CSR_Matrix.h"
23+
24+
/* Base matrix type with function pointers for polymorphic dispatch */
25+
typedef struct Matrix
26+
{
27+
int m, n;
28+
void (*block_left_mult_vec)(const struct Matrix *self, const double *x,
29+
double *y, int p);
30+
CSC_Matrix *(*block_left_mult_sparsity)(const struct Matrix *self,
31+
const CSC_Matrix *J, int p);
32+
void (*block_left_mult_values)(const struct Matrix *self, const CSC_Matrix *J,
33+
CSC_Matrix *C);
34+
void (*free_fn)(struct Matrix *self);
35+
} Matrix;
36+
37+
/* Sparse matrix wrapping CSR */
38+
typedef struct Sparse_Matrix
39+
{
40+
Matrix base;
41+
CSR_Matrix *csr;
42+
} Sparse_Matrix;
43+
44+
/* Dense matrix (row-major) */
45+
typedef struct Dense_Matrix
46+
{
47+
Matrix base;
48+
double *x;
49+
double *work; /* scratch buffer, length n */
50+
} Dense_Matrix;
51+
52+
/* Constructors */
53+
Matrix *new_sparse_matrix(const CSR_Matrix *A);
54+
Matrix *new_dense_matrix(int m, int n, const double *data);
55+
56+
/* Transpose helpers */
57+
Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork);
58+
Matrix *dense_matrix_trans(const Dense_Matrix *self);
59+
60+
/* Free helper */
61+
static inline void free_matrix(Matrix *m)
62+
{
63+
if (m)
64+
{
65+
m->free_fn(m);
66+
}
67+
}
68+
69+
#endif /* MATRIX_H */

src/bivariate/left_matmul.c

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717
*/
1818
#include "bivariate.h"
1919
#include "subexpr.h"
20-
#include "utils/Timer.h"
21-
#include "utils/linalg_sparse_matmuls.h"
20+
#include "utils/matrix.h"
2221
#include <assert.h>
2322
#include <stdio.h>
2423
#include <stdlib.h>
@@ -45,9 +44,6 @@
4544
4645
Working in terms of A_kron unifies the implementation of f(x) being
4746
vector-valued or matrix-valued.
48-
49-
I (dance858) think we can get additional big speedups when A is dense by
50-
introducing a dense matrix class.
5147
*/
5248

5349
#include "utils/utils.h"
@@ -60,9 +56,9 @@ static void forward(expr *node, const double *u)
6056
node->left->forward(node->left, u);
6157

6258
/* y = A_kron @ vec(f(x)) */
63-
CSR_Matrix *A = ((left_matmul_expr *) node)->A;
59+
Matrix *A = ((left_matmul_expr *) node)->A;
6460
int n_blocks = ((left_matmul_expr *) node)->n_blocks;
65-
block_left_multiply_vec(A, x->value, node->value, n_blocks);
61+
A->block_left_mult_vec(A, x->value, node->value, n_blocks);
6662
}
6763

6864
static bool is_affine(const expr *node)
@@ -72,33 +68,32 @@ static bool is_affine(const expr *node)
7268

7369
static void free_type_data(expr *node)
7470
{
75-
left_matmul_expr *lin_node = (left_matmul_expr *) node;
76-
free_csr_matrix(lin_node->A);
77-
free_csr_matrix(lin_node->AT);
78-
free_csc_matrix(lin_node->Jchild_CSC);
79-
free_csc_matrix(lin_node->J_CSC);
80-
free(lin_node->csc_to_csr_workspace);
81-
lin_node->A = NULL;
82-
lin_node->AT = NULL;
83-
lin_node->Jchild_CSC = NULL;
84-
lin_node->J_CSC = NULL;
85-
lin_node->csc_to_csr_workspace = NULL;
71+
left_matmul_expr *lnode = (left_matmul_expr *) node;
72+
free_matrix(lnode->A);
73+
free_matrix(lnode->AT);
74+
free_csc_matrix(lnode->Jchild_CSC);
75+
free_csc_matrix(lnode->J_CSC);
76+
free(lnode->csc_to_csr_work);
77+
lnode->A = NULL;
78+
lnode->AT = NULL;
79+
lnode->Jchild_CSC = NULL;
80+
lnode->J_CSC = NULL;
81+
lnode->csc_to_csr_work = NULL;
8682
}
8783

8884
static void jacobian_init(expr *node)
8985
{
9086
expr *x = node->left;
91-
left_matmul_expr *lin_node = (left_matmul_expr *) node;
87+
left_matmul_expr *lnode = (left_matmul_expr *) node;
9288

9389
/* initialize child's jacobian and precompute sparsity of its CSC */
9490
x->jacobian_init(x);
95-
lin_node->Jchild_CSC = csr_to_csc_fill_sparsity(x->jacobian, node->iwork);
91+
lnode->Jchild_CSC = csr_to_csc_fill_sparsity(x->jacobian, node->iwork);
9692

9793
/* precompute sparsity of this node's jacobian in CSC and CSR */
98-
lin_node->J_CSC = block_left_multiply_fill_sparsity(
99-
lin_node->A, lin_node->Jchild_CSC, lin_node->n_blocks);
100-
node->jacobian =
101-
csc_to_csr_fill_sparsity(lin_node->J_CSC, lin_node->csc_to_csr_workspace);
94+
lnode->J_CSC = lnode->A->block_left_mult_sparsity(lnode->A, lnode->Jchild_CSC,
95+
lnode->n_blocks);
96+
node->jacobian = csc_to_csr_fill_sparsity(lnode->J_CSC, lnode->csc_to_csr_work);
10297
}
10398

10499
static void eval_jacobian(expr *node)
@@ -114,8 +109,8 @@ static void eval_jacobian(expr *node)
114109
csr_to_csc_fill_values(x->jacobian, Jchild_CSC, node->iwork);
115110

116111
/* compute this node's jacobian: */
117-
block_left_multiply_fill_values(lnode->A, Jchild_CSC, J_CSC);
118-
csc_to_csr_fill_values(J_CSC, node->jacobian, lnode->csc_to_csr_workspace);
112+
lnode->A->block_left_mult_values(lnode->A, Jchild_CSC, J_CSC);
113+
csc_to_csr_fill_values(J_CSC, node->jacobian, lnode->csc_to_csr_work);
119114
}
120115

121116
static void wsum_hess_init(expr *node)
@@ -131,16 +126,16 @@ static void wsum_hess_init(expr *node)
131126

132127
/* work for computing A^T w*/
133128
int n_blocks = ((left_matmul_expr *) node)->n_blocks;
134-
int dim = ((left_matmul_expr *) node)->A->n * n_blocks;
129+
int dim = ((left_matmul_expr *) node)->AT->m * n_blocks;
135130
node->dwork = (double *) malloc(dim * sizeof(double));
136131
}
137132

138133
static void eval_wsum_hess(expr *node, const double *w)
139134
{
140135
/* compute A^T w*/
141-
CSR_Matrix *AT = ((left_matmul_expr *) node)->AT;
136+
Matrix *AT = ((left_matmul_expr *) node)->AT;
142137
int n_blocks = ((left_matmul_expr *) node)->n_blocks;
143-
block_left_multiply_vec(AT, w, node->dwork, n_blocks);
138+
AT->block_left_mult_vec(AT, w, node->dwork, n_blocks);
144139

145140
node->left->eval_wsum_hess(node->left, node->dwork);
146141
memcpy(node->wsum_hess->x, node->left->wsum_hess->x,
@@ -173,25 +168,64 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A)
173168
}
174169

175170
/* Allocate the type-specific struct */
176-
left_matmul_expr *lin_node =
171+
left_matmul_expr *lnode =
177172
(left_matmul_expr *) calloc(1, sizeof(left_matmul_expr));
178-
expr *node = &lin_node->base;
173+
expr *node = &lnode->base;
179174
init_expr(node, d1, d2, u->n_vars, forward, jacobian_init, eval_jacobian,
180175
is_affine, wsum_hess_init, eval_wsum_hess, free_type_data);
181176
node->left = u;
182177
expr_retain(u);
183178

184-
/* allocate workspace. iwork is used for transposing A (requiring size A->n)
185-
and for converting J_child csr to csc (requring size node->n_vars).
186-
csc_to_csr_workspace is used for converting J_CSC to CSR (requring node->size)
187-
*/
179+
/* allocate workspace. iwork is used for converting J_child csr to csc
180+
(requiring size node->n_vars) and for transposing A (requiring size A->n).
181+
csc_to_csr_work is used for converting J_CSC to CSR (requiring
182+
node->size) */
188183
node->iwork = (int *) malloc(MAX(A->n, node->n_vars) * sizeof(int));
189-
lin_node->csc_to_csr_workspace = (int *) malloc(node->size * sizeof(int));
190-
lin_node->n_blocks = n_blocks;
184+
lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int));
185+
lnode->n_blocks = n_blocks;
191186

192187
/* store A and AT */
193-
lin_node->A = new_csr(A);
194-
lin_node->AT = transpose(lin_node->A, node->iwork);
188+
lnode->A = new_sparse_matrix(A);
189+
lnode->AT = sparse_matrix_trans((const Sparse_Matrix *) lnode->A, node->iwork);
190+
191+
return node;
192+
}
193+
194+
expr *new_left_matmul_dense(expr *u, int m, int n, const double *data)
195+
{
196+
int d1, d2, n_blocks;
197+
if (u->d1 == n)
198+
{
199+
d1 = m;
200+
d2 = u->d2;
201+
n_blocks = u->d2;
202+
}
203+
else if (u->d2 == n && u->d1 == 1)
204+
{
205+
d1 = 1;
206+
d2 = m;
207+
n_blocks = 1;
208+
}
209+
else
210+
{
211+
fprintf(stderr, "Error in new_left_matmul_dense: dimension mismatch\n");
212+
exit(1);
213+
}
214+
215+
left_matmul_expr *lnode =
216+
(left_matmul_expr *) calloc(1, sizeof(left_matmul_expr));
217+
expr *node = &lnode->base;
218+
init_expr(node, d1, d2, u->n_vars, forward, jacobian_init, eval_jacobian,
219+
is_affine, wsum_hess_init, eval_wsum_hess, free_type_data);
220+
node->left = u;
221+
expr_retain(u);
222+
223+
node->iwork = (int *) malloc(MAX(n, node->n_vars) * sizeof(int));
224+
lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int));
225+
lnode->n_blocks = n_blocks;
226+
227+
lnode->A = new_dense_matrix(m, n, data);
228+
lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
195229

196230
return node;
197231
}

0 commit comments

Comments
 (0)