Skip to content

Commit cd6c1ff

Browse files
committed
matrix abstraction first commit
1 parent 7c84b13 commit cd6c1ff

75 files changed

Lines changed: 274 additions & 215 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/cmake.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,17 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [ubuntu-latest, macos-latest, windows-latest]
14+
os: [ubuntu-latest, macos-latest]
15+
# os: [ubuntu-latest, macos-latest, windows-latest]
1516
build_type: [Release, Debug]
1617

1718
steps:
1819
- uses: actions/checkout@v3
1920

21+
- name: Install BLAS (Ubuntu)
22+
if: runner.os == 'Linux'
23+
run: sudo apt-get update && sudo apt-get install -y libopenblas-dev
24+
2025
- name: Configure CMake
2126
run: cmake -B build -S . -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_VERBOSE_MAKEFILE=ON
2227

.github/workflows/sanitizer.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v5
1818

19+
- name: Install BLAS (Ubuntu)
20+
if: runner.os == 'Linux'
21+
run: sudo apt-get update && sudo apt-get install -y libopenblas-dev
22+
1923
# Configure CMake with ASan + UBSan
2024
- name: Configure with sanitizers
2125
run: cmake -B build -S . \

.github/workflows/valgrind.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
- name: Install dependencies
1212
run: |
1313
sudo apt-get update
14-
sudo apt-get install -y valgrind
14+
sudo apt-get install -y valgrind libopenblas-dev
1515
1616
- name: Configure Debug build
1717
run: cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ if(NOT MSVC)
5353
target_link_libraries(dnlp_diff m)
5454
endif()
5555

56+
# Find and link BLAS
57+
find_package(BLAS REQUIRED)
58+
if(APPLE)
59+
find_library(ACCELERATE_FRAMEWORK Accelerate)
60+
target_link_libraries(dnlp_diff ${ACCELERATE_FRAMEWORK})
61+
else()
62+
target_link_libraries(dnlp_diff ${BLAS_LIBRARIES})
63+
endif()
64+
5665
# Config-specific compile options (compiler-specific)
5766
if(MSVC)
5867
target_compile_options(dnlp_diff PRIVATE

include/bivariate.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,13 @@ expr *new_rel_entr_second_arg_scalar(expr *left, expr *right);
3030
/* Matrix multiplication: Z = X @ Y */
3131
expr *new_matmul(expr *x, expr *y);
3232

33-
/* Left matrix multiplication: A @ f(x) where A is a constant matrix */
33+
/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix */
3434
expr *new_left_matmul(expr *u, const CSR_Matrix *A);
3535

36+
/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
37+
* (row-major, m x n). Uses CBLAS for efficient computation. */
38+
expr *new_left_matmul_dense(expr *u, int m, int n, const double *data);
39+
3640
/* Right matrix multiplication: f(x) @ A where A is a constant matrix */
3741
expr *new_right_matmul(expr *u, const CSR_Matrix *A);
3842

include/subexpr.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "expr.h"
2222
#include "utils/CSC_Matrix.h"
2323
#include "utils/CSR_Matrix.h"
24+
#include "utils/matrix.h"
2425

2526
/* Forward declaration */
2627
struct int_double_pair;
@@ -107,12 +108,12 @@ important distinction compared to linear_op_expr. */
107108
typedef struct left_matmul_expr
108109
{
109110
expr base;
110-
CSR_Matrix *A;
111-
CSR_Matrix *AT;
111+
Matrix *A;
112+
Matrix *AT;
112113
int n_blocks;
113114
CSC_Matrix *Jchild_CSC;
114115
CSC_Matrix *J_CSC;
115-
int *csc_to_csr_workspace;
116+
int *csc_to_csr_work;
116117
} left_matmul_expr;
117118

118119
/* Right matrix multiplication: y = f(x) * A where f(x) is an expression.

src/bivariate/left_matmul.c

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717
*/
1818
#include "bivariate.h"
1919
#include "subexpr.h"
20-
#include "utils/Timer.h"
21-
#include "utils/linalg_sparse_matmuls.h"
20+
#include "utils/matrix.h"
2221
#include <assert.h>
2322
#include <stdio.h>
2423
#include <stdlib.h>
@@ -45,9 +44,6 @@
4544
4645
Working in terms of A_kron unifies the implementation of f(x) being
4746
vector-valued or matrix-valued.
48-
49-
I (dance858) think we can get additional big speedups when A is dense by
50-
introducing a dense matrix class.
5147
*/
5248

5349
#include "utils/utils.h"
@@ -60,9 +56,9 @@ static void forward(expr *node, const double *u)
6056
node->left->forward(node->left, u);
6157

6258
/* y = A_kron @ vec(f(x)) */
63-
CSR_Matrix *A = ((left_matmul_expr *) node)->A;
59+
Matrix *A = ((left_matmul_expr *) node)->A;
6460
int n_blocks = ((left_matmul_expr *) node)->n_blocks;
65-
block_left_multiply_vec(A, x->value, node->value, n_blocks);
61+
A->block_left_mult_vec(A, x->value, node->value, n_blocks);
6662
}
6763

6864
static bool is_affine(const expr *node)
@@ -72,33 +68,32 @@ static bool is_affine(const expr *node)
7268

7369
static void free_type_data(expr *node)
7470
{
75-
left_matmul_expr *lin_node = (left_matmul_expr *) node;
76-
free_csr_matrix(lin_node->A);
77-
free_csr_matrix(lin_node->AT);
78-
free_csc_matrix(lin_node->Jchild_CSC);
79-
free_csc_matrix(lin_node->J_CSC);
80-
free(lin_node->csc_to_csr_workspace);
81-
lin_node->A = NULL;
82-
lin_node->AT = NULL;
83-
lin_node->Jchild_CSC = NULL;
84-
lin_node->J_CSC = NULL;
85-
lin_node->csc_to_csr_workspace = NULL;
71+
left_matmul_expr *lnode = (left_matmul_expr *) node;
72+
free_matrix(lnode->A);
73+
free_matrix(lnode->AT);
74+
free_csc_matrix(lnode->Jchild_CSC);
75+
free_csc_matrix(lnode->J_CSC);
76+
free(lnode->csc_to_csr_work);
77+
lnode->A = NULL;
78+
lnode->AT = NULL;
79+
lnode->Jchild_CSC = NULL;
80+
lnode->J_CSC = NULL;
81+
lnode->csc_to_csr_work = NULL;
8682
}
8783

8884
static void jacobian_init(expr *node)
8985
{
9086
expr *x = node->left;
91-
left_matmul_expr *lin_node = (left_matmul_expr *) node;
87+
left_matmul_expr *lnode = (left_matmul_expr *) node;
9288

9389
/* initialize child's jacobian and precompute sparsity of its CSC */
9490
x->jacobian_init(x);
95-
lin_node->Jchild_CSC = csr_to_csc_fill_sparsity(x->jacobian, node->iwork);
91+
lnode->Jchild_CSC = csr_to_csc_fill_sparsity(x->jacobian, node->iwork);
9692

9793
/* precompute sparsity of this node's jacobian in CSC and CSR */
98-
lin_node->J_CSC = block_left_multiply_fill_sparsity(
99-
lin_node->A, lin_node->Jchild_CSC, lin_node->n_blocks);
100-
node->jacobian =
101-
csc_to_csr_fill_sparsity(lin_node->J_CSC, lin_node->csc_to_csr_workspace);
94+
lnode->J_CSC = lnode->A->block_left_mult_sparsity(lnode->A, lnode->Jchild_CSC,
95+
lnode->n_blocks);
96+
node->jacobian = csc_to_csr_fill_sparsity(lnode->J_CSC, lnode->csc_to_csr_work);
10297
}
10398

10499
static void eval_jacobian(expr *node)
@@ -114,8 +109,8 @@ static void eval_jacobian(expr *node)
114109
csr_to_csc_fill_values(x->jacobian, Jchild_CSC, node->iwork);
115110

116111
/* compute this node's jacobian: */
117-
block_left_multiply_fill_values(lnode->A, Jchild_CSC, J_CSC);
118-
csc_to_csr_fill_values(J_CSC, node->jacobian, lnode->csc_to_csr_workspace);
112+
lnode->A->block_left_mult_values(lnode->A, Jchild_CSC, J_CSC);
113+
csc_to_csr_fill_values(J_CSC, node->jacobian, lnode->csc_to_csr_work);
119114
}
120115

121116
static void wsum_hess_init(expr *node)
@@ -131,16 +126,16 @@ static void wsum_hess_init(expr *node)
131126

132127
/* work for computing A^T w*/
133128
int n_blocks = ((left_matmul_expr *) node)->n_blocks;
134-
int dim = ((left_matmul_expr *) node)->A->n * n_blocks;
129+
int dim = ((left_matmul_expr *) node)->AT->m * n_blocks;
135130
node->dwork = (double *) malloc(dim * sizeof(double));
136131
}
137132

138133
static void eval_wsum_hess(expr *node, const double *w)
139134
{
140135
/* compute A^T w*/
141-
CSR_Matrix *AT = ((left_matmul_expr *) node)->AT;
136+
Matrix *AT = ((left_matmul_expr *) node)->AT;
142137
int n_blocks = ((left_matmul_expr *) node)->n_blocks;
143-
block_left_multiply_vec(AT, w, node->dwork, n_blocks);
138+
AT->block_left_mult_vec(AT, w, node->dwork, n_blocks);
144139

145140
node->left->eval_wsum_hess(node->left, node->dwork);
146141
memcpy(node->wsum_hess->x, node->left->wsum_hess->x,
@@ -173,25 +168,64 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A)
173168
}
174169

175170
/* Allocate the type-specific struct */
176-
left_matmul_expr *lin_node =
171+
left_matmul_expr *lnode =
177172
(left_matmul_expr *) calloc(1, sizeof(left_matmul_expr));
178-
expr *node = &lin_node->base;
173+
expr *node = &lnode->base;
179174
init_expr(node, d1, d2, u->n_vars, forward, jacobian_init, eval_jacobian,
180175
is_affine, wsum_hess_init, eval_wsum_hess, free_type_data);
181176
node->left = u;
182177
expr_retain(u);
183178

184-
/* allocate workspace. iwork is used for transposing A (requiring size A->n)
185-
and for converting J_child csr to csc (requring size node->n_vars).
186-
csc_to_csr_workspace is used for converting J_CSC to CSR (requring node->size)
187-
*/
179+
/* allocate workspace. iwork is used for converting J_child csr to csc
180+
(requiring size node->n_vars) and for transposing A (requiring size A->n).
181+
csc_to_csr_work is used for converting J_CSC to CSR (requiring
182+
node->size) */
188183
node->iwork = (int *) malloc(MAX(A->n, node->n_vars) * sizeof(int));
189-
lin_node->csc_to_csr_workspace = (int *) malloc(node->size * sizeof(int));
190-
lin_node->n_blocks = n_blocks;
184+
lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int));
185+
lnode->n_blocks = n_blocks;
191186

192187
/* store A and AT */
193-
lin_node->A = new_csr(A);
194-
lin_node->AT = transpose(lin_node->A, node->iwork);
188+
lnode->A = new_sparse_matrix(A);
189+
lnode->AT = sparse_matrix_trans((const Sparse_Matrix *) lnode->A, node->iwork);
190+
191+
return node;
192+
}
193+
194+
expr *new_left_matmul_dense(expr *u, int m, int n, const double *data)
195+
{
196+
int d1, d2, n_blocks;
197+
if (u->d1 == n)
198+
{
199+
d1 = m;
200+
d2 = u->d2;
201+
n_blocks = u->d2;
202+
}
203+
else if (u->d2 == n && u->d1 == 1)
204+
{
205+
d1 = 1;
206+
d2 = m;
207+
n_blocks = 1;
208+
}
209+
else
210+
{
211+
fprintf(stderr, "Error in new_left_matmul_dense: dimension mismatch\n");
212+
exit(1);
213+
}
214+
215+
left_matmul_expr *lnode =
216+
(left_matmul_expr *) calloc(1, sizeof(left_matmul_expr));
217+
expr *node = &lnode->base;
218+
init_expr(node, d1, d2, u->n_vars, forward, jacobian_init, eval_jacobian,
219+
is_affine, wsum_hess_init, eval_wsum_hess, free_type_data);
220+
node->left = u;
221+
expr_retain(u);
222+
223+
node->iwork = (int *) malloc(MAX(n, node->n_vars) * sizeof(int));
224+
lnode->csc_to_csr_work = (int *) malloc(node->size * sizeof(int));
225+
lnode->n_blocks = n_blocks;
226+
227+
lnode->A = new_dense_matrix(m, n, data);
228+
lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
195229

196230
return node;
197231
}

src/utils/linalg_sparse_matmuls.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,15 @@ CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A,
109109
/* A is m x n, J is (n*p) x k, C is (m*p) x k */
110110
int m = A->m;
111111
int n = A->n;
112-
int k = J->n;
113112
int j, jj, block, block_start, block_end, block_jj_start, block_jj_end,
114113
row_offset;
115114

116115
/* allocate column pointers and an estimate of row indices */
117-
int *Cp = (int *) malloc((k + 1) * sizeof(int));
118-
iVec *Ci = iVec_new(k * m);
116+
int *Cp = (int *) malloc((J->n + 1) * sizeof(int));
117+
iVec *Ci = iVec_new(J->n * m);
119118
Cp[0] = 0;
120119

121-
/* for each column of j */
120+
/* for each column of J */
122121
for (j = 0; j < J->n; j++)
123122
{
124123
/* if empty we continue */
@@ -175,14 +174,9 @@ CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A,
175174
Cp[j + 1] = Ci->len;
176175
}
177176

178-
/* Allocate result matrix C in CSC format */
179-
CSC_Matrix *C = new_csc_matrix(m * p, k, Ci->len);
180-
181-
/* Copy column pointers and row indices */
182-
memcpy(C->p, Cp, (k + 1) * sizeof(int));
177+
CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len);
178+
memcpy(C->p, Cp, (J->n + 1) * sizeof(int));
183179
memcpy(C->i, Ci->data, Ci->len * sizeof(int));
184-
185-
/* Clean up workspace */
186180
free(Cp);
187181
iVec_free(Ci);
188182

tests/all_tests.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@
4343
#include "jacobian_tests/test_trace.h"
4444
#include "jacobian_tests/test_transpose.h"
4545
#include "problem/test_problem.h"
46+
#include "utils/test_cblas.h"
4647
#include "utils/test_coo_matrix.h"
4748
#include "utils/test_csc_matrix.h"
4849
#include "utils/test_csr_csc_conversion.h"
4950
#include "utils/test_csr_matrix.h"
5051
#include "utils/test_linalg_sparse_matmuls.h"
52+
#include "utils/test_matrix.h"
5153
#include "wsum_hess/elementwise/test_entr.h"
5254
#include "wsum_hess/elementwise/test_exp.h"
5355
#include "wsum_hess/elementwise/test_hyperbolic.h"
@@ -242,6 +244,7 @@ int main(void)
242244
mu_run_test(test_wsum_hess_transpose, tests_run);
243245

244246
printf("\n--- Utility Tests ---\n");
247+
mu_run_test(test_cblas_ddot, tests_run);
245248
mu_run_test(test_diag_csr_mult, tests_run);
246249
mu_run_test(test_csr_sum, tests_run);
247250
mu_run_test(test_csr_sum2, tests_run);
@@ -275,6 +278,11 @@ int main(void)
275278
mu_run_test(test_csr_to_coo, tests_run);
276279
mu_run_test(test_csr_to_coo_lower_triangular, tests_run);
277280
mu_run_test(test_refresh_lower_triangular_coo, tests_run);
281+
mu_run_test(test_dense_matrix_mult_vec, tests_run);
282+
mu_run_test(test_dense_matrix_mult_vec_blocks, tests_run);
283+
mu_run_test(test_sparse_vs_dense_mult_vec, tests_run);
284+
mu_run_test(test_dense_matrix_trans, tests_run);
285+
mu_run_test(test_sparse_vs_dense_mult_vec_blocks, tests_run);
278286

279287
printf("\n--- Problem Struct Tests ---\n");
280288
mu_run_test(test_problem_new_free, tests_run);

tests/forward_pass/affine/test_add.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "minunit.h"
88
#include "test_helpers.h"
99

10-
const char *test_addition()
10+
const char *test_addition(void)
1111
{
1212
double u[2] = {3.0, 4.0};
1313
double c[2] = {1.0, 2.0};

0 commit comments

Comments
 (0)