Skip to content

Commit 5280ff8

Browse files
committed
dense matrix class first draft
1 parent cd6c1ff commit 5280ff8

6 files changed

Lines changed: 508 additions & 0 deletions

File tree

include/utils/cblas_wrapper.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef CBLAS_WRAPPER_H
2+
#define CBLAS_WRAPPER_H
3+
4+
#ifdef __APPLE__
5+
#define ACCELERATE_NEW_LAPACK
6+
#include <Accelerate/Accelerate.h>
7+
#else
8+
#include <cblas.h>
9+
#endif
10+
11+
#endif /* CBLAS_WRAPPER_H */

include/utils/matrix.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright 2026 Daniel Cederberg and William Zhang
3+
*
4+
* This file is part of the DNLP-differentiation-engine project.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
#ifndef MATRIX_H
19+
#define MATRIX_H
20+
21+
#include "CSC_Matrix.h"
22+
#include "CSR_Matrix.h"
23+
24+
/* Base matrix type with function pointers for polymorphic dispatch */
25+
typedef struct Matrix
26+
{
27+
int m, n;
28+
void (*block_left_mult_vec)(const struct Matrix *self, const double *x,
29+
double *y, int p);
30+
CSC_Matrix *(*block_left_mult_sparsity)(const struct Matrix *self,
31+
const CSC_Matrix *J, int p);
32+
void (*block_left_mult_values)(const struct Matrix *self, const CSC_Matrix *J,
33+
CSC_Matrix *C);
34+
void (*free_fn)(struct Matrix *self);
35+
} Matrix;
36+
37+
/* Sparse matrix wrapping CSR */
38+
typedef struct Sparse_Matrix
39+
{
40+
Matrix base;
41+
CSR_Matrix *csr;
42+
} Sparse_Matrix;
43+
44+
/* Dense matrix (row-major) */
45+
typedef struct Dense_Matrix
46+
{
47+
Matrix base;
48+
double *x;
49+
double *work; /* scratch buffer, length n */
50+
} Dense_Matrix;
51+
52+
/* Constructors */
53+
Matrix *new_sparse_matrix(const CSR_Matrix *A);
54+
Matrix *new_dense_matrix(int m, int n, const double *data);
55+
56+
/* Transpose helpers */
57+
Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork);
58+
Matrix *dense_matrix_trans(const Dense_Matrix *self);
59+
60+
/* Free helper */
61+
static inline void free_matrix(Matrix *m)
62+
{
63+
if (m)
64+
{
65+
m->free_fn(m);
66+
}
67+
}
68+
69+
#endif /* MATRIX_H */

src/utils/dense_matrix.c

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/*
2+
* Copyright 2026 Daniel Cederberg and William Zhang
3+
*
4+
* This file is part of the DNLP-differentiation-engine project.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
#include "utils/cblas_wrapper.h"
19+
#include "utils/iVec.h"
20+
#include "utils/matrix.h"
21+
#include <stdlib.h>
22+
#include <string.h>
23+
24+
static void dense_block_left_mult_vec(const Matrix *A, const double *x, double *y,
25+
int p)
26+
{
27+
const Dense_Matrix *dm = (const Dense_Matrix *) A;
28+
int m = dm->base.m;
29+
int n = dm->base.n;
30+
31+
/* y = kron(I_p, A) @ x via a single dgemm call:
32+
Treat x as n x p (column-major blocks) and y as m x p.
33+
But x and y are stored as p blocks of length n and m respectively
34+
(i.e. block-interleaved). This is the same as treating them as
35+
row-major matrices of shape p x n and p x m, so:
36+
y (p x m) = x (p x n) * A^T (n x m), all row-major.
37+
cblas with RowMajor: C = alpha * A * B + beta * C
38+
where A = x (p x n), B = A^T (n x m), C = y (p x m). */
39+
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, p, m, n, 1.0, x, n, dm->x,
40+
n, 0.0, y, m);
41+
}
42+
43+
static CSC_Matrix *dense_block_left_mult_sparsity(const Matrix *A,
44+
const CSC_Matrix *J, int p)
45+
{
46+
int m = A->m;
47+
int n = A->n;
48+
int i, j, jj, block, block_start, block_end, block_jj_start, row_offset;
49+
50+
int *Cp = (int *) malloc((J->n + 1) * sizeof(int));
51+
iVec *Ci = iVec_new(J->n * m);
52+
Cp[0] = 0;
53+
54+
/* for each column of J */
55+
for (j = 0; j < J->n; j++)
56+
{
57+
/* if empty we continue */
58+
if (J->p[j] == J->p[j + 1])
59+
{
60+
Cp[j + 1] = Cp[j];
61+
continue;
62+
}
63+
64+
/* process each of p blocks of rows in this column of J */
65+
jj = J->p[j];
66+
for (block = 0; block < p; block++)
67+
{
68+
// -----------------------------------------------------------------
69+
// find start and end indices of rows of J in this block
70+
// -----------------------------------------------------------------
71+
block_start = block * n;
72+
block_end = block_start + n;
73+
while (jj < J->p[j + 1] && J->i[jj] < block_start)
74+
{
75+
jj++;
76+
}
77+
78+
block_jj_start = jj;
79+
while (jj < J->p[j + 1] && J->i[jj] < block_end)
80+
{
81+
jj++;
82+
}
83+
84+
/* if no entries in this block, continue */
85+
if (jj == block_jj_start)
86+
{
87+
continue;
88+
}
89+
90+
/* dense A: all m rows contribute */
91+
row_offset = block * m;
92+
for (i = 0; i < m; i++)
93+
{
94+
iVec_append(Ci, row_offset + i);
95+
}
96+
}
97+
Cp[j + 1] = Ci->len;
98+
}
99+
100+
CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len);
101+
memcpy(C->p, Cp, (J->n + 1) * sizeof(int));
102+
memcpy(C->i, Ci->data, Ci->len * sizeof(int));
103+
free(Cp);
104+
iVec_free(Ci);
105+
106+
return C;
107+
}
108+
109+
static void dense_block_left_mult_values(const Matrix *A, const CSC_Matrix *J,
110+
CSC_Matrix *C)
111+
{
112+
const Dense_Matrix *dm = (const Dense_Matrix *) A;
113+
int m = dm->base.m;
114+
int n = dm->base.n;
115+
int k = J->n;
116+
117+
int i, j, s, block, block_start, block_end, start, end;
118+
119+
double *j_dense = dm->work;
120+
121+
/* for each column of J (and C) */
122+
for (j = 0; j < k; j++)
123+
{
124+
for (i = C->p[j]; i < C->p[j + 1]; i += m)
125+
{
126+
block = C->i[i] / m;
127+
block_start = block * n;
128+
block_end = block_start + n;
129+
130+
start = J->p[j];
131+
end = J->p[j + 1];
132+
133+
while (start < J->p[j + 1] && J->i[start] < block_start)
134+
{
135+
start++;
136+
}
137+
138+
while (end > start && J->i[end - 1] >= block_end)
139+
{
140+
end--;
141+
}
142+
143+
/* scatter sparse J column into dense vector and then compute
144+
A @ j_dense */
145+
memset(j_dense, 0, n * sizeof(double));
146+
for (s = start; s < end; s++)
147+
{
148+
j_dense[J->i[s] - block_start] = J->x[s];
149+
}
150+
151+
cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, dm->x, n, j_dense, 1,
152+
0.0, C->x + i, 1);
153+
}
154+
}
155+
}
156+
157+
static void dense_free(Matrix *A)
158+
{
159+
Dense_Matrix *dm = (Dense_Matrix *) A;
160+
free(dm->x);
161+
free(dm->work);
162+
free(dm);
163+
}
164+
165+
Matrix *new_dense_matrix(int m, int n, const double *data)
166+
{
167+
Dense_Matrix *dm = (Dense_Matrix *) calloc(1, sizeof(Dense_Matrix));
168+
dm->base.m = m;
169+
dm->base.n = n;
170+
dm->base.block_left_mult_vec = dense_block_left_mult_vec;
171+
dm->base.block_left_mult_sparsity = dense_block_left_mult_sparsity;
172+
dm->base.block_left_mult_values = dense_block_left_mult_values;
173+
dm->base.free_fn = dense_free;
174+
dm->x = (double *) malloc(m * n * sizeof(double));
175+
memcpy(dm->x, data, m * n * sizeof(double));
176+
dm->work = (double *) malloc(n * sizeof(double));
177+
return &dm->base;
178+
}
179+
180+
Matrix *dense_matrix_trans(const Dense_Matrix *A)
181+
{
182+
int m = A->base.m;
183+
int n = A->base.n;
184+
double *AT_x = (double *) malloc(m * n * sizeof(double));
185+
186+
for (int i = 0; i < m; i++)
187+
{
188+
for (int j = 0; j < n; j++)
189+
{
190+
AT_x[j * m + i] = A->x[i * n + j];
191+
}
192+
}
193+
194+
Matrix *result = new_dense_matrix(n, m, AT_x);
195+
free(AT_x);
196+
return result;
197+
}

src/utils/sparse_matrix.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright 2026 Daniel Cederberg and William Zhang
3+
*
4+
* This file is part of the DNLP-differentiation-engine project.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
#include "utils/linalg_sparse_matmuls.h"
19+
#include "utils/matrix.h"
20+
#include <stdlib.h>
21+
22+
static void sparse_block_left_mult_vec(const Matrix *self, const double *x,
23+
double *y, int p)
24+
{
25+
const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
26+
block_left_multiply_vec(sm->csr, x, y, p);
27+
}
28+
29+
static CSC_Matrix *sparse_block_left_mult_sparsity(const Matrix *self,
30+
const CSC_Matrix *J, int p)
31+
{
32+
const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
33+
return block_left_multiply_fill_sparsity(sm->csr, J, p);
34+
}
35+
36+
static void sparse_block_left_mult_values(const Matrix *self, const CSC_Matrix *J,
37+
CSC_Matrix *C)
38+
{
39+
const Sparse_Matrix *sm = (const Sparse_Matrix *) self;
40+
block_left_multiply_fill_values(sm->csr, J, C);
41+
}
42+
43+
static void sparse_free(Matrix *self)
44+
{
45+
Sparse_Matrix *sm = (Sparse_Matrix *) self;
46+
free_csr_matrix(sm->csr);
47+
free(sm);
48+
}
49+
50+
Matrix *new_sparse_matrix(const CSR_Matrix *A)
51+
{
52+
Sparse_Matrix *sm = (Sparse_Matrix *) calloc(1, sizeof(Sparse_Matrix));
53+
sm->base.m = A->m;
54+
sm->base.n = A->n;
55+
sm->base.block_left_mult_vec = sparse_block_left_mult_vec;
56+
sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
57+
sm->base.block_left_mult_values = sparse_block_left_mult_values;
58+
sm->base.free_fn = sparse_free;
59+
sm->csr = new_csr(A);
60+
return &sm->base;
61+
}
62+
63+
Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork)
64+
{
65+
CSR_Matrix *AT = transpose(self->csr, iwork);
66+
Sparse_Matrix *sm = (Sparse_Matrix *) calloc(1, sizeof(Sparse_Matrix));
67+
sm->base.m = AT->m;
68+
sm->base.n = AT->n;
69+
sm->base.block_left_mult_vec = sparse_block_left_mult_vec;
70+
sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity;
71+
sm->base.block_left_mult_values = sparse_block_left_mult_values;
72+
sm->base.free_fn = sparse_free;
73+
sm->csr = AT;
74+
return &sm->base;
75+
}

tests/utils/test_cblas.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifndef TEST_CBLAS_H
2+
#define TEST_CBLAS_H
3+
4+
#include "minunit.h"
5+
#include "utils/cblas_wrapper.h"
6+
#include <math.h>
7+
8+
static char *test_cblas_ddot(void)
9+
{
10+
double x[] = {1.0, 2.0, 3.0, 4.0};
11+
double y[] = {5.0, 6.0, 7.0, 8.0};
12+
double result = cblas_ddot(4, x, 1, y, 1);
13+
double expected = 1.0 * 5.0 + 2.0 * 6.0 + 3.0 * 7.0 + 4.0 * 8.0;
14+
mu_assert("test_cblas_ddot: wrong dot product", fabs(result - expected) < 1e-12);
15+
return 0;
16+
}
17+
18+
#endif /* TEST_CBLAS_H */

0 commit comments

Comments
 (0)