Skip to content

Commit 0727796

Browse files
committed
clean up CSC_Matrix class
1 parent c2276d3 commit 0727796

9 files changed

Lines changed: 88 additions & 240 deletions

File tree

include/utils/CSC_Matrix.h

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,46 +29,39 @@ CSC_Matrix *new_csc_matrix(int m, int n, int nnz);
2929
/* Free a CSC matrix */
3030
void free_csc_matrix(CSC_Matrix *matrix);
3131

32-
CSC_Matrix *csr_to_csc(const CSR_Matrix *A);
33-
34-
/* Allocate sparsity pattern for C = A^T D A for diagonal D */
32+
/* Fill sparsity of C = A^T D A for diagonal D */
3533
CSR_Matrix *ATA_alloc(const CSC_Matrix *A);
3634

37-
/* Allocate sparsity pattern for C = B^T D A for diagonal D */
35+
/* Fill sparsity of C = B^T D A for diagonal D */
3836
CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B);
3937

40-
/* Compute values for C = A^T D A. C must have precomputed sparsity pattern.
41-
* If d is NULL, D is treated as the identity (computes A^T A). */
38+
/* Fill sparsity of C = BA, where B is symmetric. */
39+
CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A);
40+
41+
/* Compute values for C = A^T D A (null d corresponds to D as identity) */
4242
void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C);
4343

44-
/* Compute values for C = B^T D A. C must have precomputed sparsity pattern.
45-
* If d is NULL, D is treated as the identity (computes B^T A). */
44+
/* Compute values for C = B^T D A (null d corresonds to D as identity) */
4645
void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
4746
CSR_Matrix *C);
4847

49-
/* C = z^T A where A is in CSC format and C is assumed to have one row.
50-
* C must have column indices pre-computed. Fills in values of C only.
51-
*/
52-
void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C);
53-
54-
/* Allocate B = Q * A (sparsity only). Q must be symmetric.
55-
* Q is CSR, A is CSC, B is CSC. */
56-
CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
57-
const CSC_Matrix *A);
48+
/* Fill values of C = BA. The matrix B does not have to be symmetric */
49+
void BA_fill_values(const CSR_Matrix *B, const CSC_Matrix *A, CSC_Matrix *C);
5850

59-
/* Fill values of B = Q * A. Q must be symmetric.
60-
* B must have sparsity from sym_csr_csc_multiply_fill_sparsity. */
61-
void sym_csr_csc_multiply_fill_values(const CSR_Matrix *Q,
62-
const CSC_Matrix *A,
63-
CSC_Matrix *B);
51+
/* Fill values of C = x^T A. The matrix C must have filled sparsity. */
52+
void yTA_fill_values(const CSC_Matrix *A, const double *x, CSR_Matrix *C);
6453

6554
/* Count nonzero columns of a CSC matrix */
6655
int count_nonzero_cols_csc(const CSC_Matrix *A);
6756

68-
CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork);
57+
/* convert from CSR to CSC format */
58+
CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork);
6959
void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork);
7060

71-
CSR_Matrix *csc_to_csr_fill_sparsity(const CSC_Matrix *A, int *iwork);
61+
62+
/* convert from CSC to CSR format */
63+
CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork);
7264
void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork);
7365

66+
7467
#endif /* CSC_MATRIX_H */

src/affine/left_matmul.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ static void jacobian_init_impl(expr *node)
8888

8989
/* initialize child's jacobian and precompute sparsity of its CSC */
9090
jacobian_init(x);
91-
lnode->Jchild_CSC = csr_to_csc_fill_sparsity(x->jacobian, node->work->iwork);
91+
lnode->Jchild_CSC = csr_to_csc_alloc(x->jacobian, node->work->iwork);
9292

9393
/* precompute sparsity of this node's jacobian in CSC and CSR */
9494
lnode->J_CSC = lnode->A->block_left_mult_sparsity(lnode->A, lnode->Jchild_CSC,
9595
lnode->n_blocks);
96-
node->jacobian = csc_to_csr_fill_sparsity(lnode->J_CSC, lnode->csc_to_csr_work);
96+
node->jacobian = csc_to_csr_alloc(lnode->J_CSC, lnode->csc_to_csr_work);
9797
}
9898

9999
static void eval_jacobian(expr *node)

src/bivariate_restricted_dom/quad_over_lin.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,7 @@ static void eval_jacobian(expr *node)
164164
}
165165

166166
/* chain rule (no derivative wrt y) using CSC format */
167-
csc_matvec_fill_values(x->work->jacobian_csc, node->work->dwork,
168-
node->jacobian);
167+
yTA_fill_values(x->work->jacobian_csc, node->work->dwork, node->jacobian);
169168

170169
/* insert derivative wrt y at right place (for correctness this assumes
171170
that y does not appear in the numerator, but this will always be

src/expr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void jacobian_csc_init(expr *node)
5151
}
5252
node->work->csc_work = (int *) malloc(node->n_vars * sizeof(int));
5353
node->work->jacobian_csc =
54-
csr_to_csc_fill_sparsity(node->jacobian, node->work->csc_work);
54+
csr_to_csc_alloc(node->jacobian, node->work->csc_work);
5555
}
5656

5757
void free_expr(expr *node)

src/other/quad_form.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ static void eval_jacobian(expr *node)
9999

100100
/* The jacobian has same values as the gradient, which is
101101
J_f^T (Q @ f(x)). Here, dwork stores Q @ f(x) from forward */
102-
csc_matvec_fill_values(x->work->jacobian_csc, node->work->dwork,
103-
node->jacobian);
102+
yTA_fill_values(x->work->jacobian_csc, node->work->dwork, node->jacobian);
104103

105104
cblas_dscal(node->jacobian->nnz, 2.0, node->jacobian->x, 1);
106105
}
@@ -146,7 +145,7 @@ static void wsum_hess_init_impl(expr *node)
146145
CSC_Matrix *Jf = x->work->jacobian_csc;
147146

148147
/* term1 = Jf^T W Jf = Jf^T B*/
149-
CSC_Matrix *B = sym_csr_csc_multiply_fill_sparsity(Q, Jf);
148+
CSC_Matrix *B = symBA_alloc(Q, Jf);
150149
qnode->QJf = B;
151150
node->work->hess_term1 = BTA_alloc(Jf, B);
152151

@@ -194,7 +193,7 @@ static void eval_wsum_hess(expr *node, const double *w)
194193
CSR_Matrix *term2 = node->work->hess_term2;
195194

196195
/* term1 = J_f^T Q J_f = J_f^T B */
197-
sym_csr_csc_multiply_fill_values(Q, Jf, QJf);
196+
BA_fill_values(Q, Jf, QJf);
198197
BTDA_fill_values(Jf, QJf, NULL, term1);
199198

200199
/* term2 */

src/utils/CSC_Matrix.c

Lines changed: 59 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -202,54 +202,7 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
202202
}
203203
}
204204

205-
CSC_Matrix *csr_to_csc(const CSR_Matrix *A)
206-
{
207-
CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz);
208-
209-
int i, j;
210-
int *count = malloc(A->n * sizeof(int));
211-
212-
memset(count, 0, A->n * sizeof(int));
213-
214-
// -------------------------------------------------------------------
215-
// compute nnz in each column of A
216-
// -------------------------------------------------------------------
217-
for (i = 0; i < A->m; ++i)
218-
{
219-
for (j = A->p[i]; j < A->p[i + 1]; ++j)
220-
{
221-
count[A->i[j]]++;
222-
}
223-
}
224-
225-
// ------------------------------------------------------------------
226-
// compute column pointers
227-
// ------------------------------------------------------------------
228-
C->p[0] = 0;
229-
for (i = 0; i < A->n; ++i)
230-
{
231-
C->p[i + 1] = C->p[i] + count[i];
232-
count[i] = C->p[i];
233-
}
234-
235-
// ------------------------------------------------------------------
236-
// fill matrix
237-
// ------------------------------------------------------------------
238-
for (i = 0; i < A->m; ++i)
239-
{
240-
for (j = A->p[i]; j < A->p[i + 1]; ++j)
241-
{
242-
C->x[count[A->i[j]]] = A->x[j];
243-
C->i[count[A->i[j]]] = i;
244-
count[A->i[j]]++;
245-
}
246-
}
247-
248-
free(count);
249-
return C;
250-
}
251-
252-
CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork)
205+
CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork)
253206
{
254207
CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz);
255208

@@ -312,7 +265,7 @@ void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork)
312265
}
313266
}
314267

315-
CSR_Matrix *csc_to_csr_fill_sparsity(const CSC_Matrix *A, int *iwork)
268+
CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork)
316269
{
317270
CSR_Matrix *C = new_csr_matrix(A->m, A->n, A->nnz);
318271

@@ -435,19 +388,14 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B)
435388
return C;
436389
}
437390

438-
void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C)
391+
void yTA_fill_values(const CSC_Matrix *A, const double *y, CSR_Matrix *C)
439392
{
440-
/* Compute C = z^T * A where A is in CSC format
441-
* C is a single-row CSR matrix with column indices pre-computed
442-
* This fills in the values of C only
443-
*/
444-
445393
for (int col = 0; col < A->n; col++)
446394
{
447395
double val = 0;
448396
for (int j = A->p[col]; j < A->p[col + 1]; j++)
449397
{
450-
val += z[A->i[j]] * A->x[j];
398+
val += y[A->i[j]] * A->x[j];
451399
}
452400

453401
if (A->p[col + 1] - A->p[col] == 0) continue;
@@ -464,6 +412,7 @@ void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C)
464412
}
465413
}
466414

415+
/* computes C = B^T * D * A in CSR */
467416
void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
468417
CSR_Matrix *C)
469418
{
@@ -477,40 +426,62 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
477426
int nnz_bi = B->p[i + 1] - B->p[i];
478427
int nnz_aj = A->p[j + 1] - A->p[j];
479428

480-
double sum;
481429
if (d != NULL)
482430
{
483-
sum = sparse_wdot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
484-
A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
431+
C->x[jj] = sparse_wdot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
432+
A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
485433
}
486434
else
487435
{
488-
sum = sparse_dot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
489-
A->x + A->p[j], A->i + A->p[j], nnz_aj);
436+
C->x[jj] = sparse_dot(B->x + B->p[i], B->i + B->p[i], nnz_bi,
437+
A->x + A->p[j], A->i + A->p[j], nnz_aj);
490438
}
439+
}
440+
}
441+
}
442+
443+
/* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:]
444+
* into column j of C using a marker array for position lookup) may be
445+
* faster when Q is dense, since it touches each Q entry exactly once.
446+
* The sparse_dot approach below is simpler but redundantly scans
447+
* column j of A for each nonzero row of C. */
448+
void BA_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A, CSC_Matrix *C)
449+
{
450+
/* fill values of C = Q * A, given the sparsity pattern of C. */
451+
int i, j, ii;
491452

492-
C->x[jj] = sum;
453+
/* for each column j of C */
454+
for (j = 0; j < C->n; j++)
455+
{
456+
for (ii = C->p[j]; ii < C->p[j + 1]; ii++)
457+
{
458+
i = C->i[ii];
459+
int nnz_q = Q->p[i + 1] - Q->p[i];
460+
int nnz_a = A->p[j + 1] - A->p[j];
461+
462+
/* inner product between row i of Q and column j of A */
463+
C->x[ii] = sparse_dot(Q->x + Q->p[i], Q->i + Q->p[i], nnz_q,
464+
A->x + A->p[j], A->i + A->p[j], nnz_a);
493465
}
494466
}
495467
}
496468

497-
CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
498-
const CSC_Matrix *A)
469+
CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A)
499470
{
500-
/* Allocate B = Q * A (sparsity only). Q must be symmetric.
501-
* Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n).
471+
/* Allocate C = B * A (sparsity only). B must be symmetric.
472+
* B is CSR (m x m), A is CSC (m x n), C is CSC (m x n).
502473
*
503-
* Column j of B is Q * a_j = sum_k A_{k,j} Q[:, k], so the nonzero
504-
* rows of column j of B are the union of the nonzero rows of Q[:, k].
474+
* Column j of C is B * a_j = sum_k A_{k,j} B[:, k], so the nonzero
475+
* rows of column j of C are the union of the nonzero rows of B[:, k].
505476
*
506-
* Since Q is symmetric, we can find the nonzero rows of Q[:, k] by
507-
* finding the nonzero columns of Q in row k.
477+
* Since B is symmetric, we can find the nonzero rows of B[:, k] by
478+
* finding the nonzero columns of B in row k.
508479
*
509480
* We use a marker array to avoid duplicates: marker[l] stores the
510481
* last column j that registered l as nonzero, so checking
511482
* marker[l] != j avoids duplicates. */
512483

513-
int m = Q->m;
484+
int m = B->m;
514485
int n = A->n;
515486
int i, j, k, jj, ii, ell;
516487

@@ -521,11 +492,11 @@ CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
521492
marker[i] = -1;
522493
}
523494

524-
int *Bp = (int *) malloc((n + 1) * sizeof(int));
525-
iVec *Bi = iVec_new(A->nnz);
526-
Bp[0] = 0;
495+
int *Cp = (int *) malloc((n + 1) * sizeof(int));
496+
iVec *Ci = iVec_new(A->nnz);
497+
Cp[0] = 0;
527498

528-
/* for each column j of B */
499+
/* for each column j of C */
529500
for (j = 0; j < n; j++)
530501
{
531502
int col_nnz = 0;
@@ -535,66 +506,33 @@ CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
535506
{
536507
k = A->i[ii];
537508

538-
/* find nonzero rows ell of column k of Q */
539-
for (jj = Q->p[k]; jj < Q->p[k + 1]; jj++)
509+
/* find nonzero rows ell of column k of B */
510+
for (jj = B->p[k]; jj < B->p[k + 1]; jj++)
540511
{
541-
ell = Q->i[jj];
512+
ell = B->i[jj];
542513
if (marker[ell] != j)
543514
{
544515
marker[ell] = j;
545-
iVec_append(Bi, ell);
516+
iVec_append(Ci, ell);
546517
col_nnz++;
547518
}
548519
}
549520
}
550521

551-
Bp[j + 1] = Bp[j] + col_nnz;
522+
Cp[j + 1] = Cp[j] + col_nnz;
552523
}
553524

554-
/* allocate B and copy the computed structure */
555-
int total_nnz = Bp[n];
556-
CSC_Matrix *B = new_csc_matrix(m, n, total_nnz);
557-
memcpy(B->p, Bp, (n + 1) * sizeof(int));
558-
memcpy(B->i, Bi->data, total_nnz * sizeof(int));
525+
/* allocate C and copy the computed structure */
526+
int total_nnz = Cp[n];
527+
CSC_Matrix *C = new_csc_matrix(m, n, total_nnz);
528+
memcpy(C->p, Cp, (n + 1) * sizeof(int));
529+
memcpy(C->i, Ci->data, total_nnz * sizeof(int));
559530

560531
free(marker);
561-
free(Bp);
562-
iVec_free(Bi);
563-
564-
return B;
565-
}
566-
567-
/* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:]
568-
* into column j of B using a marker array for position lookup) may be
569-
* faster when Q is dense, since it touches each Q entry exactly once.
570-
* The sparse_dot approach below is simpler but redundantly scans
571-
* column j of A for each nonzero row of B. */
572-
void sym_csr_csc_multiply_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A,
573-
CSC_Matrix *B)
574-
{
575-
/* Fill values of B = Q * A. Q must be symmetric.
576-
* B must have sparsity from sym_csr_csc_multiply_fill_sparsity.
577-
*
578-
* B_{l,j} = sum_k Q_{l,k} * A_{k,j} = dot(Q[l,:], A[:,j]).
579-
* Since Q is symmetric, row l of Q has the same entries as
580-
* column l, so we iterate over row l of Q in CSR format. */
581-
582-
int i, j, ii;
583-
584-
/* for each column j of B */
585-
for (j = 0; j < B->n; j++)
586-
{
587-
for (ii = B->p[j]; ii < B->p[j + 1]; ii++)
588-
{
589-
i = B->i[ii];
590-
int nnz_q = Q->p[i + 1] - Q->p[i];
591-
int nnz_a = A->p[j + 1] - A->p[j];
532+
free(Cp);
533+
iVec_free(Ci);
592534

593-
/* inner product between row i of Q and column j of A */
594-
B->x[ii] = sparse_dot(Q->x + Q->p[i], Q->i + Q->p[i], nnz_q,
595-
A->x + A->p[j], A->i + A->p[j], nnz_a);
596-
}
597-
}
535+
return C;
598536
}
599537

600538
int count_nonzero_cols_csc(const CSC_Matrix *A)

0 commit comments

Comments
 (0)