Skip to content

Commit c2276d3

Browse files
committed
clean up some infrastructure
1 parent ce5fa77 commit c2276d3

3 files changed

Lines changed: 72 additions & 59 deletions

File tree

include/utils/CSC_Matrix.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,16 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
5151
*/
5252
void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C);
5353

54-
/* Allocate B = Q * A (sparsity only). Q is CSR, A is CSC, B is CSC. */
55-
CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix *A);
56-
57-
/* Fill values of B = Q * A. B must have sparsity from above. */
58-
void csr_csc_multiply_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A,
59-
CSC_Matrix *B);
54+
/* Allocate B = Q * A (sparsity only). Q must be symmetric.
55+
* Q is CSR, A is CSC, B is CSC. */
56+
CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
57+
const CSC_Matrix *A);
58+
59+
/* Fill values of B = Q * A. Q must be symmetric.
60+
* B must have sparsity from sym_csr_csc_multiply_fill_sparsity. */
61+
void sym_csr_csc_multiply_fill_values(const CSR_Matrix *Q,
62+
const CSC_Matrix *A,
63+
CSC_Matrix *B);
6064

6165
/* Count nonzero columns of a CSC matrix */
6266
int count_nonzero_cols_csc(const CSC_Matrix *A);

src/other/quad_form.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ static void wsum_hess_init_impl(expr *node)
146146
CSC_Matrix *Jf = x->work->jacobian_csc;
147147

148148
/* term1 = Jf^T W Jf = Jf^T B*/
149-
CSC_Matrix *B = csr_csc_multiply_fill_sparsity(Q, Jf);
149+
CSC_Matrix *B = sym_csr_csc_multiply_fill_sparsity(Q, Jf);
150150
qnode->QJf = B;
151151
node->work->hess_term1 = BTA_alloc(Jf, B);
152152

@@ -194,7 +194,7 @@ static void eval_wsum_hess(expr *node, const double *w)
194194
CSR_Matrix *term2 = node->work->hess_term2;
195195

196196
/* term1 = J_f^T Q J_f = J_f^T B */
197-
csr_csc_multiply_fill_values(Q, Jf, QJf);
197+
sym_csr_csc_multiply_fill_values(Q, Jf, QJf);
198198
BTDA_fill_values(Jf, QJf, NULL, term1);
199199

200200
/* term2 */

src/utils/CSC_Matrix.c

Lines changed: 60 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -186,19 +186,17 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
186186
int nnz_ai = A->p[ii + 1] - A->p[ii];
187187
int nnz_aj = A->p[j + 1] - A->p[j];
188188

189-
double sum;
190189
if (d != NULL)
191190
{
192-
sum = sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
193-
A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
191+
C->x[jj] =
192+
sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
193+
A->x + A->p[j], A->i + A->p[j], nnz_aj, d);
194194
}
195195
else
196196
{
197-
sum = sparse_dot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
198-
A->x + A->p[j], A->i + A->p[j], nnz_aj);
197+
C->x[jj] = sparse_dot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai,
198+
A->x + A->p[j], A->i + A->p[j], nnz_aj);
199199
}
200-
201-
C->x[jj] = sum;
202200
}
203201
}
204202
}
@@ -496,36 +494,55 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
496494
}
497495
}
498496

499-
CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix *A)
497+
CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
498+
const CSC_Matrix *A)
500499
{
501-
/* Allocate B = Q * A (sparsity only).
502-
* Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n). */
500+
/* Allocate B = Q * A (sparsity only). Q must be symmetric.
501+
* Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n).
502+
*
503+
* Column j of B is Q * a_j = sum_k A_{k,j} Q[:, k], so the nonzero
504+
* rows of column j of B are the union of the nonzero rows of Q[:, k].
505+
*
506+
* Since Q is symmetric, we can find the nonzero rows of Q[:, k] by
507+
* finding the nonzero columns of Q in row k.
508+
*
509+
* We use a marker array to avoid duplicates: marker[l] stores the
510+
* last column j that registered l as nonzero, so checking
511+
* marker[l] != j avoids duplicates. */
503512

504513
int m = Q->m;
505514
int n = A->n;
515+
int i, j, k, jj, ii, ell;
506516

517+
/* marker[row] = last column j that registered row as nonzero */
507518
int *marker = (int *) malloc(m * sizeof(int));
508-
memset(marker, -1, m * sizeof(int));
519+
for (i = 0; i < m; i++)
520+
{
521+
marker[i] = -1;
522+
}
509523

510524
int *Bp = (int *) malloc((n + 1) * sizeof(int));
511525
iVec *Bi = iVec_new(A->nnz);
512526
Bp[0] = 0;
513527

514-
for (int j = 0; j < n; j++)
528+
/* for each column j of B */
529+
for (j = 0; j < n; j++)
515530
{
516531
int col_nnz = 0;
517532

518-
for (int t = A->p[j]; t < A->p[j + 1]; t++)
533+
/* iterate over nonzero rows k in column j of A */
534+
for (ii = A->p[j]; ii < A->p[j + 1]; ii++)
519535
{
520-
int k = A->i[t];
536+
k = A->i[ii];
521537

522-
for (int s = Q->p[k]; s < Q->p[k + 1]; s++)
538+
/* find nonzero rows ell of column k of Q */
539+
for (jj = Q->p[k]; jj < Q->p[k + 1]; jj++)
523540
{
524-
int row = Q->i[s];
525-
if (marker[row] != j)
541+
ell = Q->i[jj];
542+
if (marker[ell] != j)
526543
{
527-
marker[row] = j;
528-
iVec_append(Bi, row);
544+
marker[ell] = j;
545+
iVec_append(Bi, ell);
529546
col_nnz++;
530547
}
531548
}
@@ -534,6 +551,7 @@ CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix
534551
Bp[j + 1] = Bp[j] + col_nnz;
535552
}
536553

554+
/* allocate B and copy the computed structure */
537555
int total_nnz = Bp[n];
538556
CSC_Matrix *B = new_csc_matrix(m, n, total_nnz);
539557
memcpy(B->p, Bp, (n + 1) * sizeof(int));
@@ -546,46 +564,37 @@ CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix
546564
return B;
547565
}
548566

549-
void csr_csc_multiply_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A,
550-
CSC_Matrix *B)
567+
/* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:]
568+
* into column j of B using a marker array for position lookup) may be
569+
* faster when Q is dense, since it touches each Q entry exactly once.
570+
* The sparse_dot approach below is simpler but redundantly scans
571+
* column j of A for each nonzero row of B. */
572+
void sym_csr_csc_multiply_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A,
573+
CSC_Matrix *B)
551574
{
552-
/* Fill values of B = Q * A. B must have sparsity from
553-
* csr_csc_multiply_fill_sparsity. */
554-
555-
int m = Q->m;
575+
/* Fill values of B = Q * A. Q must be symmetric.
576+
* B must have sparsity from sym_csr_csc_multiply_fill_sparsity.
577+
*
578+
* B_{l,j} = sum_k Q_{l,k} * A_{k,j} = dot(Q[l,:], A[:,j]).
579+
* Since Q is symmetric, row l of Q has the same entries as
580+
* column l, so we iterate over row l of Q in CSR format. */
556581

557-
int *marker = (int *) malloc(m * sizeof(int));
558-
memset(marker, -1, m * sizeof(int));
559-
memset(B->x, 0, B->nnz * sizeof(double));
582+
int i, j, ii;
560583

561-
for (int j = 0; j < B->n; j++)
584+
/* for each column j of B */
585+
for (j = 0; j < B->n; j++)
562586
{
563-
/* map row index -> position in column j of B */
564-
for (int t = B->p[j]; t < B->p[j + 1]; t++)
565-
{
566-
marker[B->i[t]] = t;
567-
}
568-
569-
/* accumulate A_{k,j} * Q[k, :] */
570-
for (int t = A->p[j]; t < A->p[j + 1]; t++)
587+
for (ii = B->p[j]; ii < B->p[j + 1]; ii++)
571588
{
572-
int k = A->i[t];
573-
double a_kj = A->x[t];
574-
575-
for (int s = Q->p[k]; s < Q->p[k + 1]; s++)
576-
{
577-
B->x[marker[Q->i[s]]] += a_kj * Q->x[s];
578-
}
579-
}
589+
i = B->i[ii];
590+
int nnz_q = Q->p[i + 1] - Q->p[i];
591+
int nnz_a = A->p[j + 1] - A->p[j];
580592

581-
/* reset marker */
582-
for (int t = B->p[j]; t < B->p[j + 1]; t++)
583-
{
584-
marker[B->i[t]] = -1;
593+
/* inner product between row i of Q and column j of A */
594+
B->x[ii] = sparse_dot(Q->x + Q->p[i], Q->i + Q->p[i], nnz_q,
595+
A->x + A->p[j], A->i + A->p[j], nnz_a);
585596
}
586597
}
587-
588-
free(marker);
589598
}
590599

591600
int count_nonzero_cols_csc(const CSC_Matrix *A)

0 commit comments

Comments
 (0)