@@ -186,19 +186,17 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
186186 int nnz_ai = A -> p [ii + 1 ] - A -> p [ii ];
187187 int nnz_aj = A -> p [j + 1 ] - A -> p [j ];
188188
189- double sum ;
190189 if (d != NULL )
191190 {
192- sum = sparse_wdot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
193- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
191+ C -> x [jj ] =
192+ sparse_wdot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
193+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
194194 }
195195 else
196196 {
197- sum = sparse_dot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
198- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj );
197+ C -> x [ jj ] = sparse_dot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
198+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj );
199199 }
200-
201- C -> x [jj ] = sum ;
202200 }
203201 }
204202 }
@@ -496,36 +494,55 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
496494 }
497495}
498496
499- CSC_Matrix * csr_csc_multiply_fill_sparsity (const CSR_Matrix * Q , const CSC_Matrix * A )
497+ CSC_Matrix * sym_csr_csc_multiply_fill_sparsity (const CSR_Matrix * Q ,
498+ const CSC_Matrix * A )
500499{
501- /* Allocate B = Q * A (sparsity only).
502- * Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n). */
500+ /* Allocate B = Q * A (sparsity only). Q must be symmetric.
501+ * Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n).
502+ *
503+ * Column j of B is Q * a_j = sum_k A_{k,j} Q[:, k], so the nonzero
504+ * rows of column j of B are the union of the nonzero rows of Q[:, k].
505+ *
506+ * Since Q is symmetric, we can find the nonzero rows of Q[:, k] by
507+ * finding the nonzero columns of Q in row k.
508+ *
509+ * We use a marker array to avoid duplicates: marker[l] stores the
510+ * last column j that registered l as nonzero, so checking
511+ * marker[l] != j avoids duplicates. */
503512
504513 int m = Q -> m ;
505514 int n = A -> n ;
515+ int i , j , k , jj , ii , ell ;
506516
517+ /* marker[row] = last column j that registered row as nonzero */
507518 int * marker = (int * ) malloc (m * sizeof (int ));
508- memset (marker , -1 , m * sizeof (int ));
519+ for (i = 0 ; i < m ; i ++ )
520+ {
521+ marker [i ] = -1 ;
522+ }
509523
510524 int * Bp = (int * ) malloc ((n + 1 ) * sizeof (int ));
511525 iVec * Bi = iVec_new (A -> nnz );
512526 Bp [0 ] = 0 ;
513527
514- for (int j = 0 ; j < n ; j ++ )
528+ /* for each column j of B */
529+ for (j = 0 ; j < n ; j ++ )
515530 {
516531 int col_nnz = 0 ;
517532
518- for (int t = A -> p [j ]; t < A -> p [j + 1 ]; t ++ )
533+ /* iterate over nonzero rows k in column j of A */
534+ for (ii = A -> p [j ]; ii < A -> p [j + 1 ]; ii ++ )
519535 {
520- int k = A -> i [t ];
536+ k = A -> i [ii ];
521537
522- for (int s = Q -> p [k ]; s < Q -> p [k + 1 ]; s ++ )
538+ /* find nonzero rows ell of column k of Q */
539+ for (jj = Q -> p [k ]; jj < Q -> p [k + 1 ]; jj ++ )
523540 {
524- int row = Q -> i [s ];
525- if (marker [row ] != j )
541+ ell = Q -> i [jj ];
542+ if (marker [ell ] != j )
526543 {
527- marker [row ] = j ;
528- iVec_append (Bi , row );
544+ marker [ell ] = j ;
545+ iVec_append (Bi , ell );
529546 col_nnz ++ ;
530547 }
531548 }
@@ -534,6 +551,7 @@ CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix
534551 Bp [j + 1 ] = Bp [j ] + col_nnz ;
535552 }
536553
554+ /* allocate B and copy the computed structure */
537555 int total_nnz = Bp [n ];
538556 CSC_Matrix * B = new_csc_matrix (m , n , total_nnz );
539557 memcpy (B -> p , Bp , (n + 1 ) * sizeof (int ));
@@ -546,46 +564,37 @@ CSC_Matrix *csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q, const CSC_Matrix
546564 return B ;
547565}
548566
549- void csr_csc_multiply_fill_values (const CSR_Matrix * Q , const CSC_Matrix * A ,
550- CSC_Matrix * B )
567+ /* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:]
568+ * into column j of B using a marker array for position lookup) may be
569+ * faster when Q is dense, since it touches each Q entry exactly once.
570+ * The sparse_dot approach below is simpler but redundantly scans
571+ * column j of A for each nonzero row of B. */
572+ void sym_csr_csc_multiply_fill_values (const CSR_Matrix * Q , const CSC_Matrix * A ,
573+ CSC_Matrix * B )
551574{
552- /* Fill values of B = Q * A. B must have sparsity from
553- * csr_csc_multiply_fill_sparsity. */
554-
555- int m = Q -> m ;
575+ /* Fill values of B = Q * A. Q must be symmetric.
576+ * B must have sparsity from sym_csr_csc_multiply_fill_sparsity.
577+ *
578+ * B_{l,j} = sum_k Q_{l,k} * A_{k,j} = dot(Q[l,:], A[:,j]).
579+ * Since Q is symmetric, row l of Q has the same entries as
580+ * column l, so we iterate over row l of Q in CSR format. */
556581
557- int * marker = (int * ) malloc (m * sizeof (int ));
558- memset (marker , -1 , m * sizeof (int ));
559- memset (B -> x , 0 , B -> nnz * sizeof (double ));
582+ int i , j , ii ;
560583
561- for (int j = 0 ; j < B -> n ; j ++ )
584+ /* for each column j of B */
585+ for (j = 0 ; j < B -> n ; j ++ )
562586 {
563- /* map row index -> position in column j of B */
564- for (int t = B -> p [j ]; t < B -> p [j + 1 ]; t ++ )
565- {
566- marker [B -> i [t ]] = t ;
567- }
568-
569- /* accumulate A_{k,j} * Q[k, :] */
570- for (int t = A -> p [j ]; t < A -> p [j + 1 ]; t ++ )
587+ for (ii = B -> p [j ]; ii < B -> p [j + 1 ]; ii ++ )
571588 {
572- int k = A -> i [t ];
573- double a_kj = A -> x [t ];
574-
575- for (int s = Q -> p [k ]; s < Q -> p [k + 1 ]; s ++ )
576- {
577- B -> x [marker [Q -> i [s ]]] += a_kj * Q -> x [s ];
578- }
579- }
589+ i = B -> i [ii ];
590+ int nnz_q = Q -> p [i + 1 ] - Q -> p [i ];
591+ int nnz_a = A -> p [j + 1 ] - A -> p [j ];
580592
581- /* reset marker */
582- for (int t = B -> p [j ]; t < B -> p [j + 1 ]; t ++ )
583- {
584- marker [B -> i [t ]] = -1 ;
593+ /* inner product between row i of Q and column j of A */
594+ B -> x [ii ] = sparse_dot (Q -> x + Q -> p [i ], Q -> i + Q -> p [i ], nnz_q ,
595+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_a );
585596 }
586597 }
587-
588- free (marker );
589598}
590599
591600int count_nonzero_cols_csc (const CSC_Matrix * A )
0 commit comments