@@ -202,54 +202,7 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
202202 }
203203}
204204
205- CSC_Matrix * csr_to_csc (const CSR_Matrix * A )
206- {
207- CSC_Matrix * C = new_csc_matrix (A -> m , A -> n , A -> nnz );
208-
209- int i , j ;
210- int * count = malloc (A -> n * sizeof (int ));
211-
212- memset (count , 0 , A -> n * sizeof (int ));
213-
214- // -------------------------------------------------------------------
215- // compute nnz in each column of A
216- // -------------------------------------------------------------------
217- for (i = 0 ; i < A -> m ; ++ i )
218- {
219- for (j = A -> p [i ]; j < A -> p [i + 1 ]; ++ j )
220- {
221- count [A -> i [j ]]++ ;
222- }
223- }
224-
225- // ------------------------------------------------------------------
226- // compute column pointers
227- // ------------------------------------------------------------------
228- C -> p [0 ] = 0 ;
229- for (i = 0 ; i < A -> n ; ++ i )
230- {
231- C -> p [i + 1 ] = C -> p [i ] + count [i ];
232- count [i ] = C -> p [i ];
233- }
234-
235- // ------------------------------------------------------------------
236- // fill matrix
237- // ------------------------------------------------------------------
238- for (i = 0 ; i < A -> m ; ++ i )
239- {
240- for (j = A -> p [i ]; j < A -> p [i + 1 ]; ++ j )
241- {
242- C -> x [count [A -> i [j ]]] = A -> x [j ];
243- C -> i [count [A -> i [j ]]] = i ;
244- count [A -> i [j ]]++ ;
245- }
246- }
247-
248- free (count );
249- return C ;
250- }
251-
252- CSC_Matrix * csr_to_csc_fill_sparsity (const CSR_Matrix * A , int * iwork )
205+ CSC_Matrix * csr_to_csc_alloc (const CSR_Matrix * A , int * iwork )
253206{
254207 CSC_Matrix * C = new_csc_matrix (A -> m , A -> n , A -> nnz );
255208
@@ -312,7 +265,7 @@ void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork)
312265 }
313266}
314267
315- CSR_Matrix * csc_to_csr_fill_sparsity (const CSC_Matrix * A , int * iwork )
268+ CSR_Matrix * csc_to_csr_alloc (const CSC_Matrix * A , int * iwork )
316269{
317270 CSR_Matrix * C = new_csr_matrix (A -> m , A -> n , A -> nnz );
318271
@@ -435,19 +388,14 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B)
435388 return C ;
436389}
437390
438- void csc_matvec_fill_values (const CSC_Matrix * A , const double * z , CSR_Matrix * C )
391+ void yTA_fill_values (const CSC_Matrix * A , const double * y , CSR_Matrix * C )
439392{
440- /* Compute C = z^T * A where A is in CSC format
441- * C is a single-row CSR matrix with column indices pre-computed
442- * This fills in the values of C only
443- */
444-
445393 for (int col = 0 ; col < A -> n ; col ++ )
446394 {
447395 double val = 0 ;
448396 for (int j = A -> p [col ]; j < A -> p [col + 1 ]; j ++ )
449397 {
450- val += z [A -> i [j ]] * A -> x [j ];
398+ val += y [A -> i [j ]] * A -> x [j ];
451399 }
452400
453401 if (A -> p [col + 1 ] - A -> p [col ] == 0 ) continue ;
@@ -464,6 +412,7 @@ void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C)
464412 }
465413}
466414
415+ /* computes C = B^T * D * A in CSR */
467416void BTDA_fill_values (const CSC_Matrix * A , const CSC_Matrix * B , const double * d ,
468417 CSR_Matrix * C )
469418{
@@ -477,40 +426,62 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
477426 int nnz_bi = B -> p [i + 1 ] - B -> p [i ];
478427 int nnz_aj = A -> p [j + 1 ] - A -> p [j ];
479428
480- double sum ;
481429 if (d != NULL )
482430 {
483- sum = sparse_wdot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
484- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
431+ C -> x [ jj ] = sparse_wdot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
432+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
485433 }
486434 else
487435 {
488- sum = sparse_dot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
489- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj );
436+ C -> x [ jj ] = sparse_dot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
437+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj );
490438 }
439+ }
440+ }
441+ }
442+
443+ /* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:]
444+ * into column j of C using a marker array for position lookup) may be
445+ * faster when Q is dense, since it touches each Q entry exactly once.
446+ * The sparse_dot approach below is simpler but redundantly scans
447+ * column j of A for each nonzero row of C. */
448+ void BA_fill_values (const CSR_Matrix * Q , const CSC_Matrix * A , CSC_Matrix * C )
449+ {
450+ /* fill values of C = Q * A, given the sparsity pattern of C. */
451+ int i , j , ii ;
491452
492- C -> x [jj ] = sum ;
453+ /* for each column j of C */
454+ for (j = 0 ; j < C -> n ; j ++ )
455+ {
456+ for (ii = C -> p [j ]; ii < C -> p [j + 1 ]; ii ++ )
457+ {
458+ i = C -> i [ii ];
459+ int nnz_q = Q -> p [i + 1 ] - Q -> p [i ];
460+ int nnz_a = A -> p [j + 1 ] - A -> p [j ];
461+
462+ /* inner product between row i of Q and column j of A */
463+ C -> x [ii ] = sparse_dot (Q -> x + Q -> p [i ], Q -> i + Q -> p [i ], nnz_q ,
464+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_a );
493465 }
494466 }
495467}
496468
497- CSC_Matrix * sym_csr_csc_multiply_fill_sparsity (const CSR_Matrix * Q ,
498- const CSC_Matrix * A )
469+ CSC_Matrix * symBA_alloc (const CSR_Matrix * B , const CSC_Matrix * A )
499470{
500- /* Allocate B = Q * A (sparsity only). Q must be symmetric.
501- * Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n).
471+ /* Allocate C = B * A (sparsity only). B must be symmetric.
472+ * B is CSR (m x m), A is CSC (m x n), C is CSC (m x n).
502473 *
503- * Column j of B is Q * a_j = sum_k A_{k,j} Q [:, k], so the nonzero
504- * rows of column j of B are the union of the nonzero rows of Q [:, k].
474+ * Column j of C is B * a_j = sum_k A_{k,j} B [:, k], so the nonzero
475+ * rows of column j of C are the union of the nonzero rows of B [:, k].
505476 *
506- * Since Q is symmetric, we can find the nonzero rows of Q [:, k] by
507- * finding the nonzero columns of Q in row k.
477+ * Since B is symmetric, we can find the nonzero rows of B [:, k] by
478+ * finding the nonzero columns of B in row k.
508479 *
509480 * We use a marker array to avoid duplicates: marker[l] stores the
510481 * last column j that registered l as nonzero, so checking
511482 * marker[l] != j avoids duplicates. */
512483
513- int m = Q -> m ;
484+ int m = B -> m ;
514485 int n = A -> n ;
515486 int i , j , k , jj , ii , ell ;
516487
@@ -521,11 +492,11 @@ CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
521492 marker [i ] = -1 ;
522493 }
523494
524- int * Bp = (int * ) malloc ((n + 1 ) * sizeof (int ));
525- iVec * Bi = iVec_new (A -> nnz );
526- Bp [0 ] = 0 ;
495+ int * Cp = (int * ) malloc ((n + 1 ) * sizeof (int ));
496+ iVec * Ci = iVec_new (A -> nnz );
497+ Cp [0 ] = 0 ;
527498
528- /* for each column j of B */
499+ /* for each column j of C */
529500 for (j = 0 ; j < n ; j ++ )
530501 {
531502 int col_nnz = 0 ;
@@ -535,66 +506,33 @@ CSC_Matrix *sym_csr_csc_multiply_fill_sparsity(const CSR_Matrix *Q,
535506 {
536507 k = A -> i [ii ];
537508
538- /* find nonzero rows ell of column k of Q */
539- for (jj = Q -> p [k ]; jj < Q -> p [k + 1 ]; jj ++ )
509+ /* find nonzero rows ell of column k of B */
510+ for (jj = B -> p [k ]; jj < B -> p [k + 1 ]; jj ++ )
540511 {
541- ell = Q -> i [jj ];
512+ ell = B -> i [jj ];
542513 if (marker [ell ] != j )
543514 {
544515 marker [ell ] = j ;
545- iVec_append (Bi , ell );
516+ iVec_append (Ci , ell );
546517 col_nnz ++ ;
547518 }
548519 }
549520 }
550521
551- Bp [j + 1 ] = Bp [j ] + col_nnz ;
522+ Cp [j + 1 ] = Cp [j ] + col_nnz ;
552523 }
553524
554- /* allocate B and copy the computed structure */
555- int total_nnz = Bp [n ];
556- CSC_Matrix * B = new_csc_matrix (m , n , total_nnz );
557- memcpy (B -> p , Bp , (n + 1 ) * sizeof (int ));
558- memcpy (B -> i , Bi -> data , total_nnz * sizeof (int ));
525+ /* allocate C and copy the computed structure */
526+ int total_nnz = Cp [n ];
527+ CSC_Matrix * C = new_csc_matrix (m , n , total_nnz );
528+ memcpy (C -> p , Cp , (n + 1 ) * sizeof (int ));
529+ memcpy (C -> i , Ci -> data , total_nnz * sizeof (int ));
559530
560531 free (marker );
561- free (Bp );
562- iVec_free (Bi );
563-
564- return B ;
565- }
566-
567- /* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:]
568- * into column j of B using a marker array for position lookup) may be
569- * faster when Q is dense, since it touches each Q entry exactly once.
570- * The sparse_dot approach below is simpler but redundantly scans
571- * column j of A for each nonzero row of B. */
572- void sym_csr_csc_multiply_fill_values (const CSR_Matrix * Q , const CSC_Matrix * A ,
573- CSC_Matrix * B )
574- {
575- /* Fill values of B = Q * A. Q must be symmetric.
576- * B must have sparsity from sym_csr_csc_multiply_fill_sparsity.
577- *
578- * B_{l,j} = sum_k Q_{l,k} * A_{k,j} = dot(Q[l,:], A[:,j]).
579- * Since Q is symmetric, row l of Q has the same entries as
580- * column l, so we iterate over row l of Q in CSR format. */
581-
582- int i , j , ii ;
583-
584- /* for each column j of B */
585- for (j = 0 ; j < B -> n ; j ++ )
586- {
587- for (ii = B -> p [j ]; ii < B -> p [j + 1 ]; ii ++ )
588- {
589- i = B -> i [ii ];
590- int nnz_q = Q -> p [i + 1 ] - Q -> p [i ];
591- int nnz_a = A -> p [j + 1 ] - A -> p [j ];
532+ free (Cp );
533+ iVec_free (Ci );
592534
593- /* inner product between row i of Q and column j of A */
594- B -> x [ii ] = sparse_dot (Q -> x + Q -> p [i ], Q -> i + Q -> p [i ], nnz_q ,
595- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_a );
596- }
597- }
535+ return C ;
598536}
599537
600538int count_nonzero_cols_csc (const CSC_Matrix * A )
0 commit comments