@@ -111,6 +111,34 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A)
111111 return C ;
112112}
113113
114+ static inline double sparse_dot (const double * a_x , const int * a_i , int a_nnz ,
115+ const double * b_x , const int * b_i , int b_nnz )
116+ {
117+ int ii = 0 ;
118+ int jj = 0 ;
119+ double sum = 0.0 ;
120+
121+ while (ii < a_nnz && jj < b_nnz )
122+ {
123+ if (a_i [ii ] == b_i [jj ])
124+ {
125+ sum += a_x [ii ] * b_x [jj ];
126+ ii ++ ;
127+ jj ++ ;
128+ }
129+ else if (a_i [ii ] < b_i [jj ])
130+ {
131+ ii ++ ;
132+ }
133+ else
134+ {
135+ jj ++ ;
136+ }
137+ }
138+
139+ return sum ;
140+ }
141+
114142static inline double sparse_wdot (const double * a_x , const int * a_i , int a_nnz ,
115143 const double * b_x , const int * b_i , int b_nnz ,
116144 const double * d )
@@ -158,9 +186,17 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C)
158186 int nnz_ai = A -> p [ii + 1 ] - A -> p [ii ];
159187 int nnz_aj = A -> p [j + 1 ] - A -> p [j ];
160188
161- /* compute Cij = weighted inner product of column i and column j */
162- double sum = sparse_wdot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
163- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
189+ double sum ;
190+ if (d != NULL )
191+ {
192+ sum = sparse_wdot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
193+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
194+ }
195+ else
196+ {
197+ sum = sparse_dot (A -> x + A -> p [ii ], A -> i + A -> p [ii ], nnz_ai ,
198+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj );
199+ }
164200
165201 C -> x [jj ] = sum ;
166202 }
@@ -443,15 +479,115 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d,
443479 int nnz_bi = B -> p [i + 1 ] - B -> p [i ];
444480 int nnz_aj = A -> p [j + 1 ] - A -> p [j ];
445481
446- /* compute Cij = weighted inner product of col i of B and col j of A */
447- double sum = sparse_wdot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
448- A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
482+ double sum ;
483+ if (d != NULL )
484+ {
485+ sum = sparse_wdot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
486+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj , d );
487+ }
488+ else
489+ {
490+ sum = sparse_dot (B -> x + B -> p [i ], B -> i + B -> p [i ], nnz_bi ,
491+ A -> x + A -> p [j ], A -> i + A -> p [j ], nnz_aj );
492+ }
449493
450494 C -> x [jj ] = sum ;
451495 }
452496 }
453497}
454498
499+ CSC_Matrix * csr_csc_multiply_fill_sparsity (const CSR_Matrix * Q , const CSC_Matrix * A )
500+ {
501+ /* Allocate B = Q * A (sparsity only).
502+ * Q is CSR (m x m), A is CSC (m x n), B is CSC (m x n). */
503+
504+ int m = Q -> m ;
505+ int n = A -> n ;
506+
507+ int * marker = (int * ) malloc (m * sizeof (int ));
508+ memset (marker , -1 , m * sizeof (int ));
509+
510+ int * Bp = (int * ) malloc ((n + 1 ) * sizeof (int ));
511+ iVec * Bi = iVec_new (A -> nnz );
512+ Bp [0 ] = 0 ;
513+
514+ for (int j = 0 ; j < n ; j ++ )
515+ {
516+ int col_nnz = 0 ;
517+
518+ for (int t = A -> p [j ]; t < A -> p [j + 1 ]; t ++ )
519+ {
520+ int k = A -> i [t ];
521+
522+ for (int s = Q -> p [k ]; s < Q -> p [k + 1 ]; s ++ )
523+ {
524+ int row = Q -> i [s ];
525+ if (marker [row ] != j )
526+ {
527+ marker [row ] = j ;
528+ iVec_append (Bi , row );
529+ col_nnz ++ ;
530+ }
531+ }
532+ }
533+
534+ Bp [j + 1 ] = Bp [j ] + col_nnz ;
535+ }
536+
537+ int total_nnz = Bp [n ];
538+ CSC_Matrix * B = new_csc_matrix (m , n , total_nnz );
539+ memcpy (B -> p , Bp , (n + 1 ) * sizeof (int ));
540+ memcpy (B -> i , Bi -> data , total_nnz * sizeof (int ));
541+
542+ free (marker );
543+ free (Bp );
544+ iVec_free (Bi );
545+
546+ return B ;
547+ }
548+
549+ void csr_csc_multiply_fill_values (const CSR_Matrix * Q , const CSC_Matrix * A ,
550+ CSC_Matrix * B )
551+ {
552+ /* Fill values of B = Q * A. B must have sparsity from
553+ * csr_csc_multiply_fill_sparsity. */
554+
555+ int m = Q -> m ;
556+
557+ int * marker = (int * ) malloc (m * sizeof (int ));
558+ memset (marker , -1 , m * sizeof (int ));
559+ memset (B -> x , 0 , B -> nnz * sizeof (double ));
560+
561+ for (int j = 0 ; j < B -> n ; j ++ )
562+ {
563+ /* map row index -> position in column j of B */
564+ for (int t = B -> p [j ]; t < B -> p [j + 1 ]; t ++ )
565+ {
566+ marker [B -> i [t ]] = t ;
567+ }
568+
569+ /* accumulate A_{k,j} * Q[k, :] */
570+ for (int t = A -> p [j ]; t < A -> p [j + 1 ]; t ++ )
571+ {
572+ int k = A -> i [t ];
573+ double a_kj = A -> x [t ];
574+
575+ for (int s = Q -> p [k ]; s < Q -> p [k + 1 ]; s ++ )
576+ {
577+ B -> x [marker [Q -> i [s ]]] += a_kj * Q -> x [s ];
578+ }
579+ }
580+
581+ /* reset marker */
582+ for (int t = B -> p [j ]; t < B -> p [j + 1 ]; t ++ )
583+ {
584+ marker [B -> i [t ]] = -1 ;
585+ }
586+ }
587+
588+ free (marker );
589+ }
590+
455591int count_nonzero_cols_csc (const CSC_Matrix * A )
456592{
457593 int count = 0 ;
0 commit comments