Skip to content

Commit d3a5747

Browse files
authored
adds fast blas copy for sparse vectors (#51)
1 parent 7e7875d commit d3a5747

1 file changed

Lines changed: 23 additions & 7 deletions

File tree

src/utils/dense_matrix.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,31 @@ static void dense_block_left_mult_values(const Matrix *A, const CSC_Matrix *J,
141141
end--;
142142
}
143143

144-
/* scatter sparse J col into dense vector and then compute A @ j_dense */
145-
memset(j_dense, 0, n * sizeof(double));
146-
for (s = start; s < end; s++)
144+
int count = end - start;
145+
146+
if (count == 1)
147147
{
148-
j_dense[J->i[s] - block_start] = J->x[s];
148+
/* Fast path: C column segment = val * A[:, row_in_block] */
149+
int row_in_block = J->i[start] - block_start;
150+
double val = J->x[start];
151+
cblas_dcopy(m, dm->x + row_in_block, n, C->x + i, 1);
152+
if (val != 1.0)
153+
{
154+
cblas_dscal(m, val, C->x + i, 1);
155+
}
156+
}
157+
else
158+
{
159+
/* scatter sparse J col into dense vector and then compute A @ j_dense */
160+
memset(j_dense, 0, n * sizeof(double));
161+
for (s = start; s < end; s++)
162+
{
163+
j_dense[J->i[s] - block_start] = J->x[s];
164+
}
165+
166+
cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, dm->x, n, j_dense, 1,
167+
0.0, C->x + i, 1);
149168
}
150-
151-
cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, dm->x, n, j_dense, 1,
152-
0.0, C->x + i, 1);
153169
}
154170
}
155171
}

0 commit comments

Comments
 (0)