Skip to content

Commit 7fa7ea3

Browse files
committed
updated haswell optimized sgmv_n kernel
1 parent 3fbc13e commit 7fa7ea3

1 file changed

Lines changed: 2 additions & 0 deletions

File tree

kernel/x86_64/sgemv_n_microk_haswell-2.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y)
3535

3636
__asm__ __volatile__
3737
(
38+
"vzeroupper \n\t"
3839
"vbroadcastss (%2), %%ymm12 \n\t" // x0
3940
"vbroadcastss 4(%2), %%ymm13 \n\t" // x1
4041
"vbroadcastss 8(%2), %%ymm14 \n\t" // x2
@@ -64,6 +65,7 @@ static void sgemv_kernel_16x4( long n, float **ap, float *x, float *y)
6465
"addq $16, %0 \n\t"
6566
"subq $16, %1 \n\t"
6667
"jnz .L01LOOP%= \n\t"
68+
"vzeroupper \n\t"
6769

6870
:
6971
:

0 commit comments

Comments
 (0)