Skip to content

Commit 328c957

Browse files
committed
Add a bunch of local_barrier()'s.
They are overkill but seem to fix the problems with the testcases, at least so far.
1 parent 4a17f48 commit 328c957

2 files changed

Lines changed: 49 additions & 46 deletions

File tree

src/gpuarray_reduction.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2609,6 +2609,7 @@ static void reduxGenSrcAppendDecode (GpuReduction* gr){
26092609
" TK1* restrict const SHMEMK1 = (TK1*)(SHMEM + SHMEMK1Off);\n");
26102610
}
26112611
srcbAppends(&gr->srcGen,
2612+
" local_barrier();\n"
26122613
" INITREDUXSTATE(SHMEMK0[LID_0], SHMEMK1[LID_0]);\n"
26132614
" if(D<LDIM_0 && LID_0+LDIM_0<H){\n"
26142615
" INITREDUXSTATE(SHMEMK0[LID_0+LDIM_0], SHMEMK1[LID_0+LDIM_0]);\n"
@@ -2745,6 +2746,7 @@ static void reduxGenSrcAppendIncrement (GpuReduction* gr,
27452746
static void reduxGenSrcAppendDstWrite (GpuReduction* gr,
27462747
uint32_t selector,
27472748
int initial){
2749+
srcbAppends(&gr->srcGen, " local_barrier();\n");
27482750
if (initial){
27492751
srcbAppends(&gr->srcGen, " if(LID_0 < D){\n"
27502752
" SETREDUXSTATE(W0R[GID_0*D + LID_0],\n"
@@ -2771,6 +2773,7 @@ static void reduxGenSrcAppendDstWrite (GpuReduction* gr,
27712773
" }\n");
27722774
}
27732775
}
2776+
srcbAppends(&gr->srcGen, " local_barrier();\n");
27742777
}
27752778
static void reduxGenSrcAppendPhase1 (GpuReduction* gr){
27762779
/**

0 commit comments

Comments
 (0)