File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -2608,6 +2608,12 @@ static void reduxGenSrcAppendDecode (GpuReduction* gr){
26082608 " TK1* restrict const W1R = &W1[GDIM_0*D];\n"
26092609 " TK1* restrict const SHMEMK1 = (TK1*)(SHMEM + SHMEMK1Off);\n" );
26102610 }
2611+ srcbAppends (& gr -> srcGen ,
2612+ " INITREDUXSTATE(SHMEMK0[LID_0], SHMEMK1[LID_0]);\n"
2613+ " if(D<LDIM_0 && LID_0+LDIM_0<H){\n"
2614+ " INITREDUXSTATE(SHMEMK0[LID_0+LDIM_0], SHMEMK1[LID_0+LDIM_0]);\n"
2615+ " }\n"
2616+ " local_barrier();\n" );
26112617 }
26122618
26132619
@@ -3511,9 +3517,7 @@ static int reduxInvInferProperties (redux_ctx* ctx){
35113517 }
35123518
35133519
3514- return ctx -> flags & 0 ? //FIXME: Delete this hack after debugging.
3515- reduxInvFlattenSource (ctx ):
3516- reduxInvComputeKernelArgs (ctx );
3520+ return reduxInvFlattenSource (ctx );
35173521}
35183522
35193523/**
You can’t perform that action at this time.
0 commit comments