@@ -364,24 +364,26 @@ static void maxandargmaxAppendIndexDeclarations(maxandargmax_ctx* ctx){
364364 strb_appends (& ctx -> s , "\tX bd0 = LDIM_0, bd1 = LDIM_1, bd2 = LDIM_2;\n" );
365365 strb_appends (& ctx -> s , "\tX ti0 = LID_0, ti1 = LID_1, ti2 = LID_2;\n" );
366366 strb_appends (& ctx -> s , "\tX gi0 = bi0*bd0+ti0, gi1 = bi1*bd1+ti1, gi2 = bi2*bd2+ti2;\n" );
367- strb_appends (& ctx -> s , "\tX " );
368- for (i = 0 ;i < ctx -> ndh ;i ++ ){
369- strb_appendf (& ctx -> s , "ci%u = chunkSize[%u]%s" ,
370- i , i , (i == ctx -> ndh - 1 ) ? ";\n" : ", " );
367+ if (ctx -> ndh > 0 ){
368+ strb_appends (& ctx -> s , "\tX " );
369+ for (i = 0 ;i < ctx -> ndh ;i ++ ){
370+ strb_appendf (& ctx -> s , "ci%u = chunkSize[%u]%s" ,
371+ i , i , (i == ctx -> ndh - 1 ) ? ";\n" : ", " );
372+ }
371373 }
372374
373375 strb_appends (& ctx -> s , "\t\n" );
374376 strb_appends (& ctx -> s , "\t\n" );
375377 strb_appends (& ctx -> s , "\t/* Free indices & Reduction indices */\n" );
376378
377- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "" , ";\n" );
378- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Dim" , ";\n" );
379- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Start" , ";\n" );
380- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "End" , ";\n" );
381- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "SStep" , ";\n" );
382- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "MStep" , ";\n" );
383- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "AStep" , ";\n" );
384- appendIdxes (& ctx -> s , "\tX " , "i" , ctx -> ndd , ctx -> nds , "PDim" , ";\n" );
379+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "" , ";\n" );}
380+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Dim" , ";\n" );}
381+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Start" , ";\n" );}
382+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "End" , ";\n" );}
383+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "SStep" , ";\n" );}
384+ if ( ctx -> ndd > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "MStep" , ";\n" );}
385+ if ( ctx -> ndd > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "AStep" , ";\n" );}
386+ if ( ctx -> nds > ctx -> ndd ){ appendIdxes (& ctx -> s , "\tX " , "i" , ctx -> ndd , ctx -> nds , "PDim" , ";\n" );}
385387
386388 strb_appends (& ctx -> s , "\t\n" );
387389 strb_appends (& ctx -> s , "\t\n" );
@@ -725,8 +727,10 @@ static int maxandargmaxSchedule (maxandargmax_ctx* ctx){
725727 }
726728 }
727729
728- dims [bestWarpAxis ] = (dims [bestWarpAxis ] + warpSize - 1 )/warpSize ;
729- gaIFactorize (warpSize , 0 , 0 , & factBS [bestWarpAxis ]);
730+ if (ctx -> ndh > 0 ){
731+ dims [bestWarpAxis ] = (dims [bestWarpAxis ] + warpSize - 1 )/warpSize ;
732+ gaIFactorize (warpSize , 0 , 0 , & factBS [bestWarpAxis ]);
733+ }
730734
731735 /**
732736 * Factorization job. We'll steadily increase the slack in case of failure
@@ -806,7 +810,7 @@ static int maxandargmaxInvoke (maxandargmax_ctx* ctx){
806810 ctx -> dstMaxStepsGD &&
807811 ctx -> dstArgmaxStepsGD ){
808812 ctx -> ret = GpuKernel_call (& ctx -> kernel ,
809- ctx -> ndh ,
813+ ctx -> ndh > 0 ? ctx -> ndh : 1 ,
810814 ctx -> blockSize ,
811815 ctx -> gridSize ,
812816 0 ,
0 commit comments