@@ -27,27 +27,27 @@ struct maxandargmax_ctx{
2727 GpuArray * dstMax ;
2828 GpuArray * dstArgmax ;
2929 const GpuArray * src ;
30- unsigned reduxLen ;
31- const unsigned * reduxList ;
30+ int reduxLen ;
31+ const int * reduxList ;
3232
3333 /* General. */
3434 int ret ;
35- unsigned * axisList ;
35+ int * axisList ;
3636 gpucontext * gpuCtx ;
3737
3838 /* Source code Generator. */
3939 const char * dstMaxType ;
4040 const char * dstArgmaxType ;
41- unsigned ndd ;
42- unsigned ndr ;
43- unsigned nds ;
44- unsigned ndh ;
41+ int ndd ;
42+ int ndr ;
43+ int nds ;
44+ int ndh ;
4545 strb s ;
4646 char * sourceCode ;
4747 GpuKernel kernel ;
4848
4949 /* Scheduler */
50- unsigned hwAxisList [3 ];
50+ int hwAxisList [3 ];
5151 size_t blockSize [3 ];
5252 size_t gridSize [3 ];
5353 size_t chunkSize [3 ];
@@ -64,8 +64,8 @@ typedef struct maxandargmax_ctx maxandargmax_ctx;
6464
6565
6666/* Function prototypes */
67- static int axisInSet (unsigned v ,
68- const unsigned * set ,
67+ static int axisInSet (int v ,
68+ const int * set ,
6969 size_t setLen ,
7070 size_t * where );
7171static void appendIdxes (strb * s ,
@@ -102,7 +102,8 @@ GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray* dstMax,
102102 const GpuArray * src ,
103103 unsigned reduxLen ,
104104 const unsigned * reduxList ){
105- maxandargmax_ctx ctxSTACK = {dstMax , dstArgmax , src , reduxLen , reduxList },
105+ maxandargmax_ctx ctxSTACK = {dstMax , dstArgmax , src ,
106+ (int )reduxLen , (const int * )reduxList },
106107 * ctx = & ctxSTACK ;
107108
108109 if (maxandargmaxCheckargs (ctx ) == GA_NO_ERROR &&
@@ -127,8 +128,8 @@ GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray* dstMax,
127128 * @return Non-zero if the set is non-empty and v is in it; Zero otherwise.
128129 */
129130
130- static int axisInSet (unsigned v ,
131- const unsigned * set ,
131+ static int axisInSet (int v ,
132+ const int * set ,
132133 size_t setLen ,
133134 size_t * where ){
134135 size_t i ;
@@ -190,7 +191,7 @@ static void appendIdxes (strb* s,
190191 */
191192
192193static int maxandargmaxCheckargs (maxandargmax_ctx * ctx ){
193- unsigned i ;
194+ int i ;
194195
195196 /**
196197 * We initialize certain parts of the context.
@@ -216,13 +217,14 @@ static int maxandargmaxCheckargs (maxandargmax_ctx* ctx){
216217
217218 /* Insane src or reduxLen? */
218219 if (!ctx -> dstMax || !ctx -> dstArgmax || !ctx -> src || ctx -> src -> nd == 0 ||
219- ctx -> reduxLen == 0 || ctx -> reduxLen >= ctx -> src -> nd ){
220+ ctx -> reduxLen == 0 || ctx -> reduxLen > ( int ) ctx -> src -> nd ){
220221 return ctx -> ret = GA_INVALID_ERROR ;
221222 }
222223
223224 /* Insane or duplicate list entry? */
224225 for (i = 0 ;i < ctx -> reduxLen ;i ++ ){
225- if (ctx -> reduxList [i ] >= ctx -> src -> nd ||
226+ if (ctx -> reduxList [i ] < 0 ||
227+ ctx -> reduxList [i ] >= (int )ctx -> src -> nd ||
226228 axisInSet (ctx -> reduxList [i ], ctx -> reduxList , i , 0 )){
227229 return ctx -> ret = GA_INVALID_ERROR ;
228230 }
@@ -260,8 +262,8 @@ static int maxandargmaxCheckargs (maxandargmax_ctx* ctx){
260262 */
261263
262264static int maxandargmaxSelectHwAxes (maxandargmax_ctx * ctx ){
263- unsigned i , j , maxI = 0 ;
264- size_t maxV ;
265+ int i , j , maxI = 0 ;
266+ size_t maxV ;
265267
266268 ctx -> ndh = ctx -> ndd < 3 ? ctx -> ndd : 3 ;
267269
@@ -355,31 +357,33 @@ static void maxandargmaxAppendOffsets (maxandargmax_ctx* ctx){
355357 strb_appends (& ctx -> s , "\t\n" );
356358}
357359static void maxandargmaxAppendIndexDeclarations (maxandargmax_ctx * ctx ){
358- unsigned i ;
360+ int i ;
359361 strb_appends (& ctx -> s , "\t/* GPU kernel coordinates. Always 3D. */\n" );
360362
361363 strb_appends (& ctx -> s , "\tX bi0 = GID_0, bi1 = GID_1, bi2 = GID_2;\n" );
362364 strb_appends (& ctx -> s , "\tX bd0 = LDIM_0, bd1 = LDIM_1, bd2 = LDIM_2;\n" );
363365 strb_appends (& ctx -> s , "\tX ti0 = LID_0, ti1 = LID_1, ti2 = LID_2;\n" );
364366 strb_appends (& ctx -> s , "\tX gi0 = bi0*bd0+ti0, gi1 = bi1*bd1+ti1, gi2 = bi2*bd2+ti2;\n" );
365- strb_appends (& ctx -> s , "\tX " );
366- for (i = 0 ;i < ctx -> ndh ;i ++ ){
367- strb_appendf (& ctx -> s , "ci%u = chunkSize[%u]%s" ,
368- i , i , (i == ctx -> ndh - 1 ) ? ";\n" : ", " );
367+ if (ctx -> ndh > 0 ){
368+ strb_appends (& ctx -> s , "\tX " );
369+ for (i = 0 ;i < ctx -> ndh ;i ++ ){
370+ strb_appendf (& ctx -> s , "ci%u = chunkSize[%u]%s" ,
371+ i , i , (i == ctx -> ndh - 1 ) ? ";\n" : ", " );
372+ }
369373 }
370374
371375 strb_appends (& ctx -> s , "\t\n" );
372376 strb_appends (& ctx -> s , "\t\n" );
373377 strb_appends (& ctx -> s , "\t/* Free indices & Reduction indices */\n" );
374378
375- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "" , ";\n" );
376- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Dim" , ";\n" );
377- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Start" , ";\n" );
378- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "End" , ";\n" );
379- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "SStep" , ";\n" );
380- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "MStep" , ";\n" );
381- appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "AStep" , ";\n" );
382- appendIdxes (& ctx -> s , "\tX " , "i" , ctx -> ndd , ctx -> nds , "PDim" , ";\n" );
379+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "" , ";\n" );}
380+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Dim" , ";\n" );}
381+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "Start" , ";\n" );}
382+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "End" , ";\n" );}
383+ if ( ctx -> nds > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> nds , "SStep" , ";\n" );}
384+ if ( ctx -> ndd > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "MStep" , ";\n" );}
385+ if ( ctx -> ndd > 0 ){ appendIdxes (& ctx -> s , "\tX " , "i" , 0 , ctx -> ndd , "AStep" , ";\n" );}
386+ if ( ctx -> nds > ctx -> ndd ){ appendIdxes (& ctx -> s , "\tX " , "i" , ctx -> ndd , ctx -> nds , "PDim" , ";\n" );}
383387
384388 strb_appends (& ctx -> s , "\t\n" );
385389 strb_appends (& ctx -> s , "\t\n" );
@@ -605,7 +609,7 @@ static void maxandargmaxAppendLoopMacroUndefs (maxandargmax_ctx* ctx){
605609 strb_appends (& ctx -> s , "#undef DSTAINDEXER\n" );
606610}
607611static void maxandargmaxComputeAxisList (maxandargmax_ctx * ctx ){
608- unsigned i , f = 0 ;
612+ int i , f = 0 ;
609613
610614 for (i = 0 ;i < ctx -> nds ;i ++ ){
611615 if (axisInSet (i , ctx -> reduxList , ctx -> ndr , 0 )){
@@ -723,8 +727,10 @@ static int maxandargmaxSchedule (maxandargmax_ctx* ctx){
723727 }
724728 }
725729
726- dims [bestWarpAxis ] = (dims [bestWarpAxis ] + warpSize - 1 )/warpSize ;
727- gaIFactorize (warpSize , 0 , 0 , & factBS [bestWarpAxis ]);
730+ if (ctx -> ndh > 0 ){
731+ dims [bestWarpAxis ] = (dims [bestWarpAxis ] + warpSize - 1 )/warpSize ;
732+ gaIFactorize (warpSize , 0 , 0 , & factBS [bestWarpAxis ]);
733+ }
728734
729735 /**
730736 * Factorization job. We'll steadily increase the slack in case of failure
@@ -804,7 +810,7 @@ static int maxandargmaxInvoke (maxandargmax_ctx* ctx){
804810 ctx -> dstMaxStepsGD &&
805811 ctx -> dstArgmaxStepsGD ){
806812 ctx -> ret = GpuKernel_call (& ctx -> kernel ,
807- ctx -> ndh ,
813+ ctx -> ndh > 0 ? ctx -> ndh : 1 ,
808814 ctx -> blockSize ,
809815 ctx -> gridSize ,
810816 0 ,
0 commit comments