@@ -124,6 +124,27 @@ typedef enum _ga_order {
124124 GA_F_ORDER = 1
125125} ga_order ;
126126
127+ /**
128+ * Supported array reduction operations.
129+ */
130+
131+ typedef enum _ga_reduce_op {
132+ GA_REDUCE_SUM , /* + */
133+ GA_REDUCE_PROD , /* * */
134+ GA_REDUCE_PRODNZ , /* * (!=0) */
135+ GA_REDUCE_MIN , /* min() */
136+ GA_REDUCE_MAX , /* max() */
137+ GA_REDUCE_ARGMIN , /* argmin() */
138+ GA_REDUCE_ARGMAX , /* argmax() */
139+ GA_REDUCE_MINANDARGMIN , /* min(), argmin() */
140+ GA_REDUCE_MAXANDARGMAX , /* max(), argmax() */
141+ GA_REDUCE_AND , /* & */
142+ GA_REDUCE_OR , /* | */
143+ GA_REDUCE_XOR , /* ^ */
144+ GA_REDUCE_ALL , /* &&/all() */
145+ GA_REDUCE_ANY , /* ||/any() */
146+ } ga_reduce_op ;
147+
127148/**
128149 * Checks if all the specified flags are set.
129150 *
@@ -615,26 +636,31 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
615636
616637GPUARRAY_PUBLIC int GpuArray_fdump (FILE * fd , const GpuArray * a );
617638
639+
618640/**
619- * @brief Computes simultaneously the maxima and the arguments of maxima over
620- * specified axes of the tensor.
641+ * @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
642+ * min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
643+ * or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
621644 *
622- * Returns two tensors of identical shape. Both tensors' axes are a subset of
623- * the axes of the original tensor. The axes to be reduced are specified by
624- * the caller, and the maxima and arguments of maxima are computed over them.
645+ * Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
646+ * tensors. The destination tensor(s)' axes are a strict subset of the axes of the
647+ * source tensor. The axes to be reduced are specified by the caller, and the
648+ * reduction is performed over these axes, which are then removed in the
649+ * destination.
625650 *
626- * @param [out] dstMax The resulting tensor of maxima
627- * @param [out] dstArgmax the resulting tensor of arguments at maxima
651+ * @param [out] dst The destination tensor. Has the same type as the source.
652+ * @param [out] dstArg For argument of minima/ maxima operations. Has type int64.
628653 * @param [in] src The source tensor.
629654 * @param [in] reduxLen The number of axes reduced. Must be >= 1 and
630655 * <= src->nd.
631656 * @param [in] reduxList A list of integers of length reduxLen, indicating
632657 * the axes to be reduced. The order of the axes
633- * matters for dstArgmax index calculations. All
634- * entries in the list must be unique, >= 0 and
635- * < src->nd.
658+ * matters for dstArg index calculations (GpuArray_argmin,
659+ * GpuArray_argmax, GpuArray_minandargmin,
660+ * GpuArray_maxandargmax). All entries in the list must be
661+ * unique, >= 0 and < src->nd.
636662 *
637- * For example, if a 5D-tensor is reduced with an axis
663+ * For example, if a 5D-tensor is max- reduced with an axis
638664 * list of [3,4,1], then reduxLen shall be 3, and the
639665 * index calculation in every point shall take the form
640666 *
@@ -648,11 +674,74 @@ GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
648674 * code otherwise.
649675 */
650676
651- GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray * dstMax ,
652- GpuArray * dstArgmax ,
677+ GPUARRAY_PUBLIC int GpuArray_sum (GpuArray * dst ,
653678 const GpuArray * src ,
654679 unsigned reduxLen ,
655680 const unsigned * reduxList );
681+ GPUARRAY_PUBLIC int GpuArray_prod (GpuArray * dst ,
682+ const GpuArray * src ,
683+ unsigned reduxLen ,
684+ const unsigned * reduxList );
685+ GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray * dst ,
686+ const GpuArray * src ,
687+ unsigned reduxLen ,
688+ const unsigned * reduxList );
689+ GPUARRAY_PUBLIC int GpuArray_min (GpuArray * dst ,
690+ const GpuArray * src ,
691+ unsigned reduxLen ,
692+ const unsigned * reduxList );
693+ GPUARRAY_PUBLIC int GpuArray_max (GpuArray * dst ,
694+ const GpuArray * src ,
695+ unsigned reduxLen ,
696+ const unsigned * reduxList );
697+ GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray * dstArg ,
698+ const GpuArray * src ,
699+ unsigned reduxLen ,
700+ const unsigned * reduxList );
701+ GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray * dstArg ,
702+ const GpuArray * src ,
703+ unsigned reduxLen ,
704+ const unsigned * reduxList );
705+ GPUARRAY_PUBLIC int GpuArray_minandargmin (GpuArray * dst ,
706+ GpuArray * dstArg ,
707+ const GpuArray * src ,
708+ unsigned reduxLen ,
709+ const unsigned * reduxList );
710+ GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray * dst ,
711+ GpuArray * dstArg ,
712+ const GpuArray * src ,
713+ unsigned reduxLen ,
714+ const unsigned * reduxList );
715+ GPUARRAY_PUBLIC int GpuArray_and (GpuArray * dst ,
716+ const GpuArray * src ,
717+ unsigned reduxLen ,
718+ const unsigned * reduxList );
719+ GPUARRAY_PUBLIC int GpuArray_or (GpuArray * dst ,
720+ const GpuArray * src ,
721+ unsigned reduxLen ,
722+ const unsigned * reduxList );
723+ GPUARRAY_PUBLIC int GpuArray_xor (GpuArray * dst ,
724+ const GpuArray * src ,
725+ unsigned reduxLen ,
726+ const unsigned * reduxList );
727+ GPUARRAY_PUBLIC int GpuArray_all (GpuArray * dst ,
728+ const GpuArray * src ,
729+ unsigned reduxLen ,
730+ const unsigned * reduxList );
731+ GPUARRAY_PUBLIC int GpuArray_any (GpuArray * dst ,
732+ const GpuArray * src ,
733+ unsigned reduxLen ,
734+ const unsigned * reduxList );
735+ GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op ,
736+ GpuArray * dst ,
737+ GpuArray * dstArg ,
738+ const GpuArray * src ,
739+ unsigned reduxLen ,
740+ const unsigned * reduxList );
741+
742+
743+
744+
656745
657746#ifdef __cplusplus
658747}
0 commit comments