Skip to content

Commit 71c5b6b

Browse files
committed
Current status of reduction generalization and small-destination
support.
1 parent d641fb5 commit 71c5b6b

3 files changed

Lines changed: 1542 additions & 350 deletions

File tree

src/gpuarray/array.h

Lines changed: 102 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,27 @@ typedef enum _ga_order {
124124
GA_F_ORDER=1
125125
} ga_order;
126126

127+
/**
128+
* Supported array reduction operations.
129+
*/
130+
131+
typedef enum _ga_reduce_op {
132+
GA_REDUCE_SUM, /* + */
133+
GA_REDUCE_PROD, /* * */
134+
GA_REDUCE_PRODNZ, /* * (!=0) */
135+
GA_REDUCE_MIN, /* min() */
136+
GA_REDUCE_MAX, /* max() */
137+
GA_REDUCE_ARGMIN, /* argmin() */
138+
GA_REDUCE_ARGMAX, /* argmax() */
139+
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
140+
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
141+
GA_REDUCE_AND, /* & */
142+
GA_REDUCE_OR, /* | */
143+
GA_REDUCE_XOR, /* ^ */
144+
GA_REDUCE_ALL, /* &&/all() */
145+
GA_REDUCE_ANY, /* ||/any() */
146+
} ga_reduce_op;
147+
127148
/**
128149
* Checks if all the specified flags are set.
129150
*
@@ -615,26 +636,31 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
615636

616637
GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
617638

639+
618640
/**
619-
* @brief Computes simultaneously the maxima and the arguments of maxima over
620-
* specified axes of the tensor.
641+
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
642+
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
643+
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
621644
*
622-
* Returns two tensors of identical shape. Both tensors' axes are a subset of
623-
* the axes of the original tensor. The axes to be reduced are specified by
624-
* the caller, and the maxima and arguments of maxima are computed over them.
645+
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
646+
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
647+
* source tensor. The axes to be reduced are specified by the caller, and the
648+
* reduction is performed over these axes, which are then removed in the
649+
* destination.
625650
*
626-
* @param [out] dstMax The resulting tensor of maxima
627-
* @param [out] dstArgmax the resulting tensor of arguments at maxima
651+
* @param [out] dst The destination tensor. Has the same type as the source.
652+
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
628653
* @param [in] src The source tensor.
629654
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
630655
* <= src->nd.
631656
* @param [in] reduxList A list of integers of length reduxLen, indicating
632657
* the axes to be reduced. The order of the axes
633-
* matters for dstArgmax index calculations. All
634-
* entries in the list must be unique, >= 0 and
635-
* < src->nd.
658+
* matters for dstArg index calculations (GpuArray_argmin,
659+
* GpuArray_argmax, GpuArray_minandargmin,
660+
* GpuArray_maxandargmax). All entries in the list must be
661+
* unique, >= 0 and < src->nd.
636662
*
637-
* For example, if a 5D-tensor is reduced with an axis
663+
* For example, if a 5D-tensor is max-reduced with an axis
638664
* list of [3,4,1], then reduxLen shall be 3, and the
639665
* index calculation in every point shall take the form
640666
*
@@ -648,11 +674,74 @@ GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
648674
* code otherwise.
649675
*/
650676

651-
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dstMax,
652-
GpuArray* dstArgmax,
677+
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
653678
const GpuArray* src,
654679
unsigned reduxLen,
655680
const unsigned* reduxList);
681+
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
682+
const GpuArray* src,
683+
unsigned reduxLen,
684+
const unsigned* reduxList);
685+
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
686+
const GpuArray* src,
687+
unsigned reduxLen,
688+
const unsigned* reduxList);
689+
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
690+
const GpuArray* src,
691+
unsigned reduxLen,
692+
const unsigned* reduxList);
693+
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
694+
const GpuArray* src,
695+
unsigned reduxLen,
696+
const unsigned* reduxList);
697+
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
698+
const GpuArray* src,
699+
unsigned reduxLen,
700+
const unsigned* reduxList);
701+
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
702+
const GpuArray* src,
703+
unsigned reduxLen,
704+
const unsigned* reduxList);
705+
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
706+
GpuArray* dstArg,
707+
const GpuArray* src,
708+
unsigned reduxLen,
709+
const unsigned* reduxList);
710+
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
711+
GpuArray* dstArg,
712+
const GpuArray* src,
713+
unsigned reduxLen,
714+
const unsigned* reduxList);
715+
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
716+
const GpuArray* src,
717+
unsigned reduxLen,
718+
const unsigned* reduxList);
719+
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
720+
const GpuArray* src,
721+
unsigned reduxLen,
722+
const unsigned* reduxList);
723+
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
724+
const GpuArray* src,
725+
unsigned reduxLen,
726+
const unsigned* reduxList);
727+
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
728+
const GpuArray* src,
729+
unsigned reduxLen,
730+
const unsigned* reduxList);
731+
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
732+
const GpuArray* src,
733+
unsigned reduxLen,
734+
const unsigned* reduxList);
735+
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
736+
GpuArray* dst,
737+
GpuArray* dstArg,
738+
const GpuArray* src,
739+
unsigned reduxLen,
740+
const unsigned* reduxList);
741+
742+
743+
744+
656745

657746
#ifdef __cplusplus
658747
}

0 commit comments

Comments
 (0)