Skip to content

Commit 0b1643f

Browse files
committed
Moved the reduction API to reduction.h.
1 parent 9464f60 commit 0b1643f

5 files changed

Lines changed: 284 additions & 151 deletions

File tree

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ set(headers
124124
gpuarray/extension.h
125125
gpuarray/ext_cuda.h
126126
gpuarray/kernel.h
127+
gpuarray/reduction.h
127128
gpuarray/types.h
128129
gpuarray/util.h
129130
)

src/gpuarray/array.h

Lines changed: 0 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -124,27 +124,6 @@ typedef enum _ga_order {
124124
GA_F_ORDER=1
125125
} ga_order;
126126

127-
/**
128-
* Supported array reduction operations.
129-
*/
130-
131-
typedef enum _ga_reduce_op {
132-
GA_REDUCE_SUM, /* + */
133-
GA_REDUCE_PROD, /* * */
134-
GA_REDUCE_PRODNZ, /* * (!=0) */
135-
GA_REDUCE_MIN, /* min() */
136-
GA_REDUCE_MAX, /* max() */
137-
GA_REDUCE_ARGMIN, /* argmin() */
138-
GA_REDUCE_ARGMAX, /* argmax() */
139-
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
140-
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
141-
GA_REDUCE_AND, /* & */
142-
GA_REDUCE_OR, /* | */
143-
GA_REDUCE_XOR, /* ^ */
144-
GA_REDUCE_ALL, /* &&/all() */
145-
GA_REDUCE_ANY, /* ||/any() */
146-
} ga_reduce_op;
147-
148127
/**
149128
* Checks if all the specified flags are set.
150129
*
@@ -637,110 +616,6 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
637616
GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
638617

639618

640-
/**
641-
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
642-
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
643-
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
644-
*
645-
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
646-
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
647-
* source tensor. The axes to be reduced are specified by the caller, and the
648-
* reduction is performed over these axes, which are then removed in the
649-
* destination.
650-
*
651-
* @param [out] dst The destination tensor. Has the same type as the source.
652-
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
653-
* @param [in] src The source tensor.
654-
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
655-
* <= src->nd.
656-
* @param [in] reduxList A list of integers of length reduxLen, indicating
657-
* the axes to be reduced. The order of the axes
658-
* matters for dstArg index calculations (GpuArray_argmin,
659-
* GpuArray_argmax, GpuArray_minandargmin,
660-
* GpuArray_maxandargmax). All entries in the list must be
661-
* unique, >= 0 and < src->nd.
662-
*
663-
* For example, if a 5D-tensor is max-reduced with an axis
664-
* list of [3,4,1], then reduxLen shall be 3, and the
665-
* index calculation in every point shall take the form
666-
*
667-
* dstArgmax[i0,i2] = i3 * src.shape[4] * src.shape[1] +
668-
* i4 * src.shape[1] +
669-
* i1
670-
*
671-
* where (i3,i4,i1) are the coordinates of the maximum-
672-
* valued element within subtensor [i0,:,i2,:,:] of src.
673-
* @return GA_NO_ERROR if the operation was successful, or a non-zero error
674-
* code otherwise.
675-
*/
676-
677-
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
678-
const GpuArray* src,
679-
unsigned reduxLen,
680-
const unsigned* reduxList);
681-
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
682-
const GpuArray* src,
683-
unsigned reduxLen,
684-
const unsigned* reduxList);
685-
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
686-
const GpuArray* src,
687-
unsigned reduxLen,
688-
const unsigned* reduxList);
689-
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
690-
const GpuArray* src,
691-
unsigned reduxLen,
692-
const unsigned* reduxList);
693-
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
694-
const GpuArray* src,
695-
unsigned reduxLen,
696-
const unsigned* reduxList);
697-
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
698-
const GpuArray* src,
699-
unsigned reduxLen,
700-
const unsigned* reduxList);
701-
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
702-
const GpuArray* src,
703-
unsigned reduxLen,
704-
const unsigned* reduxList);
705-
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
706-
GpuArray* dstArg,
707-
const GpuArray* src,
708-
unsigned reduxLen,
709-
const unsigned* reduxList);
710-
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
711-
GpuArray* dstArg,
712-
const GpuArray* src,
713-
unsigned reduxLen,
714-
const unsigned* reduxList);
715-
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
716-
const GpuArray* src,
717-
unsigned reduxLen,
718-
const unsigned* reduxList);
719-
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
720-
const GpuArray* src,
721-
unsigned reduxLen,
722-
const unsigned* reduxList);
723-
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
724-
const GpuArray* src,
725-
unsigned reduxLen,
726-
const unsigned* reduxList);
727-
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
728-
const GpuArray* src,
729-
unsigned reduxLen,
730-
const unsigned* reduxList);
731-
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
732-
const GpuArray* src,
733-
unsigned reduxLen,
734-
const unsigned* reduxList);
735-
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
736-
GpuArray* dst,
737-
GpuArray* dstArg,
738-
const GpuArray* src,
739-
unsigned reduxLen,
740-
const unsigned* reduxList);
741-
742-
743-
744619

745620

746621
#ifdef __cplusplus

src/gpuarray/reduction.h

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#ifndef GPUARRAY_REDUCTION_H
2+
#define GPUARRAY_REDUCTION_H
3+
/**
4+
* \file reduction.h
5+
* \brief Reduction functions.
6+
*/
7+
8+
#include <gpuarray/array.h>
9+
10+
#ifdef _MSC_VER
11+
#ifndef inline
12+
#define inline __inline
13+
#endif
14+
#endif
15+
16+
#ifdef __cplusplus
17+
extern "C" {
18+
#endif
19+
#ifdef CONFUSE_EMACS
20+
}
21+
#endif
22+
23+
24+
/**
25+
* Supported array reduction operations.
26+
*/
27+
28+
typedef enum _ga_reduce_op {
29+
GA_REDUCE_SUM, /* + */
30+
GA_REDUCE_PROD, /* * */
31+
GA_REDUCE_PRODNZ, /* * (!=0) */
32+
GA_REDUCE_MIN, /* min() */
33+
GA_REDUCE_MAX, /* max() */
34+
GA_REDUCE_ARGMIN, /* argmin() */
35+
GA_REDUCE_ARGMAX, /* argmax() */
36+
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
37+
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
38+
GA_REDUCE_AND, /* & */
39+
GA_REDUCE_OR, /* | */
40+
GA_REDUCE_XOR, /* ^ */
41+
GA_REDUCE_ALL, /* &&/all() */
42+
GA_REDUCE_ANY, /* ||/any() */
43+
} ga_reduce_op;
44+
45+
46+
47+
/**
48+
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
49+
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
50+
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
51+
*
52+
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
53+
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
54+
* source tensor. The axes to be reduced are specified by the caller, and the
55+
* reduction is performed over these axes, which are then removed in the
56+
* destination.
57+
*
58+
* @param [out] dst The destination tensor. Has the same type as the source.
59+
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
60+
* @param [in] src The source tensor.
61+
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
62+
* <= src->nd.
63+
* @param [in] reduxList A list of integers of length reduxLen, indicating
64+
* the axes to be reduced. The order of the axes
65+
* matters for dstArg index calculations (GpuArray_argmin,
66+
* GpuArray_argmax, GpuArray_minandargmin,
67+
* GpuArray_maxandargmax). All entries in the list must be
68+
* unique, >= 0 and < src->nd.
69+
*
70+
* For example, if a 5D-tensor is max-reduced with an axis
71+
* list of [3,4,1], then reduxLen shall be 3, and the
72+
* index calculation in every point shall take the form
73+
*
74+
* dstArgmax[i0,i2] = i3 * src.shape[4] * src.shape[1] +
75+
* i4 * src.shape[1] +
76+
* i1
77+
*
78+
* where (i3,i4,i1) are the coordinates of the maximum-
79+
* valued element within subtensor [i0,:,i2,:,:] of src.
80+
* @return GA_NO_ERROR if the operation was successful, or a non-zero error
81+
* code otherwise.
82+
*/
83+
84+
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
85+
const GpuArray* src,
86+
unsigned reduxLen,
87+
const unsigned* reduxList);
88+
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
89+
const GpuArray* src,
90+
unsigned reduxLen,
91+
const unsigned* reduxList);
92+
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
93+
const GpuArray* src,
94+
unsigned reduxLen,
95+
const unsigned* reduxList);
96+
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
97+
const GpuArray* src,
98+
unsigned reduxLen,
99+
const unsigned* reduxList);
100+
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
101+
const GpuArray* src,
102+
unsigned reduxLen,
103+
const unsigned* reduxList);
104+
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
105+
const GpuArray* src,
106+
unsigned reduxLen,
107+
const unsigned* reduxList);
108+
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
109+
const GpuArray* src,
110+
unsigned reduxLen,
111+
const unsigned* reduxList);
112+
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
113+
GpuArray* dstArg,
114+
const GpuArray* src,
115+
unsigned reduxLen,
116+
const unsigned* reduxList);
117+
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
118+
GpuArray* dstArg,
119+
const GpuArray* src,
120+
unsigned reduxLen,
121+
const unsigned* reduxList);
122+
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
123+
const GpuArray* src,
124+
unsigned reduxLen,
125+
const unsigned* reduxList);
126+
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
127+
const GpuArray* src,
128+
unsigned reduxLen,
129+
const unsigned* reduxList);
130+
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
131+
const GpuArray* src,
132+
unsigned reduxLen,
133+
const unsigned* reduxList);
134+
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
135+
const GpuArray* src,
136+
unsigned reduxLen,
137+
const unsigned* reduxList);
138+
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
139+
const GpuArray* src,
140+
unsigned reduxLen,
141+
const unsigned* reduxList);
142+
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
143+
GpuArray* dst,
144+
GpuArray* dstArg,
145+
const GpuArray* src,
146+
unsigned reduxLen,
147+
const unsigned* reduxList);
148+
149+
150+
151+
152+
153+
#ifdef __cplusplus
154+
}
155+
#endif
156+
157+
#endif

0 commit comments

Comments
 (0)