Skip to content

Commit fc649da

Browse files
authored
Merge pull request #291 from abergeron/for_release
Small changes for the release
2 parents 6d0d1b5 + cda0d46 commit fc649da

9 files changed

Lines changed: 105 additions & 57 deletions

File tree

pygpu/gpuarray.pxd

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ cdef extern from "gpuarray/types.h":
4949
GA_NBASE
5050

5151
cdef extern from "gpuarray/util.h":
52-
const int gpuarray_api_major
53-
const int gpuarray_api_minor
5452
int gpuarray_register_type(gpuarray_type *t, int *ret)
5553
size_t gpuarray_get_elsize(int typecode)
5654
gpuarray_type *gpuarray_get_type(int typecode)
@@ -75,6 +73,7 @@ cdef extern from "gpuarray/buffer.h":
7573
gpucontext *gpucontext_init(const char *name, int devno, int flags, int *ret)
7674
void gpucontext_deref(gpucontext *ctx)
7775
char *gpucontext_error(gpucontext *ctx, int err)
76+
int gpudata_property(gpudata *ctx, int prop_id, void *res)
7877
int gpucontext_property(gpucontext *ctx, int prop_id, void *res)
7978
int gpukernel_property(gpukernel *k, int prop_id, void *res)
8079
gpucontext *gpudata_context(gpudata *)
@@ -101,6 +100,9 @@ cdef extern from "gpuarray/buffer.h":
101100
int GA_CTX_PROP_MAXGSIZE0
102101
int GA_CTX_PROP_MAXGSIZE1
103102
int GA_CTX_PROP_MAXGSIZE2
103+
104+
int GA_BUFFER_PROP_SIZE
105+
104106
int GA_KERNEL_PROP_MAXLSIZE
105107
int GA_KERNEL_PROP_PREFLSIZE
106108
int GA_KERNEL_PROP_NUMARGS
@@ -151,6 +153,7 @@ cdef extern from "gpuarray/array.h":
151153
ctypedef enum ga_order:
152154
GA_ANY_ORDER, GA_C_ORDER, GA_F_ORDER
153155

156+
void GpuArray_fix_flags(_GpuArray *a)
154157
int GpuArray_empty(_GpuArray *a, gpucontext *ctx,
155158
int typecode, int nd, const size_t *dims, ga_order ord)
156159
int GpuArray_fromdata(_GpuArray *a,

pygpu/gpuarray.pyx

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ from cpython cimport Py_INCREF, PyNumber_Index
1010
from cpython.object cimport Py_EQ, Py_NE
1111

1212
def api_version():
13-
# major, minor, py
14-
return (gpuarray_api_major, gpuarray_api_minor, 0)
13+
# Those where the last defined numbers.
14+
return (-9997, 1, 0)
1515

1616
np.import_array()
1717

@@ -212,6 +212,35 @@ cdef ga_order to_ga_order(ord) except <ga_order>-2:
212212
else:
213213
raise ValueError, "Valid orders are: 'A' (any), 'C' (C), 'F' (Fortran)"
214214

215+
cdef int strides_ok(GpuArray a, strides):
216+
# Check that the passed in strides will not go outside of the
217+
# memory of the array. It is assumed that the strides are of the
218+
# proper length.
219+
cdef ssize_t max_axis_offset
220+
cdef size_t lower = a.ga.offset
221+
cdef size_t upper = a.ga.offset
222+
cdef size_t itemsize = gpuarray_get_elsize(a.ga.typecode)
223+
cdef size_t size
224+
cdef unsigned int i
225+
226+
gpudata_property(a.ga.data, GA_BUFFER_PROP_SIZE, &size)
227+
228+
for i in range(a.ga.nd):
229+
if a.ga.dimensions[i] == 0:
230+
return 1
231+
232+
max_axis_offset = strides[i] * (a.ga.dimensions[i] - 1)
233+
if max_axis_offset > 0:
234+
if upper + max_axis_offset > size:
235+
return 0
236+
upper += max_axis_offset
237+
else:
238+
if lower < -max_axis_offset:
239+
return 0
240+
lower += max_axis_offset
241+
return (upper + itemsize) <= size
242+
243+
215244
class GpuArrayException(Exception):
216245
"""
217246
Exception used for most errors related to libgpuarray.
@@ -234,6 +263,9 @@ cdef bint py_CHKFLAGS(GpuArray a, int flags):
234263
cdef bint py_ISONESEGMENT(GpuArray a):
235264
return GpuArray_ISONESEGMENT(&a.ga)
236265

266+
cdef void array_fix_flags(GpuArray a):
267+
GpuArray_fix_flags(&a.ga)
268+
237269
cdef int array_empty(GpuArray a, gpucontext *ctx,
238270
int typecode, unsigned int nd, const size_t *dims,
239271
ga_order ord) except -1:
@@ -564,12 +596,6 @@ def init(dev, sched='default', disable_alloc_cache=False, single_stream=False):
564596
are no gaps in the valid numbers.
565597
"""
566598
cdef int flags = 0
567-
expected_version = -9997
568-
if gpuarray_api_major != expected_version or gpuarray_api_minor < 0:
569-
raise RuntimeError(
570-
"Pygpu was expecting libgpuarray version %d, but %d is available. "
571-
"Recompile it to avoid problems.",
572-
expected_version, gpuarray_api_major)
573599
if sched == 'single':
574600
flags |= GA_CTX_SINGLE_THREAD
575601
elif sched == 'multi':
@@ -1943,6 +1969,16 @@ cdef class GpuArray:
19431969
res[i] = self.ga.strides[i]
19441970
return tuple(res)
19451971

1972+
def __set__(self, newstrides):
1973+
cdef unsigned int i
1974+
if len(newstrides) != self.ga.nd:
1975+
raise ValueError("new strides are the wrong length")
1976+
if not strides_ok(self, newstrides):
1977+
raise ValueError("new strides go outside of allocated memory")
1978+
for i in range(self.ga.nd):
1979+
self.ga.strides[i] = newstrides[i]
1980+
array_fix_flags(self)
1981+
19461982
property ndim:
19471983
"The number of dimensions in this object"
19481984
def __get__(self):

pygpu/tests/test_gpu_ndarray.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import numpy
1010

11+
from nose.tools import assert_raises
1112
import pygpu
1213
from pygpu.gpuarray import GpuArray, GpuContext, GpuKernel
1314

@@ -448,6 +449,32 @@ def reshape(shps, offseted, order1, order2):
448449
assert numpy.allclose(outc, numpy.asarray(outg))
449450

450451

452+
def test_strides():
453+
yield strides_, (4, 4), 'c', 1, (4, 4)
454+
yield strides_, (4, 4), 'c', 1, (4, 16)
455+
yield strides_, (4, 4), 'c', 1, (16, 4)
456+
yield strides_, (4, 4), 'c', 1, (16, 8)
457+
yield strides_, (4, 4), 'c', 1, (16, 0)
458+
yield strides_, (4, 4), 'c', -1, (-20, 4)
459+
yield strides_, (4, 4), 'c', -1, (-12, 4)
460+
461+
462+
def set_strides(a, newstr):
463+
a.strides = newstr
464+
465+
466+
def strides_(shp, order, sliced, newstr):
467+
ac, ag = gen_gpuarray(shp, 'float32', sliced=sliced, order=order, ctx=ctx)
468+
try:
469+
ac.strides = newstr
470+
except ValueError:
471+
assert_raises(ValueError, set_strides, ag, newstr)
472+
return
473+
ag.strides = newstr
474+
check_flags(ag, ac)
475+
assert numpy.allclose(ac, numpy.asarray(ag))
476+
477+
451478
def test_transpose():
452479
for shp in [(2, 3), (4, 8, 9), (1, 2, 3, 4)]:
453480
for offseted in [True, False]:

src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ set_target_properties(gpuarray PROPERTIES
8787
COMPILE_FLAGS "-DGPUARRAY_BUILDING_DLL -DGPUARRAY_SHARED"
8888
INSTALL_NAME_DIR ${CMAKE_INSTALL_PREFIX}/lib
8989
MACOSX_RPATH OFF
90+
# This is the shared library version
91+
VERSION 0.0
9092
)
9193

9294
add_library(gpuarray-static STATIC ${GPUARRAY_SRC})

src/gpuarray/array.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ static inline int GpuArray_CHKFLAGS(const GpuArray *a, int flags) {
196196
*/
197197
#define GpuArray_ITEMSIZE(a) gpuarray_get_elsize((a)->typecode)
198198

199+
/**
200+
* Fix the flags of an array using the current strides and shape.
201+
*
202+
* \param a GpuArray to fix flags for
203+
*/
204+
GPUARRAY_PUBLIC void GpuArray_fix_flags(GpuArray *a);
205+
199206
/**
200207
* Initialize and allocate a new empty (uninitialized data) array.
201208
*

src/gpuarray/util.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@ extern "C" {
1515
#include <gpuarray/elemwise.h>
1616
#include <gpuarray/types.h>
1717

18-
extern GPUARRAY_PUBLIC const int gpuarray_api_major;
19-
extern GPUARRAY_PUBLIC const int gpuarray_api_minor;
20-
2118
/**
2219
* Registers a type with the kernel machinery.
2320
*

src/gpuarray_array.c

Lines changed: 17 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,17 @@ static int ga_extcopy(GpuArray *dst, const GpuArray *src) {
8585
/* Value below which a size_t multiplication will never overflow. */
8686
#define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4))
8787

88-
int GpuArray_empty(GpuArray *a, gpucontext *ctx,
89-
int typecode, unsigned int nd, const size_t *dims,
90-
ga_order ord) {
88+
void GpuArray_fix_flags(GpuArray *a) {
89+
/* Only keep the writable flag */
90+
a->flags &= GA_WRITEABLE;
91+
/* Set the other flags if applicable */
92+
if (GpuArray_is_c_contiguous(a)) a->flags |= GA_C_CONTIGUOUS;
93+
if (GpuArray_is_f_contiguous(a)) a->flags |= GA_F_CONTIGUOUS;
94+
if (GpuArray_is_aligned(a)) a->flags |= GA_ALIGNED;
95+
}
96+
97+
int GpuArray_empty(GpuArray *a, gpucontext *ctx, int typecode,
98+
unsigned int nd, const size_t *dims, ga_order ord) {
9199
size_t size = gpuarray_get_elsize(typecode);
92100
unsigned int i;
93101
int res = GA_NO_ERROR;
@@ -185,9 +193,7 @@ int GpuArray_fromdata(GpuArray *a, gpudata *data, size_t offset, int typecode,
185193
memcpy(a->dimensions, dims, nd*sizeof(size_t));
186194
memcpy(a->strides, strides, nd*sizeof(ssize_t));
187195

188-
if (GpuArray_is_c_contiguous(a)) a->flags |= GA_C_CONTIGUOUS;
189-
if (GpuArray_is_f_contiguous(a)) a->flags |= GA_F_CONTIGUOUS;
190-
if (GpuArray_is_aligned(a)) a->flags |= GA_ALIGNED;
196+
GpuArray_fix_flags(a);
191197

192198
return GA_NO_ERROR;
193199
}
@@ -304,18 +310,7 @@ int GpuArray_index_inplace(GpuArray *a, const ssize_t *starts,
304310
a->dimensions = newdims;
305311
free(a->strides);
306312
a->strides = newstrs;
307-
if (GpuArray_is_c_contiguous(a))
308-
a->flags |= GA_C_CONTIGUOUS;
309-
else
310-
a->flags &= ~GA_C_CONTIGUOUS;
311-
if (GpuArray_is_f_contiguous(a))
312-
a->flags |= GA_F_CONTIGUOUS;
313-
else
314-
a->flags &= ~GA_F_CONTIGUOUS;
315-
if (GpuArray_is_aligned(a))
316-
a->flags |= GA_ALIGNED;
317-
else
318-
a->flags &= ~GA_ALIGNED;
313+
GpuArray_fix_flags(a);
319314

320315
return GA_NO_ERROR;
321316
}
@@ -582,9 +577,8 @@ int GpuArray_setarray(GpuArray *a, const GpuArray *v) {
582577
tv.nd = a->nd;
583578
tv.dimensions = a->dimensions;
584579
tv.strides = strs;
585-
/* This could be optiomized by setting the right flags */
586580
if (tv.nd != 0)
587-
tv.flags &= ~(GA_C_CONTIGUOUS|GA_F_CONTIGUOUS);
581+
GpuArray_fix_flags(&tv);
588582
err = ga_extcopy(a, &tv);
589583
free(strs);
590584
return err;
@@ -745,18 +739,7 @@ int GpuArray_reshape_inplace(GpuArray *a, unsigned int nd,
745739
a->strides = newstrides;
746740

747741
fix_flags:
748-
if (GpuArray_is_c_contiguous(a))
749-
a->flags |= GA_C_CONTIGUOUS;
750-
else
751-
a->flags &= ~GA_C_CONTIGUOUS;
752-
if (GpuArray_is_f_contiguous(a))
753-
a->flags |= GA_F_CONTIGUOUS;
754-
else
755-
a->flags &= ~GA_F_CONTIGUOUS;
756-
if (GpuArray_is_aligned(a))
757-
a->flags |= GA_ALIGNED;
758-
else
759-
a->flags &= ~GA_ALIGNED;
742+
GpuArray_fix_flags(a);
760743
return GA_NO_ERROR;
761744
}
762745

@@ -808,11 +791,7 @@ int GpuArray_transpose_inplace(GpuArray *a, const unsigned int *new_axes) {
808791
a->dimensions = newdims;
809792
a->strides = newstrs;
810793

811-
a->flags &= ~(GA_C_CONTIGUOUS|GA_F_CONTIGUOUS);
812-
if (GpuArray_is_c_contiguous(a))
813-
a->flags |= GA_C_CONTIGUOUS;
814-
if (GpuArray_is_f_contiguous(a))
815-
a->flags |= GA_F_CONTIGUOUS;
794+
GpuArray_fix_flags(a);
816795

817796
return GA_NO_ERROR;
818797
}
@@ -1016,10 +995,9 @@ int GpuArray_concatenate(GpuArray *r, const GpuArray **as, size_t n,
1016995
res_off = r->offset;
1017996
res_dims = r->dimensions;
1018997
res_flags = r->flags;
1019-
/* This could be optimized by setting the right flags */
1020-
r->flags &= ~(GA_C_CONTIGUOUS|GA_F_CONTIGUOUS);
1021998
for (i = 0; i < n; i++) {
1022999
r->dimensions = as[i]->dimensions;
1000+
GpuArray_fix_flags(r);
10231001
err = ga_extcopy(r, as[i]);
10241002
if (err != GA_NO_ERROR) {
10251003
r->dimensions = res_dims;

src/gpuarray_buffer_cuda.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,9 @@ static gpucontext *cuda_init(int ord, int flags, int *ret) {
399399
int r;
400400

401401
r = setup_lib();
402-
if (r != GA_NO_ERROR)
403-
return NULL;
402+
if (r != GA_NO_ERROR) {
403+
FAIL(NULL, r);
404+
}
404405

405406
if (ord == -1) {
406407
int i, c;

src/gpuarray_util.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
* phase. Once we go stable, this will move to 0 and go up from
1414
* there.
1515
*/
16-
const int gpuarray_api_major = -9997;
17-
const int gpuarray_api_minor = 1;
18-
1916
static gpuarray_type **custom_types = NULL;
2017
static int n_types = 0;
2118
static gpuarray_type no_type = {NULL, 0, 0, -1};

0 commit comments

Comments
 (0)