Skip to content

Commit a4c7381

Browse files
authored
Merge pull request #446 from abergeron/check_offset
Add offset in DEBUG
2 parents 1137c81 + c068f74 commit a4c7381

3 files changed

Lines changed: 44 additions & 6 deletions

File tree

pygpu/basic.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from string import Template
2-
from .gpuarray import GpuArray, GpuKernel
2+
from .gpuarray import GpuArray, GpuKernel, SIZE
33

44

55
def _generate_kernel(ctx, cols, upper=True):
66
tmpl = Template("""
7-
KERNEL void extract_tri(GLOBAL_MEM ga_float *a, ga_uint N) {
7+
KERNEL void extract_tri(GLOBAL_MEM ga_float *a, ga_size a_off, ga_uint N) {
8+
a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
89
unsigned int idx = GID_1 * LDIM_0 * GDIM_0 +
910
GID_0 * LDIM_0 + LID_0;
1011
unsigned int ix = idx/${cols};
@@ -20,7 +21,7 @@ def _generate_kernel(ctx, cols, upper=True):
2021
else:
2122
le = '<'
2223
src = tmpl.substitute(cols=cols, le=le)
23-
spec = [GpuArray, 'uint32']
24+
spec = [GpuArray, SIZE, 'uint32']
2425
k = GpuKernel(src, "extract_tri", spec, context=ctx)
2526
return k
2627

@@ -40,7 +41,7 @@ def triu(A, inplace=True):
4041
upper = True
4142
cols = A.shape[1]
4243
k = _generate_kernel(A.context, cols, upper)
43-
k(A, A.shape[0] * A.shape[1], n=A.shape[0] * A.shape[1])
44+
k(A, A.offset, A.shape[0] * A.shape[1], n=A.shape[0] * A.shape[1])
4445
return A
4546

4647

@@ -59,5 +60,5 @@ def tril(A, inplace=True):
5960
upper = False
6061
cols = A.shape[1]
6162
k = _generate_kernel(A.context, cols, upper)
62-
k(A, A.shape[0] * A.shape[1], n=A.shape[0] * A.shape[1])
63+
k(A, A.offset, A.shape[0] * A.shape[1], n=A.shape[0] * A.shape[1])
6364
return A

pygpu/gpuarray.pyx

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2223,13 +2223,40 @@ cdef class GpuArray:
22232223
def __get__(self):
22242224
return self.ga.offset
22252225

2226+
property data:
2227+
"""Return a pointer to the raw OpenCL buffer object.
2228+
2229+
This will fail for arrays that have an offset.
2230+
"""
2231+
def __get__(self):
2232+
if self.context.kind != b"opencl":
2233+
raise TypeError("This is for OpenCL arrays.")
2234+
if self.offset != 0:
2235+
raise ValueError("This array has an offset.")
2236+
# This wizadry grabs the actual backend pointer since it's
2237+
# guarenteed to be the first element of the gpudata
2238+
# structure.
2239+
return <size_t>((<void **>self.ga.data)[0])
2240+
2241+
property base_data:
2242+
"Return a pointer to the backing OpenCL object."
2243+
def __get__(self):
2244+
if self.context.kind != b"opencl":
2245+
raise TypeError("This is for OpenCL arrays.")
2246+
# This wizadry grabs the actual backend pointer since it's
2247+
# guarenteed to be the first element of the gpudata
2248+
# structure.
2249+
return <size_t>((<void **>self.ga.data)[0])
2250+
22262251
property gpudata:
22272252
"Return a pointer to the raw backend object."
22282253
def __get__(self):
2254+
if self.context.kind != b"cuda":
2255+
raise TypeError("This is for CUDA arrays.")
22292256
# This wizadry grabs the actual backend pointer since it's
22302257
# guarenteed to be the first element of the gpudata
22312258
# structure.
2232-
return <size_t>((<void **>self.ga.data)[0])
2259+
return <size_t>((<void **>self.ga.data)[0]) + self.offset
22332260

22342261
def __str__(self):
22352262
return str(numpy.asarray(self))

src/gpuarray_array.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,20 @@ int GpuArray_empty(GpuArray *a, gpucontext *ctx, int typecode,
118118
size *= d;
119119
}
120120

121+
/* We add a offset of 64 to all arrays in DEBUG to help catch errors. */
122+
#ifdef DEBUG
123+
assert(SIZE_MAX - size > 64);
124+
size += 64;
125+
#endif
126+
121127
a->data = gpudata_alloc(ctx, size, NULL, 0, &res);
122128
if (a->data == NULL) return ctx->err->code;
123129
a->nd = nd;
130+
#ifdef DEBUG
131+
a->offset = 64;
132+
#else
124133
a->offset = 0;
134+
#endif
125135
a->typecode = typecode;
126136
a->dimensions = calloc(nd, sizeof(size_t));
127137
a->strides = calloc(nd, sizeof(ssize_t));

0 commit comments

Comments
 (0)