Skip to content

Commit 27e0aff

Browse files
authored
Merge pull request #442 from abergeron/fixes
Mixed fixes
2 parents f526272 + 4ac185c commit 27e0aff

5 files changed

Lines changed: 23 additions & 12 deletions

File tree

pygpu/_elemwise.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ cdef class arg:
4949
memset(&self.a, 0, sizeof(gpuelemwise_arg))
5050

5151
def __init__(self, name, type, read=False, write=False, scalar=False):
52+
# Make sure to clear previous storage
53+
# __init__ may be called more than once
54+
free(self.a.name)
5255
self.a.name = strdup(to_bytes(name))
5356
if self.a.name is NULL:
5457
raise MemoryError
@@ -63,6 +66,9 @@ cdef class arg:
6366
if self.a.flags == 0:
6467
raise ValueError('no flags specified for arg %s' % (name,))
6568

69+
def __dealloc__(self):
70+
free(self.a.name)
71+
6672
property name:
6773
def __get__(self):
6874
return self.a.name.decode('ascii')

pygpu/elemwise.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
from . import gpuarray
55
from ._elemwise import GpuElemwise, arg
66

7-
__all__ = ['GpuElemwise', 'elemwise1', 'elemwise2', 'ielemwise2', 'compare']
7+
__all__ = ['GpuElemwise', 'arg', 'as_argument',
8+
'elemwise1', 'elemwise2', 'ielemwise2', 'compare']
89

910

1011
def _dtype(o):

pygpu/gpuarray.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2455,7 +2455,7 @@ cdef class GpuKernel:
24552455
raise ValueError, "nd mismatch for gs (int)"
24562456
gs[0] = py_gs
24572457
elif isinstance(py_gs, (list, tuple)):
2458-
if len(py_gs) < 3:
2458+
if len(py_gs) > 3:
24592459
raise ValueError, "gs is not of length 3 or less"
24602460
if len(py_ls) != nd:
24612461
raise ValueError, "nd mismatch for gs (tuple)"

src/gpuarray_buffer_opencl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static int cl_get_device_count(unsigned int platform, unsigned int* devcount) {
5858
cl_platform_id p;
5959
cl_uint numd;
6060
cl_int err;
61-
unsigned int platcount;
61+
unsigned int platcount = 0;
6262

6363
/* This will load the library if needed */
6464
GA_CHECK(cl_get_platform_count(&platcount));

src/gpuarray_collectives_cuda_nccl.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ static inline ncclDataType_t convert_data_type(int typecode) {
180180
case GA_LONG: return ncclInt64;
181181
case GA_ULONG: return ncclUint64;
182182
case GA_HALF: return ncclHalf;
183-
case GA_FLOAT16: return ncclHalf;
184183
}
185184
return nccl_NUM_TYPES;
186185
}
@@ -237,8 +236,9 @@ static inline int check_restrictions(gpudata *src, size_t offsrc,
237236
static int reduce(gpudata *src, size_t offsrc, gpudata *dest, size_t offdest,
238237
size_t count, int typecode, int opcode, int root,
239238
gpucomm *comm) {
240-
ncclRedOp_t op;
241-
ncclDataType_t datatype;
239+
// need dummy init so that compiler shuts up
240+
ncclRedOp_t op = nccl_NUM_OPS;
241+
ncclDataType_t datatype = nccl_NUM_TYPES;
242242
gpudata *dst = NULL;
243243
int rank = 0;
244244
cuda_context *ctx;
@@ -286,8 +286,9 @@ static int reduce(gpudata *src, size_t offsrc, gpudata *dest, size_t offdest,
286286
static int all_reduce(gpudata *src, size_t offsrc, gpudata *dest,
287287
size_t offdest, size_t count, int typecode, int opcode,
288288
gpucomm *comm) {
289-
ncclRedOp_t op;
290-
ncclDataType_t datatype;
289+
// need dummy init so that compiler shuts up
290+
ncclRedOp_t op = nccl_NUM_OPS;
291+
ncclDataType_t datatype = nccl_NUM_TYPES;
291292
cuda_context *ctx;
292293

293294
ASSERT_BUF(src);
@@ -323,8 +324,9 @@ static int all_reduce(gpudata *src, size_t offsrc, gpudata *dest,
323324
static int reduce_scatter(gpudata *src, size_t offsrc, gpudata *dest,
324325
size_t offdest, size_t count, int typecode,
325326
int opcode, gpucomm *comm) {
326-
ncclRedOp_t op;
327-
ncclDataType_t datatype;
327+
// need dummy init so that compiler shuts up
328+
ncclRedOp_t op = nccl_NUM_OPS;
329+
ncclDataType_t datatype = nccl_NUM_TYPES;
328330
int ndev = 0;
329331
size_t resc_size;
330332
cuda_context *ctx;
@@ -368,7 +370,8 @@ static int reduce_scatter(gpudata *src, size_t offsrc, gpudata *dest,
368370
*/
369371
static int broadcast(gpudata *array, size_t offset, size_t count, int typecode,
370372
int root, gpucomm *comm) {
371-
ncclDataType_t datatype;
373+
// need dummy init so that compiler shuts up
374+
ncclDataType_t datatype = nccl_NUM_TYPES;
372375
int rank = 0;
373376
cuda_context *ctx;
374377

@@ -407,7 +410,8 @@ static int broadcast(gpudata *array, size_t offset, size_t count, int typecode,
407410
static int all_gather(gpudata *src, size_t offsrc, gpudata *dest,
408411
size_t offdest, size_t count, int typecode,
409412
gpucomm *comm) {
410-
ncclDataType_t datatype;
413+
// need dummy init so that compiler shuts up
414+
ncclDataType_t datatype = nccl_NUM_TYPES;
411415
int ndev = 0;
412416
size_t resc_size;
413417
cuda_context *ctx;

0 commit comments

Comments
 (0)