Skip to content

Commit 6779942

Browse files
authored
Merge pull request #298 from abergeron/offset_take1
Add offset handling for indices and result in take1()
2 parents 6628674 + 45fd909 commit 6779942

2 files changed

Lines changed: 53 additions & 11 deletions

File tree

src/gpuarray_array.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ static int gen_take1_kernel(GpuKernel *k, gpucontext *ctx, char **err_str,
336336
int flags = GA_USE_CLUDA;
337337
int res;
338338

339-
nargs = 7 + 2 * v->nd;
339+
nargs = 9 + 2 * v->nd;
340340

341341
atypes = calloc(nargs, sizeof(int));
342342
if (atypes == NULL)
@@ -351,34 +351,41 @@ static int gen_take1_kernel(GpuKernel *k, gpucontext *ctx, char **err_str,
351351
}
352352

353353
apos = 0;
354-
strb_appendf(&sb, "KERNEL void take1(GLOBAL_MEM %s *r, "
355-
"GLOBAL_MEM const %s *v, ga_size off,",
354+
strb_appendf(&sb, "KERNEL void take1(GLOBAL_MEM %s *r, ga_size r_off, "
355+
"GLOBAL_MEM const %s *v, ga_size v_off,",
356356
gpuarray_get_type(a->typecode)->cluda_name,
357357
gpuarray_get_type(v->typecode)->cluda_name);
358358
atypes[apos++] = GA_BUFFER;
359+
atypes[apos++] = GA_SIZE;
359360
atypes[apos++] = GA_BUFFER;
360361
atypes[apos++] = GA_SIZE;
361362
for (i = 0; i < v->nd; i++) {
362363
strb_appendf(&sb, " ga_ssize s%u, ga_size d%u,", i, i);
363364
atypes[apos++] = GA_SSIZE;
364365
atypes[apos++] = GA_SIZE;
365366
}
366-
strb_appendf(&sb, " GLOBAL_MEM const %s *ind, ga_size n0, ga_size n1,"
367-
" GLOBAL_MEM int* err) {\n",
367+
strb_appendf(&sb, " GLOBAL_MEM const %s *ind, ga_size i_off, "
368+
"ga_size n0, ga_size n1, GLOBAL_MEM int* err) {\n",
368369
gpuarray_get_type(ind->typecode)->cluda_name);
369370
atypes[apos++] = GA_BUFFER;
370371
atypes[apos++] = GA_SIZE;
371372
atypes[apos++] = GA_SIZE;
373+
atypes[apos++] = GA_SIZE;
372374
atypes[apos++] = GA_BUFFER;
373375
assert(apos == nargs);
374376
strb_appendf(&sb, " const %s idx0 = LDIM_0 * GID_0 + LID_0;\n"
375377
" const %s numThreads0 = LDIM_0 * GDIM_0;\n"
376378
" const %s idx1 = LDIM_1 * GID_1 + LID_1;\n"
377379
" const %s numThreads1 = LDIM_1 * GDIM_1;\n"
378380
" %s i0, i1;\n", sz, sz, sz, sz, sz);
381+
strb_appends(&sb, " if (idx0 >= n0 || idx1 >= n1) return;\n");
382+
strb_appendf(&sb, " r = (GLOBAL_MEM %s *)(((char *)r) + r_off);\n"
383+
" ind = (GLOBAL_MEM %s *)(((char *)ind) + i_off);\n",
384+
gpuarray_get_type(a->typecode)->cluda_name,
385+
gpuarray_get_type(ind->typecode)->cluda_name);
379386
strb_appendf(&sb, " for (i0 = idx0; i0 < n0; i0 += numThreads0) {\n"
380387
" %s ii0 = ind[i0];\n"
381-
" %s pos0 = off;\n"
388+
" %s pos0 = v_off;\n"
382389
" if (ii0 < 0) ii0 += d0;\n"
383390
" if ((ii0 < 0) || (ii0 >= d0)) {\n"
384391
" *err = -1;\n"
@@ -500,13 +507,16 @@ int GpuArray_take1(GpuArray *a, const GpuArray *v, const GpuArray *i,
500507

501508
argp = 0;
502509
GpuKernel_setarg(&k, argp++, a->data);
510+
GpuKernel_setarg(&k, argp++, (void *)&a->offset);
503511
GpuKernel_setarg(&k, argp++, v->data);
512+
/* The cast is to avoid a warning about const */
504513
GpuKernel_setarg(&k, argp++, (void *)&v->offset);
505514
for (j = 0; j < v->nd; j++) {
506515
GpuKernel_setarg(&k, argp++, &v->strides[j]);
507516
GpuKernel_setarg(&k, argp++, &v->dimensions[j]);
508517
}
509518
GpuKernel_setarg(&k, argp++, i->data);
519+
GpuKernel_setarg(&k, argp++, (void *)&i->offset);
510520
GpuKernel_setarg(&k, argp++, &n[0]);
511521
GpuKernel_setarg(&k, argp++, &n[1]);
512522
GpuKernel_setarg(&k, argp++, errbuf);
@@ -1083,11 +1093,15 @@ int GpuArray_fdump(FILE *fd, const GpuArray *a) {
10831093
case GA_UINT:
10841094
fprintf(fd, "%u", *(unsigned int *)p);
10851095
break;
1096+
case GA_LONG:
1097+
fprintf(fd, "%lld", (long long)*(int64_t *)p);
1098+
break;
10861099
case GA_SSIZE:
10871100
fprintf(fd, "%" SPREFIX "d", *(ssize_t *)p);
10881101
break;
10891102
default:
10901103
free(buf);
1104+
fprintf(fd, "<unsupported data type %d>\n", a->typecode);
10911105
return GA_UNSUPPORTED_ERROR;
10921106
}
10931107
s -= gpuarray_get_elsize(a->typecode);

tests/check_array.c

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ START_TEST(test_take1_ok) {
2121
GpuArray v;
2222
GpuArray vidx;
2323
GpuArray vres;
24-
static const uint32_t data[24] = { 0, 1, 2, 3, 4, 5,
25-
6, 7, 8, 9, 10, 11,
26-
12, 13, 14, 15, 16, 17,
27-
18, 19, 20, 21, 22, 23};
24+
const uint32_t data[24] = { 0, 1, 2, 3, 4, 5,
25+
6, 7, 8, 9, 10, 11,
26+
12, 13, 14, 15, 16, 17,
27+
18, 19, 20, 21, 22, 23};
2828
uint32_t buf[12 * 24];
29-
static const size_t data_dims[1] = {24};
29+
const size_t data_dims[1] = {24};
3030
ssize_t indexes[12];
3131
size_t dims[3];
3232

@@ -243,12 +243,40 @@ START_TEST(test_take1_ok) {
243243
}
244244
END_TEST
245245

246+
START_TEST(test_take1_offset) {
247+
const uint32_t data[24] = {0, 1, 2, 3};
248+
const size_t data_dims[1] = {4};
249+
const size_t out_dims[1] = {2};
250+
const uint32_t idx[4] = {20, 3, 3, 2};
251+
GpuArray v;
252+
GpuArray i;
253+
GpuArray r;
254+
255+
ga_assert_ok(GpuArray_empty(&v, ctx, GA_UINT, 1, data_dims, GA_C_ORDER));
256+
ga_assert_ok(GpuArray_write(&v, data, sizeof(data)));
257+
258+
ga_assert_ok(GpuArray_empty(&i, ctx, GA_UINT, 1, data_dims, GA_C_ORDER));
259+
ga_assert_ok(GpuArray_write(&i, idx, sizeof(idx)));
260+
261+
ga_assert_ok(GpuArray_empty(&r, ctx, GA_UINT, 1, out_dims, GA_C_ORDER));
262+
263+
/* Fake subtensor for offset */
264+
i.offset = 8;
265+
i.dimensions[0] = 2;
266+
267+
ga_assert_ok(GpuArray_take1(&r, &v, &i, 1));
268+
/* The actual results are not important, this is just to check that
269+
we don't trigger the out of bounds check */
270+
}
271+
END_TEST
272+
246273
Suite *get_suite(void) {
247274
Suite *s = suite_create("array");
248275
TCase *tc = tcase_create("take1");
249276
tcase_add_checked_fixture(tc, setup, teardown);
250277
tcase_set_timeout(tc, 8.0);
251278
tcase_add_test(tc, test_take1_ok);
279+
tcase_add_test(tc, test_take1_offset);
252280
suite_add_tcase(s, tc);
253281
return s;
254282
}

0 commit comments

Comments
 (0)