Skip to content

Commit 14cae83

Browse files
committed
Errors for collectives.
1 parent 6ba0ef5 commit 14cae83

2 files changed

Lines changed: 16 additions & 15 deletions

File tree

src/gpuarray_array_collectives.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,16 @@ static inline size_t find_total_elems(const GpuArray* array) {
2929
static inline int check_gpuarrays(int times_src, const GpuArray* src,
3030
int times_dest, const GpuArray* dest,
3131
size_t* count) {
32+
gpucontext *ctx = gpudata_context(src->data);
3233
size_t count_src, count_dest;
3334
count_src = find_total_elems(src);
3435
count_dest = find_total_elems(dest);
3536
if (times_src * count_src != times_dest * count_dest)
36-
return GA_VALUE_ERROR;
37+
return error_set(ctx->err, GA_VALUE_ERROR, "Size mismatch for transfer");
3738
if (src->typecode != dest->typecode)
38-
return GA_VALUE_ERROR;
39+
return error_set(ctx->err, GA_VALUE_ERROR, "Type mismatch");
3940
if (!GpuArray_ISALIGNED(src) || !GpuArray_CHKFLAGS(dest, GA_BEHAVED))
40-
return GA_UNALIGNED_ERROR;
41+
return error_set(ctx->err, GA_UNALIGNED_ERROR, "Misbehaved arrays");
4142

4243
if (times_src >= times_dest)
4344
*count = count_src;
@@ -50,7 +51,7 @@ int GpuArray_reduce_from(const GpuArray* src, int opcode, int root,
5051
gpucomm* comm) {
5152
size_t total_elems;
5253
if (!GpuArray_ISALIGNED(src))
53-
return GA_UNALIGNED_ERROR;
54+
return error_set(ctx->err, GA_UNALIGNED_ERROR, "Unaligned input");
5455
total_elems = find_total_elems(src);
5556
return gpucomm_reduce(src->data, src->offset, NULL, 0, total_elems,
5657
src->typecode, opcode, root, comm);
@@ -95,10 +96,10 @@ int GpuArray_broadcast(GpuArray* array, int root, gpucomm* comm) {
9596
GA_CHECK(gpucomm_get_rank(comm, &rank));
9697
if (rank == root) {
9798
if (!GpuArray_CHKFLAGS(array, GA_BEHAVED))
98-
return GA_UNALIGNED_ERROR;
99+
return error_set(ctx->err, GA_UNALIGNED_ERROR, "Unaligned input");
99100
} else {
100101
if (!GpuArray_ISALIGNED(array))
101-
return GA_UNALIGNED_ERROR;
102+
return error_set(ctx->err, GA_UNALIGNED_ERROR, "Unaligned input");
102103
}
103104

104105
total_elems = find_total_elems(array);

src/gpuarray_buffer_collectives.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ int gpucomm_new(gpucomm** comm, gpucontext* ctx, gpucommCliqueId comm_id,
88
int ndev, int rank) {
99
if (ctx->comm_ops == NULL) {
1010
*comm = NULL;
11-
return GA_UNSUPPORTED_ERROR;
11+
return error_set(ctx->err, GA_UNSUPPORTED_ERROR, "Collectives unavailable");
1212
}
1313
return ctx->comm_ops->comm_new(comm, ctx, comm_id, ndev, rank);
1414
}
@@ -30,21 +30,21 @@ gpucontext* gpucomm_context(gpucomm* comm) {
3030
}
3131
int gpucomm_gen_clique_id(gpucontext* ctx, gpucommCliqueId* comm_id) {
3232
if (ctx->comm_ops == NULL)
33-
return GA_COMM_ERROR;
33+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
3434
return ctx->comm_ops->generate_clique_id(ctx, comm_id);
3535
}
3636

3737
int gpucomm_get_count(gpucomm* comm, int* gpucount) {
3838
gpucontext* ctx = gpucomm_context(comm);
3939
if (ctx->comm_ops == NULL)
40-
return GA_COMM_ERROR;
40+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
4141
return ctx->comm_ops->get_count(comm, gpucount);
4242
}
4343

4444
int gpucomm_get_rank(gpucomm* comm, int* rank) {
4545
gpucontext* ctx = gpucomm_context(comm);
4646
if (ctx->comm_ops == NULL)
47-
return GA_COMM_ERROR;
47+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
4848
return ctx->comm_ops->get_rank(comm, rank);
4949
}
5050

@@ -53,7 +53,7 @@ int gpucomm_reduce(gpudata* src, size_t offsrc, gpudata* dest, size_t offdest,
5353
gpucomm* comm) {
5454
gpucontext* ctx = gpucomm_context(comm);
5555
if (ctx->comm_ops == NULL)
56-
return GA_COMM_ERROR;
56+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
5757
return ctx->comm_ops->reduce(src, offsrc, dest, offdest, count, typecode,
5858
opcode, root, comm);
5959
}
@@ -63,7 +63,7 @@ int gpucomm_all_reduce(gpudata* src, size_t offsrc, gpudata* dest,
6363
gpucomm* comm) {
6464
gpucontext* ctx = gpucomm_context(comm);
6565
if (ctx->comm_ops == NULL)
66-
return GA_COMM_ERROR;
66+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
6767
return ctx->comm_ops->all_reduce(src, offsrc, dest, offdest, count, typecode,
6868
opcode, comm);
6969
}
@@ -73,7 +73,7 @@ int gpucomm_reduce_scatter(gpudata* src, size_t offsrc, gpudata* dest,
7373
int opcode, gpucomm* comm) {
7474
gpucontext* ctx = gpucomm_context(comm);
7575
if (ctx->comm_ops == NULL)
76-
return GA_COMM_ERROR;
76+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
7777
return ctx->comm_ops->reduce_scatter(src, offsrc, dest, offdest, count,
7878
typecode, opcode, comm);
7979
}
@@ -82,7 +82,7 @@ int gpucomm_broadcast(gpudata* array, size_t offset, size_t count, int typecode,
8282
int root, gpucomm* comm) {
8383
gpucontext* ctx = gpucomm_context(comm);
8484
if (ctx->comm_ops == NULL)
85-
return GA_COMM_ERROR;
85+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
8686
return ctx->comm_ops->broadcast(array, offset, count, typecode, root, comm);
8787
}
8888

@@ -91,7 +91,7 @@ int gpucomm_all_gather(gpudata* src, size_t offsrc, gpudata* dest,
9191
gpucomm* comm) {
9292
gpucontext* ctx = gpucomm_context(comm);
9393
if (ctx->comm_ops == NULL)
94-
return GA_COMM_ERROR;
94+
return error_set(ctx->err, GA_DEVSUP_ERROR, "Collectives unavailable");
9595
return ctx->comm_ops->all_gather(src, offsrc, dest, offdest, count, typecode,
9696
comm);
9797
}

0 commit comments

Comments
 (0)