Skip to content

Commit 5761af1

Browse files
author
abergeron
authored
Merge pull request #570 from jvesely/half_rte
cluda_opencl: Use round to nearest even to match cpu version
2 parents 04c2892 + ac58a79 commit 5761af1

2 files changed

Lines changed: 2 additions & 2 deletions

File tree

src/cluda_opencl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ typedef struct _ga_half {
5050
#define ga_half2float(p) vload_half(0, &((p).data))
5151
static inline ga_half ga_float2half(ga_float f) {
5252
ga_half r;
53-
vstore_half_rtn(f, 0, &r.data);
53+
vstore_half_rte(f, 0, &r.data);
5454
return r;
5555
}
5656

src/cluda_opencl.h.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ static const char cluda_opencl_h[] = {
122122
0x6f, 0x61, 0x74, 0x20, 0x66, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
123123
0x67, 0x61, 0x5f, 0x68, 0x61, 0x6c, 0x66, 0x20, 0x72, 0x3b, 0x0a,
124124
0x20, 0x20, 0x76, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x68, 0x61,
125-
0x6c, 0x66, 0x5f, 0x72, 0x74, 0x6e, 0x28, 0x66, 0x2c, 0x20, 0x30,
125+
0x6c, 0x66, 0x5f, 0x72, 0x74, 0x65, 0x28, 0x66, 0x2c, 0x20, 0x30,
126126
0x2c, 0x20, 0x26, 0x72, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b,
127127
0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x72,
128128
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x23, 0x70, 0x72, 0x61, 0x67, 0x6d,

0 commit comments

Comments
 (0)