Skip to content

Commit 0c6228e

Browse files
committed
gpu mode is fully work now!!!
1 parent 60a4cb1 commit 0c6228e

2 files changed

Lines changed: 17 additions & 13 deletions

File tree

src/model/tensor_gpu.cu

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,10 @@ __global__ void reluKernel(const ValueType* input, ValueType* output, std::size_
187187

188188
__global__ void reluDerivativeKernel(const ValueType* input, ValueType* output, std::size_t count) {
189189
std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
190-
if (idx < count) output[idx] = input[idx] > 0.0f ? 1.0f : 0.0f;
190+
if (idx < count) {
191+
ValueType derivative = (input[idx] > 0.0f) ? 1.0f : 0.0f;
192+
output[idx] *= derivative; // FIX: Changed = to *=
193+
}
191194
}
192195

193196
void relu(const ValueType* input, ValueType* output, std::size_t count) {
@@ -219,7 +222,8 @@ __global__ void sigmoidDerivativeKernel(const ValueType* input, ValueType* outpu
219222
if (idx < count) {
220223
ValueType x = input[idx];
221224
ValueType s = 1.0f / (1.0f + expf(-x));
222-
output[idx] = s * (1.0f - s);
225+
ValueType derivative = s * (1.0f - s);
226+
output[idx] *= derivative;
223227
}
224228
}
225229

@@ -248,7 +252,8 @@ __global__ void tanhDerivativeKernel(const ValueType* input, ValueType* output,
248252
std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
249253
if (idx < count) {
250254
ValueType t = tanhf(input[idx]);
251-
output[idx] = 1.0f - t * t;
255+
ValueType derivative = 1.0f - t * t;
256+
output[idx] *= derivative;
252257
}
253258
}
254259

@@ -275,7 +280,10 @@ __global__ void leakyReluKernel(const ValueType* input, ValueType* output, std::
275280

276281
__global__ void leakyReluDerivativeKernel(const ValueType* input, ValueType* output, std::size_t count, ValueType alpha) {
277282
std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
278-
if (idx < count) output[idx] = (input[idx] > 0.0f) ? 1.0f : alpha;
283+
if (idx < count) {
284+
ValueType derivative = (input[idx] > 0.0f) ? 1.0f : alpha;
285+
output[idx] *= derivative; // FIX: Changed = to *=
286+
}
279287
}
280288

281289
void leaky_relu(const ValueType* input, ValueType* output, std::size_t count, ValueType alpha) {
@@ -384,7 +392,7 @@ __global__ void outerKernel(const ValueType* a, const ValueType* b, ValueType* r
384392
if (idx < total) {
385393
size_t i = idx / n;
386394
size_t j = idx % n;
387-
result[i * n + j] = a[i] * b[j];
395+
result[i * n + j] += a[i] * b[j];
388396
}
389397
}
390398

tests/data/config-binary_test.json

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
"enableNetwrokVisual": true,
55
"modes": [
66
{ "state": "pause", "mode": true },
7-
{ "state": "precise mode", "mode": false },
7+
{ "state": "precise mode", "mode": true },
88
{ "state": "auto pause", "mode": false }
99
]
1010
},
1111
"training config": {
12-
"batch size": 64,
13-
"batch count": 1000,
12+
"batch size": 8,
13+
"batch count": 10000,
1414
"optimizer": {
1515
"type": "const",
1616
"lr": 0.5
@@ -23,11 +23,7 @@
2323
"output size": 16,
2424
"output activation": 4,
2525
"layers": [
26-
{ "size": 100, "activationType": 1 },
27-
{ "size": 100, "activationType": 1 },
28-
{ "size": 100, "activationType": 1 },
29-
{ "size": 100, "activationType": 1 },
30-
{ "size": 300, "activationType": 1 }
26+
{ "size": 50, "activationType": 1 }
3127
]
3228
}
3329
]

0 commit comments

Comments
 (0)