gpu mode is fully work now!!!

dredstone1 · dredstone1 · commit 0c6228e362a1 · 2025-08-09T19:46:07.000+03:00
diff --git a/src/model/tensor_gpu.cu b/src/model/tensor_gpu.cu
@@ -187,7 +187,10 @@ __global__ void reluKernel(const ValueType* input, ValueType* output, std::size_
 
 __global__ void reluDerivativeKernel(const ValueType* input, ValueType* output, std::size_t count) {
     std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
-    if (idx < count) output[idx] = input[idx] > 0.0f ? 1.0f : 0.0f;
+    if (idx < count) {
+        ValueType derivative = (input[idx] > 0.0f) ? 1.0f : 0.0f;
+        output[idx] *= derivative; // FIX: Changed = to *=
+    }
 }
 
 void relu(const ValueType* input, ValueType* output, std::size_t count) {
@@ -219,7 +222,8 @@ __global__ void sigmoidDerivativeKernel(const ValueType* input, ValueType* outpu
     if (idx < count) {
         ValueType x = input[idx];
         ValueType s = 1.0f / (1.0f + expf(-x));
-        output[idx] = s * (1.0f - s);
+        ValueType derivative = s * (1.0f - s);
+        output[idx] *= derivative;
     }
 }
 
@@ -248,7 +252,8 @@ __global__ void tanhDerivativeKernel(const ValueType* input, ValueType* output,
     std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
     if (idx < count) {
         ValueType t = tanhf(input[idx]);
-        output[idx] = 1.0f - t * t;
+        ValueType derivative = 1.0f - t * t;
+        output[idx] *= derivative;
     }
 }
 
@@ -275,7 +280,10 @@ __global__ void leakyReluKernel(const ValueType* input, ValueType* output, std::
 
 __global__ void leakyReluDerivativeKernel(const ValueType* input, ValueType* output, std::size_t count, ValueType alpha) {
     std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
-    if (idx < count) output[idx] = (input[idx] > 0.0f) ? 1.0f : alpha;
+    if (idx < count) {
+        ValueType derivative = (input[idx] > 0.0f) ? 1.0f : alpha;
+        output[idx] *= derivative; // FIX: Changed = to *=
+    }
 }
 
 void leaky_relu(const ValueType* input, ValueType* output, std::size_t count, ValueType alpha) {
@@ -384,7 +392,7 @@ __global__ void outerKernel(const ValueType* a, const ValueType* b, ValueType* r
     if (idx < total) {
         size_t i = idx / n;
         size_t j = idx % n;
-        result[i * n + j] = a[i] * b[j];
+        result[i * n + j] += a[i] * b[j];
     }
 }
 
diff --git a/tests/data/config-binary_test.json b/tests/data/config-binary_test.json
@@ -4,13 +4,13 @@
 		"enableNetwrokVisual": true,
 		"modes": [
 			{ "state": "pause", "mode": true },
-			{ "state": "precise mode", "mode": false },
+			{ "state": "precise mode", "mode": true },
 			{ "state": "auto pause", "mode": false }
 		]
 	},
 	"training config": {
-		"batch size": 64,
-		"batch count": 1000,
+		"batch size": 8,
+		"batch count": 10000,
 		"optimizer": {
 			"type": "const",
 			"lr": 0.5
@@ -23,11 +23,7 @@
 			"output size": 16,
 			"output activation": 4,
 			"layers": [
-				{ "size": 100, "activationType": 1 },
-				{ "size": 100, "activationType": 1 },
-				{ "size": 100, "activationType": 1 },
-				{ "size": 100, "activationType": 1 },
-				{ "size": 300, "activationType": 1 }
+				{ "size": 50, "activationType": 1 }
 			]
 		}
 	]

Original file line number	Diff line number	Diff line change
`@@ -187,7 +187,10 @@ __global__ void reluKernel(const ValueType* input, ValueType* output, std::size_`
`187`	`187`
`188`	`188`	`__global__ void reluDerivativeKernel(const ValueType* input, ValueType* output, std::size_t count) {`
`189`	`189`	`std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;`
`190`		`- if (idx < count) output[idx] = input[idx] > 0.0f ? 1.0f : 0.0f;`
	`190`	`+ if (idx < count) {`
	`191`	`+ ValueType derivative = (input[idx] > 0.0f) ? 1.0f : 0.0f;`
	`192`	`+ output[idx] = derivative; // FIX: Changed = to =`
	`193`	`+ }`
`191`	`194`	`}`
`192`	`195`
`193`	`196`	`void relu(const ValueType* input, ValueType* output, std::size_t count) {`
`@@ -219,7 +222,8 @@ __global__ void sigmoidDerivativeKernel(const ValueType* input, ValueType* outpu`
`219`	`222`	`if (idx < count) {`
`220`	`223`	`ValueType x = input[idx];`
`221`	`224`	`ValueType s = 1.0f / (1.0f + expf(-x));`
`222`		`- output[idx] = s * (1.0f - s);`
	`225`	`+ ValueType derivative = s * (1.0f - s);`
	`226`	`+ output[idx] *= derivative;`
`223`	`227`	`}`
`224`	`228`	`}`
`225`	`229`
`@@ -248,7 +252,8 @@ __global__ void tanhDerivativeKernel(const ValueType* input, ValueType* output,`
`248`	`252`	`std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;`
`249`	`253`	`if (idx < count) {`
`250`	`254`	`ValueType t = tanhf(input[idx]);`
`251`		`- output[idx] = 1.0f - t * t;`
	`255`	`+ ValueType derivative = 1.0f - t * t;`
	`256`	`+ output[idx] *= derivative;`
`252`	`257`	`}`
`253`	`258`	`}`
`254`	`259`
`@@ -275,7 +280,10 @@ __global__ void leakyReluKernel(const ValueType* input, ValueType* output, std::`
`275`	`280`
`276`	`281`	`__global__ void leakyReluDerivativeKernel(const ValueType* input, ValueType* output, std::size_t count, ValueType alpha) {`
`277`	`282`	`std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;`
`278`		`- if (idx < count) output[idx] = (input[idx] > 0.0f) ? 1.0f : alpha;`
	`283`	`+ if (idx < count) {`
	`284`	`+ ValueType derivative = (input[idx] > 0.0f) ? 1.0f : alpha;`
	`285`	`+ output[idx] = derivative; // FIX: Changed = to =`
	`286`	`+ }`
`279`	`287`	`}`
`280`	`288`
`281`	`289`	`void leaky_relu(const ValueType* input, ValueType* output, std::size_t count, ValueType alpha) {`
`@@ -384,7 +392,7 @@ __global__ void outerKernel(const ValueType* a, const ValueType* b, ValueType* r`
`384`	`392`	`if (idx < total) {`
`385`	`393`	`size_t i = idx / n;`
`386`	`394`	`size_t j = idx % n;`
`387`		`- result[i * n + j] = a[i] * b[j];`
	`395`	`+ result[i * n + j] += a[i] * b[j];`
`388`	`396`	`}`
`389`	`397`	`}`
`390`	`398`