Skip to content

Commit 8f7c251

Browse files
committed
addColToCols
1 parent 74155b0 commit 8f7c251

File tree

4 files changed

+208
-4
lines changed

4 files changed

+208
-4
lines changed

src/main/java/com/github/TannerLow/JavaMatrixMath/Matrix.java

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,25 @@ public Matrix addRowToRows(Matrix row) throws DimensionsMismatchException {
8181
return result;
8282
}
8383

84+
public Matrix addColToCols(Matrix col) throws DimensionsMismatchException {
85+
if(rows != col.rows) {
86+
final int[] dimensionsA = {rows, cols};
87+
final int[] dimensionsB = {col.rows, col.cols};
88+
throw new DimensionsMismatchException(dimensionsA, dimensionsB);
89+
}
90+
91+
Matrix result = new Matrix(rows, cols);
92+
93+
for(int row = 0; row < rows; row++) {
94+
for(int currentCol = 0; currentCol < cols; currentCol++) {
95+
int index = row * cols + currentCol;
96+
result.data[index] = data[index] + col.data[row];
97+
}
98+
}
99+
100+
return result;
101+
}
102+
84103
public Matrix relu() {
85104
Matrix result = new Matrix(rows, cols);
86105

@@ -91,6 +110,18 @@ public Matrix relu() {
91110
return result;
92111
}
93112

113+
public Matrix vectorizedReluDerivative() {
114+
Matrix result = new Matrix(rows, cols);
115+
116+
for(int i = 0; i < data.length; i++) {
117+
if(data[i] > 0) {
118+
result.data[i] = 1;
119+
}
120+
}
121+
122+
return result;
123+
}
124+
94125
public Matrix softmax() {
95126
Matrix result = new Matrix(rows, cols);
96127

@@ -123,6 +154,35 @@ public Matrix softmax() {
123154
return result;
124155
}
125156

157+
// public Matrix fastBatchSoftmaxDerivative(Matrix output) {
158+
// Matrix partialDerivatives = new Matrix(cols, cols);
159+
//
160+
// // for each set of output features
161+
// for(int outputRow = 0; outputRow < output.rows; outputRow++) {
162+
// int offset = outputRow * cols;
163+
// // for each output feature in the set
164+
// for(int i = 0; i < cols; i++) {
165+
// float valueI = output.data[offset + i];
166+
// // for each input feature in the set
167+
// for(int j = 0; j < cols; j++) {
168+
// float valueJ = output.data[offset + j];
169+
// if(i == j) {
170+
// partialDerivatives.data[i * cols + j] += valueI * (1 - valueI);
171+
// }
172+
// else {
173+
// partialDerivatives.data[i * cols + j] += -valueI * valueJ;
174+
// }
175+
// }
176+
// }
177+
// }
178+
//
179+
// for(int i = 0; i < partialDerivatives.data.length; i++) {
180+
// partialDerivatives.data[i] /= output.rows;
181+
// }
182+
//
183+
// return partialDerivatives;
184+
// }
185+
126186
public static boolean isCompatibleWithGPU(GPU gpu) {
127187
return gpu.isInitialized() &&
128188
gpu.getKernel("Matrices::matrixMultiply") != null &&
@@ -189,15 +249,17 @@ public Matrix multiply(GPU gpu, Matrix other) {
189249

190250
public Matrix addRowToRows(GPU gpu, Matrix row) {
191251
if(cols != row.cols) {
192-
return null;
252+
final int[] dimensionsA = {rows, cols};
253+
final int[] dimensionsB = {row.rows, row.cols};
254+
throw new DimensionsMismatchException(dimensionsA, dimensionsB);
193255
}
194256

195257
cl_context context = gpu.getContext();
196258
cl_command_queue commandQueue = gpu.getCommandQueue();
197259
cl_kernel kernel = gpu.getKernel("Matrices::addRowToRows");
198260

199261
if(kernel == null) {
200-
return null;
262+
throw new NullPointerException("Matrices::addRowToRows not found to be loaded in GPU");
201263
}
202264

203265
Matrix result = new Matrix(rows, cols);
@@ -243,6 +305,64 @@ public Matrix addRowToRows(GPU gpu, Matrix row) {
243305
return result;
244306
}
245307

308+
public Matrix addColToCols(GPU gpu, Matrix col) {
309+
if(rows != col.rows) {
310+
final int[] dimensionsA = {rows, cols};
311+
final int[] dimensionsB = {col.rows, col.cols};
312+
throw new DimensionsMismatchException(dimensionsA, dimensionsB);
313+
}
314+
315+
cl_context context = gpu.getContext();
316+
cl_command_queue commandQueue = gpu.getCommandQueue();
317+
cl_kernel kernel = gpu.getKernel("Matrices::addColToCols");
318+
319+
if(kernel == null) {
320+
throw new NullPointerException("Matrices::addColToCols not found to be loaded in GPU");
321+
}
322+
323+
Matrix result = new Matrix(rows, cols);
324+
325+
Pointer pointerA = Pointer.to(data);
326+
Pointer pointerB = Pointer.to(col.data);
327+
Pointer pointerOut = Pointer.to(result.data);
328+
329+
// Allocate the memory objects for the input- and output data
330+
cl_mem memoryA = clCreateBuffer(context,
331+
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
332+
Sizeof.cl_float * data.length, pointerA, null);
333+
cl_mem memoryB = clCreateBuffer(context,
334+
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
335+
Sizeof.cl_float * col.data.length, pointerB, null);
336+
cl_mem memoryOut = clCreateBuffer(context,
337+
CL_MEM_READ_WRITE,
338+
Sizeof.cl_float * result.data.length, null, null);
339+
340+
// Set the arguments for the kernel
341+
int argNum = 0;
342+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryOut));
343+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryA));
344+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryB));
345+
clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{cols}));
346+
347+
// Set the work-item dimensions
348+
long local_work_sizes[] = new long[]{1};
349+
long global_work_sizes[] = new long[]{rows};
350+
351+
// Execute the kernel
352+
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
353+
global_work_sizes, local_work_sizes, 0, null, null);
354+
355+
// Read the output data
356+
clEnqueueReadBuffer(commandQueue, memoryOut, CL_TRUE, 0,
357+
result.data.length * Sizeof.cl_float, pointerOut, 0, null, null);
358+
359+
clReleaseMemObject(memoryA);
360+
clReleaseMemObject(memoryB);
361+
clReleaseMemObject(memoryOut);
362+
363+
return result;
364+
}
365+
246366
public Matrix relu(GPU gpu) {
247367
cl_context context = gpu.getContext();
248368
cl_command_queue commandQueue = gpu.getCommandQueue();

src/main/resources/kernels/Matrices.cl

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,34 @@ addRowToRows(__global float* C,
5555
{
5656
int globalRow = get_global_id(0);
5757

58+
int offset = globalRow * rowSize;
59+
5860
// one thread per row
5961
for (int i = 0; i < rowSize; i++)
6062
{
61-
C[globalRow * rowSize + i] = A[globalRow * rowSize + i] + B[i];
63+
C[offset + i] = A[offset + i] + B[i];
64+
}
65+
}
66+
67+
// Add col to cols: C = A[i][j] + B, for all cols j.
68+
__kernel void
69+
addColToCols(__global float* C,
70+
__global float* A,
71+
__global float* B,
72+
const int rowSize)
73+
{
74+
int globalRow = get_global_id(0);
75+
76+
int offset = globalRow * rowSize;
77+
78+
// one thread per row of A
79+
for (int i = 0; i < rowSize; i++)
80+
{
81+
C[offset + i] = A[offset + i] + B[globalRow];
6282
}
6383
}
6484

65-
// Add row to rows: output = ReLu(A).
85+
// Relu: output = ReLu(A).
6686
__kernel void
6787
relu(__global float* output,
6888
__global float* input,

src/test/java/com/github/TannerLow/JavaMatrixMath/CpuTest.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ public class CpuTest {
77
public static void testAll() {
88
testMultiply();
99
testAddRowToRows();
10+
testAddColToCols();
1011
testRelu();
12+
testVectorizedReluDerivative();
1113
testSoftmax();
1214
}
1315

@@ -53,6 +55,27 @@ private static void testAddRowToRows() {
5355
}
5456
}
5557

58+
private static void testAddColToCols() {
59+
float[] aData = {1,0,2,0,3,0};
60+
float[] bData = {3,2,1};
61+
float[] expected = {4,3,4,2,4,1};
62+
63+
Matrix a = new Matrix(3,2, aData);
64+
Matrix b = new Matrix(3,1, bData);
65+
66+
Matrix result = a.addColToCols(b);
67+
68+
if(result.rows != a.rows || result.cols != a.cols) {
69+
throw new TestFailedException();
70+
}
71+
72+
for(int i = 0; i < result.data.length; i++) {
73+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
74+
throw new TestFailedException();
75+
}
76+
}
77+
}
78+
5679
private static void testRelu() {
5780
float[] data = {-1,2,-3,0};
5881
float[] expected = {0,2,0,0};
@@ -72,6 +95,25 @@ private static void testRelu() {
7295
}
7396
}
7497

98+
private static void testVectorizedReluDerivative() {
99+
float[] data = {-1,2,-3,0};
100+
float[] expected = {0,1,0,0};
101+
102+
Matrix m = new Matrix(2, 2, data);
103+
104+
Matrix result = m.vectorizedReluDerivative();
105+
106+
if(result.rows != m.rows || result.cols != m.cols) {
107+
throw new TestFailedException();
108+
}
109+
110+
for(int i = 0; i < result.data.length; i++) {
111+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
112+
throw new TestFailedException();
113+
}
114+
}
115+
}
116+
75117
private static void testSoftmax() {
76118
float[] data = {1.1f,2.2f,0.2f,-1.7f};
77119
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};

src/test/java/com/github/TannerLow/JavaMatrixMath/GpuTest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ public static void testAll() throws IOException {
1717

1818
testMultiply();
1919
testAddRowToRows();
20+
testAddColToCols();
2021
testRelu();
2122
testSoftmax();
2223
}
@@ -84,6 +85,27 @@ private static void testAddRowToRows() {
8485
}
8586
}
8687

88+
private static void testAddColToCols() {
89+
float[] aData = {1,0,2,0,3,0};
90+
float[] bData = {3,2,1};
91+
float[] expected = {4,3,4,2,4,1};
92+
93+
Matrix a = new Matrix(3,2, aData);
94+
Matrix b = new Matrix(3,1, bData);
95+
96+
Matrix result = a.addColToCols(b);
97+
98+
if(result.rows != a.rows || result.cols != a.cols) {
99+
throw new TestFailedException();
100+
}
101+
102+
for(int i = 0; i < result.data.length; i++) {
103+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
104+
throw new TestFailedException();
105+
}
106+
}
107+
}
108+
87109
private static void testRelu() {
88110
float[] data = {-1,2,-3,0};
89111
float[] expected = {0,2,0,0};

0 commit comments

Comments
 (0)