Skip to content

Commit d687734

Browse files
committed
testing simd macos
1 parent 95ddc65 commit d687734

1 file changed

Lines changed: 53 additions & 58 deletions

File tree

inc/vector_math/matrix4f.hpp

Lines changed: 53 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
#pragma once
22

3-
#include <intrin.h>
3+
#if defined(__x86__) || defined(__x86_64__)
4+
// do x64 stuff
5+
#include <intrin.h>
6+
#include <immintrin.h>
7+
#elif defined(__arm__) || defined(__arm64__)
8+
// do arm stuff
9+
#endif
10+
411
#include <vector_math/matrix4.hpp>
512
#include <vector_math/vector4f.hpp>
613

@@ -29,75 +36,63 @@ namespace systems::leal::vector_math
2936
};
3037

3138
Matrix4f Matrix4f::operator*(const Matrix4f &rhs) const {
32-
//auto toReturn = ((Mat<float,4,4> *)this)->operator*(rhs);
39+
#if defined(__x86__) || defined(__x86_64__)
40+
Matrix4f toReturn;
41+
auto transposed = rhs.transpose();
42+
alignas(float) float result[4];
43+
44+
const float *plhs = this->data;
45+
for (int c=0; c<4; c++) {
46+
const float *prhs = transposed.data;
47+
__m128 x = _mm_load_ps(plhs);
48+
for (int r=0; r<4; r++) {
49+
__m128 y = _mm_load_ps(prhs);
50+
__m128 z =_mm_mul_ps(x,y);
51+
_mm_store_ps(result, z);
52+
53+
float value=0;
54+
for (int e=0; e<4; e++) {
55+
value += result[e];
56+
}
57+
toReturn.data[4*r + c] = value;
58+
59+
prhs += 4;
60+
}
61+
plhs += 4;
62+
}
63+
return toReturn;
64+
#elif defined(__arm__) || defined(__arm64__)
65+
auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
66+
return *(Matrix4f *)&toReturn;
67+
#endif
68+
}
3369

34-
Matrix4f toReturn;
35-
auto transposed = rhs.transpose();
36-
alignas(float) float result[4];
70+
Vector4f Matrix4f::operator*(const Vector4f &rhs) const {
71+
#if defined(__x86__) || defined(__x86_64__)
72+
Vector4f toReturn;
73+
alignas(float) float result[4];
3774

38-
const float *plhs = this->data;
39-
for (int c=0; c<4; c++) {
40-
const float *prhs = transposed.data;
41-
__m128 x = _mm_load_ps(plhs);
75+
const float *plhs = this->data;
76+
const float *prhs = rhs.data;
77+
__m128 y = _mm_load_ps(prhs);
4278
for (int r=0; r<4; r++) {
43-
__m128 y = _mm_load_ps(prhs);
79+
__m128 x = _mm_load_ps(plhs);
4480
__m128 z =_mm_mul_ps(x,y);
4581
_mm_store_ps(result, z);
4682

4783
float value=0;
4884
for (int e=0; e<4; e++) {
4985
value += result[e];
5086
}
51-
toReturn.data[4*r + c] = value;
52-
53-
prhs += 4;
54-
}
55-
plhs += 4;
56-
}
57-
58-
//__m128 y = _mm_load_ps(rhs.data);
59-
//__m128 z =_mm_mul_ps(x,y);
60-
//__m256 z = _mm256_add_ps(x,y);
61-
62-
//alignas(float) float result[8];
63-
//_mm256_store_ps(result, z);
87+
toReturn.data[r] = value;
6488

65-
66-
return toReturn;
67-
}
68-
69-
Vector4f Matrix4f::operator*(const Vector4f &rhs) const {
70-
//auto toReturn = ((Mat<float,4,4> *)this)->operator*(rhs);
71-
72-
Vector4f toReturn;
73-
alignas(float) float result[4];
74-
75-
const float *plhs = this->data;
76-
const float *prhs = rhs.data;
77-
__m128 y = _mm_load_ps(prhs);
78-
for (int r=0; r<4; r++) {
79-
__m128 x = _mm_load_ps(plhs);
80-
__m128 z =_mm_mul_ps(x,y);
81-
_mm_store_ps(result, z);
82-
83-
float value=0;
84-
for (int e=0; e<4; e++) {
85-
value += result[e];
89+
plhs += 4;
8690
}
87-
toReturn.data[r] = value;
88-
89-
plhs += 4;
90-
}
91-
92-
//__m128 y = _mm_load_ps(rhs.data);
93-
//__m128 z =_mm_mul_ps(x,y);
94-
//__m256 z = _mm256_add_ps(x,y);
95-
96-
//alignas(float) float result[8];
97-
//_mm256_store_ps(result, z);
98-
99-
100-
return toReturn;
91+
return toReturn;
92+
#elif defined(__arm__) || defined(__arm64__)
93+
auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
94+
return *(Vector4f *)&toReturn;
95+
#endif
10196
}
10297

10398
Matrix4f Matrix4f::identity()

0 commit comments

Comments
 (0)