|
1 | 1 | #pragma once |
2 | 2 |
|
3 | | -#include <intrin.h> |
| 3 | +#if defined(__x86__) || defined(__x86_64__) |
| 4 | + // do x64 stuff |
| 5 | + #include <intrin.h> |
| 6 | + #include <immintrin.h> |
| 7 | +#elif defined(__arm__) || defined(__arm64__) |
| 8 | + // do arm stuff |
| 9 | +#endif |
| 10 | + |
4 | 11 | #include <vector_math/matrix4.hpp> |
5 | 12 | #include <vector_math/vector4f.hpp> |
6 | 13 |
|
@@ -29,75 +36,63 @@ namespace systems::leal::vector_math |
29 | 36 | }; |
30 | 37 |
|
31 | 38 | Matrix4f Matrix4f::operator*(const Matrix4f &rhs) const { |
32 | | - //auto toReturn = ((Mat<float,4,4> *)this)->operator*(rhs); |
| 39 | + #if defined(__x86__) || defined(__x86_64__) |
| 40 | + Matrix4f toReturn; |
| 41 | + auto transposed = rhs.transpose(); |
| 42 | + alignas(float) float result[4]; |
| 43 | + |
| 44 | + const float *plhs = this->data; |
| 45 | + for (int c=0; c<4; c++) { |
| 46 | + const float *prhs = transposed.data; |
| 47 | + __m128 x = _mm_load_ps(plhs); |
| 48 | + for (int r=0; r<4; r++) { |
| 49 | + __m128 y = _mm_load_ps(prhs); |
| 50 | + __m128 z =_mm_mul_ps(x,y); |
| 51 | + _mm_store_ps(result, z); |
| 52 | + |
| 53 | + float value=0; |
| 54 | + for (int e=0; e<4; e++) { |
| 55 | + value += result[e]; |
| 56 | + } |
| 57 | + toReturn.data[4*r + c] = value; |
| 58 | + |
| 59 | + prhs += 4; |
| 60 | + } |
| 61 | + plhs += 4; |
| 62 | + } |
| 63 | + return toReturn; |
| 64 | + #elif defined(__arm__) || defined(__arm64__) |
| 65 | + auto toReturn = ((Matrix4<float> *)this)->operator*(rhs); |
| 66 | + return *(Matrix4f *)&toReturn; |
| 67 | + #endif |
| 68 | + } |
33 | 69 |
|
34 | | - Matrix4f toReturn; |
35 | | - auto transposed = rhs.transpose(); |
36 | | - alignas(float) float result[4]; |
| 70 | + Vector4f Matrix4f::operator*(const Vector4f &rhs) const { |
| 71 | + #if defined(__x86__) || defined(__x86_64__) |
| 72 | + Vector4f toReturn; |
| 73 | + alignas(float) float result[4]; |
37 | 74 |
|
38 | | - const float *plhs = this->data; |
39 | | - for (int c=0; c<4; c++) { |
40 | | - const float *prhs = transposed.data; |
41 | | - __m128 x = _mm_load_ps(plhs); |
| 75 | + const float *plhs = this->data; |
| 76 | + const float *prhs = rhs.data; |
| 77 | + __m128 y = _mm_load_ps(prhs); |
42 | 78 | for (int r=0; r<4; r++) { |
43 | | - __m128 y = _mm_load_ps(prhs); |
| 79 | + __m128 x = _mm_load_ps(plhs); |
44 | 80 | __m128 z =_mm_mul_ps(x,y); |
45 | 81 | _mm_store_ps(result, z); |
46 | 82 |
|
47 | 83 | float value=0; |
48 | 84 | for (int e=0; e<4; e++) { |
49 | 85 | value += result[e]; |
50 | 86 | } |
51 | | - toReturn.data[4*r + c] = value; |
52 | | - |
53 | | - prhs += 4; |
54 | | - } |
55 | | - plhs += 4; |
56 | | - } |
57 | | - |
58 | | - //__m128 y = _mm_load_ps(rhs.data); |
59 | | - //__m128 z =_mm_mul_ps(x,y); |
60 | | - //__m256 z = _mm256_add_ps(x,y); |
61 | | - |
62 | | - //alignas(float) float result[8]; |
63 | | - //_mm256_store_ps(result, z); |
| 87 | + toReturn.data[r] = value; |
64 | 88 |
|
65 | | - |
66 | | - return toReturn; |
67 | | - } |
68 | | - |
69 | | - Vector4f Matrix4f::operator*(const Vector4f &rhs) const { |
70 | | - //auto toReturn = ((Mat<float,4,4> *)this)->operator*(rhs); |
71 | | - |
72 | | - Vector4f toReturn; |
73 | | - alignas(float) float result[4]; |
74 | | - |
75 | | - const float *plhs = this->data; |
76 | | - const float *prhs = rhs.data; |
77 | | - __m128 y = _mm_load_ps(prhs); |
78 | | - for (int r=0; r<4; r++) { |
79 | | - __m128 x = _mm_load_ps(plhs); |
80 | | - __m128 z =_mm_mul_ps(x,y); |
81 | | - _mm_store_ps(result, z); |
82 | | - |
83 | | - float value=0; |
84 | | - for (int e=0; e<4; e++) { |
85 | | - value += result[e]; |
| 89 | + plhs += 4; |
86 | 90 | } |
87 | | - toReturn.data[r] = value; |
88 | | - |
89 | | - plhs += 4; |
90 | | - } |
91 | | - |
92 | | - //__m128 y = _mm_load_ps(rhs.data); |
93 | | - //__m128 z =_mm_mul_ps(x,y); |
94 | | - //__m256 z = _mm256_add_ps(x,y); |
95 | | - |
96 | | - //alignas(float) float result[8]; |
97 | | - //_mm256_store_ps(result, z); |
98 | | - |
99 | | - |
100 | | - return toReturn; |
| 91 | + return toReturn; |
| 92 | + #elif defined(__arm__) || defined(__arm64__) |
| 93 | + auto toReturn = ((Matrix4<float> *)this)->operator*(rhs); |
| 94 | + return *(Vector4f *)&toReturn; |
| 95 | + #endif |
101 | 96 | } |
102 | 97 |
|
103 | 98 | Matrix4f Matrix4f::identity() |
|
0 commit comments