11#pragma once
22
3- #if defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
4- #define __VECTOR_MATH_ARCH_X86_X64
5- #undef __VECTOR_MATH_ARCH_ARM
6- #endif
7-
8- #if defined(__arm__) || defined(__arm64__)
9- #undef __VECTOR_MATH_ARCH_X86_X64
10- #define __VECTOR_MATH_ARCH_ARM
11- #endif
3+ #include < vector_math/common.hpp>
4+ #include < vector_math/matrix4.hpp>
5+ #include < vector_math/vector4f.hpp>
126
137#ifdef __VECTOR_MATH_ARCH_X86_X64
14- // do x64 stuff
15- // #include <intrin.h>
168 #include < immintrin.h>
179#elif defined(__VECTOR_MATH_ARCH_ARM)
18- // do arm stuff
1910#endif
2011
21- #include < vector_math/matrix4.hpp>
22- #include < vector_math/vector4f.hpp>
2312
2413namespace systems ::leal::vector_math
2514{
@@ -28,9 +17,9 @@ namespace systems::leal::vector_math
2817 // ////////////////
2918 // constructors //
3019 // ////////////////
31- Matrix4f ():Matrix4< float >() {}
32- Matrix4f (float value):Matrix4< float >(value) {}
33- Matrix4f (float buffer[16 ]):Matrix4< float >(buffer) {}
20+ Matrix4f ();
21+ Matrix4f (float value);
22+ Matrix4f (float buffer[16 ]);
3423
3524 // /////////////
3625 // operators //
@@ -44,130 +33,4 @@ namespace systems::leal::vector_math
4433 static Matrix4f identity ();
4534
4635 };
47-
48- Matrix4f Matrix4f::operator *(const Matrix4f &rhs) const {
49- #ifdef __VECTOR_MATH_ARCH_X86_X64
50- // printf("1\n");
51- Matrix4f toReturn;
52- __m128 row1 = _mm_load_ps (&rhs.data [0 ]);
53- __m128 row2 = _mm_load_ps (&rhs.data [4 ]);
54- __m128 row3 = _mm_load_ps (&rhs.data [8 ]);
55- __m128 row4 = _mm_load_ps (&rhs.data [12 ]);
56- for (int i=0 ; i<4 ; i++) {
57- __m128 brod1 = _mm_set1_ps (this ->data [4 *i + 0 ]);
58- __m128 brod2 = _mm_set1_ps (this ->data [4 *i + 1 ]);
59- __m128 brod3 = _mm_set1_ps (this ->data [4 *i + 2 ]);
60- __m128 brod4 = _mm_set1_ps (this ->data [4 *i + 3 ]);
61- __m128 row = _mm_add_ps (
62- _mm_add_ps (
63- _mm_mul_ps (brod1, row1),
64- _mm_mul_ps (brod2, row2)),
65- _mm_add_ps (
66- _mm_mul_ps (brod3, row3),
67- _mm_mul_ps (brod4, row4)));
68- _mm_store_ps (&toReturn.data [4 *i], row);
69- }
70- return toReturn;
71- /* Matrix4f toReturn;
72- auto transposed = rhs.transpose();
73- alignas(float) float result[4];
74-
75- const float *plhs = this->data;
76- for (int c=0; c<4; c++) {
77- const float *prhs = transposed.data;
78- __m128 x = _mm_load_ps(plhs);
79- for (int r=0; r<4; r++) {
80- __m128 y = _mm_load_ps(prhs);
81- __m128 z =_mm_mul_ps(x,y);
82- _mm_store_ps(result, z);
83-
84- float value=0;
85- for (int e=0; e<4; e++) {
86- value += result[e];
87- }
88- toReturn.data[4*r + c] = value;
89-
90- prhs += 4;
91- }
92- plhs += 4;
93- }
94- return toReturn;*/
95-
96- #elif defined(__VECTOR_MATH_ARCH_ARM)
97- auto toReturn = ((Matrix4<float > *)this )->operator *(rhs);
98- return *(Matrix4f *)&toReturn;
99- #endif
100- }
101-
102- Vector4f Matrix4f::operator *(const Vector4f &rhs) const {
103- #ifdef __VECTOR_MATH_ARCH_X86_X64
104- Vector4f toReturn;
105- __m128 row1 = _mm_load_ps (&this ->data [0 ]);
106- __m128 row2 = _mm_load_ps (&this ->data [4 ]);
107- __m128 row3 = _mm_load_ps (&this ->data [8 ]);
108- __m128 row4 = _mm_load_ps (&this ->data [12 ]);
109- __m128 vector = _mm_load_ps (rhs.data );
110- __m128 r1 = _mm_mul_ps (row1, vector);
111- __m128 r2 = _mm_mul_ps (row2, vector);
112- __m128 r3 = _mm_mul_ps (row3, vector);
113- __m128 r4 = _mm_mul_ps (row4, vector);
114- __m128 result = _mm_hadd_ps (
115- _mm_hadd_ps (r1,r2),
116- _mm_hadd_ps (r3,r4)
117- );
118- _mm_store_ps (toReturn.data , result);
119-
120- /* __m128 brod1 = _mm_set1_ps(rhs.data[0]);
121- __m128 brod2 = _mm_set1_ps(rhs.data[1]);
122- __m128 brod3 = _mm_set1_ps(rhs.data[2]);
123- __m128 brod4 = _mm_set1_ps(rhs.data[3]);
124- __m128 row = _mm_add_ps(
125- _mm_add_ps(
126- _mm_mul_ps(brod1, row1),
127- _mm_mul_ps(brod2, row2)),
128- _mm_add_ps(
129- _mm_mul_ps(brod3, row3),
130- _mm_mul_ps(brod4, row4)));
131- _mm_store_ps(toReturn.data, row);
132- printf("vector: %f %f %f %f\n", toReturn.data[0], toReturn.data[1], toReturn.data[2], toReturn.data[3]);
133- */
134- return toReturn;
135-
136- /* Vector4f toReturn;
137- alignas(float) float result[4];
138-
139- const float *plhs = this->data;
140- const float *prhs = rhs.data;
141- __m128 y = _mm_load_ps(prhs);
142- for (int r=0; r<4; r++) {
143- __m128 x = _mm_load_ps(plhs);
144- __m128 z =_mm_mul_ps(x,y);
145- _mm_store_ps(result, z);
146-
147- float value=0;
148- for (int e=0; e<4; e++) {
149- value += result[e];
150- }
151- toReturn.data[r] = value;
152-
153- plhs += 4;
154- }
155- return toReturn;*/
156- #elif defined(__VECTOR_MATH_ARCH_ARM)
157- auto toReturn = ((Matrix4<float > *)this )->operator *(rhs);
158- return *(Vector4f *)&toReturn;
159- #endif
160- }
161-
162- Matrix4f Matrix4f::identity ()
163- {
164- float data[] = {
165- 1.0 ,0.0 ,0.0 ,0.0 ,
166- 0.0 ,1.0 ,0.0 ,0.0 ,
167- 0.0 ,0.0 ,1.0 ,0.0 ,
168- 0.0 ,0.0 ,0.0 ,1.0 ,
169- };
170- return Matrix4f (data);
171- }
172-
17336}
0 commit comments