Skip to content

Commit 9b34eef

Browse files
committed
matrix4f, vector4f multiply simd x86 ok
1 parent 15baa0b commit 9b34eef

3 files changed

Lines changed: 66 additions & 3 deletions

File tree

inc/vector_math/matrix4f.hpp

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,26 @@ namespace systems::leal::vector_math
4848
Matrix4f Matrix4f::operator*(const Matrix4f &rhs) const {
4949
#ifdef __VECTOR_MATH_ARCH_X86_X64
5050
Matrix4f toReturn;
51+
__m128 row1 = _mm_load_ps(&rhs.data[0]);
52+
__m128 row2 = _mm_load_ps(&rhs.data[4]);
53+
__m128 row3 = _mm_load_ps(&rhs.data[8]);
54+
__m128 row4 = _mm_load_ps(&rhs.data[12]);
55+
for(int i=0; i<4; i++) {
56+
__m128 brod1 = _mm_set1_ps(this->data[4*i + 0]);
57+
__m128 brod2 = _mm_set1_ps(this->data[4*i + 1]);
58+
__m128 brod3 = _mm_set1_ps(this->data[4*i + 2]);
59+
__m128 brod4 = _mm_set1_ps(this->data[4*i + 3]);
60+
__m128 row = _mm_add_ps(
61+
_mm_add_ps(
62+
_mm_mul_ps(brod1, row1),
63+
_mm_mul_ps(brod2, row2)),
64+
_mm_add_ps(
65+
_mm_mul_ps(brod3, row3),
66+
_mm_mul_ps(brod4, row4)));
67+
_mm_store_ps(&toReturn.data[4*i], row);
68+
}
69+
return toReturn;
70+
/*Matrix4f toReturn;
5171
auto transposed = rhs.transpose();
5272
alignas(float) float result[4];
5373
@@ -70,7 +90,8 @@ namespace systems::leal::vector_math
7090
}
7191
plhs += 4;
7292
}
73-
return toReturn;
93+
return toReturn;*/
94+
7495
#elif defined(__VECTOR_MATH_ARCH_ARM)
7596
auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
7697
return *(Matrix4f *)&toReturn;
@@ -80,6 +101,38 @@ namespace systems::leal::vector_math
80101
Vector4f Matrix4f::operator*(const Vector4f &rhs) const {
81102
#ifdef __VECTOR_MATH_ARCH_X86_X64
82103
Vector4f toReturn;
104+
__m128 row1 = _mm_load_ps(&this->data[0]);
105+
__m128 row2 = _mm_load_ps(&this->data[4]);
106+
__m128 row3 = _mm_load_ps(&this->data[8]);
107+
__m128 row4 = _mm_load_ps(&this->data[12]);
108+
__m128 vector = _mm_load_ps(rhs.data);
109+
__m128 r1 = _mm_mul_ps(row1, vector);
110+
__m128 r2 = _mm_mul_ps(row2, vector);
111+
__m128 r3 = _mm_mul_ps(row3, vector);
112+
__m128 r4 = _mm_mul_ps(row4, vector);
113+
__m128 result = _mm_hadd_ps(
114+
_mm_hadd_ps(r1,r2),
115+
_mm_hadd_ps(r3,r4)
116+
);
117+
_mm_store_ps(toReturn.data, result);
118+
119+
/*__m128 brod1 = _mm_set1_ps(rhs.data[0]);
120+
__m128 brod2 = _mm_set1_ps(rhs.data[1]);
121+
__m128 brod3 = _mm_set1_ps(rhs.data[2]);
122+
__m128 brod4 = _mm_set1_ps(rhs.data[3]);
123+
__m128 row = _mm_add_ps(
124+
_mm_add_ps(
125+
_mm_mul_ps(brod1, row1),
126+
_mm_mul_ps(brod2, row2)),
127+
_mm_add_ps(
128+
_mm_mul_ps(brod3, row3),
129+
_mm_mul_ps(brod4, row4)));
130+
_mm_store_ps(toReturn.data, row);
131+
printf("vector: %f %f %f %f\n", toReturn.data[0], toReturn.data[1], toReturn.data[2], toReturn.data[3]);
132+
*/
133+
return toReturn;
134+
135+
/*Vector4f toReturn;
83136
alignas(float) float result[4];
84137
85138
const float *plhs = this->data;
@@ -98,7 +151,7 @@ namespace systems::leal::vector_math
98151
99152
plhs += 4;
100153
}
101-
return toReturn;
154+
return toReturn;*/
102155
#elif defined(__VECTOR_MATH_ARCH_ARM)
103156
auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
104157
return *(Vector4f *)&toReturn;

inc/vector_math/vector_math.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <string>
44
#include <vector_math/matrix4.hpp>
5+
#include <vector_math/matrix4f.hpp>
56

67
namespace systems::leal::vector_math {
78

test/main.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,16 @@ TEST(Matrix4Float, sum) {
4040
}
4141

4242
TEST(Matrix4Float, mul) {
43-
EXPECT_EQ(Matrix4<float>::identity()*Matrix4<float>(1), Matrix4<float>(1));
43+
float m1[16] = {1,2,3,4,0,1,2,3,5,6,7,8,10,11,12,13};
44+
float m2[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
45+
float m3[16] = {80,90,100,110,56,62,68,74,176,202,228,254,296,342,388,434};
46+
47+
EXPECT_EQ(Matrix4<float>(m1)*Matrix4<float>(m2), Matrix4<float>(m3));
48+
EXPECT_EQ(Matrix4f(m1)*Matrix4f(m2), Matrix4f(m3));
49+
50+
EXPECT_EQ(Matrix4<float>(m1)*Vector4<float>(2,6,10,14), Vector4<float>(100,68,228,388));
51+
EXPECT_EQ(Matrix4f(m1)*Vector4f(2,6,10,14), Vector4f(100,68,228,388));
52+
4453
}
4554

4655
TEST(Matrix4Float, transpose) {

0 commit comments

Comments
 (0)