@@ -48,6 +48,26 @@ namespace systems::leal::vector_math
4848 Matrix4f Matrix4f::operator *(const Matrix4f &rhs) const {
4949 #ifdef __VECTOR_MATH_ARCH_X86_X64
5050 Matrix4f toReturn;
51+ __m128 row1 = _mm_load_ps (&rhs.data [0 ]);
52+ __m128 row2 = _mm_load_ps (&rhs.data [4 ]);
53+ __m128 row3 = _mm_load_ps (&rhs.data [8 ]);
54+ __m128 row4 = _mm_load_ps (&rhs.data [12 ]);
55+ for (int i=0 ; i<4 ; i++) {
56+ __m128 brod1 = _mm_set1_ps (this ->data [4 *i + 0 ]);
57+ __m128 brod2 = _mm_set1_ps (this ->data [4 *i + 1 ]);
58+ __m128 brod3 = _mm_set1_ps (this ->data [4 *i + 2 ]);
59+ __m128 brod4 = _mm_set1_ps (this ->data [4 *i + 3 ]);
60+ __m128 row = _mm_add_ps (
61+ _mm_add_ps (
62+ _mm_mul_ps (brod1, row1),
63+ _mm_mul_ps (brod2, row2)),
64+ _mm_add_ps (
65+ _mm_mul_ps (brod3, row3),
66+ _mm_mul_ps (brod4, row4)));
67+ _mm_store_ps (&toReturn.data [4 *i], row);
68+ }
69+ return toReturn;
70+ /* Matrix4f toReturn;
5171 auto transposed = rhs.transpose();
5272 alignas(float) float result[4];
5373
@@ -70,7 +90,8 @@ namespace systems::leal::vector_math
7090 }
7191 plhs += 4;
7292 }
73- return toReturn;
93+ return toReturn;*/
94+
7495 #elif defined(__VECTOR_MATH_ARCH_ARM)
7596 auto toReturn = ((Matrix4<float > *)this )->operator *(rhs);
7697 return *(Matrix4f *)&toReturn;
@@ -80,6 +101,38 @@ namespace systems::leal::vector_math
80101 Vector4f Matrix4f::operator *(const Vector4f &rhs) const {
81102 #ifdef __VECTOR_MATH_ARCH_X86_X64
82103 Vector4f toReturn;
104+ __m128 row1 = _mm_load_ps (&this ->data [0 ]);
105+ __m128 row2 = _mm_load_ps (&this ->data [4 ]);
106+ __m128 row3 = _mm_load_ps (&this ->data [8 ]);
107+ __m128 row4 = _mm_load_ps (&this ->data [12 ]);
108+ __m128 vector = _mm_load_ps (rhs.data );
109+ __m128 r1 = _mm_mul_ps (row1, vector);
110+ __m128 r2 = _mm_mul_ps (row2, vector);
111+ __m128 r3 = _mm_mul_ps (row3, vector);
112+ __m128 r4 = _mm_mul_ps (row4, vector);
113+ __m128 result = _mm_hadd_ps (
114+ _mm_hadd_ps (r1,r2),
115+ _mm_hadd_ps (r3,r4)
116+ );
117+ _mm_store_ps (toReturn.data , result);
118+
119+ /* __m128 brod1 = _mm_set1_ps(rhs.data[0]);
120+ __m128 brod2 = _mm_set1_ps(rhs.data[1]);
121+ __m128 brod3 = _mm_set1_ps(rhs.data[2]);
122+ __m128 brod4 = _mm_set1_ps(rhs.data[3]);
123+ __m128 row = _mm_add_ps(
124+ _mm_add_ps(
125+ _mm_mul_ps(brod1, row1),
126+ _mm_mul_ps(brod2, row2)),
127+ _mm_add_ps(
128+ _mm_mul_ps(brod3, row3),
129+ _mm_mul_ps(brod4, row4)));
130+ _mm_store_ps(toReturn.data, row);
131+ printf("vector: %f %f %f %f\n", toReturn.data[0], toReturn.data[1], toReturn.data[2], toReturn.data[3]);
132+ */
133+ return toReturn;
134+
135+ /* Vector4f toReturn;
83136 alignas(float) float result[4];
84137
85138 const float *plhs = this->data;
@@ -98,7 +151,7 @@ namespace systems::leal::vector_math
98151
99152 plhs += 4;
100153 }
101- return toReturn;
154+ return toReturn;*/
102155 #elif defined(__VECTOR_MATH_ARCH_ARM)
103156 auto toReturn = ((Matrix4<float > *)this )->operator *(rhs);
104157 return *(Vector4f *)&toReturn;
0 commit comments