matrix4d intrinsics fails

rusoleal · rusoleal · commit f7bf612866bf · 2025-07-05T08:52:14.000+02:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,16 +6,33 @@ configure_file(src/vector_math_config.h.in vector_math_config.h)
 
 set(CMAKE_CXX_STANDARD 17)
 
+message(STATUS ${CMAKE_SYSTEM_PROCESSOR})
+
+# enable avx simd extension for x86 processor family
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)|(x86_64)")
+    message(STATUS "Enabling AVX support")
+    if(MSVC AND NOT MSVC_VERSION LESS 1600)
+        message(STATUS "Enabling AVX support for MSVC")
+        set( CMAKE_CXX_FLAGS "/arch:AVX")
+    else()
+        message(STATUS "Enabling AVX support for gcc/clang")
+        set( CMAKE_CXX_FLAGS "-mavx")
+    endif()
+else ()
+endif ()
+
 option(VECTOR_MATH_BUILD_TEST "Enable building tests" OFF)
 
 option(VECTOR_MATH_BUILD_BENCHMARK "Enable building benchmark" OFF)
 
 include_directories(inc)
 
-message(STATUS ${PROJECT_NAME})
+message(STATUS "Building ${PROJECT_NAME}...")
 
 add_library(${PROJECT_NAME}
     src/vector_math.cpp
+    src/matrix4f.cpp
+    src/matrix4d.cpp
 )
 
 target_include_directories (vector_math PUBLIC 
diff --git a/benchmark/main.cpp b/benchmark/main.cpp
@@ -1,6 +1,7 @@
 #include <benchmark/benchmark.h>
 #include <vector_math/matrix4.hpp>
 #include <vector_math/matrix4f.hpp>
+#include <vector_math/matrix4d.hpp>
 
 #include <glm/vec3.hpp> // glm::vec3
 #include <glm/vec4.hpp> // glm::vec4
@@ -108,6 +109,51 @@ static void BM_Matrix4fLookAt(benchmark::State& state) {
 }
 BENCHMARK(BM_Matrix4fLookAt);
 
+////////////////////////
+// Matrix4d benchmark //
+////////////////////////
+static void BM_Matrix4dCreation(benchmark::State& state) {
+    for (auto _ : state){
+        benchmark::DoNotOptimize(systems::leal::vector_math::Matrix4d());
+    }
+}
+BENCHMARK(BM_Matrix4dCreation);
+
+static void BM_Matrix4dIdentity(benchmark::State& state) {
+    for (auto _ : state){
+        benchmark::DoNotOptimize(systems::leal::vector_math::Matrix4d::identity());
+    }
+}
+BENCHMARK(BM_Matrix4dIdentity);
+
+static void BM_Matrix4dMultiply(benchmark::State& state) {
+    systems::leal::vector_math::Matrix4d m1 = systems::leal::vector_math::Matrix4d::identity();
+    systems::leal::vector_math::Matrix4d m2 = systems::leal::vector_math::Matrix4d(1);
+    for (auto _ : state){
+        benchmark::DoNotOptimize(m1 * m2);
+    }
+}
+BENCHMARK(BM_Matrix4dMultiply);
+
+static void BM_Matrix4dByVector(benchmark::State& state) {
+    Matrix4d m1 = systems::leal::vector_math::Matrix4d::identity();
+    Vector4d v1 = systems::leal::vector_math::Vector4d(1,2,3,4);
+    for (auto _ : state){
+        benchmark::DoNotOptimize(m1 * v1);
+    }
+}
+BENCHMARK(BM_Matrix4dByVector);
+
+static void BM_Matrix4dLookAt(benchmark::State& state) {
+    auto eye = systems::leal::vector_math::Vector3<double>(0,0,0);
+    auto target = systems::leal::vector_math::Vector3<double>(0,0,1000);
+    auto up = systems::leal::vector_math::Vector3<double>(0,1,0);
+    for (auto _ : state){
+        benchmark::DoNotOptimize(systems::leal::vector_math::Matrix4d::lookAt(eye, target, up));
+    }
+}
+BENCHMARK(BM_Matrix4dLookAt);
+
 ///////////////////////////
 // GLM Matrix4 benchmark //
 ///////////////////////////
diff --git a/inc/vector_math/common.hpp b/inc/vector_math/common.hpp
@@ -1,6 +1,17 @@
 #pragma once
 
 #include <limits>
+#include <cmath>
+
+#if defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+    #define __VECTOR_MATH_ARCH_X86_X64
+    #undef __VECTOR_MATH_ARCH_ARM
+#endif
+
+#if defined(__arm__) || defined(__arm64__)
+    #undef __VECTOR_MATH_ARCH_X86_X64
+    #define __VECTOR_MATH_ARCH_ARM
+#endif
 
 namespace systems::leal::vector_math {
 
diff --git a/inc/vector_math/matrix4d.hpp b/inc/vector_math/matrix4d.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <vector_math/common.hpp>
+#include <vector_math/matrix4.hpp>
+#include <vector_math/vector4d.hpp>
+
+#ifdef __VECTOR_MATH_ARCH_X86_X64
+    #include <immintrin.h>
+#elif defined(__VECTOR_MATH_ARCH_ARM)
+#endif  
+
+
+namespace systems::leal::vector_math
+{
+    class alignas(32) Matrix4d : public Matrix4<double> {
+    public:
+        //////////////////
+        // constructors //
+        //////////////////
+        Matrix4d();
+        Matrix4d(double value);
+        Matrix4d(double buffer[16]);
+
+        ///////////////
+        // operators //
+        ///////////////
+        Matrix4d operator*(const Matrix4d &rhs) const;
+        Vector4d operator*(const Vector4d &rhs) const;
+
+        /////////////
+        // statics //
+        /////////////
+        static Matrix4d identity();
+
+    };
+}
diff --git a/inc/vector_math/matrix4f.hpp b/inc/vector_math/matrix4f.hpp
@@ -1,25 +1,14 @@
 #pragma once
 
-#if defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
-    #define __VECTOR_MATH_ARCH_X86_X64
-    #undef __VECTOR_MATH_ARCH_ARM
-#endif
-
-#if defined(__arm__) || defined(__arm64__)
-    #undef __VECTOR_MATH_ARCH_X86_X64
-    #define __VECTOR_MATH_ARCH_ARM
-#endif
+#include <vector_math/common.hpp>
+#include <vector_math/matrix4.hpp>
+#include <vector_math/vector4f.hpp>
 
 #ifdef __VECTOR_MATH_ARCH_X86_X64
-    // do x64 stuff   
-    //#include <intrin.h>
     #include <immintrin.h>
 #elif defined(__VECTOR_MATH_ARCH_ARM)
-    // do arm stuff
 #endif  
 
-#include <vector_math/matrix4.hpp>
-#include <vector_math/vector4f.hpp>
 
 namespace systems::leal::vector_math
 {
@@ -28,9 +17,9 @@ namespace systems::leal::vector_math
         //////////////////
         // constructors //
         //////////////////
-        Matrix4f():Matrix4<float>() {}
-        Matrix4f(float value):Matrix4<float>(value) {}
-        Matrix4f(float buffer[16]):Matrix4<float>(buffer) {}
+        Matrix4f();
+        Matrix4f(float value);
+        Matrix4f(float buffer[16]);
 
         ///////////////
         // operators //
@@ -44,130 +33,4 @@ namespace systems::leal::vector_math
         static Matrix4f identity();
 
     };
-
-    Matrix4f Matrix4f::operator*(const Matrix4f &rhs) const {
-        #ifdef __VECTOR_MATH_ARCH_X86_X64
-            //printf("1\n");
-            Matrix4f toReturn;
-            __m128 row1 = _mm_load_ps(&rhs.data[0]);
-            __m128 row2 = _mm_load_ps(&rhs.data[4]);
-            __m128 row3 = _mm_load_ps(&rhs.data[8]);
-            __m128 row4 = _mm_load_ps(&rhs.data[12]);
-            for(int i=0; i<4; i++) {
-                __m128 brod1 = _mm_set1_ps(this->data[4*i + 0]);
-                __m128 brod2 = _mm_set1_ps(this->data[4*i + 1]);
-                __m128 brod3 = _mm_set1_ps(this->data[4*i + 2]);
-                __m128 brod4 = _mm_set1_ps(this->data[4*i + 3]);
-                __m128 row = _mm_add_ps(
-                            _mm_add_ps(
-                                _mm_mul_ps(brod1, row1),
-                                _mm_mul_ps(brod2, row2)),
-                            _mm_add_ps(
-                                _mm_mul_ps(brod3, row3),
-                                _mm_mul_ps(brod4, row4)));
-                _mm_store_ps(&toReturn.data[4*i], row);
-            }
-            return toReturn;        
-            /*Matrix4f toReturn;
-            auto transposed = rhs.transpose();
-            alignas(float) float result[4];
-
-            const float *plhs = this->data;
-            for (int c=0; c<4; c++) {
-                const float *prhs = transposed.data;
-                __m128 x = _mm_load_ps(plhs);
-                for (int r=0; r<4; r++) {
-                    __m128 y = _mm_load_ps(prhs);
-                    __m128 z =_mm_mul_ps(x,y);
-                    _mm_store_ps(result, z);
-
-                    float value=0;
-                    for (int e=0; e<4; e++) {
-                        value += result[e];
-                    }
-                    toReturn.data[4*r + c] = value;
-
-                    prhs += 4;
-                }
-                plhs += 4;
-            }
-            return toReturn;*/
-
-        #elif defined(__VECTOR_MATH_ARCH_ARM)
-            auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
-            return *(Matrix4f *)&toReturn;
-        #endif  
-    }
-
-    Vector4f Matrix4f::operator*(const Vector4f &rhs) const {
-        #ifdef __VECTOR_MATH_ARCH_X86_X64
-            Vector4f toReturn;
-            __m128 row1 = _mm_load_ps(&this->data[0]);
-            __m128 row2 = _mm_load_ps(&this->data[4]);
-            __m128 row3 = _mm_load_ps(&this->data[8]);
-            __m128 row4 = _mm_load_ps(&this->data[12]);
-            __m128 vector = _mm_load_ps(rhs.data);
-            __m128 r1 = _mm_mul_ps(row1, vector);
-            __m128 r2 = _mm_mul_ps(row2, vector);
-            __m128 r3 = _mm_mul_ps(row3, vector);
-            __m128 r4 = _mm_mul_ps(row4, vector);
-            __m128 result = _mm_hadd_ps(
-                _mm_hadd_ps(r1,r2),
-                _mm_hadd_ps(r3,r4)
-            );
-            _mm_store_ps(toReturn.data, result);
-
-            /*__m128 brod1 = _mm_set1_ps(rhs.data[0]);
-            __m128 brod2 = _mm_set1_ps(rhs.data[1]);
-            __m128 brod3 = _mm_set1_ps(rhs.data[2]);
-            __m128 brod4 = _mm_set1_ps(rhs.data[3]);
-            __m128 row = _mm_add_ps(
-                        _mm_add_ps(
-                            _mm_mul_ps(brod1, row1),
-                            _mm_mul_ps(brod2, row2)),
-                        _mm_add_ps(
-                            _mm_mul_ps(brod3, row3),
-                            _mm_mul_ps(brod4, row4)));
-            _mm_store_ps(toReturn.data, row);
-            printf("vector: %f %f %f %f\n", toReturn.data[0], toReturn.data[1], toReturn.data[2], toReturn.data[3]);
-            */
-            return toReturn;        
-
-            /*Vector4f toReturn;
-            alignas(float) float result[4];
-
-            const float *plhs = this->data;
-            const float *prhs = rhs.data;
-            __m128 y = _mm_load_ps(prhs);
-            for (int r=0; r<4; r++) {
-                __m128 x = _mm_load_ps(plhs);
-                __m128 z =_mm_mul_ps(x,y);
-                _mm_store_ps(result, z);
-
-                float value=0;
-                for (int e=0; e<4; e++) {
-                    value += result[e];
-                }
-                toReturn.data[r] = value;
-
-                plhs += 4;
-            }
-            return toReturn;*/
-        #elif defined(__VECTOR_MATH_ARCH_ARM)
-            auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
-            return *(Vector4f *)&toReturn;
-        #endif
-    }
-
-    Matrix4f Matrix4f::identity()
-    {
-        float data[] = {
-            1.0,0.0,0.0,0.0,
-            0.0,1.0,0.0,0.0,
-            0.0,0.0,1.0,0.0,
-            0.0,0.0,0.0,1.0,
-        };
-        return Matrix4f(data);
-    }
-
 }
diff --git a/inc/vector_math/vector4d.hpp b/inc/vector_math/vector4d.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <vector_math/vector4.hpp>
+
+namespace systems::leal::vector_math {
+
+    class alignas(32) Vector4d:public Vector4<double> {
+    public:
+        Vector4d() = default;
+        Vector4d(double x, double y, double z, double w) {
+            this->data[0] = x;
+            this->data[1] = y;
+            this->data[2] = z;
+            this->data[3] = w;
+        }
+
+    };
+
+}
diff --git a/inc/vector_math/vector_math.hpp b/inc/vector_math/vector_math.hpp
@@ -3,6 +3,7 @@
 #include <string>
 #include <vector_math/matrix4.hpp>
 #include <vector_math/matrix4f.hpp>
+#include <vector_math/matrix4d.hpp>
 
 namespace systems::leal::vector_math {
 
diff --git a/launch_benchmark.sh b/launch_benchmark.sh
@@ -1,3 +1,3 @@
-cmake -B build -S . -DVECTOR_MATH_BUILD_BENCHMARK=ON -DCMAKE_BUILD_TYPE=Release
-make -C build
-build/vector_math_benchmark
+cmake -B build/benchmark -S . -DVECTOR_MATH_BUILD_BENCHMARK=ON -DCMAKE_BUILD_TYPE=Release
+make -C build/benchmark
+build/benchmark/vector_math_benchmark
diff --git a/launch_test.sh b/launch_test.sh
@@ -1,3 +1,3 @@
-cmake -B build -S . -DVECTOR_MATH_BUILD_TEST=ON
-make -C build
-build/vector_math_test
+cmake -B build/test -S . -DVECTOR_MATH_BUILD_TEST=ON
+make -C build/test
+build/test/vector_math_test
diff --git a/src/matrix4d.cpp b/src/matrix4d.cpp
diff --git a/src/matrix4f.cpp b/src/matrix4f.cpp
diff --git a/test/main.cpp b/test/main.cpp