Skip to content

Commit f7bf612

Browse files
committed
matrix4d intrinsics fails
1 parent abe441c commit f7bf612

12 files changed

Lines changed: 306 additions & 152 deletions

File tree

CMakeLists.txt

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,33 @@ configure_file(src/vector_math_config.h.in vector_math_config.h)
66

77
set(CMAKE_CXX_STANDARD 17)
88

9+
message(STATUS ${CMAKE_SYSTEM_PROCESSOR})
10+
11+
# enable avx simd extension for x86 processor family
12+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)|(x86_64)")
13+
message(STATUS "Enabling AVX support")
14+
if(MSVC AND NOT MSVC_VERSION LESS 1600)
15+
message(STATUS "Enabling AVX support for MSVC")
16+
set( CMAKE_CXX_FLAGS "/arch:AVX")
17+
else()
18+
message(STATUS "Enabling AVX support for gcc/clang")
19+
set( CMAKE_CXX_FLAGS "-mavx")
20+
endif()
21+
else ()
22+
endif ()
23+
924
option(VECTOR_MATH_BUILD_TEST "Enable building tests" OFF)
1025

1126
option(VECTOR_MATH_BUILD_BENCHMARK "Enable building benchmark" OFF)
1227

1328
include_directories(inc)
1429

15-
message(STATUS ${PROJECT_NAME})
30+
message(STATUS "Building ${PROJECT_NAME}...")
1631

1732
add_library(${PROJECT_NAME}
1833
src/vector_math.cpp
34+
src/matrix4f.cpp
35+
src/matrix4d.cpp
1936
)
2037

2138
target_include_directories (vector_math PUBLIC

benchmark/main.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <benchmark/benchmark.h>
22
#include <vector_math/matrix4.hpp>
33
#include <vector_math/matrix4f.hpp>
4+
#include <vector_math/matrix4d.hpp>
45

56
#include <glm/vec3.hpp> // glm::vec3
67
#include <glm/vec4.hpp> // glm::vec4
@@ -108,6 +109,51 @@ static void BM_Matrix4fLookAt(benchmark::State& state) {
108109
}
109110
BENCHMARK(BM_Matrix4fLookAt);
110111

112+
////////////////////////
113+
// Matrix4d benchmark //
114+
////////////////////////
115+
static void BM_Matrix4dCreation(benchmark::State& state) {
116+
for (auto _ : state){
117+
benchmark::DoNotOptimize(systems::leal::vector_math::Matrix4d());
118+
}
119+
}
120+
BENCHMARK(BM_Matrix4dCreation);
121+
122+
static void BM_Matrix4dIdentity(benchmark::State& state) {
123+
for (auto _ : state){
124+
benchmark::DoNotOptimize(systems::leal::vector_math::Matrix4d::identity());
125+
}
126+
}
127+
BENCHMARK(BM_Matrix4dIdentity);
128+
129+
static void BM_Matrix4dMultiply(benchmark::State& state) {
130+
systems::leal::vector_math::Matrix4d m1 = systems::leal::vector_math::Matrix4d::identity();
131+
systems::leal::vector_math::Matrix4d m2 = systems::leal::vector_math::Matrix4d(1);
132+
for (auto _ : state){
133+
benchmark::DoNotOptimize(m1 * m2);
134+
}
135+
}
136+
BENCHMARK(BM_Matrix4dMultiply);
137+
138+
static void BM_Matrix4dByVector(benchmark::State& state) {
139+
Matrix4d m1 = systems::leal::vector_math::Matrix4d::identity();
140+
Vector4d v1 = systems::leal::vector_math::Vector4d(1,2,3,4);
141+
for (auto _ : state){
142+
benchmark::DoNotOptimize(m1 * v1);
143+
}
144+
}
145+
BENCHMARK(BM_Matrix4dByVector);
146+
147+
static void BM_Matrix4dLookAt(benchmark::State& state) {
148+
auto eye = systems::leal::vector_math::Vector3<double>(0,0,0);
149+
auto target = systems::leal::vector_math::Vector3<double>(0,0,1000);
150+
auto up = systems::leal::vector_math::Vector3<double>(0,1,0);
151+
for (auto _ : state){
152+
benchmark::DoNotOptimize(systems::leal::vector_math::Matrix4d::lookAt(eye, target, up));
153+
}
154+
}
155+
BENCHMARK(BM_Matrix4dLookAt);
156+
111157
///////////////////////////
112158
// GLM Matrix4 benchmark //
113159
///////////////////////////

inc/vector_math/common.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
#pragma once
22

33
#include <limits>
4+
#include <cmath>
5+
6+
#if defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
7+
#define __VECTOR_MATH_ARCH_X86_X64
8+
#undef __VECTOR_MATH_ARCH_ARM
9+
#endif
10+
11+
#if defined(__arm__) || defined(__arm64__)
12+
#undef __VECTOR_MATH_ARCH_X86_X64
13+
#define __VECTOR_MATH_ARCH_ARM
14+
#endif
415

516
namespace systems::leal::vector_math {
617

inc/vector_math/matrix4d.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#pragma once
2+
3+
#include <vector_math/common.hpp>
4+
#include <vector_math/matrix4.hpp>
5+
#include <vector_math/vector4d.hpp>
6+
7+
#ifdef __VECTOR_MATH_ARCH_X86_X64
8+
#include <immintrin.h>
9+
#elif defined(__VECTOR_MATH_ARCH_ARM)
10+
#endif
11+
12+
13+
namespace systems::leal::vector_math
14+
{
15+
class alignas(32) Matrix4d : public Matrix4<double> {
16+
public:
17+
//////////////////
18+
// constructors //
19+
//////////////////
20+
Matrix4d();
21+
Matrix4d(double value);
22+
Matrix4d(double buffer[16]);
23+
24+
///////////////
25+
// operators //
26+
///////////////
27+
Matrix4d operator*(const Matrix4d &rhs) const;
28+
Vector4d operator*(const Vector4d &rhs) const;
29+
30+
/////////////
31+
// statics //
32+
/////////////
33+
static Matrix4d identity();
34+
35+
};
36+
}

inc/vector_math/matrix4f.hpp

Lines changed: 6 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,14 @@
11
#pragma once
22

3-
#if defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
4-
#define __VECTOR_MATH_ARCH_X86_X64
5-
#undef __VECTOR_MATH_ARCH_ARM
6-
#endif
7-
8-
#if defined(__arm__) || defined(__arm64__)
9-
#undef __VECTOR_MATH_ARCH_X86_X64
10-
#define __VECTOR_MATH_ARCH_ARM
11-
#endif
3+
#include <vector_math/common.hpp>
4+
#include <vector_math/matrix4.hpp>
5+
#include <vector_math/vector4f.hpp>
126

137
#ifdef __VECTOR_MATH_ARCH_X86_X64
14-
// do x64 stuff
15-
//#include <intrin.h>
168
#include <immintrin.h>
179
#elif defined(__VECTOR_MATH_ARCH_ARM)
18-
// do arm stuff
1910
#endif
2011

21-
#include <vector_math/matrix4.hpp>
22-
#include <vector_math/vector4f.hpp>
2312

2413
namespace systems::leal::vector_math
2514
{
@@ -28,9 +17,9 @@ namespace systems::leal::vector_math
2817
//////////////////
2918
// constructors //
3019
//////////////////
31-
Matrix4f():Matrix4<float>() {}
32-
Matrix4f(float value):Matrix4<float>(value) {}
33-
Matrix4f(float buffer[16]):Matrix4<float>(buffer) {}
20+
Matrix4f();
21+
Matrix4f(float value);
22+
Matrix4f(float buffer[16]);
3423

3524
///////////////
3625
// operators //
@@ -44,130 +33,4 @@ namespace systems::leal::vector_math
4433
static Matrix4f identity();
4534

4635
};
47-
48-
Matrix4f Matrix4f::operator*(const Matrix4f &rhs) const {
49-
#ifdef __VECTOR_MATH_ARCH_X86_X64
50-
//printf("1\n");
51-
Matrix4f toReturn;
52-
__m128 row1 = _mm_load_ps(&rhs.data[0]);
53-
__m128 row2 = _mm_load_ps(&rhs.data[4]);
54-
__m128 row3 = _mm_load_ps(&rhs.data[8]);
55-
__m128 row4 = _mm_load_ps(&rhs.data[12]);
56-
for(int i=0; i<4; i++) {
57-
__m128 brod1 = _mm_set1_ps(this->data[4*i + 0]);
58-
__m128 brod2 = _mm_set1_ps(this->data[4*i + 1]);
59-
__m128 brod3 = _mm_set1_ps(this->data[4*i + 2]);
60-
__m128 brod4 = _mm_set1_ps(this->data[4*i + 3]);
61-
__m128 row = _mm_add_ps(
62-
_mm_add_ps(
63-
_mm_mul_ps(brod1, row1),
64-
_mm_mul_ps(brod2, row2)),
65-
_mm_add_ps(
66-
_mm_mul_ps(brod3, row3),
67-
_mm_mul_ps(brod4, row4)));
68-
_mm_store_ps(&toReturn.data[4*i], row);
69-
}
70-
return toReturn;
71-
/*Matrix4f toReturn;
72-
auto transposed = rhs.transpose();
73-
alignas(float) float result[4];
74-
75-
const float *plhs = this->data;
76-
for (int c=0; c<4; c++) {
77-
const float *prhs = transposed.data;
78-
__m128 x = _mm_load_ps(plhs);
79-
for (int r=0; r<4; r++) {
80-
__m128 y = _mm_load_ps(prhs);
81-
__m128 z =_mm_mul_ps(x,y);
82-
_mm_store_ps(result, z);
83-
84-
float value=0;
85-
for (int e=0; e<4; e++) {
86-
value += result[e];
87-
}
88-
toReturn.data[4*r + c] = value;
89-
90-
prhs += 4;
91-
}
92-
plhs += 4;
93-
}
94-
return toReturn;*/
95-
96-
#elif defined(__VECTOR_MATH_ARCH_ARM)
97-
auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
98-
return *(Matrix4f *)&toReturn;
99-
#endif
100-
}
101-
102-
Vector4f Matrix4f::operator*(const Vector4f &rhs) const {
103-
#ifdef __VECTOR_MATH_ARCH_X86_X64
104-
Vector4f toReturn;
105-
__m128 row1 = _mm_load_ps(&this->data[0]);
106-
__m128 row2 = _mm_load_ps(&this->data[4]);
107-
__m128 row3 = _mm_load_ps(&this->data[8]);
108-
__m128 row4 = _mm_load_ps(&this->data[12]);
109-
__m128 vector = _mm_load_ps(rhs.data);
110-
__m128 r1 = _mm_mul_ps(row1, vector);
111-
__m128 r2 = _mm_mul_ps(row2, vector);
112-
__m128 r3 = _mm_mul_ps(row3, vector);
113-
__m128 r4 = _mm_mul_ps(row4, vector);
114-
__m128 result = _mm_hadd_ps(
115-
_mm_hadd_ps(r1,r2),
116-
_mm_hadd_ps(r3,r4)
117-
);
118-
_mm_store_ps(toReturn.data, result);
119-
120-
/*__m128 brod1 = _mm_set1_ps(rhs.data[0]);
121-
__m128 brod2 = _mm_set1_ps(rhs.data[1]);
122-
__m128 brod3 = _mm_set1_ps(rhs.data[2]);
123-
__m128 brod4 = _mm_set1_ps(rhs.data[3]);
124-
__m128 row = _mm_add_ps(
125-
_mm_add_ps(
126-
_mm_mul_ps(brod1, row1),
127-
_mm_mul_ps(brod2, row2)),
128-
_mm_add_ps(
129-
_mm_mul_ps(brod3, row3),
130-
_mm_mul_ps(brod4, row4)));
131-
_mm_store_ps(toReturn.data, row);
132-
printf("vector: %f %f %f %f\n", toReturn.data[0], toReturn.data[1], toReturn.data[2], toReturn.data[3]);
133-
*/
134-
return toReturn;
135-
136-
/*Vector4f toReturn;
137-
alignas(float) float result[4];
138-
139-
const float *plhs = this->data;
140-
const float *prhs = rhs.data;
141-
__m128 y = _mm_load_ps(prhs);
142-
for (int r=0; r<4; r++) {
143-
__m128 x = _mm_load_ps(plhs);
144-
__m128 z =_mm_mul_ps(x,y);
145-
_mm_store_ps(result, z);
146-
147-
float value=0;
148-
for (int e=0; e<4; e++) {
149-
value += result[e];
150-
}
151-
toReturn.data[r] = value;
152-
153-
plhs += 4;
154-
}
155-
return toReturn;*/
156-
#elif defined(__VECTOR_MATH_ARCH_ARM)
157-
auto toReturn = ((Matrix4<float> *)this)->operator*(rhs);
158-
return *(Vector4f *)&toReturn;
159-
#endif
160-
}
161-
162-
Matrix4f Matrix4f::identity()
163-
{
164-
float data[] = {
165-
1.0,0.0,0.0,0.0,
166-
0.0,1.0,0.0,0.0,
167-
0.0,0.0,1.0,0.0,
168-
0.0,0.0,0.0,1.0,
169-
};
170-
return Matrix4f(data);
171-
}
172-
17336
}

inc/vector_math/vector4d.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include <vector_math/vector4.hpp>
4+
5+
namespace systems::leal::vector_math {
6+
7+
class alignas(32) Vector4d:public Vector4<double> {
8+
public:
9+
Vector4d() = default;
10+
Vector4d(double x, double y, double z, double w) {
11+
this->data[0] = x;
12+
this->data[1] = y;
13+
this->data[2] = z;
14+
this->data[3] = w;
15+
}
16+
17+
};
18+
19+
}

inc/vector_math/vector_math.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <string>
44
#include <vector_math/matrix4.hpp>
55
#include <vector_math/matrix4f.hpp>
6+
#include <vector_math/matrix4d.hpp>
67

78
namespace systems::leal::vector_math {
89

launch_benchmark.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
cmake -B build -S . -DVECTOR_MATH_BUILD_BENCHMARK=ON -DCMAKE_BUILD_TYPE=Release
2-
make -C build
3-
build/vector_math_benchmark
1+
cmake -B build/benchmark -S . -DVECTOR_MATH_BUILD_BENCHMARK=ON -DCMAKE_BUILD_TYPE=Release
2+
make -C build/benchmark
3+
build/benchmark/vector_math_benchmark

launch_test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
cmake -B build -S . -DVECTOR_MATH_BUILD_TEST=ON
2-
make -C build
3-
build/vector_math_test
1+
cmake -B build/test -S . -DVECTOR_MATH_BUILD_TEST=ON
2+
make -C build/test
3+
build/test/vector_math_test

0 commit comments

Comments
 (0)