Skip to content

Commit 9890bb1

Browse files
valassiQubitol
authored andcommitted
[fix-arm-support] CODEGEN: use builtin __ARM_NEON for aarch64 simd (with DanieleM)
Remove the custom __ARM_NEON__ with two extra underscores Use 'g++ -march=armv8.2-a+simd -E -dM - < /dev/null | grep ARM' to check
1 parent 42140fc commit 9890bb1

4 files changed

Lines changed: 8 additions & 8 deletions

File tree

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,8 @@ namespace mg5amcCpu
260260
bool ok = true; // this is just an assumption!
261261
const std::string tag = "simd arch not defined";
262262
#endif
263-
#elif defined __ARM_NEON__ // consider using __BUILTIN_CPU_SUPPORTS__
263+
#elif defined __ARM_NEON // consider using __BUILTIN_CPU_SUPPORTS__
264264
bool known = false; // __builtin_cpu_supports is not supported
265-
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
266265
// See https://stackoverflow.com/q/62783908
267266
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
268267
bool ok = true; // this is just an assumption!

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,7 @@ main( int argc, char** argv )
915915
#else
916916
wrkflwtxt += "/sse4";
917917
#endif
918-
#elif defined __ARM_NEON__
918+
#elif defined __ARM_NEON
919919
wrkflwtxt += "/neon";
920920
#else
921921
wrkflwtxt += "/????"; // no path to this statement
@@ -1031,7 +1031,7 @@ main( int argc, char** argv )
10311031
#else
10321032
<< "] ('sse4': SSE4.2, 128bit)" << cxtref << std::endl
10331033
#endif
1034-
#elif defined __ARM_NEON__
1034+
#elif defined __ARM_NEON
10351035
<< "Internal loops fptype_sv = VECTOR[" << neppV
10361036
<< "] ('sse4': ARM NEON, 128bit)" << cxtref << std::endl
10371037
#else

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ CXXFLAGS += $(OMPFLAGS)
516516
# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone")
517517
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
518518
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
519+
# [Use 'g++ <buildflags> -E -dM - < /dev/null' to check which #define's are enabled]
519520
ifeq ($(UNAME_P),ppc64le)
520521
ifeq ($(BACKEND),cppsse4)
521522
override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers)
@@ -537,10 +538,10 @@ else ifeq ($(UNAME_P),arm)
537538
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
538539
endif
539540
else ifeq ($(UNAME_P),aarch64)
540-
ifeq ($(BACKEND),cppnone)
541+
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
541542
override AVXFLAGS = -march=armv8-a+nosimd
542-
else ifeq ($(BACKEND),cppsse4)
543-
override AVXFLAGS = -march=armv8-a+simd -D__ARM_NEON__
543+
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)
544+
override AVXFLAGS = -march=armv8-a+simd
544545
else ifeq ($(BACKEND),cppavx2)
545546
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
546547
else ifeq ($(BACKEND),cpp512y)

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ using mgOnGpu::fptype2;
241241
#else
242242
#define MGONGPU_CPPSIMD 4
243243
#endif
244-
#elif defined __ARM_NEON__ // C++ "sse4" ARM NEON (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [ARM default]
244+
#elif defined __ARM_NEON // C++ "sse4" ARM NEON (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [ARM default]
245245
#ifdef MGONGPU_FPTYPE_DOUBLE
246246
#define MGONGPU_CPPSIMD 2
247247
#else

0 commit comments

Comments
 (0)