Skip to content

Commit d427fcf

Browse files
committed
[fix-arm-support] gg_tt.mad: undefine__ARM_NEON for cppnone on arm/apple (with DanieleM)
Results on an Apple M1 (thanks Olivier!) for avx in none sse4; do ./build.${avx}_m_inl0_hrd0/check_cpp.exe -p 1024 256 1 | \egrep '(EvtsPerSec\[MECalcOnly\]|MeanMatrixElemValue|fptype_sv)'; done Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) EvtsPerSec[MECalcOnly] (3a) = ( 5.535410e+05 ) sec^-1 MeanMatrixElemValue = ( 2.080788e+00 +- 6.803789e-03 ) GeV^0 Internal loops fptype_sv = VECTOR[2] ('sse4': ARM NEON, 128bit) [cxtype_ref=NO] EvtsPerSec[MECalcOnly] (3a) = ( 8.327442e+05 ) sec^-1 MeanMatrixElemValue = ( 2.080788e+00 +- 6.803789e-03 ) GeV^0
1 parent fff5062 commit d427fcf

2 files changed

Lines changed: 11 additions & 4 deletions

File tree

epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -527,17 +527,19 @@ ifeq ($(UNAME_P),ppc64le)
527527
else ifeq ($(BACKEND),cpp512z)
528528
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment)
529529
endif
530-
else ifeq ($(UNAME_P),arm)
531-
ifeq ($(BACKEND),cppsse4)
532-
override AVXFLAGS = -D__ARM_NEON__ # ARM NEON with 128 width (Q/quadword registers)
530+
else ifeq ($(UNAME_P),arm) # ARM on Apple silicon
531+
ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON
532+
override AVXFLAGS = -DMGONGPU_NOARMNEON
533+
else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon
534+
override AVXFLAGS =
533535
else ifeq ($(BACKEND),cppavx2)
534536
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
535537
else ifeq ($(BACKEND),cpp512y)
536538
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
537539
else ifeq ($(BACKEND),cpp512z)
538540
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
539541
endif
540-
else ifeq ($(UNAME_P),aarch64)
542+
else ifeq ($(UNAME_P),aarch64) # ARM on Linux
541543
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
542544
override AVXFLAGS = -march=armv8-a+nosimd
543545
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)

epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ namespace mgOnGpu
214214
using mgOnGpu::fptype;
215215
using mgOnGpu::fptype2;
216216

217+
// Undefine ARM_NEON (hack for cppnone on Apple silicon ARM)
218+
#ifdef MGONGPU_NOARMNEON
219+
#undef __ARM_NEON
220+
#endif
221+
217222
// C++ SIMD vectorization width (this will be used to set neppV)
218223
#ifdef MGONGPUCPP_GPUIMPL // CUDA and HIP implementations have no SIMD
219224
#undef MGONGPU_CPPSIMD

0 commit comments

Comments
 (0)