Skip to content

Commit 5ef6258

Browse files
committed
Merge remote-tracking branch 'ghdm/fix-arm-support' into armdmav
2 parents 9f1799e + c7296ac commit 5ef6258

226 files changed

Lines changed: 2012 additions & 1403 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MadtRex/makefiles/cudacpp_driver.mk

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ CXXFLAGS += $(OMPFLAGS)
415415

416416
# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone")
417417
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
418+
# [Use 'g++ <buildflags> -E -dM - < /dev/null' to check which #define's are enabled]
418419
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
419420
ifeq ($(UNAME_P),ppc64le)
420421
ifeq ($(BACKEND),cppsse4)
@@ -426,21 +427,23 @@ ifeq ($(UNAME_P),ppc64le)
426427
else ifeq ($(BACKEND),cpp512z)
427428
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment)
428429
endif
429-
else ifeq ($(UNAME_P),arm)
430-
ifeq ($(BACKEND),cppsse4)
431-
override AVXFLAGS = -D__ARM_NEON__ # ARM NEON with 128 width (Q/quadword registers)
430+
else ifeq ($(UNAME_P),arm) # ARM on Apple silicon
431+
ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON
432+
override AVXFLAGS = -DMGONGPU_NOARMNEON
433+
else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon
434+
override AVXFLAGS =
432435
else ifeq ($(BACKEND),cppavx2)
433436
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
434437
else ifeq ($(BACKEND),cpp512y)
435438
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
436439
else ifeq ($(BACKEND),cpp512z)
437440
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
438441
endif
439-
else ifeq ($(UNAME_P),aarch64)
440-
ifeq ($(BACKEND),cppnone)
442+
else ifeq ($(UNAME_P),aarch64) # ARM on Linux
443+
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
441444
override AVXFLAGS = -march=armv8-a+nosimd
442-
else ifeq ($(BACKEND),cppsse4)
443-
override AVXFLAGS = -march=armv8-a+simd -D__ARM_NEON__
445+
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)
446+
override AVXFLAGS = -march=armv8-a+simd
444447
else ifeq ($(BACKEND),cppavx2)
445448
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
446449
else ifeq ($(BACKEND),cpp512y)

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MadtRex/makefiles/cudacpp_runner.mk

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ CXXFLAGS += $(OMPFLAGS)
259259

260260
# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone")
261261
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
262+
# [Use 'g++ <buildflags> -E -dM - < /dev/null' to check which #define's are enabled]
262263
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
263264
ifeq ($(UNAME_P),ppc64le)
264265
ifeq ($(BACKEND),cppsse4)
@@ -270,21 +271,23 @@ ifeq ($(UNAME_P),ppc64le)
270271
else ifeq ($(BACKEND),cpp512z)
271272
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment)
272273
endif
273-
else ifeq ($(UNAME_P),arm)
274-
ifeq ($(BACKEND),cppsse4)
275-
override AVXFLAGS = -D__ARM_NEON__ # ARM NEON with 128 width (Q/quadword registers)
274+
else ifeq ($(UNAME_P),arm) # ARM on Apple silicon
275+
ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON
276+
override AVXFLAGS = -DMGONGPU_NOARMNEON
277+
else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon
278+
override AVXFLAGS =
276279
else ifeq ($(BACKEND),cppavx2)
277280
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
278281
else ifeq ($(BACKEND),cpp512y)
279282
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
280283
else ifeq ($(BACKEND),cpp512z)
281284
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
282285
endif
283-
else ifeq ($(UNAME_P),aarch64)
284-
ifeq ($(BACKEND),cppnone)
286+
else ifeq ($(UNAME_P),aarch64) # ARM on Linux
287+
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
285288
override AVXFLAGS = -march=armv8-a+nosimd
286-
else ifeq ($(BACKEND),cppsse4)
287-
override AVXFLAGS = -march=armv8-a+simd -D__ARM_NEON__
289+
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)
290+
override AVXFLAGS = -march=armv8-a+simd
288291
else ifeq ($(BACKEND),cppavx2)
289292
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
290293
else ifeq ($(BACKEND),cpp512y)

epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs
4848

4949
Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
5050
Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
51-
import /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg
51+
import /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg
5252
The import format was not given, so we guess it as command
5353
set stdout_level DEBUG
5454
set output information to level: 10
@@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
5757
No model currently active, so we import the Standard Model
5858
INFO: load particles
5959
INFO: load vertices
60-
DEBUG: model prefixing takes 0.004445075988769531 
60+
DEBUG: model prefixing takes 0.010645151138305664 
6161
INFO: Restrict model sm with file models/sm/restrict_default.dat .
6262
DEBUG: Simplifying conditional expressions 
6363
DEBUG: remove interactions: u s w+ at order: QED=1 
@@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes.
149149
INFO: Please specify coupling orders to bypass this step.
150150
INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1
151151
INFO: Process has 2 diagrams
152-
1 processes with 2 diagrams generated in 0.003 s
152+
1 processes with 2 diagrams generated in 0.007 s
153153
Total: 1 processes with 2 diagrams
154154
output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
155155
Output will be done with PLUGIN: CUDACPP_OUTPUT
@@ -160,10 +160,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vecto
160160
INFO: initialize a new directory: CODEGEN_mad_ee_mumu
161161
INFO: remove old information in CODEGEN_mad_ee_mumu
162162
DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180] 
163-
WARNING: File exists /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu 
164-
INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu
165-
WARNING: File exists /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards 
166-
WARNING: File exists /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses 
163+
WARNING: File exists /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu 
164+
INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu
165+
WARNING: File exists /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards 
166+
WARNING: File exists /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses 
167167
INFO: Organizing processes into subprocess groups
168168
INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1
169169
INFO: Processing color information for process: e+ e- > mu+ mu- @1
@@ -178,19 +178,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum
178178
DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1552] 
179179
DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1576] 
180180
DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1577] 
181-
Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
182-
Wrote files for 8 helas calls in 0.060 s
181+
Generated helas calls for 1 subprocesses (2 diagrams) in 0.008 s
182+
Wrote files for 8 helas calls in 0.158 s
183183
ALOHA: aloha starts to compute helicity amplitudes
184184
ALOHA: aloha creates FFV1 routines
185185
ALOHA: aloha creates FFV2 routines
186186
ALOHA: aloha creates FFV4 routines
187-
ALOHA: aloha creates 3 routines in 0.170 s
187+
ALOHA: aloha creates 3 routines in 0.362 s
188188
ALOHA: aloha starts to compute helicity amplitudes
189189
ALOHA: aloha creates FFV1 routines
190190
ALOHA: aloha creates FFV2 routines
191191
ALOHA: aloha creates FFV4 routines
192192
ALOHA: aloha creates FFV2_4 routines
193-
ALOHA: aloha creates 7 routines in 0.184 s
193+
ALOHA: aloha creates 7 routines in 0.534 s
194194
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
195195
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
196196
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -199,32 +199,32 @@ ALOHA: aloha creates 7 routines in 0.184 s
199199
<class 'aloha.create_aloha.AbstractRoutine'> FFV4
200200
<class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
201201
<class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
202-
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
203-
INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/.
202+
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
203+
INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/.
204204
super_write_set_parameters_onlyfixMajorana (hardcoded=False)
205205
super_write_set_parameters_onlyfixMajorana (hardcoded=True)
206-
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
207-
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc
206+
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
207+
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc
208208
INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
209-
INFO: /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/.
209+
INFO: /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/.
210210
The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
211211
If you want to make this value the default for future session, you can run 'save options --all'
212-
save configuration file to /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
212+
save configuration file to /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
213213
INFO: Use Fortran compiler gfortran
214214
INFO: Use c++ compiler g++
215215
INFO: Generate jpeg diagrams
216216
INFO: Generate web pages
217217
DEBUG: result.returncode =  0 [output.py at line 273] 
218-
Output to directory /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
218+
Output to directory /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
219219
Type "launch" to generate events from this process, or see
220-
/home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README
220+
/home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README
221221
Run "open index.html" to see more information about this process.
222222
quit
223223

224-
real 0m2.396s
225-
user 0m1.798s
226-
sys 0m0.425s
227-
Code generation completed in 2 seconds
224+
real 0m5.254s
225+
user 0m3.920s
226+
sys 0m1.041s
227+
Code generation completed in 5 seconds
228228
************************************************************
229229
* *
230230
* W E L C O M E to *
@@ -245,9 +245,9 @@ Code generation completed in 2 seconds
245245
* Type 'help' for in-line help. *
246246
* *
247247
************************************************************
248-
INFO: load configuration from /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
249-
INFO: load configuration from /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
250-
INFO: load configuration from /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
248+
INFO: load configuration from /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
249+
INFO: load configuration from /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
250+
INFO: load configuration from /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
251251
Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
252252
Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
253253
treatcards run
@@ -274,9 +274,9 @@ launch in debug mode
274274
* Type 'help' for in-line help. *
275275
* *
276276
************************************************************
277-
INFO: load configuration from /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
278-
INFO: load configuration from /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
279-
INFO: load configuration from /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
277+
INFO: load configuration from /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
278+
INFO: load configuration from /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
279+
INFO: load configuration from /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
280280
Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
281281
Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
282282
treatcards param

epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@
235235
# pineappl = pineappl
236236

237237

238-
#mg5_path = /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/mg5amcnlo
238+
#mg5_path = /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/mg5amcnlo
239239

240240
# MG5 MAIN DIRECTORY
241-
#mg5_path = /home/dmass/Development/madgraph4gpu/release-v1.01.01/MG5aMC/mg5amcnlo
241+
#mg5_path = /home/dmass/Development/madgraph4gpu/fix-arm-support/MG5aMC/mg5amcnlo

epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -250,25 +250,22 @@ namespace mg5amcCpu
250250
bool known = true;
251251
bool ok = __builtin_cpu_supports( "vsx" );
252252
const std::string tag = "powerpc vsx (128bit as in SSE4.2)";
253-
#elif defined __ARM_NEON__ // consider using __BUILTIN_CPU_SUPPORTS__
254-
bool known = false; // __builtin_cpu_supports is not supported
255-
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
256-
// See https://stackoverflow.com/q/62783908
257-
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
258-
bool ok = true; // this is just an assumption!
259-
const std::string tag = "arm neon (128bit as in SSE4.2)";
260253
#elif defined( __x86_64__ ) || defined( __i386__ )
261254
bool known = true;
262255
bool ok = __builtin_cpu_supports( "sse4.2" );
263256
const std::string tag = "nehalem (SSE4.2)";
264257
#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted
258+
// DM now we have an explicit NEON target for ARM
259+
bool known = false; // __builtin_cpu_supports is not supported
260+
bool ok = true; // this is just an assumption!
261+
const std::string tag = "simd arch not defined";
262+
#endif
263+
#elif defined __ARM_NEON // consider using __BUILTIN_CPU_SUPPORTS__
265264
bool known = false; // __builtin_cpu_supports is not supported
266-
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
267265
// See https://stackoverflow.com/q/62783908
268266
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
269267
bool ok = true; // this is just an assumption!
270268
const std::string tag = "arm neon (128bit as in SSE4.2)";
271-
#endif
272269
#else
273270
bool known = true;
274271
bool ok = true;

epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -912,13 +912,13 @@ main( int argc, char** argv )
912912
#elif defined __SSE4_2__
913913
#ifdef __PPC__
914914
wrkflwtxt += "/ppcv";
915-
#elif defined __ARM_NEON__
916-
wrkflwtxt += "/neon";
917915
#else
918916
wrkflwtxt += "/sse4";
919917
#endif
918+
#elif defined __ARM_NEON
919+
wrkflwtxt += "/neon";
920920
#else
921-
wrkflwtxt += "/????"; // no path to this statement
921+
wrkflwtxt += "/????"; // no path to this statement
922922
#endif
923923
// -- Has cxtype_v::operator[] bracket with non-const reference?
924924
#if defined MGONGPU_CPPSIMD
@@ -1028,11 +1028,12 @@ main( int argc, char** argv )
10281028
<< "Internal loops fptype_sv = VECTOR[" << neppV
10291029
#ifdef __PPC__
10301030
<< "] ('sse4': PPC VSX, 128bit)" << cxtref << std::endl
1031-
#elif defined __ARM_NEON__
1032-
<< "] ('sse4': ARM NEON, 128bit)" << cxtref << std::endl
10331031
#else
10341032
<< "] ('sse4': SSE4.2, 128bit)" << cxtref << std::endl
10351033
#endif
1034+
#elif defined __ARM_NEON
1035+
<< "Internal loops fptype_sv = VECTOR[" << neppV
1036+
<< "] ('sse4': ARM NEON, 128bit)" << cxtref << std::endl
10361037
#else
10371038
#error Internal error: unknown SIMD build configuration
10381039
#endif

0 commit comments

Comments
 (0)