Skip to content

Commit 83f37b7

Browse files
committed
Fix OPTFLAGS in case of HIP builds
See madgraph5/madgraph4gpu#806, which explains that gq_ttxq build with HIP fails with -O3. Substitute with -O2 (no performance degradation according to #806) only in HIP case.
1 parent ecd9929 commit 83f37b7

1 file changed

Lines changed: 8 additions & 3 deletions

File tree

  • madgraph/iolibs/template_files/madmatrix

madgraph/iolibs/template_files/madmatrix/cudacpp.mk

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,7 @@ else ifeq ($(BACKEND),hip)
329329
GPULANGUAGE = hip
330330
GPUSUFFIX = hip
331331

332-
# Optimization flags
333-
override OPTFLAGS = -O2 # work around "Memory access fault" in gq_ttq for HIP #806: disable hipcc -O3 optimizations
332+
# Optimization flags (HIP -O2 workaround applied after OPTFLAGS = -O3 below, see #806)
334333
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
335334

336335
# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
@@ -402,11 +401,17 @@ endif
402401
#=== Configure common compiler flags for C++ and CUDA/HIP
403402

404403
INCFLAGS = -I.
405-
OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here
404+
OPTFLAGS = -O3
405+
406+
# HIP requires -O2 to avoid "Memory access fault" in gq_ttq (#806)
407+
ifeq ($(BACKEND),hip)
408+
override OPTFLAGS = -O2
409+
endif
406410

407411
# PROFILE=1: reduced optimisation + symbols suitable for profilers (perf, gprof, valgrind...)
408412
# DEBUG=1 : no optimisation + full debug symbols
409413
# Both flags propagate automatically to src/ sub-makes via MAKEFLAGS.
414+
# These override the HIP workaround above intentionally: DEBUG in particular must always win.
410415
ifeq ($(PROFILE),1)
411416
override OPTFLAGS = -O2
412417
else ifeq ($(DEBUG),1)

0 commit comments

Comments
 (0)