@@ -10,6 +10,15 @@ MPI_PATH ?= /usr/local/openmpi
1010HIPCC ?= $(ROCM_PATH ) /bin/amdclang++
1111NVCC ?= $(CUDA_PATH ) /bin/nvcc
1212
13+ # ROCm device libraries can live in different locations depending on packaging.
14+ # hipcc/clang needs to find the amdgcn bitcode directory at link time.
15+ ROCM_DEVICE_LIB_PATH ?=
16+ ifneq ($(wildcard $(ROCM_PATH ) /amdgcn/bitcode) ,)
17+ ROCM_DEVICE_LIB_PATH := $(ROCM_PATH ) /amdgcn/bitcode
18+ else ifneq ($(wildcard $(ROCM_PATH)/lib/llvm/amdgcn/bitcode),)
19+ ROCM_DEVICE_LIB_PATH := $(ROCM_PATH ) /lib/llvm/amdgcn/bitcode
20+ endif
21+
1322# Option to compile with single GFX kernel to drop compilation time
1423SINGLE_KERNEL ?= 0
1524
@@ -40,6 +49,9 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
4049 CXXFLAGS = -I$(ROCM_PATH ) /include -I$(ROCM_PATH ) /include/hip -I$(ROCM_PATH ) /include/hsa
4150 HIPLDFLAGS = -lnuma -L$(ROCM_PATH ) /lib -lhsa-runtime64 -lamdhip64
4251 HIPFLAGS = -Wall -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS )
52+ ifneq ($(strip $(ROCM_DEVICE_LIB_PATH)),)
53+ HIPFLAGS += --rocm-device-lib-path=$(ROCM_DEVICE_LIB_PATH )
54+ endif
4355 NVFLAGS = -x cu -lnuma -arch=native
4456
4557 ifeq ($(SINGLE_KERNEL), 1)
@@ -111,7 +123,7 @@ endif
111123all : $(EXE )
112124
113125TransferBench : ./src/client/Client.cpp $(shell find -regex ".* \.\hpp")
114- $(HIPCC ) $(CXXFLAGS ) $(HIPFLAGS ) $(COMMON_FLAGS ) $< -o $@ $(HIPLDFLAGS ) $(LDFLAGS )
126+ $(CXX ) $(CXXFLAGS ) $(HIPFLAGS ) $(COMMON_FLAGS ) $< -o $@ $(HIPLDFLAGS ) $(LDFLAGS )
115127
116128TransferBenchCuda : ./src/client/Client.cpp $(shell find -regex ".* \.\hpp")
117129 $(NVCC ) $(NVFLAGS ) $(COMMON_FLAGS ) $< -o $@ $(LDFLAGS )
0 commit comments