Skip to content

Commit 023ce41

Browse files
TransferBench v1.63 (#193)
* Fixing issue with P memory type and use of DMA subexecutor * CMake builds require explicit opt-in by setting NIC_EXEC_ENABLE=1 * Removing self-GPU check for DMA engine copies * [BUILD] Add new GPU targets and switch to amdclang++ (#187) * [BUILD] Add gfx950, gfx1150, and gfx1151 targets * [BUILD] Modify CMake to use amdclang++ * [BUILD] Modify Makefile to use amdclang++ * [GIT] Updated CHANGELOG and .gitignore * Adding HBM testing to healthcheck preset * Tweaking HBM tests to occur first, and provide more info during VERBOSE=1 * Fixing timing reporting issues with NUM_SUBITERATIONS * [BUILD] Simplify Makefile (#190) * Combines steps for compilation and linking * Does not rebuild if no change to source code * Updating CHANGELOG --------- Co-authored-by: Nilesh M Negi <Nilesh.Negi@amd.com>
1 parent a4709f4 commit 023ce41

9 files changed

Lines changed: 706 additions & 259 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ _templates/
77
_toc.yml
88
docBin/
99
TransferBench
10+
*.o

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,19 @@
33
Documentation for TransferBench is available at
44
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
55

6+
## v1.63.00
7+
### Added
8+
- Added `gfx950`, `gfx1150`, and `gfx1151` to default GPU targets list in CMake builds
9+
10+
### Modified
11+
- Removing self-GPU check for DMA engine copies
12+
- Switched to amdclang++ as primary compiler
13+
- healthcheck preset adds HBM testing and support for more MI3XX variants
14+
15+
### Fixed
16+
- Fixed issue when using "P" memory type and specific DMA subengines
17+
- Fixed issue with subiteration timing reports
18+
619
## v1.62.00
720
### Added
821
- Adding GFX_TEMPORAL to allow for use for use of non-temporal loads/stores

CMakeLists.txt

Lines changed: 138 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,29 @@
11
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
22

3-
if (DEFINED ENV{ROCM_PATH})
4-
set(ROCM_PATH "$ENV{ROCM_PATH}" CACHE STRING "ROCm install directory")
5-
else()
6-
set(ROCM_PATH "/opt/rocm" CACHE STRING "ROCm install directory")
3+
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
4+
5+
# CMake Toolchain file to define compilers and path to ROCm
6+
#==================================================================================================
7+
if (NOT CMAKE_TOOLCHAIN_FILE)
8+
set(CMAKE_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/toolchain-linux.cmake")
9+
message(STATUS "CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}")
710
endif()
8-
cmake_minimum_required(VERSION 3.5)
911

10-
project(TransferBench VERSION 1.62.00 LANGUAGES CXX)
12+
set(VERSION_STRING "1.63.00")
13+
project(TransferBench VERSION ${VERSION_STRING} LANGUAGES CXX)
14+
15+
## Load CMake modules
16+
#==================================================================================================
17+
include(CheckIncludeFiles)
18+
include(CheckSymbolExists)
19+
include(cmake/Dependencies.cmake) # rocm-cmake, rocm_local_targets
20+
21+
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
22+
23+
# Build options
24+
#==================================================================================================
25+
option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on this machine" OFF)
26+
option(ENABLE_NIC_EXEC "Enable RDMA NIC Executor in TransferBench" OFF)
1127

1228
# Default GPU architectures to build
1329
#==================================================================================================
@@ -16,15 +32,18 @@ set(DEFAULT_GPUS
1632
gfx908
1733
gfx90a
1834
gfx942
35+
gfx950
1936
gfx1030
2037
gfx1100
2138
gfx1101
2239
gfx1102
40+
gfx1150
41+
gfx1151
2342
gfx1200
2443
gfx1201)
2544

26-
# Build only for local GPU architecture
27-
if (BUILD_LOCAL_GPU_TARGET_ONLY)
45+
## Build only for local GPU architecture
46+
if(BUILD_LOCAL_GPU_TARGET_ONLY)
2847
message(STATUS "Building only for local GPU target")
2948
if (COMMAND rocm_local_targets)
3049
rocm_local_targets(DEFAULT_GPUS)
@@ -33,10 +52,10 @@ if (BUILD_LOCAL_GPU_TARGET_ONLY)
3352
endif()
3453
endif()
3554

36-
# Determine which GPU architectures to build for
55+
## Determine which GPU architectures to build for
3756
set(GPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if GPU_TARGETS is not defined.")
3857

39-
# Check if clang compiler can offload to GPU_TARGETS
58+
## Check if clang compiler can offload to GPU_TARGETS
4059
if (COMMAND rocm_check_target_ids)
4160
message(STATUS "Checking for ROCm support for GPU targets: " "${GPU_TARGETS}")
4261
rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${GPU_TARGETS})
@@ -45,53 +64,127 @@ else()
4564
set(SUPPORTED_GPUS ${DEFAULT_GPUS})
4665
endif()
4766

48-
set(COMPILING_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "GPU targets to compile for.")
49-
message(STATUS "Compiling for ${COMPILING_TARGETS}")
67+
set(GPU_TARGETS "${SUPPORTED_GPUS}")
68+
message(STATUS "Compiling for ${GPU_TARGETS}")
5069

51-
foreach(target ${COMPILING_TARGETS})
52-
list(APPEND static_link_flags --offload-arch=${target})
53-
endforeach()
54-
list(JOIN static_link_flags " " flags_str)
55-
set( CMAKE_CXX_FLAGS "${flags_str} ${CMAKE_CXX_FLAGS}")
70+
## NOTE: Reload rocm-cmake in order to update GPU_TARGETS
71+
include(cmake/Dependencies.cmake) # Reloading to use desired GPU_TARGETS instead of defaults
5672

57-
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib")
58-
include_directories(${ROCM_PATH}/include)
59-
find_library(IBVERBS_LIBRARY ibverbs)
60-
find_path(IBVERBS_INCLUDE_DIR infiniband/verbs.h)
61-
if (DEFINED ENV{DISABLE_NIC_EXEC})
62-
message(STATUS "Disabling NIC Executor support")
63-
elseif(IBVERBS_LIBRARY AND IBVERBS_INCLUDE_DIR)
64-
message(STATUS "Found ibverbs: ${IBVERBS_LIBRARY}. Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable")
65-
add_definitions(-DNIC_EXEC_ENABLED)
66-
link_libraries(ibverbs)
73+
# Check for required dependencies
74+
#==================================================================================================
75+
## Try to establish ROCM_PATH (for find_package)
76+
if(NOT DEFINED ROCM_PATH)
77+
# Guess default location
78+
set(ROCM_PATH "/opt/rocm")
79+
message(WARNING "Unable to find ROCM_PATH: Falling back to ${ROCM_PATH}")
6780
else()
68-
if (NOT IBVERBS_LIBRARY)
69-
message(WARNING "IBVerbs library not found")
70-
elseif (NOT IBVERBS_INCLUDE_DIR)
71-
message(WARNING "infiniband/verbs.h not found")
81+
message(STATUS "ROCM_PATH found: ${ROCM_PATH}")
82+
endif()
83+
set(ENV{ROCM_PATH} ${ROCM_PATH})
84+
85+
## Set CMAKE flags
86+
if (NOT DEFINED CMAKE_CXX_STANDARD)
87+
set(CMAKE_CXX_STANDARD 17)
88+
endif()
89+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
90+
set(CMAKE_CXX_EXTENSIONS OFF)
91+
list(APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths for finding HIP / HSA
92+
${ROCM_PATH}
93+
${ROCM_PATH}/llvm
94+
${ROCM_PATH}/hip
95+
/opt/rocm
96+
/opt/rocm/llvm
97+
/opt/rocm/hip)
98+
99+
## Check for HIP
100+
find_package(hip REQUIRED CONFIG PATHS ${CMAKE_PREFIX_PATH})
101+
message(STATUS "HIP compiler: ${HIP_COMPILER}")
102+
103+
## Ensuring that CXX compiler meets expectations
104+
if(NOT (("${CMAKE_CXX_COMPILER}" MATCHES ".*hipcc") OR ("${CMAKE_CXX_COMPILER}" MATCHES ".*clang\\+\\+")))
105+
message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.")
106+
endif()
107+
108+
## Check for Threads
109+
find_package(Threads REQUIRED)
110+
set(THREADS_PREFER_PTHREAD_FLAG ON)
111+
112+
## Check for numa support
113+
find_library(NUMA_LIBRARY numa)
114+
find_path(NUMA_INCLUDE_DIR numa.h)
115+
if(NUMA_LIBRARY AND NUMA_INCLUDE_DIR)
116+
add_library(numa SHARED IMPORTED)
117+
set_target_properties(numa PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${NUMA_INCLUDE_DIR}" IMPORTED_LOCATION "${NUMA_LIBRARY}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${NUMA_INCLUDE_DIR}")
118+
endif()
119+
120+
## Check for hsa support
121+
find_library(HSA_LIBRARY hsa-runtime64 PATHS ${ROCM_PATH} ${ROCM_PATH}/lib)
122+
find_path(HSA_INCLUDE_DIR hsa.h PATHS ${ROCM_PATH}/include ${ROCM_PATH}/include/hsa)
123+
if(HSA_LIBRARY AND HSA_INCLUDE_DIR)
124+
add_library(hsa-runtime64 SHARED IMPORTED)
125+
set_target_properties(hsa-runtime64 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HSA_INCLUDE_DIR}" IMPORTED_LOCATION "${HSA_LIBRARY}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HSA_INCLUDE_DIR}")
126+
endif()
127+
128+
## Check for infiniband verbs support
129+
if(DEFINED ENV{ENABLE_NIC_EXEC} OR DEFINED ENABLE_NIC_EXEC)
130+
message(STATUS "For CMake builds, NIC executor also requires explicit opt-in by setting CMake flag -DENABLE_NIC_EXEC=1 or environment flag ENABLE_NIC_EXEC=1")
131+
find_library(IBVERBS_LIBRARY ibverbs)
132+
find_path(IBVERBS_INCLUDE_DIR infiniband/verbs.h)
133+
if(IBVERBS_LIBRARY AND IBVERBS_INCLUDE_DIR)
134+
add_library(ibverbs SHARED IMPORTED)
135+
set_target_properties(ibverbs PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}" IMPORTED_LOCATION "${IBVERBS_LIBRARY}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}")
136+
set(IBVERBS_FOUND 1)
137+
message(STATUS "Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable")
138+
else()
139+
if(NOT IBVERBS_LIBRARY)
140+
message(WARNING "IBVerbs library not found")
141+
elseif(NOT IBVERBS_INCLUDE_DIR)
142+
message(WARNING "infiniband/verbs.h not found")
143+
endif()
144+
message(WARNING "Building without NIC executor support. To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed")
72145
endif()
73-
message(WARNING "Building without NIC executor support. To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed")
146+
else()
147+
message(STATUS "Disabling NIC Executor support")
148+
message(WARNING "For CMake builds, NIC executor requires explicit opt-in by setting ENABLE_NIC_EXEC=1")
74149
endif()
75150

76-
link_libraries(numa hsa-runtime64 pthread)
77-
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY .)
151+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY .)
152+
78153
add_executable(TransferBench src/client/Client.cpp)
79-
target_include_directories(TransferBench PRIVATE src/header src/client src/client/Presets)
80154

81-
find_package(ROCM 0.8 REQUIRED PATHS ${ROCM_PATH})
82-
include(ROCMInstallTargets)
83-
include(ROCMCreatePackage)
84-
set(ROCMCHECKS_WARN_TOOLCHAIN_VAR OFF)
155+
target_include_directories(TransferBench PRIVATE src/header)
156+
target_include_directories(TransferBench PRIVATE src/client)
157+
target_include_directories(TransferBench PRIVATE src/client/Presets)
158+
target_include_directories(TransferBench PRIVATE ${NUMA_INCLUDE_DIR})
159+
target_include_directories(TransferBench PRIVATE ${HSA_INCLUDE_DIR})
160+
if(IBVERBS_FOUND)
161+
target_include_directories(TransferBench PRIVATE ${IBVERBS_INCLUDE_DIR})
162+
target_link_libraries(TransferBench PRIVATE ${IBVERBS_LIBRARY})
163+
target_compile_definitions(TransferBench PRIVATE NIC_EXEC_ENABLED)
164+
endif()
85165

86-
set(PACKAGE_NAME TB)
87-
set(LIBRARY_NAME TransferBench)
166+
target_link_libraries(TransferBench PRIVATE -fgpu-rdc) # Required when linking relocatable device code
167+
target_link_libraries(TransferBench PRIVATE Threads::Threads)
168+
target_link_libraries(TransferBench INTERFACE hip::host)
169+
target_link_libraries(TransferBench PRIVATE hip::device)
170+
target_link_libraries(TransferBench PRIVATE dl)
171+
target_link_libraries(TransferBench PRIVATE ${NUMA_LIBRARY})
172+
target_link_libraries(TransferBench PRIVATE ${HSA_LIBRARY})
88173

89174
rocm_install(TARGETS TransferBench COMPONENT devel)
175+
rocm_setup_version(VERSION ${VERSION_STRING})
90176

91-
rocm_package_add_dependencies(DEPENDS numactl hsa-rocr)
177+
# Package specific CPACK vars
178+
rocm_package_add_dependencies(DEPENDS "numactl" "hsa-rocr")
179+
180+
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md")
181+
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
182+
183+
set(PACKAGE_NAME TB)
184+
set(LIBRARY_NAME TransferBench)
92185

93186
rocm_create_package(
94-
NAME ${LIBRARY_NAME}
95-
DESCRIPTION "TransferBench package"
96-
MAINTAINER "RCCL Team <gilbert.lee@amd.com>"
187+
NAME ${LIBRARY_NAME}
188+
DESCRIPTION "TransferBench package"
189+
MAINTAINER "RCCL Team <gilbert.lee@amd.com>"
97190
)

Makefile

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -6,57 +6,81 @@
66
ROCM_PATH ?= /opt/rocm
77
CUDA_PATH ?= /usr/local/cuda
88

9-
HIPCC=$(ROCM_PATH)/bin/hipcc
10-
NVCC=$(CUDA_PATH)/bin/nvcc
11-
12-
# Compile TransferBenchCuda if nvcc detected
13-
ifeq ("$(shell test -e $(NVCC) && echo found)", "found")
14-
EXE=TransferBenchCuda
15-
CXX=$(NVCC)
16-
else
17-
EXE=TransferBench
18-
CXX=$(HIPCC)
19-
endif
9+
HIPCC ?= $(ROCM_PATH)/bin/amdclang++
10+
NVCC ?= $(CUDA_PATH)/bin/nvcc
11+
12+
# This can be a space separated string of multiple GPU targets
13+
# Default is the native GPU target
14+
GPU_TARGETS ?= native
15+
16+
DEBUG ?= 0
17+
18+
ifeq ($(filter clean,$(MAKECMDGOALS)),)
19+
# Compile TransferBenchCuda if nvcc detected
20+
ifeq ("$(shell test -e $(NVCC) && echo found)", "found")
21+
EXE=TransferBenchCuda
22+
CXX=$(NVCC)
23+
else
24+
EXE=TransferBench
25+
ifeq ("$(shell test -e $(HIPCC) && echo found)", "found")
26+
CXX=$(HIPCC)
27+
else ifeq ("$(shell test -e $(ROCM_PATH)/bin/hipcc && echo found)", "found")
28+
CXX=$(ROCM_PATH)/bin/hipcc
29+
$(warning "Could not find $(HIPCC). Using fallback to $(CXX)")
30+
else
31+
$(error "Could not find $(HIPCC) or $(ROCM_PATH)/bin/hipcc. Check if the path is correct if you want to build $(EXE)")
32+
endif
33+
GPU_TARGETS_FLAGS = $(foreach target,$(GPU_TARGETS),"--offload-arch=$(target)")
34+
endif
2035

21-
CXXFLAGS = -I$(ROCM_PATH)/include -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64
22-
NVFLAGS = -x cu -lnuma -arch=native
23-
COMMON_FLAGS = -O3 -I./src/header -I./src/client -I./src/client/Presets
24-
LDFLAGS += -lpthread
25-
26-
# Compile RDMA executor if
27-
# 1) DISABLE_NIC_EXEC is not set to 1
28-
# 2) IBVerbs is found in the Dynamic Linker cache
29-
# 3) infiniband/verbs.h is found in the default include path
30-
NIC_ENABLED = 0
31-
ifneq ($(DISABLE_NIC_EXEC),1)
32-
ifeq ("$(shell ldconfig -p | grep -c ibverbs)", "0")
33-
$(info lib IBVerbs not found)
34-
else ifeq ("$(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'infiniband/verbs.h')", "0")
35-
$(info infiniband/verbs.h not found)
36+
CXXFLAGS = -I$(ROCM_PATH)/include -I$(ROCM_PATH)/include/hip -I$(ROCM_PATH)/include/hsa
37+
HIPLDFLAGS= -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64 -lamdhip64
38+
HIPFLAGS = -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS)
39+
NVFLAGS = -x cu -lnuma -arch=native
40+
41+
ifeq ($(DEBUG), 0)
42+
COMMON_FLAGS += -O3
3643
else
37-
LDFLAGS += -libverbs -DNIC_EXEC_ENABLED
38-
NVFLAGS += -libverbs -DNIC_EXEC_ENABLED
39-
NIC_ENABLED = 1
44+
COMMON_FLAGS += -O0 -g -ggdb3
4045
endif
41-
ifeq ($(NIC_ENABLED), 0)
42-
$(info To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed)
46+
COMMON_FLAGS += -I./src/header -I./src/client -I./src/client/Presets
47+
48+
LDFLAGS += -lpthread
49+
50+
# Compile RDMA executor if
51+
# 1) DISABLE_NIC_EXEC is not set to 1
52+
# 2) IBVerbs is found in the Dynamic Linker cache
53+
# 3) infiniband/verbs.h is found in the default include path
54+
DISABLE_NIC_EXEC ?= 0
55+
ifneq ($(DISABLE_NIC_EXEC),1)
56+
ifeq ("$(shell ldconfig -p | grep -c ibverbs)", "0")
57+
$(info lib IBVerbs not found)
58+
else ifeq ("$(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'infiniband/verbs.h')", "0")
59+
$(info infiniband/verbs.h not found)
60+
else
61+
CXXFLAGS += -DNIC_EXEC_ENABLED
62+
LDFLAGS += -libverbs
63+
NIC_ENABLED = 1
64+
endif
65+
ifeq ($(NIC_ENABLED), 0)
66+
$(info Building without NIC executor support)
67+
$(info To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed)
68+
else
69+
$(info Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable)
70+
endif
4371
endif
4472
endif
4573

74+
.PHONY : all clean
75+
4676
all: $(EXE)
4777

48-
TransferBench: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp") NicStatus
49-
$(HIPCC) $(CXXFLAGS) $(COMMON_FLAGS) $< -o $@ $(LDFLAGS)
78+
TransferBench: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
79+
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) $(COMMON_FLAGS) $< -o $@ $(HIPLDFLAGS) $(LDFLAGS)
5080

51-
TransferBenchCuda: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp") NicStatus
81+
TransferBenchCuda: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
5282
$(NVCC) $(NVFLAGS) $(COMMON_FLAGS) $< -o $@ $(LDFLAGS)
5383

5484
clean:
55-
rm -f *.o ./TransferBench ./TransferBenchCuda
85+
rm -f ./TransferBench ./TransferBenchCuda
5686

57-
NicStatus:
58-
ifeq ($(NIC_ENABLED), 1)
59-
$(info Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable)
60-
else
61-
$(info Building without NIC executor support)
62-
endif

0 commit comments

Comments
 (0)