-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathMakefile
More file actions
311 lines (278 loc) · 12.2 KB
/
Makefile
File metadata and controls
311 lines (278 loc) · 12.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
#
# Configuration options
ROCM_PATH ?= /opt/rocm
CUDA_PATH ?= /usr/local/cuda
MPI_PATH ?= /usr/local/openmpi
# Optional features (set to 0 to disable, 1 to enable)
# DISABLE_NIC_EXEC: Disable RDMA/NIC executor support (default: 0)
# DISABLE_IBV_DIRECT: When NIC support is on, use dlsym for libibverbs instead of direct linkage (default: 0)
# DISABLE_MPI_COMM: Disable MPI communicator support (default: 0)
# DISABLE_DMA_BUF: Disable DMA-BUF support for GPU Direct RDMA (default: 1)
# DISABLE_AMD_SMI: Disable AMD-SMI pod membership checking support (default: 0)
# DISABLE_NVML: Disable NVML pod membership detection for CUDA builds (default: 0)
HIPCC ?= $(ROCM_PATH)/bin/amdclang++
NVCC ?= $(CUDA_PATH)/bin/nvcc
# ROCm device libraries can live in different locations depending on packaging.
# hipcc/clang needs to find the amdgcn bitcode directory at link time.
ROCM_DEVICE_LIB_PATH ?=
ifneq ($(wildcard $(ROCM_PATH)/amdgcn/bitcode),)
ROCM_DEVICE_LIB_PATH := $(ROCM_PATH)/amdgcn/bitcode
else ifneq ($(wildcard $(ROCM_PATH)/lib/llvm/amdgcn/bitcode),)
ROCM_DEVICE_LIB_PATH := $(ROCM_PATH)/lib/llvm/amdgcn/bitcode
endif
# Option to compile with single GFX kernel to drop compilation time
SINGLE_KERNEL ?= 0
# This can be a space separated string of multiple GPU targets
# Default is the native GPU target
GPU_TARGETS ?= native
EXE=TransferBench
DEBUG ?= 0
# Only perform this check if 'make clean' is not the target
ifeq ($(filter clean,$(MAKECMDGOALS)),)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
$(info Building TransferBenchCuda)
# Check for nvcc
ifneq ($(shell test -e $(NVCC) && echo found), found)
$(error "Could not find $(NVCC). Please set CUDA_PATH appropriately")
else
$(info Compiling TransferBenchCuda using $(NVCC))
endif
NVFLAGS = -x cu -lnuma -arch=native
else
# Check for HIP compiler
ifeq ("$(shell test -e $(HIPCC) && echo found)", "found")
CXX=$(HIPCC)
else ifeq ("$(shell test -e $(ROCM_PATH)/bin/hipcc && echo found)", "found")
CXX=$(ROCM_PATH)/bin/hipcc
$(info "Could not find $(HIPCC). Using fallback to $(CXX)")
else
$(error "Could not find $(HIPCC) or $(ROCM_PATH)/bin/hipcc. Check if the path is correct if you want to build $(EXE)")
endif
GPU_TARGETS_FLAGS = $(foreach target,$(GPU_TARGETS),"--offload-arch=$(target)")
$(info Compiling for $(GPU_TARGETS) architecture(s). Can modify this by setting GPU_TARGETS)
CXXFLAGS = -I. -I$(ROCM_PATH)/include -I$(ROCM_PATH)/include/hip -I$(ROCM_PATH)/include/hsa
HIPLDFLAGS= -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64 -lamdhip64
HIPFLAGS = -Wall -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS)
ifneq ($(strip $(ROCM_DEVICE_LIB_PATH)),)
HIPFLAGS += --rocm-device-lib-path=$(ROCM_DEVICE_LIB_PATH)
endif
endif
ifeq ($(SINGLE_KERNEL), 1)
COMMON_FLAGS += -DSINGLE_KERNEL
endif
ifeq ($(DEBUG), 0)
COMMON_FLAGS += -O3
else
COMMON_FLAGS += -O0 -g -ggdb3
endif
COMMON_FLAGS += -I./src/header -I./src/client -I./src/client/Presets
LDFLAGS += -lpthread
NIC_ENABLED = 0
# Compile RDMA executor if
# 1) DISABLE_NIC_EXEC is not set to 1
# 2) IBVerbs is found in the Dynamic Linker cache
# 3) infiniband/verbs.h is found in the default include path
# When enabled, -DIBV_DIRECT=1 is added unless DISABLE_IBV_DIRECT=1 (verbs via direct link + constexpr pfn_*)
DISABLE_NIC_EXEC ?= 0
DISABLE_IBV_DIRECT ?= 0
ifneq ($(DISABLE_NIC_EXEC),1)
$(info Attempting to build with NIC executor support)
ifeq ("$(shell ldconfig -p | grep -c ibverbs)", "0")
$(info - ibverbs library not found)
else ifeq ("$(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'infiniband/verbs.h')", "0")
$(info - infiniband/verbs.h not found)
else
COMMON_FLAGS += -DNIC_EXEC_ENABLED
LDFLAGS += -libverbs
NIC_ENABLED = 1
ifneq ($(DISABLE_IBV_DIRECT),1)
COMMON_FLAGS += -DIBV_DIRECT=1
endif
# Disable DMA-BUF support by default (set DISABLE_DMA_BUF=0 to enable)
DISABLE_DMA_BUF ?= 1
ifeq ($(DISABLE_DMA_BUF), 0)
# Check for both ibv_reg_dmabuf_mr and ROCm DMA-BUF export support
HAVE_IBV_DMABUF := $(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'ibv_reg_dmabuf_mr')
HAVE_ROCM_DMABUF := $(shell echo '#include <hsa/hsa_ext_amd.h>' | $(CXX) -I$(ROCM_PATH)/include -E - 2>/dev/null | grep -c 'hsa_amd_portable_export_dmabuf')
ifeq ($(HAVE_IBV_DMABUF):$(HAVE_ROCM_DMABUF), 0:0)
$(info Building without DMA-BUF support: missing both ibv_reg_dmabuf_mr and ROCm DMA-BUF export)
else ifeq ($(HAVE_IBV_DMABUF), 0)
$(info Building without DMA-BUF support: missing ibv_reg_dmabuf_mr)
else ifeq ($(HAVE_ROCM_DMABUF), 0)
$(info Building without DMA-BUF support: missing ROCm DMA-BUF export)
else
COMMON_FLAGS += -DHAVE_DMABUF_SUPPORT
$(info Building with DMA-BUF support)
endif
else
$(info Building with DMA-BUF support disabled (DISABLE_DMA_BUF=1))
endif
endif
ifeq ($(NIC_ENABLED), 0)
$(info - Building without NIC executor support)
$(info - To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed)
else
$(info - Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable)
ifeq ($(DISABLE_IBV_DIRECT),1)
$(info - IBV_DIRECT disabled: libibverbs via dlsym, DISABLE_IBV_DIRECT=1)
endif
endif
endif
MPI_ENABLED = 0
# Compile with MPI communicator support if
# 1) DISABLE_MPI_COMM is not set to 1
# 2) mpi.h is found in the MPI_PATH
DISABLE_MPI_COMM ?= 0
ifneq ($(DISABLE_MPI_COMM), 1)
$(info Attempting to build with MPI communicator support)
ifeq ($(wildcard $(MPI_PATH)/include/mpi.h),)
$(info - Unable to find mpi.h at $(MPI_PATH)/include. Please specify appropriate MPI_PATH)
else
MPI_ENABLED = 1
COMMON_FLAGS += -DMPI_COMM_ENABLED -I$(MPI_PATH)/include
LDFLAGS += -L/$(MPI_PATH)/lib -lmpi
ifeq ($(DEBUG), 1)
LDFLAGS += -lmpi_cxx
endif
endif
ifeq ($(MPI_ENABLED), 0)
$(info - Building without MPI communicator support)
$(info - To use TransferBench with MPI support, install MPI libraries and specify appropriate MPI_PATH)
else
$(info - Building with MPI communicator support. Can set DISABLE_MPI_COMM=1 to disable)
endif
endif
AMD_SMI_ENABLED = 0
# Enable AMD-SMI support for pod membership detection
# Compile with AMD-SMI support if
# 1) DISABLE_AMD_SMI is not set to 1
# 2) AMD-SMI version >= 26.4.1
DISABLE_AMD_SMI ?= 0
ifneq ($(DISABLE_AMD_SMI), 1)
ifneq ($(MAKECMDGOALS),TransferBenchCuda)
$(info Attempting to build with amd-smi support)
# Check for appropriate AMD SMI version (for querying pod membership)
AMD_SMI_MIN_MAJOR := 26
AMD_SMI_MIN_MINOR := 4
AMD_SMI ?= amd-smi
AMD_SMI_EXISTS := $(shell command -v $(AMD_SMI) >/dev/null 2>&1 && echo yes || echo no)
ifeq ($(AMD_SMI_EXISTS),no)
$(info - $(AMD_SMI) not found. Disabling pod communication support)
else
AMD_SMI_VERSION_STR := $(shell $(AMD_SMI) version | sed -n 's/.*Library version: \([0-9]\+\)\.\([0-9]\+\).*/\1 \2/p')
AMD_SMI_MAJOR := $(word 1,$(AMD_SMI_VERSION_STR))
AMD_SMI_MINOR := $(word 2,$(AMD_SMI_VERSION_STR))
AMD_SMI_VERSION_OK := $(shell \
if [ $(AMD_SMI_MAJOR) -gt $(AMD_SMI_MIN_MAJOR) ] || \
[ $(AMD_SMI_MAJOR) -eq $(AMD_SMI_MIN_MAJOR) -a $(AMD_SMI_MINOR) -ge $(AMD_SMI_MIN_MINOR) ]; then \
echo yes; \
else \
echo no; \
fi)
ifeq ($(AMD_SMI_VERSION_OK),yes)
$(info - Detected amd-smi version $(AMD_SMI_MAJOR).$(AMD_SMI_MINOR) which has pod support)
COMMON_FLAGS += -DAMD_SMI_ENABLED
LDFLAGS += -lamd_smi
AMD_SMI_ENABLED = 1
else
$(info - Detected amd-smi version $(AMD_SMI_MAJOR).$(AMD_SMI_MINOR) which does not have pod support)
$(info - Pod membership querying requires amd-smi version of at least $(AMD_SMI_MIN_MAJOR).$(AMD_SMI_MIN_MINOR))
$(info - Pod membership may be forced in TransferBench by setting TB_FORCE_SINGLE_POD=1)
endif
endif
endif
endif
NVML_ENABLED = 0
# Enable NVML support for pod membership detection on NVIDIA platforms
# Compile with NVML support if
# 1) DISABLE_NVML is not set to 1
# 2) Building TransferBenchCuda
# 3) nvml.h is found under CUDA_PATH
DISABLE_NVML ?= 0
ifneq ($(DISABLE_NVML), 1)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
$(info Attempting to build with NVML support)
ifneq ($(wildcard $(CUDA_PATH)/include/nvml.h),)
COMMON_FLAGS += -DNVML_ENABLED
LDFLAGS += -lnvidia-ml
NVML_ENABLED = 1
$(info - Building with NVML support for pod membership detection)
else
$(info - nvml.h not found at $(CUDA_PATH)/include. Building without NVML support)
$(info - Pod membership may be forced by setting TB_FORCE_SINGLE_POD=1)
endif
endif
endif
POD_ENABLED = 0
# Compile with pod support if
# 1) DISABLE_POD_COMM is not set to 1
# 2) For HIP: HIP Runtime version >= 8
# For CUDA: CUDA Version >= 12.8.1
DISABLE_POD_COMM ?= 0
ifneq ($(DISABLE_POD_COMM), 1)
$(info Attempting to build with pod communication support)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
# Check for appropriate CUDA support for MNNVL
CUDA_MIN_MAJOR := 12
CUDA_MIN_MINOR := 2
CUDA_VERSION_STR := $(shell $(NVCC) --version | grep release | sed -E 's/.*release ([0-9]+)\.([0-9]+).*/\1 \2/')
CUDA_MAJOR := $(word 1,$(CUDA_VERSION_STR))
CUDA_MINOR := $(word 2,$(CUDA_VERSION_STR))
CUDA_VERSION_OK := $(shell \
if [ $(CUDA_MAJOR) -gt $(CUDA_MIN_MAJOR) ] || \
[ $(CUDA_MAJOR) -eq $(CUDA_MIN_MAJOR) -a $(CUDA_MINOR) -ge $(CUDA_MIN_MINOR) ]; then \
echo yes; \
else \
echo no; \
fi)
ifeq ($(CUDA_VERSION_OK),yes)
$(info - Detected CUDA version $(CUDA_MAJOR).$(CUDA_MINOR) which has MNNVL support)
COMMON_FLAGS += -DPOD_COMM_ENABLED
LDFLAGS += -lcuda
POD_ENABLED = 1
else
$(info - Detected CUDA version $(CUDA_MAJOR).$(CUDA_MINOR) which does not have MNNVL support)
$(info - Pod support will require CUDA version of at least $(CUDA_MIN_MAJOR).$(CUDA_MIN_MINOR))
endif
else
# Check for appropriate HIP version (for exchanging pod memory handles)
HIP_MIN_MAJOR := 8
HIP_MIN_MINOR := 0
# Check for hipconfig
HIPCONFIG ?= hipconfig
HIP_EXISTS := $(shell command -v $(HIPCONFIG) >/dev/null 2>&1 && echo yes || echo no)
ifeq ($(HIP_EXISTS),yes)
HIP_VERSION_STR := $(shell $(HIPCONFIG) --version | sed -E 's/([0-9]+)\.([0-9]+).*/\1 \2/')
HIP_MAJOR := $(word 1,$(HIP_VERSION_STR))
HIP_MINOR := $(word 2,$(HIP_VERSION_STR))
HIP_VERSION_OK := $(shell \
if [ $(HIP_MAJOR) -gt $(HIP_MIN_MAJOR) ] || \
[ $(HIP_MAJOR) -eq $(HIP_MIN_MAJOR) -a $(HIP_MINOR) -ge $(HIP_MIN_MINOR) ]; then \
echo yes; \
else \
echo no; \
fi)
ifeq ($(HIP_VERSION_OK),yes)
$(info - Detected HIP version $(HIP_MAJOR).$(HIP_MINOR) which has pod support)
COMMON_FLAGS += -DPOD_COMM_ENABLED
else
$(info - Detected HIP version $(HIP_MAJOR).$(HIP_MINOR) which does not have pod support)
$(info - Pod support requires HIP version of at least $(HIP_MIN_MAJOR).$(HIP_MIN_MINOR))
endif
else
$(info - Unable to determine HIP version via $(HIPCONFIG). Try specifying path to hipconfig in HIPCONFIG)
$(info - Disabling pod communication support)
endif
endif
endif
endif
.PHONY : all clean
all: TransferBench
TransferBench: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
$(CXX) $(CXXFLAGS) $(HIPFLAGS) $(COMMON_FLAGS) $< -o $@ $(HIPLDFLAGS) $(LDFLAGS)
TransferBenchCuda: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
$(NVCC) $(NVFLAGS) $(COMMON_FLAGS) $< -o $@ $(LDFLAGS)
clean:
rm -f ./TransferBench ./TransferBenchCuda