Skip to content

Commit f8255f9

Browse files
authored
Add support for ROCm 6.1.2 (#4)
Extraneous libraries were included for various backends. This also adds support for pytorch and onnxruntime with ROCm 6.1.2.
1 parent 2028721 commit f8255f9

7 files changed

Lines changed: 125 additions & 7 deletions

File tree

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ build_torch: $(TORCH_ARCHIVE)
7878

7979
.PHONY: clean_torch
8080
clean_torch:
81-
rm -rf $(TORCH_BUILD_DIR) $(TORCH_ARCHIVE) $(TORCH_INSTALL_DIR)
8281
cd pytorch && git clean -fdx && git restore .
8382
cd pytorch/third_party/kineto && git restore .
8483

@@ -94,7 +93,8 @@ clean_tensorflow:
9493
rm -rf $(TF_INSTALL_DIR)
9594
cd tensorflow && \
9695
bazel clean --expunge_async && \
97-
git restore .
96+
git restore . && \
97+
git reset --hard
9898

9999
.PHONY: clean_onnxruntime
100100
clean_onnxruntime:

architectures/linux-rocm-6.1.2.mk

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
2+
# BSD 2-Clause License
3+
#
4+
# Copyright (c) 2024, Hewlett Packard Enterprise
5+
# All rights reserved.
6+
#
7+
# Redistribution and use in source and binary forms, with or without
8+
# modification, are permitted provided that the following conditions are met:
9+
#
10+
# 1. Redistributions of source code must retain the above copyright notice, this
11+
# list of conditions and the following disclaimer.
12+
#
13+
# 2. Redistributions in binary form must reproduce the above copyright notice,
14+
# this list of conditions and the following disclaimer in the documentation
15+
# and/or other materials provided with the distribution.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
OS = linux
29+
ARCHITECTURE = x64
30+
ROCM_VERSION = 6.1.2
31+
STACK=rocm-$(ROCM_VERSION)
32+
33+
# pyTorch options
34+
PYTORCH_VERSION = 2.4.0
35+
TORCH_CMAKE_OPTIONS = -DBUILD_PYTHON=OFF
36+
TORCH_CMAKE_OPTIONS += -DUSE_ROCM=ON -DUSE_RCCL=ON -DROCM_SOURCE_DIR=${ROCM_PATH}
37+
TORCH_CMAKE_OPTIONS += -DUSE_NCCL=OFF -DUSE_CUDA=OFF -DUSE_STATIC_MKL=ON
38+
PYTORCH_PREBUILD_TARGETS = pytorch_rocm_checkout pytorch_rocm_prebuild
39+
40+
41+
# Tensorflow options
42+
TF_VERSION = 2.15
43+
TF_TAG = r$(TF_VERSION)-rocm-enhanced
44+
TF_REMOTE = https://github.com/ROCm/tensorflow-upstream.git
45+
TF_PREBUILD_TARGETS = tf_rocm_checkout tf_rocm_prebuild
46+
TF_BAZEL_OPTS = --config=opt --verbose_failures
47+
48+
ONNXRT_VERSION = 1.17.3
49+
ONNXRT_OPTIONS = --use_rocm --rocm_home $(ROCM_PATH)
50+
ONNXRT_PREBUILD_TARGETS = onnxrt_checkout
51+
# No prebuild steps for ONNX
52+
53+
# From PyTorch for ROCm instructions
54+
# https://github.com/pytorch/pytorch/blob/v2.3.1/README.md?plain=1#L241-L245
55+
# For at ROCm 5.5.0 and later, also need to patch one of the ATen files
56+
pytorch_rocm_checkout:
57+
cd pytorch && \
58+
git checkout v${PYTORCH_VERSION} && \
59+
git submodule update --init --recursive && \
60+
git reset --hard
61+
62+
pytorch_rocm_prebuild:
63+
cd pytorch; python tools/amd_build/build_amd.py
64+
sed -i 's/attr.memoryType/attr.type/g' pytorch/aten/src/ATen/hip/detail/HIPHooks.cpp
65+
sed -i 's,/opt/rocm,${ROCM_PATH},g' pytorch/third_party/kineto/libkineto/CMakeLists.txt
66+
sed -i 's,\.,\\.,g' pytorch/cmake/public/LoadHIP.cmake
67+
68+
# (1) Patch .bazelrc to avoid hard-coded paths to Clang
69+
# (2) Run the bazel configure script
70+
tf_rocm_prebuild:
71+
cd tensorflow; \
72+
git restore .bazelrc
73+
# git apply ../patches/tensorflow/bazelrc.rocm.patch
74+
cd tensorflow; \
75+
USE_DEFAULT_PYTHON_LIB_PATH=1 \
76+
PYTHON_BIN_PATH=$$(which python) \
77+
TF_NEED_CLANG=0 \
78+
TF_NEED_ROCM=1 \
79+
TF_NEED_CUDA=0 \
80+
CC_OPT_FLAGS="-Wno-sign-compare -B/usr/bin" \
81+
TF_SET_ANDROID_WORKSPACE=0 \
82+
python configure.py
83+
84+
tf_rocm_checkout:
85+
cd tensorflow; \
86+
git fetch $(TF_REMOTE) $(TF_TAG) && \
87+
git checkout FETCH_HEAD
88+
89+
onnxrt_checkout:
90+
cd onnxruntime && \
91+
git checkout v$(ONNXRT_VERSION) && \
92+
git reset --hard && \
93+
git clean -xdf && \
94+
git submodule update --init --recursive
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
module purge
2+
3+
export ROCM_VERSION=6.1.2
4+
5+
module load PrgEnv-gnu rocm/$ROCM_VERSION libffi libsqlite3 cmake cray-python
6+
7+
# Following come from hipconfig, not all variables set by ROCm module
8+
export ROCM_PATH=/global/opt/rocm-$ROCM_VERSION
9+
10+
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
module purge
22

3-
module load PrgEnv-gnu cudatoolkit/11.8.0 cudnn/8.9.7.29 gcc/11.2.0 ninja libffi libsqlite3 cray-python
4-
export CC=gcc CXX=g++ FC=gfortran
3+
module load PrgEnv-gnu cudatoolkit/11.8.0 cudnn/8.9.7.29 gcc/11.2.0 ninja cray-python
4+
export CC=gcc CXX=g++ FC=gfortran
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module purge
22

3-
module load PrgEnv-gnu cudatoolkit/12.5.0 cudnn/cuda-12/9.3.0.75 gcc/11.2.0 ninja libffi libsqlite3 cray-python
3+
module load PrgEnv-gnu cudatoolkit/12.5.0 cudnn/cuda-12/9.3.0.75 gcc/11.2.0 ninja cray-python
44
export TORCH_CUDA_ARCH_LIST="5.0 5.1 5.3 6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0"
5-
export CC=gcc CXX=g++ FC=gfortran
5+
export CC=gcc CXX=g++ FC=gfortran

environments/pytorch/pinoak-rocm-5.7.0

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module purge
22

33
export ROCM_VERSION=5.7.0
4-
module load PrgEnv-gnu rocm/5.7.0 ninja libffi libsqlite3 cray-python
4+
module load PrgEnv-gnu rocm/$ROCM_VERSION ninja cray-python
55
export CC=gcc CXX=g++ FC=gfortran
66

77
# Following come from hipconfig, not all variables set by ROCm module
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
module purge
2+
3+
export ROCM_VERSION=6.1.2
4+
module load PrgEnv-gnu rocm/$ROCM_VERSION ninja cray-python
5+
export CC=gcc CXX=g++ FC=gfortran
6+
7+
# Following come from hipconfig, not all variables set by ROCm module
8+
export ROCM_PATH=$(hipconfig --rocmpath)
9+
export ROCM_SOURCE_PATH=$ROCM_PATH
10+
export HIP_PATH=$(hipconfig --path)
11+
export HIP_LIB_PATH=$ROCM_PATH/lib
12+
13+
# Build for all ROCm architectures
14+
export PYTORCH_ROCM_ARCH="gfx90a"

0 commit comments

Comments
 (0)