Skip to content

Commit 2028721

Browse files
authored
Expand support for CUDA 11, 12, MacOS, and ROCm (#3)
The ML Lib Builder now compiles libtensorflow, libtorch, and onnxruntime for CUDA 11, CUDA 12, MacOS, and ROCm 5.7. These have been tested on an internal HPE machine and intended for use with SmartSim
1 parent 2ac10fa commit 2028721

28 files changed

Lines changed: 1571 additions & 36 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ install
33
*.tgz
44
*.zip
55
*.tar.gz
6+
slurm*.out

.gitmodules

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
[submodule "pytorch"]
22
path = pytorch
33
url = https://github.com/pytorch/pytorch.git
4+
shallow = true
5+
[submodule "tensorflow"]
6+
path = tensorflow
7+
url = https://github.com/tensorflow/tensorflow.git
8+
shallow = true
9+
[submodule "onnxruntime"]
10+
path = onnxruntime
11+
url = https://github.com/microsoft/onnxruntime.git

Makefile

Lines changed: 89 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,32 @@
2424
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2525
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

27-
PYTORCH_VERSION=2.0.1
28-
OSX_ARCHITECTURE=arm64
2927

30-
TORCH_TARGET = libtorch-macos-$(OSX_ARCHITECTURE)-$(PYTORCH_VERSION).zip
31-
TORCH_BUILD = $(PWD)/build/libtorch
32-
TORCH_INSTALL = $(PWD)/install/libtorch
28+
ifeq ($(ARCH_FILE),)
29+
$(error Must specify ARCH_FILE)
30+
else
31+
include $(ARCH_FILE)
32+
endif
33+
34+
INSTALL_DIR = $(PWD)/install
35+
BUILD_DIR = $(PWD)/build
36+
37+
TORCH_ARCHIVE = $(INSTALL_DIR)/libtorch-$(PYTORCH_VERSION)-$(OS)-$(ARCHITECTURE)-$(STACK).tgz
38+
TORCH_BUILD_DIR = $(BUILD_DIR)/libtorch
39+
TORCH_INSTALL_DIR = $(INSTALL_DIR)/libtorch
3340

34-
TORCH_CMAKE_OPTIONS =
35-
TORCH_CMAKE_OPTIONS += -DCMAKE_OSX_ARCHITECTURES=$(OSX_ARCHITECTURE)
36-
TORCH_CMAKE_OPTIONS += -DUSE_MKL=OFF -DUSE_MKLDNN=OFF -DUSE_ITT=OFF
37-
TORCH_CMAKE_OPTIONS += -DUSE_QNNPACK=OFF -DUSE_KINETO=OFF
41+
TF_ARCHIVE = $(INSTALL_DIR)/libtensorflow-$(TF_VERSION)-$(OS)-$(ARCHITECTURE)-$(STACK).tgz
42+
TF_INSTALL_DIR = $(INSTALL_DIR)/libtensorflow
43+
# Note: TF uses its own build system; cannot specify a build directory
44+
45+
ONNXRT_ARCHIVE = $(INSTALL_DIR)/onnxruntime-$(ONNXRT_VERSION)-$(OS)-$(ARCHITECTURE)-$(STACK).tgz
46+
ONNXRT_BUILD_DIR = $(BUILD_DIR)/onnxruntime
47+
ONNXRT_INSTALL_DIR = $(INSTALL_DIR)/onnxruntime
3848

3949
.PHONY: help
4050
help:
4151
@grep "^# help\:" Makefile | grep -v grep | sed 's/\# help\: //' | sed 's/\# help\://'
4252

43-
ifneq ($(shell uname), Darwin)
44-
$(error This tool requires Mac OSX)
45-
endif
46-
4753
# help:
4854
# help: ----Overview----
4955
# help: This makefile can be used to builds ML backends for use on arm64. Generally
@@ -55,35 +61,83 @@ endif
5561
# help: ----Meta targets----
5662
# help: clean -- Cleans all build and install directories
5763
.PHONY: clean
58-
clean: clean_torch
64+
clean: clean_torch clean_tensorflow clean_onnxruntime
5965

60-
# help:
6166
# help: ----Build Targets----
62-
# help: torch -- Builds libtorch
63-
# help:
64-
.PHONY: torch
65-
torch: $(TORCH_TARGET)
6667

67-
# Checkout a specific version of Torch and update all of the torch submodules
68-
.PHONY: checkout_torch
69-
checkout_torch:
70-
cd pytorch && git checkout v$(PYTORCH_VERSION) && \
71-
git submodule foreach --recursive git reset --hard && \
72-
git submodule update --init --recursive
73-
74-
$(TORCH_BUILD) $(TORCH_INSTALL):
68+
## Torch section
69+
$(TORCH_BUILD_DIR):
7570
mkdir -p $@
7671

77-
.PHONY: build_torch
78-
build_torch: $(TORCH_BUILD) $(TORCH_INSTALL) checkout_torch
79-
cd $< && \
80-
cmake -DCMAKE_INSTALL_PREFIX=$(TORCH_INSTALL) $(TORCH_CMAKE_OPTIONS) ../../pytorch && \
81-
make install -j 6
72+
$(TORCH_ARCHIVE): $(TORCH_ARCHIVE_MODS) compile_torch
73+
cd $(INSTALL_DIR) && tar -czf $@ libtorch/
8274

83-
$(TORCH_TARGET): build_torch
84-
cd install && zip -r ../$@ libtorch
75+
# help: build_torch -- Builds libtorch
76+
.PHONY: build_torch
77+
build_torch: $(TORCH_ARCHIVE)
8578

8679
.PHONY: clean_torch
8780
clean_torch:
88-
rm -rf $(TORCH_BUILD) $(TORCH_TARGET) $(TORCH_INSTALL)
81+
rm -rf $(TORCH_BUILD_DIR) $(TORCH_ARCHIVE) $(TORCH_INSTALL_DIR)
82+
cd pytorch && git clean -fdx && git restore .
83+
cd pytorch/third_party/kineto && git restore .
84+
85+
.PHONY: compile_torch
86+
compile_torch: $(TORCH_BUILD_DIR) $(PYTORCH_PREBUILD_TARGETS)
87+
cd $(TORCH_BUILD_DIR) && \
88+
cmake -GNinja -DCMAKE_INSTALL_PREFIX=$(TORCH_INSTALL_DIR) -DPYTHON_EXECUTABLE=$$(which python) \
89+
$(TORCH_CMAKE_OPTIONS) ../../pytorch && \
90+
ninja install
91+
92+
.PHONY: clean_tensorflow
93+
clean_tensorflow:
94+
rm -rf $(TF_INSTALL_DIR)
95+
cd tensorflow && \
96+
bazel clean --expunge_async && \
97+
git restore .
98+
99+
.PHONY: clean_onnxruntime
100+
clean_onnxruntime:
101+
rm -rf $(ONNXRT_INSTALL_DIR) $(ONNXRT_BUILD_DIR)
102+
cd onnxruntime && \
103+
git reset --hard && \
104+
git clean -fdx && \
105+
git restore .
106+
107+
## Tensorflow section
108+
$(TF_INSTALL_DIR):
109+
mkdir -p $@
110+
111+
$(TF_ARCHIVE): $(TF_PREBUILD_TARGETS) $(TF_INSTALL_DIR)
112+
cd tensorflow && \
113+
bazel build $(TF_BAZEL_OPTS) //tensorflow/tools/lib_package:libtensorflow
114+
cp tensorflow/bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz $(TF_INSTALL_DIR)
115+
cd $(TF_INSTALL_DIR) && tar -xzf libtensorflow.tar.gz && rm -f libtensorflow.tar.gz
116+
cd $(INSTALL_DIR) && tar -czf $@ libtensorflow
117+
118+
# help: build_tensorflow -- Builds Tensorflow
119+
.PHONY: build_tensorflow
120+
build_tensorflow: $(TF_ARCHIVE)
121+
122+
## ONNX Runtime
123+
compile_onnxruntime: $(ONNXRT_PREBUILD_TARGETS)
124+
cd onnxruntime && \
125+
git apply ../patches/onnxruntime/build.install.patch
126+
cd onnxruntime && python tools/ci_build/build.py \
127+
--config Release \
128+
--build_dir=$(ONNXRT_BUILD_DIR) \
129+
--compile_no_warning_as_error \
130+
--parallel \
131+
--skip_tests \
132+
--install_dir=$(ONNXRT_INSTALL_DIR) \
133+
--build_shared_lib \
134+
$(ONNXRT_OPTIONS)
135+
136+
$(ONNXRT_ARCHIVE): compile_onnxruntime
137+
cd $(ONNXRT_BUILD_DIR)/Release && make install
138+
cd $(ONNXRT_INSTALL_DIR) && mv include/onnxruntime/* include && rm -rf include/onnxruntime && mv lib64 lib
139+
cd $(INSTALL_DIR) && tar -czf $@ onnxruntime/
89140

141+
# help: build_onnxruntime -- Builds ONNX Runtime
142+
.PHONY: build_onnxruntime
143+
build_onnxruntime: $(ONNXRT_ARCHIVE)

architectures/linux-cuda-11.8.0

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
2+
# BSD 2-Clause License
3+
#
4+
# Copyright (c) 2024, Hewlett Packard Enterprise
5+
# All rights reserved.
6+
#
7+
# Redistribution and use in source and binary forms, with or without
8+
# modification, are permitted provided that the following conditions are met:
9+
#
10+
# 1. Redistributions of source code must retain the above copyright notice, this
11+
# list of conditions and the following disclaimer.
12+
#
13+
# 2. Redistributions in binary form must reproduce the above copyright notice,
14+
# this list of conditions and the following disclaimer in the documentation
15+
# and/or other materials provided with the distribution.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
OS = linux
29+
ARCHITECTURE = x64
30+
CUDA_VERSION = 11.8.0
31+
STACK=cuda-$(CUDA_VERSION)
32+
33+
# pyTorch options
34+
PYTORCH_VERSION = 2.4.0
35+
TORCH_CMAKE_OPTIONS = -DBUILD_PYTHON=OFF
36+
TORCH_CMAKE_OPTIONS += -DUSE_NCCL=ON -DUSE_CUDA=ON -DUSE_STATIC_MKL=ON -DUSE_ROCM=OFF -DUSE_CUDNN=ON
37+
TORCH_CMAKE_OPTIONS += -DTORCH_CUDA_ARCH_LIST="All" -DCUDNN_LIBRARY_PATH=${CUDNN_LIBRARY} -DCUDNN_INCLUDE_PATH=${CUDNN_INCLUDE_DIR}
38+
PYTORCH_PREBUILD_TARGETS = pytorch_checkout
39+
40+
# Tensorflow options
41+
TF_VERSION = 2.14.1
42+
TF_TAG = v$(TF_VERSION)
43+
TF_REMOTE = https://github.com/tensorflow/tensorflow.git
44+
TF_PREBUILD_TARGETS = tf_checkout tf_prebuild
45+
TF_BAZEL_OPTS = --jobs 192
46+
47+
48+
ONNXRT_VERSION = 1.17.3
49+
ONNXRT_OPTIONS = --use_cuda --cudnn_home=${CUDNN_INSTALL_PATH}
50+
ONNXRT_PREBUILD_TARGETS = onnxrt_checkout onnxrt_patch
51+
52+
pytorch_checkout:
53+
cd pytorch && \
54+
git checkout v${PYTORCH_VERSION} && \
55+
git submodule update --init --recursive
56+
57+
pytorch_rocm_prebuild:
58+
cd pytorch; python tools/amd_build/build_amd.py
59+
sed -i 's/attr.memoryType/attr.type/g' pytorch/aten/src/ATen/hip/detail/HIPHooks.cpp
60+
cd pytorch; git apply ../patches/pytorch/caffe2_rocm_path.patch
61+
62+
tf_prebuild:
63+
cd tensorflow; \
64+
USE_DEFAULT_PYTHON_LIB_PATH=1 \
65+
PYTHON_BIN_PATH=$$(which python) \
66+
TF_NEED_CLANG=1 \
67+
TF_NEED_ROCM=0 \
68+
TF_NEED_CUDA=1 \
69+
TF_NEED_TENSORRT=0 \
70+
TF_CUDA_CLANG=0 \
71+
CC_OPT_FLAGS="-Wno-sign-compare" \
72+
TF_SET_ANDROID_WORKSPACE=0 \
73+
python configure.py
74+
75+
tf_checkout:
76+
cd tensorflow; \
77+
git fetch $(TF_REMOTE) $(TF_TAG) && \
78+
git checkout FETCH_HEAD
79+
80+
onnxrt_checkout:
81+
cd onnxruntime && \
82+
git checkout v$(ONNXRT_VERSION) && \
83+
git reset --hard && \
84+
git clean -xdf && \
85+
git submodule update --init --recursive
86+
87+
onnxrt_patch:
88+
cd onnxruntime && \
89+
git apply ../patches/onnxruntime/cfloat.patch

architectures/linux-cuda-12.5.0

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
2+
# BSD 2-Clause License
3+
#
4+
# Copyright (c) 2024, Hewlett Packard Enterprise
5+
# All rights reserved.
6+
#
7+
# Redistribution and use in source and binary forms, with or without
8+
# modification, are permitted provided that the following conditions are met:
9+
#
10+
# 1. Redistributions of source code must retain the above copyright notice, this
11+
# list of conditions and the following disclaimer.
12+
#
13+
# 2. Redistributions in binary form must reproduce the above copyright notice,
14+
# this list of conditions and the following disclaimer in the documentation
15+
# and/or other materials provided with the distribution.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
OS = linux
29+
ARCHITECTURE = x64
30+
CUDA_VERSION = 12.5.0
31+
STACK=cuda-$(CUDA_VERSION)
32+
33+
# pyTorch options
34+
PYTORCH_VERSION = 2.4.0
35+
TORCH_CMAKE_OPTIONS = -DBUILD_PYTHON=OFF
36+
TORCH_CMAKE_OPTIONS += -DUSE_NCCL=ON -DUSE_CUDA=ON -DUSE_STATIC_MKL=ON -DUSE_ROCM=OFF -DUSE_CUDNN=ON
37+
TORCH_CMAKE_OPTIONS += -DTORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} -DCUDNN_LIBRARY_PATH=${CUDNN_LIBRARY} -DCUDNN_INCLUDE_PATH=${CUDNN_INCLUDE_DIR}
38+
PYTORCH_PREBUILD_TARGETS = pytorch_checkout
39+
40+
# Tensorflow options
41+
TF_VERSION = 2.17.0
42+
TF_TAG = v$(TF_VERSION)
43+
TF_REMOTE = https://github.com/tensorflow/tensorflow.git
44+
TF_PREBUILD_TARGETS = tf_checkout tf_prebuild
45+
TF_BAZEL_OPTS = --jobs 192
46+
47+
48+
ONNXRT_VERSION = 1.17.3
49+
ONNXRT_OPTIONS = --use_cuda --cudnn_home=${CUDNN_INSTALL_PATH}
50+
ONNXRT_PREBUILD_TARGETS = onnxrt_checkout onnxrt_patch
51+
52+
pytorch_checkout:
53+
cd pytorch && \
54+
git checkout v${PYTORCH_VERSION} && \
55+
git submodule update --init --recursive
56+
57+
pytorch_rocm_prebuild:
58+
cd pytorch; python tools/amd_build/build_amd.py
59+
sed -i 's/attr.memoryType/attr.type/g' pytorch/aten/src/ATen/hip/detail/HIPHooks.cpp
60+
cd pytorch; git apply ../patches/pytorch/caffe2_rocm_path.patch
61+
62+
tf_prebuild:
63+
cd tensorflow; \
64+
USE_DEFAULT_PYTHON_LIB_PATH=1 \
65+
PYTHON_BIN_PATH=$$(which python) \
66+
TF_NEED_CLANG=1 \
67+
TF_NEED_ROCM=0 \
68+
TF_NEED_CUDA=1 \
69+
TF_NEED_TENSORRT=0 \
70+
TF_CUDA_CLANG=0 \
71+
CC_OPT_FLAGS="-Wno-sign-compare" \
72+
TF_SET_ANDROID_WORKSPACE=0 \
73+
python configure.py
74+
75+
tf_checkout:
76+
cd tensorflow; \
77+
git fetch $(TF_REMOTE) $(TF_TAG) && \
78+
git checkout FETCH_HEAD
79+
80+
onnxrt_checkout:
81+
cd onnxruntime && \
82+
git checkout v$(ONNXRT_VERSION) && \
83+
git reset --hard && \
84+
git clean -xdf && \
85+
git submodule update --init --recursive
86+
87+
onnxrt_patch:
88+
cd onnxruntime && \
89+
git apply ../patches/onnxruntime/cfloat.patch

0 commit comments

Comments
 (0)