forked from InfiniTensor/InfiniTrain
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
202 lines (166 loc) · 6.19 KB
/
CMakeLists.txt
File metadata and controls
202 lines (166 loc) · 6.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
cmake_minimum_required(VERSION 3.28)
option(USE_CUDA "Support NVIDIA CUDA" OFF)
option(PROFILE_MODE "ENABLE PROFILE MODE" OFF)
option(USE_OMP "Use OpenMP as backend for Eigen" ON)
option(USE_NCCL "Build project for distributed running" ON)
project(infini_train VERSION 0.5.0 LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# Generate compile_commands.json
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# ------------------------------------------------------------------------------
# Third-party deps
# ------------------------------------------------------------------------------
# gflags
add_subdirectory(third_party/gflags)
include_directories(${gflags_SOURCE_DIR}/include)
# glog
set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE)
set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE)
add_subdirectory(third_party/glog)
include_directories(${glog_SOURCE_DIR}/src)
# eigen
if(USE_OMP)
find_package(OpenMP REQUIRED)
endif()
add_subdirectory(third_party/eigen)
include_directories(${PROJECT_SOURCE_DIR}/third_party/eigen)
include_directories(${PROJECT_SOURCE_DIR})
if(PROFILE_MODE)
add_compile_definitions(PROFILE_MODE=1)
endif()
# ------------------------------------------------------------------------------
# Sources
# ------------------------------------------------------------------------------
# Framework core sources (*.cc), excluding cpu kernels (they are built separately)
file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc)
list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*")
if(NOT USE_NCCL)
list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*")
endif()
# CPU kernels (*.cc)
file(GLOB_RECURSE CPU_KERNELS ${PROJECT_SOURCE_DIR}/infini_train/src/kernels/cpu/*.cc)
# ------------------------------------------------------------------------------
# CPU kernels library
# ------------------------------------------------------------------------------
add_library(infini_train_cpu_kernels STATIC ${CPU_KERNELS})
target_link_libraries(infini_train_cpu_kernels PUBLIC glog Eigen3::Eigen)
if(USE_OMP)
add_compile_definitions(USE_OMP=1)
target_link_libraries(infini_train_cpu_kernels PUBLIC OpenMP::OpenMP_CXX)
endif()
# ------------------------------------------------------------------------------
# CUDA kernels library (optional)
# ------------------------------------------------------------------------------
if(USE_CUDA)
add_compile_definitions(USE_CUDA=1)
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
include_directories(${CUDAToolkit_INCLUDE_DIRS})
# CUDA compilation options
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")
# Only compile CUDA kernels / cuda sources here (your original used src/*.cu)
file(GLOB_RECURSE CUDA_KERNELS ${PROJECT_SOURCE_DIR}/infini_train/src/*.cu)
add_library(infini_train_cuda_kernels STATIC ${CUDA_KERNELS})
set_target_properties(infini_train_cuda_kernels PROPERTIES CUDA_ARCHITECTURES "75;80;90")
target_link_libraries(infini_train_cuda_kernels
PUBLIC
glog
CUDA::cudart
CUDA::cublas
CUDA::cuda_driver
)
if(USE_NCCL)
message(STATUS "Add USE_NCCL, use NCCL with CUDA")
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
find_package(NCCL REQUIRED)
add_compile_definitions(USE_NCCL=1)
target_link_libraries(infini_train_cuda_kernels PUBLIC nccl)
endif()
endif()
# ------------------------------------------------------------------------------
# Main framework library
# ------------------------------------------------------------------------------
add_library(infini_train STATIC ${SRC})
target_link_libraries(infini_train
PUBLIC
glog
gflags
infini_train_cpu_kernels
)
if(USE_CUDA)
# infini_train contains cuda runtime wrappers (*.cc) like cuda_blas_handle.cc/cuda_guard.cc
# Those may need CUDA runtime/driver/cublas symbols at final link, so attach them here too.
target_link_libraries(infini_train
PUBLIC
infini_train_cuda_kernels
CUDA::cudart
CUDA::cublas
CUDA::cuda_driver
)
if(USE_NCCL)
# If your core library code also directly references NCCL symbols (not only kernels),
# keep this. Otherwise it's harmless.
target_link_libraries(infini_train PUBLIC nccl)
endif()
endif()
# ------------------------------------------------------------------------------
# Helper: link libraries in a group to fix static lib one-pass resolution
# (THIS is what fixes "undefined reference" from cuda_kernels -> core symbols)
# ------------------------------------------------------------------------------
function(link_infini_train_exe target_name)
if(USE_CUDA)
target_link_libraries(${target_name} PRIVATE
"-Wl,--start-group"
"-Wl,--whole-archive"
infini_train
infini_train_cpu_kernels
infini_train_cuda_kernels
"-Wl,--no-whole-archive"
"-Wl,--end-group"
)
else()
target_link_libraries(${target_name} PRIVATE
"-Wl,--start-group"
"-Wl,--whole-archive"
infini_train
infini_train_cpu_kernels
"-Wl,--no-whole-archive"
"-Wl,--end-group"
)
endif()
endfunction()
# ------------------------------------------------------------------------------
# Examples
# ------------------------------------------------------------------------------
add_executable(mnist
example/mnist/main.cc
example/mnist/dataset.cc
example/mnist/net.cc
)
link_infini_train_exe(mnist)
add_executable(gpt2
example/gpt2/main.cc
example/common/tiny_shakespeare_dataset.cc
example/common/utils.cc
example/gpt2/net.cc
example/common/tokenizer.cc
)
link_infini_train_exe(gpt2)
add_executable(llama3
example/llama3/main.cc
example/common/tiny_shakespeare_dataset.cc
example/common/utils.cc
example/llama3/net.cc
example/common/tokenizer.cc
)
link_infini_train_exe(llama3)
# Tools
add_subdirectory(tools/infini_run)
set_target_properties(infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
# Tests
add_executable(test_hook test/hook/test_hook.cc)
target_link_libraries(test_hook infini_train)
add_executable(test_precision_check test/hook/test_precision_check.cc)
target_link_libraries(test_precision_check infini_train)