|
| 1 | +cmake_minimum_required(VERSION 3.25.0) |
| 2 | + |
| 3 | +# This is a bit messy, where it comes before project(), but it needs |
| 4 | +# to come before CUDA is enabled (I could also move that out of |
| 5 | +# project() into an explicit enable_language(), but this doesn't |
| 6 | +# require the CXX compiler, so it _can_ go here). |
| 7 | +if (NOT CMAKE_CUDA_ARCHITECTURES AND NOT TORCH_CUDA_ARCH_LIST) |
| 8 | + execute_process( |
| 9 | + COMMAND nvidia-smi --query-gpu "compute_cap" --format=csv,noheader |
| 10 | + RESULT_VARIABLE _nvidia_smi_retcode |
| 11 | + OUTPUT_VARIABLE _nvidia_smi_output |
| 12 | + ERROR_QUIET |
| 13 | + ) |
| 14 | + |
| 15 | + if (_nvidia_smi_retcode EQUAL 0 AND _nvidia_smi_output) |
| 16 | + string(REPLACE "\n" ";" _nvidia_compute_caps "${_nvidia_smi_output}") |
| 17 | + list(REMOVE_ITEM _nvidia_compute_caps "") |
| 18 | + list(SORT _nvidia_compute_caps COMPARE NATURAL) |
| 19 | + list(REMOVE_DUPLICATES _nvidia_compute_caps) |
| 20 | + string(REPLACE "." "" _nvidia_archs "${_nvidia_compute_caps}") |
| 21 | + |
| 22 | + # TRB: I don't _think_ these need to go in the cache... But I'm |
| 23 | + # not opposed to it. |
| 24 | + set(CMAKE_CUDA_ARCHITECTURES ${_nvidia_archs}) |
| 25 | + set(TORCH_CUDA_ARCH_LIST ${_nvidia_compute_caps}) |
| 26 | + else () |
| 27 | + set(CMAKE_CUDA_ARCHITECTURES 70 80 90) |
| 28 | + set(TORCH_CUDA_ARCH_LIST 7.0 8.0 9.0) |
| 29 | + endif () |
| 30 | +elseif (NOT CMAKE_CUDA_ARCHITECTURES) |
| 31 | + string(REPLACE "." "" _nvidia_archs "${TORCH_CUDA_ARCH_LIST}") |
| 32 | + set(CMAKE_CUDA_ARCHITECTURES ${_nvidia_archs}) |
| 33 | +elseif (NOT TORCH_CUDA_ARCH_LIST) |
| 34 | + # AFAIK, these are all of the form XY, so just put a '.' in there... |
| 35 | + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES |
| 36 | + REPLACE "([0-9])([0-9])" "\\1.\\2" |
| 37 | + OUTPUT_VARIABLE TORCH_CUDA_ARCH_LIST |
| 38 | + ) |
| 39 | +endif () |
| 40 | + |
| 41 | +message(STATUS "Detected CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") |
| 42 | +message(STATUS "Detected TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}") |
| 43 | + |
| 44 | +project( |
| 45 | + DGraph |
| 46 | + VERSION 0.0.1 |
| 47 | + DESCRIPTION "A deep learning library for training graph neural networks at scale" |
| 48 | + HOMEPAGE_URL "https://github.com/LBANN/DGraph" |
| 49 | + LANGUAGES CXX CUDA |
| 50 | +) |
| 51 | + |
| 52 | +option(DGRAPH_ENABLE_NVSHMEM |
| 53 | + "Use NVSHMEM in the build." |
| 54 | + ON |
| 55 | +) |
| 56 | + |
| 57 | +# Dependencies |
| 58 | +list(APPEND |
| 59 | + CMAKE_MODULE_PATH |
| 60 | + "${CMAKE_CURRENT_SOURCE_DIR}/cmake" |
| 61 | +) |
| 62 | + |
| 63 | +find_package(MPI 3.0 REQUIRED COMPONENTS CXX) |
| 64 | +find_package(Torch 2.6 REQUIRED CONFIG) |
| 65 | + |
| 66 | +# Also, torch_python! |
| 67 | +# We also don't care about the limited API nonsense, so we can use |
| 68 | +# libtorch. Let's find it. |
| 69 | +if (TORCH_LIBRARY) |
| 70 | + get_filename_component(TORCH_LIB_DIR "${TORCH_LIBRARY}" DIRECTORY) |
| 71 | +endif () |
| 72 | +find_library(TORCH_PYTHON_LIBRARY |
| 73 | + torch_python |
| 74 | + HINTS |
| 75 | + ${TORCH_LIB_DIR} |
| 76 | + ${Python_SITELIB}/torch/lib64 |
| 77 | + ${Python_SITELIB}/torch/lib |
| 78 | + NO_DEFAULT_PATH) |
| 79 | +find_library(TORCH_PYTHON_LIBRARY torch_python REQUIRED) |
| 80 | + |
| 81 | +if (DGRAPH_ENABLE_NVSHMEM) |
| 82 | + find_package(NVSHMEM 2.5 REQUIRED MODULE) |
| 83 | +endif () |
| 84 | + |
| 85 | +find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) |
| 86 | +find_package(pybind11 CONFIG REQUIRED) |
| 87 | + |
| 88 | +python_add_library(torch_local |
| 89 | + MODULE |
| 90 | + WITH_SOABI |
| 91 | + DGraph/distributed/csrc/torch_local_bindings.cpp |
| 92 | + DGraph/distributed/csrc/torch_local_kernels.cu |
| 93 | +) |
| 94 | + |
| 95 | +target_link_libraries(torch_local |
| 96 | + PUBLIC |
| 97 | + MPI::MPI_CXX |
| 98 | + torch |
| 99 | + ${TORCH_PYTHON_LIBRARY} |
| 100 | + PRIVATE |
| 101 | + pybind11::headers |
| 102 | +) |
| 103 | + |
| 104 | +target_sources(torch_local |
| 105 | + PUBLIC |
| 106 | + FILE_SET HEADERS |
| 107 | + BASE_DIRS DGraph/distributed/csrc DGraph/distributed/include |
| 108 | + FILES |
| 109 | + DGraph/distributed/include/macros.hpp |
| 110 | + DGraph/distributed/include/torch_local.hpp |
| 111 | + DGraph/distributed/csrc/local_data_kernels.cuh |
| 112 | +) |
| 113 | + |
| 114 | +set_target_properties(torch_local |
| 115 | + PROPERTIES |
| 116 | + CXX_STANDARD 17 |
| 117 | + CXX_STANDARD_REQUIRED ON |
| 118 | + CXX_EXTENSIONS OFF |
| 119 | + |
| 120 | + CUDA_STANDARD 17 |
| 121 | + CUDA_STANDARD_REQUIRED ON |
| 122 | + CUDA_EXTENSIONS OFF |
| 123 | + |
| 124 | + INSTALL_RPATH_USE_LINK_PATH ON |
| 125 | +) |
| 126 | + |
| 127 | +install(TARGETS torch_local |
| 128 | + LIBRARY DESTINATION . |
| 129 | +) |
| 130 | + |
| 131 | +if (DGRAPH_ENABLE_NVSHMEM) |
| 132 | + python_add_library(torch_nvshmem_p2p |
| 133 | + MODULE |
| 134 | + WITH_SOABI |
| 135 | + DGraph/distributed/csrc/torch_nvshmem_p2p.cu |
| 136 | + DGraph/distributed/csrc/torch_nvshmem_p2p_bindings.cpp |
| 137 | + ) |
| 138 | + |
| 139 | + target_sources(torch_nvshmem_p2p |
| 140 | + PUBLIC |
| 141 | + FILE_SET HEADERS |
| 142 | + BASE_DIRS DGraph/distributed/csrc DGraph/distributed/include |
| 143 | + FILES |
| 144 | + DGraph/distributed/include/torch_nvshmem_p2p.hpp |
| 145 | + DGraph/distributed/csrc/local_data_kernels.cuh |
| 146 | + DGraph/distributed/csrc/nvshmem_comm_kernels.cuh |
| 147 | + ) |
| 148 | + |
| 149 | + target_link_libraries(torch_nvshmem_p2p |
| 150 | + PUBLIC |
| 151 | + NVSHMEM::NVSHMEM |
| 152 | + MPI::MPI_CXX |
| 153 | + torch |
| 154 | + ${TORCH_PYTHON_LIBRARY} |
| 155 | + PRIVATE |
| 156 | + pybind11::headers |
| 157 | + ) |
| 158 | + |
| 159 | + get_filename_component(_nvshmem_lib_dir |
| 160 | + "${NVSHMEM_LIBRARY}" |
| 161 | + DIRECTORY |
| 162 | + ) |
| 163 | + get_target_property(_nvshmem_install_rpath |
| 164 | + torch_nvshmem_p2p |
| 165 | + INSTALL_RPATH |
| 166 | + ) |
| 167 | + if (_nvshmem_install_rpath) |
| 168 | + list(APPEND _nvshmem_install_rpath "${_nvshmem_lib_dir}") |
| 169 | + else () |
| 170 | + set(_nvshmem_install_rpath "${_nvshmem_lib_dir}") |
| 171 | + endif () |
| 172 | + |
| 173 | + set_target_properties(torch_nvshmem_p2p |
| 174 | + PROPERTIES |
| 175 | + CXX_STANDARD 17 |
| 176 | + CXX_STANDARD_REQUIRED ON |
| 177 | + CXX_EXTENSIONS OFF |
| 178 | + |
| 179 | + CUDA_STANDARD 17 |
| 180 | + CUDA_STANDARD_REQUIRED ON |
| 181 | + CUDA_EXTENSIONS OFF |
| 182 | + CUDA_SEPARABLE_COMPILATION ON |
| 183 | + |
| 184 | + INSTALL_RPATH_USE_LINK_PATH ON |
| 185 | + INSTALL_RPATH "${_nvshmem_install_rpath}" |
| 186 | + ) |
| 187 | + |
| 188 | + install(TARGETS torch_nvshmem_p2p |
| 189 | + LIBRARY DESTINATION . |
| 190 | + ) |
| 191 | +endif () |
0 commit comments