|
105 | 105 |
|
106 | 106 | end |
107 | 107 |
|
108 | | -
|
109 | | -local function eessi_cuda_enabled_load_hook(t) |
| 108 | +local function eessi_cuda_and_libraries_enabled_load_hook(t) |
110 | 109 | local frameStk = require("FrameStk"):singleton() |
111 | 110 | local mt = frameStk:mt() |
112 | 111 | local simpleName = string.match(t.modFullName, "(.-)/") |
113 | | - -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. |
114 | | - -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse |
115 | | - -- to load the CUDA module and print an informative message on how to set up GPU support for NESSI |
| 112 | + local packagesList = { ["CUDA"] = true, ["cuDNN"] = true, ["cuTENSOR"] = true } |
| 113 | + -- If we try to load any of the modules in packagesList, we check if the |
| 114 | + -- full package was installed on the host in host_injections. |
| 115 | + -- This is required for end users to build additional software that depends |
| 116 | + -- on the package. If the full SDK isn't present, refuse |
| 117 | + -- to load the module and print an informative message on how to set up GPU support for NESSI |
116 | 118 | local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" |
117 | | - if simpleName == 'CUDA' then |
| 119 | + if packagesList[simpleName] then |
| 120 | + -- simpleName is a module in packagesList |
118 | 121 | -- get the full host_injections path |
119 | 122 | local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') |
120 | | - -- build final path where the CUDA software should be installed |
121 | | - local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" |
122 | | - local cudaDirExists = isDir(cudaEasyBuildDir) |
123 | | - if not cudaDirExists then |
| 123 | + -- build final path where the software should be installed |
| 124 | + local packageEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" |
| 125 | + local packageDirExists = isDir(packageEasyBuildDir) |
| 126 | + if not packageDirExists then |
124 | 127 | local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " |
125 | | - advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where NESSI " |
| 128 | + advice = advice .. "due to licencing. You will need to install a full copy of the " .. simpleName .. " package where NESSI " |
126 | 129 | advice = advice .. "can find it.\\n" |
127 | 130 | advice = advice .. refer_to_docs |
128 | 131 | LmodError("\\nYou requested to load ", simpleName, " ", advice) |
129 | 132 | end |
130 | 133 | end |
131 | | - -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker, |
| 134 | + -- when loading CUDA (and cu*) enabled modules check if the necessary driver libraries are accessible to the NESSI linker, |
132 | 135 | -- otherwise, refuse to load the requested module and print error message |
133 | | - local checkGpu = mt:haveProperty(simpleName,"arch","gpu") |
134 | | - local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK") |
135 | | - if checkGpu and (overrideGpuCheck == nil) then |
| 136 | + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") |
| 137 | + if haveGpu then |
136 | 138 | local arch = os.getenv("EESSI_CPU_FAMILY") or "" |
137 | | - local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" |
138 | | - local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" |
| 139 | + local cvmfs_repo = os.getenv("EESSI_CVMFS_REPO") or "" |
| 140 | + local cudaVersionFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" |
| 141 | + local cudaDriverFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" |
139 | 142 | local cudaDriverExists = isFile(cudaDriverFile) |
140 | 143 | local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") |
141 | 144 | if not (cudaDriverExists or singularityCudaExists) then |
142 | 145 | local advice = "which relies on the CUDA runtime environment and driver libraries. " |
143 | 146 | advice = advice .. "In order to be able to use the module, you will need " |
144 | | - advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system. You can " |
145 | | - advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but " |
146 | | - advice = advice .. "the loaded application will not be able to execute on your system.\\n" |
| 147 | + advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n" |
147 | 148 | advice = advice .. refer_to_docs |
148 | 149 | LmodError("\\nYou requested to load ", simpleName, " ", advice) |
149 | 150 | else |
|
174 | 175 | end |
175 | 176 | end |
176 | 177 |
|
177 | | -local function eessi_cudnn_enabled_load_hook(t) |
178 | | - local frameStk = require("FrameStk"):singleton() |
179 | | - local mt = frameStk:mt() |
180 | | - local simpleName = string.match(t.modFullName, "(.-)/") |
181 | | - -- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections. |
182 | | - -- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse |
183 | | - -- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI |
184 | | - local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" |
185 | | - if simpleName == 'cuDNN' then |
186 | | - -- get the full host_injections path |
187 | | - local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') |
188 | | - -- build final path where the cuDNN software should be installed |
189 | | - local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" |
190 | | - local cudnnDirExists = isDir(cudnnEasyBuildDir) |
191 | | - if not cudnnDirExists then |
192 | | - local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " |
193 | | - advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI " |
194 | | - advice = advice .. "can find it.\\n" |
195 | | - advice = advice .. refer_to_docs |
196 | | - LmodError("\\nYou requested to load ", simpleName, " ", advice) |
197 | | - end |
198 | | - end |
199 | | -end |
200 | | -
|
201 | 178 | -- Combine both functions into a single one, as we can only register one function as load hook in lmod |
202 | 179 | -- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed |
203 | 180 | function eessi_load_hook(t) |
204 | | - -- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix |
205 | | - -- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack |
| 181 | + -- Only apply CUDA and libraries hook if the loaded module is in the NESSI prefix |
| 182 | + -- This avoids getting an Lmod Error when trying to load a CUDA or library module from a local software stack |
206 | 183 | if from_eessi_prefix(t) then |
207 | | - eessi_cuda_enabled_load_hook(t) |
208 | | - eessi_cudnn_enabled_load_hook(t) |
| 184 | + eessi_cuda_and_libraries_enabled_load_hook(t) |
209 | 185 | end |
210 | 186 | end |
211 | 187 |
|
|
0 commit comments