Skip to content

Commit da4971d

Browse files
authored
Merge pull request #367 from trz42/nessi-2023.06-cuTENSOR-2.0.1.2-CUDA-12.1.1
{2023.06}[system] cuTENSOR v2.0.1.2 with CUDA/12.1.1
2 parents 80b4e63 + b3a6215 commit da4971d

8 files changed

Lines changed: 366 additions & 141 deletions

EESSI-install-software.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,10 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
204204
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
205205
# Allow skipping CUDA SDK install in e.g. CI environments
206206
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
207-
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula
208-
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh -c 12.1.1 -d 8.9.2.26
207+
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \
208+
-e ${EESSI_PREFIX}/scripts/gpu_support/nvidia/eessi-2023.06-cuda-and-libraries.yml \
209+
-t /tmp/temp \
210+
--accept-cuda-eula
209211
else
210212
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
211213
fi

create_lmodsitepackage.py

Lines changed: 24 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -105,45 +105,46 @@
105105
106106
end
107107
108-
109-
local function eessi_cuda_enabled_load_hook(t)
108+
local function eessi_cuda_and_libraries_enabled_load_hook(t)
110109
local frameStk = require("FrameStk"):singleton()
111110
local mt = frameStk:mt()
112111
local simpleName = string.match(t.modFullName, "(.-)/")
113-
-- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
114-
-- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
115-
-- to load the CUDA module and print an informative message on how to set up GPU support for NESSI
112+
local packagesList = { ["CUDA"] = true, ["cuDNN"] = true, ["cuTENSOR"] = true }
113+
-- If we try to load any of the modules in packagesList, we check if the
114+
-- full package was installed on the host in host_injections.
115+
-- This is required for end users to build additional software that depends
116+
-- on the package. If the full SDK isn't present, refuse
117+
-- to load the module and print an informative message on how to set up GPU support for NESSI
116118
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
117-
if simpleName == 'CUDA' then
119+
if packagesList[simpleName] then
120+
-- simpleName is a module in packagesList
118121
-- get the full host_injections path
119122
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
120-
-- build final path where the CUDA software should be installed
121-
local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
122-
local cudaDirExists = isDir(cudaEasyBuildDir)
123-
if not cudaDirExists then
123+
-- build final path where the software should be installed
124+
local packageEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
125+
local packageDirExists = isDir(packageEasyBuildDir)
126+
if not packageDirExists then
124127
local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI "
125-
advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where NESSI "
128+
advice = advice .. "due to licencing. You will need to install a full copy of the " .. simpleName .. " package where NESSI "
126129
advice = advice .. "can find it.\\n"
127130
advice = advice .. refer_to_docs
128131
LmodError("\\nYou requested to load ", simpleName, " ", advice)
129132
end
130133
end
131-
-- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker,
134+
-- when loading CUDA (and cu*) enabled modules check if the necessary driver libraries are accessible to the NESSI linker,
132135
-- otherwise, refuse to load the requested module and print error message
133-
local checkGpu = mt:haveProperty(simpleName,"arch","gpu")
134-
local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK")
135-
if checkGpu and (overrideGpuCheck == nil) then
136+
local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
137+
if haveGpu then
136138
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
137-
local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
138-
local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
139+
local cvmfs_repo = os.getenv("EESSI_CVMFS_REPO") or ""
140+
local cudaVersionFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
141+
local cudaDriverFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
139142
local cudaDriverExists = isFile(cudaDriverFile)
140143
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
141144
if not (cudaDriverExists or singularityCudaExists) then
142145
local advice = "which relies on the CUDA runtime environment and driver libraries. "
143146
advice = advice .. "In order to be able to use the module, you will need "
144-
advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system. You can "
145-
advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but "
146-
advice = advice .. "the loaded application will not be able to execute on your system.\\n"
147+
advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n"
147148
advice = advice .. refer_to_docs
148149
LmodError("\\nYou requested to load ", simpleName, " ", advice)
149150
else
@@ -174,38 +175,13 @@
174175
end
175176
end
176177
177-
local function eessi_cudnn_enabled_load_hook(t)
178-
local frameStk = require("FrameStk"):singleton()
179-
local mt = frameStk:mt()
180-
local simpleName = string.match(t.modFullName, "(.-)/")
181-
-- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections.
182-
-- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse
183-
-- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI
184-
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
185-
if simpleName == 'cuDNN' then
186-
-- get the full host_injections path
187-
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
188-
-- build final path where the cuDNN software should be installed
189-
local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
190-
local cudnnDirExists = isDir(cudnnEasyBuildDir)
191-
if not cudnnDirExists then
192-
local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI "
193-
advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI "
194-
advice = advice .. "can find it.\\n"
195-
advice = advice .. refer_to_docs
196-
LmodError("\\nYou requested to load ", simpleName, " ", advice)
197-
end
198-
end
199-
end
200-
201178
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
202179
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
203180
function eessi_load_hook(t)
204-
-- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix
205-
-- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack
181+
-- Only apply CUDA and libraries hook if the loaded module is in the NESSI prefix
182+
-- This avoids getting an Lmod Error when trying to load a CUDA or library module from a local software stack
206183
if from_eessi_prefix(t) then
207-
eessi_cuda_enabled_load_hook(t)
208-
eessi_cudnn_enabled_load_hook(t)
184+
eessi_cuda_and_libraries_enabled_load_hook(t)
209185
end
210186
end
211187

easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-001-system.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ easyconfigs:
33
options:
44
from-pr: 20299
55
- EESSI-extend-2023.06-easybuild.eb
6-
# comment to trigger rebuild
6+
- cuDNN-8.9.2.26-CUDA-12.1.1.eb
7+
- cuTENSOR-2.0.1.2-CUDA-12.1.1.eb

easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,5 @@ easyconfigs:
3434
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451;
3535
options:
3636
from-pr: 19451
37-
- cuDNN-8.9.2.26-CUDA-12.1.1.eb
3837
- OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb
3938
- ABySS-2.3.7-foss-2023a.eb

0 commit comments

Comments
 (0)