Skip to content

Commit 7bef856

Browse files
author
Richard Top
committed
Sync local branch with remote
2 parents 4c9b5d0 + 34a29a4 commit 7bef856

10 files changed

Lines changed: 341 additions & 7 deletions

EESSI-extend-2023.06-easybuild.eb

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,19 @@ elseif (os.getenv("NESSI_SITE_INSTALL") ~= nil) then
9595
if ((os.getenv("NESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("NESSI_USER_INSTALL") ~= nil)) then
9696
LmodError("You cannot use NESSI_SITE_INSTALL in combination with any other NESSI_*_INSTALL environment variables")
9797
end
98-
easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections')
98+
site_install = os.getenv("NESSI_SITE_INSTALL")
99+
site_modulepath = nil
100+
if (site_install ~= nil) then
101+
-- Check the folder exists
102+
if not isDir(site_install) then
103+
LmodError("The location of NESSI_SITE_INSTALL (" .. site_install .. ") does not exist or is not a folder")
104+
end
105+
if (mode() == "load") then
106+
LmodMessage("Configuring for use of NESSI_SITE_INSTALL under " .. site_install)
107+
end
108+
easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), os.getenv("EESSI_CVMFS_REPO"), site_install)
109+
site_modulepath = pathJoin(easybuild_installpath, 'modules', 'all')
110+
end
99111
else
100112
-- Deal with user and project installs
101113
project_install = os.getenv("NESSI_PROJECT_INSTALL")

EESSI-install-software.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,15 @@ pr_diff=$(ls [0-9]*.diff | head -1)
199199
# for now, this just reinstalls all scripts. Note the most elegant, but works
200200
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
201201

202-
# Install full CUDA SDK in host_injections
202+
# Install full CUDA SDK and cu* libraries in host_injections
203203
# Hardcode this for now, see if it works
204204
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
205205
# Allow skipping CUDA SDK install in e.g. CI environments
206206
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
207207
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula
208+
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuDNN_host_injections.sh -c 12.1.1 -d 8.9.2.26
208209
else
209-
echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed"
210+
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
210211
fi
211212

212213
# Install drivers in host_injections

create_lmodsitepackage.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,38 @@
174174
end
175175
end
176176
177+
local function eessi_cudnn_enabled_load_hook(t)
178+
local frameStk = require("FrameStk"):singleton()
179+
local mt = frameStk:mt()
180+
local simpleName = string.match(t.modFullName, "(.-)/")
181+
-- If we try to load cuDNN itself, check if the full cuDNN package was installed on the host in host_injections.
182+
-- This is required for end users to build additional cuDNN dependent software. If the full SDK isn't present, refuse
183+
-- to load the cuDNN module and print an informative message on how to set up GPU support for NESSI
184+
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
185+
if simpleName == 'cuDNN' then
186+
-- get the full host_injections path
187+
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
188+
-- build final path where the cuDNN software should be installed
189+
local cudnnEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
190+
local cudnnDirExists = isDir(cudnnEasyBuildDir)
191+
if not cudnnDirExists then
192+
local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI "
193+
advice = advice .. "due to licencing. You will need to install a full copy of the cuDNN package where NESSI "
194+
advice = advice .. "can find it.\\n"
195+
advice = advice .. refer_to_docs
196+
LmodError("\\nYou requested to load ", simpleName, " ", advice)
197+
end
198+
end
199+
end
200+
177201
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
178202
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
179203
function eessi_load_hook(t)
180-
-- Only apply CUDA hooks if the loaded module is in the NESSI prefix
181-
-- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack
204+
-- Only apply CUDA and cuDNN hooks if the loaded module is in the NESSI prefix
205+
-- This avoids getting an Lmod Error when trying to load a CUDA or cuDNN module from a local software stack
182206
if from_eessi_prefix(t) then
183207
eessi_cuda_enabled_load_hook(t)
208+
eessi_cudnn_enabled_load_hook(t)
184209
end
185210
end
186211

easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ easyconfigs:
3434
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451;
3535
options:
3636
from-pr: 19451
37+
- cuDNN-8.9.2.26-CUDA-12.1.1.eb
3738
- OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# 2024-05-19
2+
# Rebuild NESSI-extend/2023.06-easybuild
3+
# The current version does not handle NESSI_SITE_INSTALL correctly.
4+
easyconfigs:
5+
- EESSI-extend-2023.06-easybuild.eb
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# 2024-05-20
2+
# Rebuild NESSI-extend/2023.06-easybuild
3+
# Need to revert to the original version.
4+
easyconfigs:
5+
- EESSI-extend-2023.06-easybuild.eb

eb_hooks.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,62 @@ def post_sanitycheck_cuda(self, *args, **kwargs):
688688
raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!")
689689

690690

691+
def post_sanitycheck_cuDNN(self, *args, **kwargs):
692+
"""
693+
Remove files from cuDNN installation that we are not allowed to ship,
694+
and replace them with a symlink to a corresponding installation under host_injections.
695+
"""
696+
if self.name == 'cuDNN':
697+
print_msg("Replacing files in cuDNN installation that we can not ship with symlinks to host_injections...")
698+
699+
allowlist = ['LICENSE']
700+
701+
# read cuDNN LICENSE, construct allowlist based on section 2.6 that specifies list of files that can be shipped
702+
license_path = os.path.join(self.installdir, 'LICENSE')
703+
search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:"
704+
with open(license_path) as infile:
705+
for line in infile:
706+
if line.strip().startswith(search_string):
707+
# remove search string, split into words, remove trailing
708+
# dots '.' and only retain words starting with a dot '.'
709+
distributable = line[len(search_string):]
710+
for word in distributable.split():
711+
if word[0] == '.':
712+
allowlist.append(word.rstrip('.'))
713+
714+
allowlist = sorted(set(allowlist))
715+
self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist))
716+
717+
# iterate over all files in the CUDA installation directory
718+
for dir_path, _, files in os.walk(self.installdir):
719+
for filename in files:
720+
full_path = os.path.join(dir_path, filename)
721+
# we only really care about real files, i.e. not symlinks
722+
if not os.path.islink(full_path):
723+
# check if the current file is part of the allowlist
724+
basename = filename.split('.')[0]
725+
if '.' in filename:
726+
extension = '.' + filename.split('.')[1]
727+
if basename in allowlist:
728+
self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path)
729+
elif '.' in filename and extension in allowlist:
730+
self.log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path)
731+
else:
732+
self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s",
733+
filename, full_path)
734+
# if it is not in the allowlist, delete the file and create a symlink to host_injections
735+
host_inj_path = full_path.replace('versions', 'host_injections')
736+
# make sure source and target of symlink are not the same
737+
if full_path == host_inj_path:
738+
raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you "
739+
"are using this hook for a NESSI installation?",
740+
full_path, host_inj_path)
741+
remove_file(full_path)
742+
symlink(host_inj_path, full_path)
743+
else:
744+
raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!")
745+
746+
691747
def inject_gpu_property(ec):
692748
"""
693749
Add 'gpu' property, via modluafooter easyconfig parameter
@@ -712,6 +768,25 @@ def inject_gpu_property(ec):
712768
ec[key] = '\n'.join([ec_dict[key], value])
713769
else:
714770
ec[key] = value
771+
# Check if cuDNN is in the dependencies, if so add the 'gpu' Lmod property
772+
if ('cuDNN' in [dep[0] for dep in iter(ec_dict['dependencies'])]):
773+
ec.log.info("Injecting gpu as Lmod arch property and envvar with cuDNN version")
774+
key = 'modluafooter'
775+
value = 'add_property("arch","gpu")'
776+
cudnn_version = 0
777+
for dep in iter(ec_dict['dependencies']):
778+
# Make cuDNN a build dependency only (rpathing saves us from link errors)
779+
if 'cuDNN' in dep[0]:
780+
cudnn_version = dep[1]
781+
ec_dict['dependencies'].remove(dep)
782+
if dep not in ec_dict['builddependencies']:
783+
ec_dict['builddependencies'].append(dep)
784+
value = '\n'.join([value, 'setenv("EESSICUDNNVERSION","%s")' % cudnn_version])
785+
if key in ec_dict:
786+
if not value in ec_dict[key]:
787+
ec[key] = '\n'.join([ec_dict[key], value])
788+
else:
789+
ec[key] = value
715790
return ec
716791

717792

@@ -768,4 +843,5 @@ def inject_gpu_property(ec):
768843

769844
POST_SANITYCHECK_HOOKS = {
770845
'CUDA': post_sanitycheck_cuda,
846+
'cuDNN': post_sanitycheck_cuDNN,
771847
}

eessi_container.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,6 @@ if [[ "${ACCESS}" == "rw" ]]; then
625625
EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}")
626626

627627
EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs"
628-
EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${repo_name}"
629628
if [[ ! -z ${LOWER_DIRS} ]]; then
630629
# need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as
631630
# separator while the lowerdir overlayfs option uses ':'

install_scripts.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@
110110

111111
# Copy files for the scripts/gpu_support/nvidia directory
112112
nvidia_files=(
113-
install_cuda_host_injections.sh link_nvidia_host_libraries.sh
113+
install_cuda_host_injections.sh install_cuDNN_host_injections.sh link_nvidia_host_libraries.sh
114114
)
115115
copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}"
116116

0 commit comments

Comments
 (0)