Skip to content
7 changes: 7 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ jobs:
# Merge all these results together
python scripts/merge_data_files.py out.yaml eessi*.yaml
mv out.yaml docs/data/eessi_software_metadata.yaml
# Also do RISC-V repository
rm eessi*.yaml
export EESSI_ARCHDETECT_OPTIONS_OVERRIDE="riscv64/generic"
export EESSI_ACCELERATOR_TARGET_OVERRIDE="doesnotexist"
( module load EESSI/2025.06 && module load EasyBuild/5 && python generate_data_files.py --eessi-version 2025.06-001 --repository "dev.eessi.io/riscv" )
python scripts/merge_data_files.py out_riscv.yaml eessi*.yaml
mv out_riscv.yaml docs/data/eessi_software_metadata_riscv.yaml
Comment thread
ocaisa marked this conversation as resolved.
Outdated
# Generate json data files and markdown index/description for them
cd docs/data
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata
Expand Down
30 changes: 25 additions & 5 deletions scripts/generate_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@
from easybuild.tools.include import include_easyblocks
from contextlib import contextmanager

VALID_EESSI_VERSIONS = ["2025.06", "2023.06"]
SUPPORTED_REPOSITORIES = {
'software.eessi.io': ["2025.06", "2023.06"],
'dev.eessi.io/riscv': ["2025.06-001"],
}

VALID_EESSI_VERSIONS = [
version
for versions in SUPPORTED_REPOSITORIES.values()
for version in versions
]

# Give order to my toolchains so I can easily figure out what "latest" means
EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS = OrderedDict(
Expand Down Expand Up @@ -205,20 +214,31 @@ def merge_dicts(d1, d2):
"-e",
required=True,
choices=VALID_EESSI_VERSIONS,
help=f"Allowed versions: {', '.join(VALID_EESSI_VERSIONS)}",
help=f"Allowed versions (also dependent on repository): {', '.join(VALID_EESSI_VERSIONS)}",
)
parser.add_argument(
"--repository",
"-r",
default="software.eessi.io",
choices=SUPPORTED_REPOSITORIES.keys(),
help=f"Repository to scan: {', '.join(SUPPORTED_REPOSITORIES)} (default: %(default)s)",
)

args = parser.parse_args()
eessi_version = args.eessi_version
repository = args.repository

if eessi_version not in SUPPORTED_REPOSITORIES[repository]:
raise ValueError(f"You must choose an EESSI version supported by the repository: {SUPPORTED_REPOSITORIES[repository]}")

print(f"Using EESSI version: {eessi_version}")

# We use a single architecture path to gather information about the software versions
eessi_reference_architecture = os.getenv("EESSI_ARCHDETECT_OPTIONS_OVERRIDE", False)
if not eessi_reference_architecture:
print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE")
print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE environment variable")
exit()
base_path = f"/cvmfs/software.eessi.io/versions/{eessi_version}/software/linux/{eessi_reference_architecture}"
base_path = f"/cvmfs/{repository}/versions/{eessi_version}/software/linux/{eessi_reference_architecture}"
cpu_easyconfig_files_dict = collect_eb_files(os.path.join(base_path, "software"))
# We also gather all the acclerator installations for NVIDIA-enabled packages
# We're not typically running this script on a node with a GPU so an override must have been set
Expand All @@ -243,7 +263,7 @@ def merge_dicts(d1, d2):

# Store the toolchain hierarchies supported by the EESSI version
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {}
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]:
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version.split("-")[0]]:
# versions are typically 2024a/2024b etc. for top level toolchains
# so let's use that to make sorting easy
toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}"
Expand Down
14 changes: 11 additions & 3 deletions scripts/process_eessi_software_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
"x86_64/intel/cascadelake",
]

RISCV_ARCHITECTURES = [
"riscv64/generic",
]

NVIDIA_ARCHITECTURES = [
"accel/nvidia/cc70",
"accel/nvidia/cc80",
Expand Down Expand Up @@ -63,7 +67,11 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
# 1) Detect the architecture substring inside the path
base_version_dict["cpu_arch"] = []
detected_arch = None
for arch in ARCHITECTURES:
if '/riscv64/' in original_path:
architecture_group = RISCV_ARCHITECTURES
else:
architecture_group = ARCHITECTURES
for arch in architecture_group:
if f"/{arch}/" in original_path:
detected_arch = arch
break
Expand All @@ -89,7 +97,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua"
Comment thread
ocaisa marked this conversation as resolved.

# 3) Substitute each architecture and test module file existence in spider cache
for arch in ARCHITECTURES:
for arch in architecture_group:
substituted_modulefile = modulefile.replace(detected_arch, arch)
substituted_spider_cache = spider_cache.replace(detected_arch, arch)
# os.path.exists is very expensive for CVMFS so we just look for the file in the spider cache
Expand Down Expand Up @@ -368,7 +376,7 @@ def main():
base_json_metadata["architectures_map"] = {}
for eessi_version in eessi_versions:
base_json_metadata["architectures_map"][eessi_version] = {}
for architecture in ARCHITECTURES:
for architecture in ARCHITECTURES + RISCV_ARCHITECTURES:
base_json_metadata["architectures_map"][eessi_version][architecture] = architecture
base_json_metadata["gpu_architectures_map"] = {}
base_json_metadata["category_details"] = {}
Expand Down
Loading