From ef3c7a5f851e7fc24fcf7c0d674cb1aedd1a7349 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Tue, 24 Jun 2025 09:47:49 +0200 Subject: [PATCH 01/14] accept EULA for NVHPC --- bin/submit_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/submit_build.py b/bin/submit_build.py index 2430339..c6d4bf5 100755 --- a/bin/submit_build.py +++ b/bin/submit_build.py @@ -77,7 +77,7 @@ def main(): # Easybuild default paths # start using environment from local machine, job scripts get custom paths ebconf = { - 'accept-eula-for': 'Intel-oneAPI,CUDA,cuDNN', + 'accept-eula-for': 'Intel-oneAPI,CUDA,cuDNN,NVHPC', 'buildpath': os.path.join(job['tmp'], 'eb-submit-build-fetch'), 'hooks': hooks_hydra.__file__, 'include-easyblocks': os.path.join(VSCSOFTSTACK_ROOT, EASYBLOCK_REPO), From 3d441a574bd00c85bc2aac8093b403e78988174e Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Tue, 24 Jun 2025 09:52:49 +0200 Subject: [PATCH 02/14] add nvidia-compilers and NVHPC v25.1 as valid toolchains --- src/build_tools/hooks_hydra.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index c96af7b..ee5bc6e 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -67,9 +67,9 @@ LOCAL_ARCH_SUFFIX = os.getenv('VSC_ARCH_SUFFIX') LOCAL_ARCH_FULL = f'{LOCAL_ARCH}{LOCAL_ARCH_SUFFIX}' -VALID_TCGENS = ['2024a'] +VALID_TCGENS = ['2024a', '25.1'] VALID_MODULES_SUBDIRS = VALID_TCGENS + ['system'] -VALID_TCS = ['foss', 'intel', 'gomkl', 'gimkl', 'gimpi'] +VALID_TCS = ['foss', 'intel', 'gomkl', 'gimkl', 'gimpi', 'nvidia-compilers', 'NVHPC'] SUBDIR_MODULES_BWRAP = '.modules_bwrap' SUFFIX_MODULES_PATH = 'collection' From 595b610cac9838ba64280e32d3806875fa18a90f Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Tue, 24 Jun 2025 09:54:51 +0200 Subject: [PATCH 03/14] add support for nvidia-compilers and NVHPC as GPU modules --- src/build_tools/hooks_hydra.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index ee5bc6e..d34ab9b 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -312,7 +312,10 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument # skip installation of CUDA software in non-GPU architectures, only create module file is_cuda_software = 'CUDA' in ec.name or 'CUDA' in ec['versionsuffix'] + cuda_tcs = ['CUDA', 'nvidia-compilers', 'NVHPC'] + is_cuda_software = ec.name in cuda_tcs or ec.toolchain.name in cuda_tcs or 'CUDA' in ec['versionsuffix'] if is_cuda_software and LOCAL_ARCH_FULL not in GPU_ARCHS: + # only install the module file in non-GPU nodes # module_only steps: [MODULE_STEP, PREPARE_STEP, READY_STEP, POSTITER_STEP, SANITYCHECK_STEP] ec['module_only'] = True ec.log.info(f"[parse hook] Set parameter module_only: {ec['module_only']}") @@ -321,6 +324,7 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument # set cuda compute capabilities elif is_cuda_software: + # on GPU nodes set cuda compute capabilities ec['cuda_compute_capabilities'] = ARCHS[LOCAL_ARCH_FULL]['cuda_cc'] ec.log.info(f"[parse hook] Set parameter cuda_compute_capabilities: {ec['cuda_compute_capabilities']}") From 6e6672b1ab06e19041c09312e8dad72d0d90b907 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Tue, 24 Jun 2025 09:55:23 +0200 Subject: [PATCH 04/14] version bump to v4.3.4 --- src/build_tools/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/build_tools/package.py b/src/build_tools/package.py index 69be34e..37732c9 100644 --- a/src/build_tools/package.py +++ b/src/build_tools/package.py @@ -16,7 +16,7 @@ @author: Alex Domingo (Vrije Universiteit Brussel) """ -VERSION = '4.3.3' +VERSION = '4.3.4' AUTHOR = { 'wp': 'Ward Poelmans', From 637d87c14c40b32b124a0b6de3df1ff305da7e0e Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Mon, 30 Jun 2025 14:35:13 +0200 Subject: [PATCH 05/14] set SLURM_MPI_TYPE in NVHPC --- src/build_tools/hooks_hydra.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index d34ab9b..d211b96 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -453,6 +453,11 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument self.cfg['modextravars'].update({'SLURM_ENABLED': "1"}) self.cfg['modextravars'].update({'SCHEDULER_TIGHT_COUPLING': "1"}) + if self.name == 'NVHPC': + slurm_mpi_type = 'pmix' + self.log.info("[pre-module hook] Set Slurm MPI type to: %s", slurm_mpi_type) + self.cfg['modextravars'].update({'SLURM_MPI_TYPE': slurm_mpi_type}) + ########################## # ------ TUNING -------- # ########################## From d58e67715b7b31ad765f5859256c26c6e174ff7f Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Fri, 25 Jul 2025 17:37:21 +0200 Subject: [PATCH 06/14] disable MPI binding in NVHPC --- src/build_tools/hooks_hydra.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index d211b96..23f37cb 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -457,6 +457,12 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument slurm_mpi_type = 'pmix' self.log.info("[pre-module hook] Set Slurm MPI type to: %s", slurm_mpi_type) self.cfg['modextravars'].update({'SLURM_MPI_TYPE': slurm_mpi_type}) + # NVHPC ships with OpenMPI v4 which has an issue between its hwloc + # and Slurm cgroups2 that results in mpirun trying to use unallocated + # cores to the job (see https://github.com/open-mpi/ompi/issues/12470) + # Only mpirun is affected, workaround is to set '--bind-to=none': + self.log.info("[pre-module hook] Disable mpirun process binding in NVHPC") + self.cfg['modextravars'].update({'OMPI_MCA_hwloc_base_binding_policy': 'none'}) ########################## # ------ TUNING -------- # From 0b16e7a8219cadb53da38a8ea955fc368fcfdb8e Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Fri, 25 Jul 2025 18:17:46 +0200 Subject: [PATCH 07/14] manually define subdir per toolchain generation --- src/build_tools/hooks_hydra.py | 49 ++++++++++++++++++++++------------ tests/test_hooks_hydra.py | 6 ++--- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 23f37cb..cf38df1 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -67,9 +67,17 @@ LOCAL_ARCH_SUFFIX = os.getenv('VSC_ARCH_SUFFIX') LOCAL_ARCH_FULL = f'{LOCAL_ARCH}{LOCAL_ARCH_SUFFIX}' -VALID_TCGENS = ['2024a', '25.1'] -VALID_MODULES_SUBDIRS = VALID_TCGENS + ['system'] -VALID_TCS = ['foss', 'intel', 'gomkl', 'gimkl', 'gimpi', 'nvidia-compilers', 'NVHPC'] +VALID_TOOLCHAINS = { + '2024a': { + 'toolchains': ['foss', 'intel', 'gomkl', 'gimkl', 'gimpi'], + 'subdir': '2024a', + }, + '25.1': { + 'toolchains': ['nvidia-compilers', 'NVHPC'], + 'subdir': '2024a', + }, +} +VALID_MODULES_SUBDIRS = ['system', '2024a'] SUBDIR_MODULES_BWRAP = '.modules_bwrap' SUFFIX_MODULES_PATH = 'collection' @@ -118,22 +126,26 @@ def get_tc_versions(): update_build_option('hooks', None) tc_versions = {} - for toolcgen in VALID_TCGENS: - tc_versions[toolcgen] = [] - for toolc in VALID_TCS: + for tcgen, tcgen_spec in VALID_TOOLCHAINS.items(): + tcgen_versions = [] + for tc_name in tcgen_spec['toolchains']: try: - tc_versions[toolcgen].extend(get_toolchain_hierarchy({'name': toolc, 'version': toolcgen})) + tcgen_versions.extend(get_toolchain_hierarchy({'name': tc_name, 'version': tcgen})) except EasyBuildError: # skip if no easyconfig found for toolchain-version pass + tc_versions[tcgen] = { + 'toolchains': tcgen_versions, + 'subdir': tcgen_spec['subdir'], + } update_build_option('hooks', hooks) return tc_versions -def calc_tc_gen(name, version, tcname, tcversion, easyblock): +def calc_tc_gen_subdir(name, version, tcname, tcversion, easyblock): """ - calculate the toolchain generation + calculate the toolchain generation subdir return False if not valid """ name_version = {'name': name, 'version': version} @@ -143,10 +155,11 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): tc_versions = get_tc_versions() # (software with) valid (sub)toolchain-version combination - for toolcgen in VALID_TCGENS: - if toolchain in tc_versions[toolcgen] or name_version in tc_versions[toolcgen]: - log_msg = f"Determined toolchain generation {toolcgen} for {software}" - return toolcgen, log_msg + for tcgen, tcgen_spec in tc_versions.items(): + if toolchain in tcgen_spec['toolchains'] or name_version in tcgen_spec['toolchains']: + tcgen_subdir = tcgen_spec['subdir'] + log_msg = f"Determined toolchain generation subdir '{tcgen_subdir}' for {software}" + return tcgen_subdir, log_msg # invalid toolchains # all toolchains have 'system' toolchain, so we need to handle the invalid toolchains separately @@ -157,8 +170,9 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): # software with 'system' toolchain: return 'system' if tcname == 'system': - log_msg = f"Determined toolchain {tcname} for {software}" - return tcname, log_msg + tcgen_subdir = 'system' + log_msg = f"Determined toolchain '{tcgen_subdir}' for {software}" + return tcgen_subdir, log_msg log_msg = f"Invalid toolchain {tcname} and/or toolchain version {tcversion} for {software}" return False, log_msg @@ -166,8 +180,9 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): def update_moduleclass(ec): "update the moduleclass of an easyconfig to /all" - tc_gen, log_msg = calc_tc_gen( - ec.name, ec.version, ec.toolchain.name, ec.toolchain.version, ec.easyblock) + tc_gen, log_msg = calc_tc_gen_subdir( + ec.name, ec.version, ec.toolchain.name, ec.toolchain.version, ec.easyblock + ) if not tc_gen: raise EasyBuildError("[parse hook] " + log_msg) diff --git a/tests/test_hooks_hydra.py b/tests/test_hooks_hydra.py index 882627d..969e514 100644 --- a/tests/test_hooks_hydra.py +++ b/tests/test_hooks_hydra.py @@ -44,8 +44,8 @@ ('fosscuda', '2023a', 'system', 'system', 'Toolchain', False), ], ) -def test_calc_tc_gen(toolchain, set_up_config): +def test_calc_tc_gen_subdir(toolchain, set_up_config): name, version, tcname, tcversion, easyblock, expected_generation = toolchain - generation, _ = hooks_hydra.calc_tc_gen(name, version, tcname, tcversion, easyblock) + generation, _ = hooks_hydra.calc_tc_gen_subdir(name, version, tcname, tcversion, easyblock) - assert generation == expected_generation + assert generation['toolchains'] == expected_generation From 3181680e6f5c2138fa14dac091b53663fa0dba62 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Sat, 26 Jul 2025 10:39:05 +0200 Subject: [PATCH 08/14] add single method to determine gpu software --- src/build_tools/hooks_hydra.py | 48 ++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index cf38df1..ff827c6 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -83,6 +83,10 @@ SUFFIX_MODULES_PATH = 'collection' SUFFIX_MODULES_SYMLINK = 'all' +################## +# MODULE FOOTERS # +################## + INTEL_MPI_MOD_FOOTER = """ if ( os.getenv("SLURM_JOB_ID") ) then setenv("I_MPI_HYDRA_BOOTSTRAP", "slurm") @@ -97,6 +101,17 @@ setenv("JAVA_TOOL_OPTIONS", "-Xmx" .. math.floor(mem*0.8)) end """ +GPU_DUMMY_MOD_FOOTER = """ +if mode() == "load" and not os.getenv("BUILD_TOOLS_LOAD_DUMMY_MODULES") then + LmodError([[ +This module is only available on nodes with a GPU. +Jobs can request GPUs with the command 'srun --gpus-per-node=1' or 'sbatch --gpus-per-node=1'. + +More information in the VUB-HPC docs: +https://hpc.vub.be/docs/job-submission/gpu-job-types/#gpu-jobs + ]]) +end +""" def get_group(name, version): @@ -178,6 +193,18 @@ def calc_tc_gen_subdir(name, version, tcname, tcversion, easyblock): return False, log_msg +def is_gpu_software(ec): + "determine if it is a GPU-only installation" + gpu_components = ['CUDA'] + gpu_toolchains = ['nvidia-compilers', 'NVHPC'] + + is_gpu_package = ec.name in gpu_components or ec.name in gpu_toolchains + needs_gpu_toolchain = ec.toolchain.name in gpu_toolchains + needs_gpu_component = any([x in ec['versionsuffix'] for x in gpu_components]) + + return is_gpu_package or needs_gpu_toolchain or needs_gpu_component + + def update_moduleclass(ec): "update the moduleclass of an easyconfig to /all" tc_gen, log_msg = calc_tc_gen_subdir( @@ -326,10 +353,7 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument ec.log.info(f"[parse hook] Set optarch in parameter toolchainopts: {ec.toolchain.options['optarch']}") # skip installation of CUDA software in non-GPU architectures, only create module file - is_cuda_software = 'CUDA' in ec.name or 'CUDA' in ec['versionsuffix'] - cuda_tcs = ['CUDA', 'nvidia-compilers', 'NVHPC'] - is_cuda_software = ec.name in cuda_tcs or ec.toolchain.name in cuda_tcs or 'CUDA' in ec['versionsuffix'] - if is_cuda_software and LOCAL_ARCH_FULL not in GPU_ARCHS: + if is_gpu_software(ec) and LOCAL_ARCH_FULL not in GPU_ARCHS: # only install the module file in non-GPU nodes # module_only steps: [MODULE_STEP, PREPARE_STEP, READY_STEP, POSTITER_STEP, SANITYCHECK_STEP] ec['module_only'] = True @@ -338,7 +362,7 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument ec.log.info(f"[parse hook] Set parameter skipsteps: {ec['skipsteps']}") # set cuda compute capabilities - elif is_cuda_software: + elif is_gpu_software(ec): # on GPU nodes set cuda compute capabilities ec['cuda_compute_capabilities'] = ARCHS[LOCAL_ARCH_FULL]['cuda_cc'] ec.log.info(f"[parse hook] Set parameter cuda_compute_capabilities: {ec['cuda_compute_capabilities']}") @@ -579,19 +603,9 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument # ------ DUMMY MODULES -------- # ################################# - is_cuda_software = 'CUDA' in self.name or 'CUDA' in self.cfg['versionsuffix'] - if is_cuda_software and LOCAL_ARCH_FULL not in GPU_ARCHS: + if is_gpu_software(self.cfg) and LOCAL_ARCH_FULL not in GPU_ARCHS: self.log.info("[pre-module hook] Creating dummy module for CUDA modules on non-GPU nodes") - self.cfg['modluafooter'] = """ -if mode() == "load" and not os.getenv("BUILD_TOOLS_LOAD_DUMMY_MODULES") then - LmodError([[ -This module is only available on nodes with a GPU. -Jobs can request GPUs with the command 'srun --gpus-per-node=1' or 'sbatch --gpus-per-node=1'. - -More information in the VUB-HPC docs: -https://hpc.vub.be/docs/job-submission/gpu-job-types/#gpu-jobs - ]]) -end""" + self.cfg['modluafooter'] = GPU_DUMMY_MOD_FOOTER def post_build_and_install_loop_hook(ecs_with_res): From ea24c77a3da8be796e00380244d00202edacf6f9 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Sat, 26 Jul 2025 11:07:14 +0200 Subject: [PATCH 09/14] avoid any module loads on the GPU dummy modules --- src/build_tools/hooks_hydra.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index ff827c6..4ceab66 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -360,6 +360,8 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument ec.log.info(f"[parse hook] Set parameter module_only: {ec['module_only']}") ec['skipsteps'] = [SANITYCHECK_STEP] ec.log.info(f"[parse hook] Set parameter skipsteps: {ec['skipsteps']}") + # remove all dependencies to avoid unnecessary module loads on the dummy module + ec['dependencies'] = [] # set cuda compute capabilities elif is_gpu_software(ec): From b0476b98bd8f8050b2de50335742fdfb11163c60 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Sat, 26 Jul 2025 11:45:08 +0200 Subject: [PATCH 10/14] move NVHPC mpirun fix to parse hook --- src/build_tools/hooks_hydra.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 4ceab66..171a8d0 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -318,6 +318,14 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument ec['dependencies'] = [d for d in ec['dependencies'] if 'libfabric' not in d] ec.log.info("[parse hook] Removed libfabric from dependency list") + if ec.name == 'NVHPC': + # NVHPC ships with OpenMPI v4 which has an issue between its hwloc + # and Slurm cgroups2 that results in mpirun trying to use unallocated + # cores to the job (see https://github.com/open-mpi/ompi/issues/12470) + # Only mpirun is affected, workaround is to set '--bind-to=none': + ec.log.info("[parse hook] Disable mpirun process binding in NVHPC") + ec['modextravars'].update({'OMPI_MCA_hwloc_base_binding_policy': 'none'}) + if ec.name == 'Gurobi': # use centrally installed Gurobi license file, and don't copy to installdir ec['license_file'] = '/apps/brussel/licenses/gurobi/gurobi.lic' @@ -435,7 +443,10 @@ def pre_configure_hook(self, *args, **kwargs): # pylint: disable=unused-argumen def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument - """Hook at pre-module level to alter module files""" + """ + Hook at pre-module level to alter module files + WARNING: this hooks triggers *after* sanity checks + """ # Must be done this way, updating self.cfg['modextravars'] # directly doesn't work due to templating. @@ -498,12 +509,6 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument slurm_mpi_type = 'pmix' self.log.info("[pre-module hook] Set Slurm MPI type to: %s", slurm_mpi_type) self.cfg['modextravars'].update({'SLURM_MPI_TYPE': slurm_mpi_type}) - # NVHPC ships with OpenMPI v4 which has an issue between its hwloc - # and Slurm cgroups2 that results in mpirun trying to use unallocated - # cores to the job (see https://github.com/open-mpi/ompi/issues/12470) - # Only mpirun is affected, workaround is to set '--bind-to=none': - self.log.info("[pre-module hook] Disable mpirun process binding in NVHPC") - self.cfg['modextravars'].update({'OMPI_MCA_hwloc_base_binding_policy': 'none'}) ########################## # ------ TUNING -------- # From 55c9a374b76cacc7082aad8f00d837fbfda57520 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Sat, 26 Jul 2025 12:01:26 +0200 Subject: [PATCH 11/14] gather all hooks about GPU modules in the parse hook --- src/build_tools/hooks_hydra.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 171a8d0..5116dd3 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -360,16 +360,22 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument ec.toolchain.options['optarch'] = optarchs_intel[LOCAL_ARCH] ec.log.info(f"[parse hook] Set optarch in parameter toolchainopts: {ec.toolchain.options['optarch']}") - # skip installation of CUDA software in non-GPU architectures, only create module file + ############################### + # ------ GPU MODULES -------- # + ############################### + + # skip installation of CUDA software in non-GPU architectures, only create a dummy module file if is_gpu_software(ec) and LOCAL_ARCH_FULL not in GPU_ARCHS: - # only install the module file in non-GPU nodes + ec.log.info("[parse hook] Generating dummy GPU module on non-GPU node") + # remove all dependencies to avoid unnecessary module loads on the dummy module + ec['dependencies'] = [] + # inject error message in module file + ec['modluafooter'] = GPU_DUMMY_MOD_FOOTER # module_only steps: [MODULE_STEP, PREPARE_STEP, READY_STEP, POSTITER_STEP, SANITYCHECK_STEP] ec['module_only'] = True ec.log.info(f"[parse hook] Set parameter module_only: {ec['module_only']}") ec['skipsteps'] = [SANITYCHECK_STEP] ec.log.info(f"[parse hook] Set parameter skipsteps: {ec['skipsteps']}") - # remove all dependencies to avoid unnecessary module loads on the dummy module - ec['dependencies'] = [] # set cuda compute capabilities elif is_gpu_software(ec): @@ -606,14 +612,6 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument else: self.cfg['docurls'] = [usage_info['link']] - ################################# - # ------ DUMMY MODULES -------- # - ################################# - - if is_gpu_software(self.cfg) and LOCAL_ARCH_FULL not in GPU_ARCHS: - self.log.info("[pre-module hook] Creating dummy module for CUDA modules on non-GPU nodes") - self.cfg['modluafooter'] = GPU_DUMMY_MOD_FOOTER - def post_build_and_install_loop_hook(ecs_with_res): """ From 0515c22769dc2dc2ad973d61b10320faa75037ab Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Sat, 26 Jul 2025 12:02:26 +0200 Subject: [PATCH 12/14] fix dummy module for NVHPC --- src/build_tools/hooks_hydra.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 5116dd3..6886828 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -371,6 +371,9 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument ec['dependencies'] = [] # inject error message in module file ec['modluafooter'] = GPU_DUMMY_MOD_FOOTER + # workaround for NVHPC + if ec.name == 'NVHPC': + ec['default_cuda_version'] = '0' # module_only steps: [MODULE_STEP, PREPARE_STEP, READY_STEP, POSTITER_STEP, SANITYCHECK_STEP] ec['module_only'] = True ec.log.info(f"[parse hook] Set parameter module_only: {ec['module_only']}") From c138388ffb22006938793773ab2064e58c5ba8e1 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Sat, 26 Jul 2025 11:14:29 +0200 Subject: [PATCH 13/14] version bump to v4.4.0 --- src/build_tools/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/build_tools/package.py b/src/build_tools/package.py index 37732c9..ed4a7fa 100644 --- a/src/build_tools/package.py +++ b/src/build_tools/package.py @@ -16,7 +16,7 @@ @author: Alex Domingo (Vrije Universiteit Brussel) """ -VERSION = '4.3.4' +VERSION = '4.4.0' AUTHOR = { 'wp': 'Ward Poelmans', From c5ea13443c474d3832b175873251908371b9e326 Mon Sep 17 00:00:00 2001 From: Alex Domingo Date: Mon, 28 Jul 2025 13:25:02 +0200 Subject: [PATCH 14/14] revert removal of dependencies in dummy modules, EB needs those in the module to pass checks --- src/build_tools/hooks_hydra.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 6886828..81c234d 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -367,8 +367,6 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument # skip installation of CUDA software in non-GPU architectures, only create a dummy module file if is_gpu_software(ec) and LOCAL_ARCH_FULL not in GPU_ARCHS: ec.log.info("[parse hook] Generating dummy GPU module on non-GPU node") - # remove all dependencies to avoid unnecessary module loads on the dummy module - ec['dependencies'] = [] # inject error message in module file ec['modluafooter'] = GPU_DUMMY_MOD_FOOTER # workaround for NVHPC