diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index b6797188be..f20b1b6c64 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -260,16 +260,11 @@ fi temp_install_storage=${TMPDIR}/temp_install_storage mkdir -p ${temp_install_storage} -# Note the eessi...CUDA.yml file(s) is(are) copied by 'install_scripts.sh' from -# the EESSI/software-layer easystacks/software.eessi.io/2023.06/accel/nvidia -# directory to /cvmfs to avoid keeping them in sync manually. If more than one -# such file is used (e.g., because different EasyBuild versions were used), the -# install script 'install_cuda_and_libraries.sh' has to be run multiple times. if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ - -e ${EESSI_PREFIX}/scripts/gpu_support/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml \ -t ${temp_install_storage} \ - --accept-cuda-eula + --accept-cuda-eula \ + --accept-cudnn-eula else echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found" fi diff --git a/install_scripts.sh b/install_scripts.sh index ad73e769dd..b6b5ac92b0 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -128,16 +128,12 @@ nvidia_files=( ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" -# special treatment for the easystack file(s) that lists CUDA and cu* libraries -# To be picked up by a build job they have to be stored under -# easystacks/software.eessi.io/2023.06/accel/nvidia/ on GitHub. -# To avoid keeping that file and the one that we distribute via CernVM-FS so -# users/sites can install the full CUDA SDK and cu* libraries under -# 'host_injections' we copy the above file to the right location under /cvmfs. -nvidia_host_injections_files=( - eessi-2023.06-eb-4.9.4-2023a-CUDA.yml +# Easystacks to be used to install software in host injections +host_injections_easystacks=( + eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml ) -copy_files_by_list ${TOPDIR}/easystacks/software.eessi.io/2023.06/accel/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_host_injections_files[@]}" +copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia/easystacks \ +${INSTALL_PREFIX}/scripts/gpu_support/nvidia/easystacks "${host_injections_easystacks[@]}" # Copy over EasyBuild hooks file used for installations hook_files=( diff --git a/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml new file mode 100644 index 0000000000..4e3fffacca --- /dev/null +++ b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml @@ -0,0 +1,8 @@ +# This EasyStack provides a list of all the EasyConfigs that should be installed in host_injections +# for nvidia GPU support, because they cannot (fully) be shipped as part of EESSI due to license constraints +easyconfigs: + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb: + options: + # Needed for support for --accept-uela-for option + include-easyblocks-from-commit: 11afb88ec55e0ca431cbe823696aa43e2a9bfca8 diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index 40bff34bb7..3d273f0fbd 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -27,8 +27,9 @@ show_help() { echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" echo " CUDA, see the EULA at" echo " https://docs.nvidia.com/cuda/eula/index.html" - echo " -e, --easystack EASYSTACK_FILE Path to easystack file that defines which" - echo " packages shall be installed" + echo " --accept-cudnn-eula You _must_ accept the cuDNN EULA to install" + echo " cuDNN, see the EULA at" + echo " https://docs.nvidia.com/deeplearning/cudnn/latest/reference/eula.html" echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" echo " storage during the installation of CUDA" echo " and/or other libraries (must have" @@ -36,7 +37,7 @@ show_help() { } # Initialize variables -eula_accepted=0 +cuda_eula_accepted=0 EASYSTACK_FILE= TEMP_DIR= @@ -48,18 +49,12 @@ while [[ $# -gt 0 ]]; do exit 0 ;; --accept-cuda-eula) - eula_accepted=1 + cuda_eula_accepted=1 shift 1 ;; - -e|--easystack) - if [ -n "$2" ]; then - EASYSTACK_FILE="$2" - shift 2 - else - echo "Error: Argument required for $1" - show_help - exit 1 - fi + --accept-cudnn-eula) + cudnn_eula_accepted=1 + shift 1 ;; -t|--temp-dir) if [ -n "$2" ]; then @@ -78,16 +73,11 @@ while [[ $# -gt 0 ]]; do esac done -if [[ -z "${EASYSTACK_FILE}" ]]; then - fatal_error "Need the name/path to an easystack file. See command line options\n" -fi - # Make sure EESSI is initialised check_eessi_initialised -# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` -# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) -export EESSI_SITE_INSTALL=${EESSI_SOFTWARE_PATH/versions/host_injections} +# Make sure that `EESSI-extend` will install in the site installation path EESSI_SITE_SOFTWARE_PATH +export EESSI_SITE_INSTALL=1 # we need a directory we can use for temporary storage if [[ -z "${TEMP_DIR}" ]]; then @@ -101,119 +91,141 @@ else fi echo "Created temporary directory '${tmpdir}'" -echo "MODULEPATH=${MODULEPATH}" -echo "List available *CUDA* modules before loading EESSI-extend/${EESSI_VERSION}-easybuild" -module avail CUDA +# use install_path/modules/all as MODULEPATH +SAVE_MODULEPATH=${MODULEPATH} -# load EESSI-extend/2023.06-easybuild module && verify that it is loaded -EESSI_EXTEND_MODULE="EESSI-extend/${EESSI_VERSION}-easybuild" -module load ${EESSI_EXTEND_MODULE} -ret=$? -if [ "${ret}" -ne 0 ]; then - fatal_error "An error occured while trying to load ${EESSI_EXTEND_MODULE}\n" -fi +for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do + echo -e "Processing easystack file ${easystack_file}...\n\n" + + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # Load EasyBuild version for this easystack file _before_ loading EESSI-extend + module load EasyBuild/${eb_version} + module load EESSI-extend/${EESSI_VERSION}-easybuild + + # Install modules in hidden .modules dir to keep track of what was installed before + MODULEPATH=${EASYBUILD_INSTALLPATH}/.modules/all + echo "set MODULEPATH=${MODULEPATH}" + + # show EasyBuild configuration + echo "Show EasyBuild configuration" + eb --show-config + + # do a 'eb --dry-run-short' with the EASYSTACK_FILE and determine list of packages + # to be installed + echo ">> Determining if packages specified in ${EASYSTACK_FILE} are missing under ${EESSI_SITE_SOFTWARE_PATH}" + eb_dry_run_short_out=${tmpdir}/eb_dry_run_short.out + eb --dry-run-short --easystack ${EASYSTACK_FILE} 2>&1 | tee ${eb_dry_run_short_out} + ret=$? + + # Check if CUDA shall be installed + cuda_install_needed=0 + cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | grep "module: CUDA/" > /dev/null + ret=$? + if [ "${ret}" -eq 0 ]; then + cuda_install_needed=1 + fi -echo "MODULEPATH=${MODULEPATH}" -echo "List available *CUDA* modules after loading EESSI-extend/${EESSI_VERSION}-easybuild" -module avail CUDA + # Make sure the CUDA EULA is accepted if it shall be installed + if [ "${cuda_install_needed}" -eq 1 ] && [ "${cuda_eula_accepted}" -ne 1 ]; then + show_help + error="\nCUDA shall be installed. However, the CUDA EULA has not been accepted\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" + fatal_error "${error}" + fi -# use install_path/modules/all as MODULEPATH -SAVE_MODULEPATH=${MODULEPATH} -MODULEPATH=${EASYBUILD_INSTALLPATH}/.modules/all -echo "set MODULEPATH=${MODULEPATH}" - -# show EasyBuild configuration -echo "Show EasyBuild configuration" -eb --show-config - -# do a 'eb --dry-run-short' with the EASYSTACK_FILE and determine list of packages -# to be installed -echo ">> Determining if packages specified in ${EASYSTACK_FILE} are missing under ${EESSI_SITE_INSTALL}" -eb_dry_run_short_out=${tmpdir}/eb_dry_run_short.out -eb --dry-run-short --rebuild --easystack ${EASYSTACK_FILE} 2>&1 | tee ${eb_dry_run_short_out} -ret=$? - -# Check if CUDA shall be installed -cuda_install_needed=0 -cat ${eb_dry_run_short_out} | grep "^ \* \[[xR]\]" | grep "module: CUDA/" -ret=$? -if [ "${ret}" -eq 0 ]; then - cuda_install_needed=1 -fi + # Check if cdDNN shall be installed + cudnn_install_needed=0 + cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | grep "module: cuDNN/" > /dev/null + ret=$? + if [ "${ret}" -eq 0 ]; then + cudnn_install_needed=1 + fi -# Make sure the CUDA EULA is accepted if it shall be installed -if [ "${cuda_install_needed}" -eq 1 ] && [ "${eula_accepted}" -ne 1 ]; then - show_help - error="\nCUDA shall be installed. However, the CUDA EULA has not been accepted\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" - fatal_error "${error}" -fi + # Make sure the cuDNN EULA is accepted if it shall be installed + if [ "${cudnn_install_needed}" -eq 1 ] && [ "${cudnn_eula_accepted}" -ne 1 ]; then + show_help + error="\ncuDNN shall be installed. However, the cuDNNDA EULA has not been accepted\nYou _must_ accept the cuDNN EULA via the appropriate command line option.\n" + fatal_error "${error}" + fi -# determine the number of packages to be installed (assume 5 GB + num_packages * -# 3GB space needed) -number_of_packages=$(cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | sed -e 's/^.*module: //' | sort -u | wc -l) -echo "number of packages to be (re-)installed: '${number_of_packages}'" -base_storage_space=$((5000000 + ${number_of_packages} * 3000000)) - -required_space_in_tmpdir=${base_storage_space} -# Let's see if we have sources and build locations defined if not, we use the temporary space -if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then - export EASYBUILD_BUILDPATH=${tmpdir}/build - required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) -fi -if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then - export EASYBUILD_SOURCEPATH=${tmpdir}/sources - required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) -fi + # determine the number of packages to be installed (assume 5 GB + num_packages * + # 3GB space needed). Both CUDA and cuDNN are about this size + number_of_packages=$(cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | sed -e 's/^.*module: //' | sort -u | wc -l) + echo "number of packages to be (re-)installed: '${number_of_packages}'" + base_storage_space=$((5000000 + ${number_of_packages} * 3000000)) + + required_space_in_tmpdir=${base_storage_space} + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install + # (~3*${base_storage_space}*1000 Bytes), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${EESSI_SITE_SOFTWARE_PATH}"/ | tail -n 1 | awk '{print $1}') + min_disk_storage=$((3 * ${base_storage_space})) + if (( avail_space < ${min_disk_storage} )); then + fatal_error "Need at least $(echo "${min_disk_storage} / 1000000" | bc) GB disk space to install CUDA and other libraries under ${EESSI_SITE_SOFTWARE_PATH}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least $(echo "${required_space_in_tmpdir} / 1000000" | bc) temporary disk space under ${tmpdir}.\n" + error="${error}Set the environment variable TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH" + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi -# The install is pretty fat, you need lots of space for download/unpack/install -# (~3*${base_storage_space}*1000 Bytes), -# need to do a space check before we proceed -avail_space=$(df --output=avail "${EESSI_SITE_INSTALL}"/ | tail -n 1 | awk '{print $1}') -min_disk_storage=$((3 * ${base_storage_space})) -if (( avail_space < ${min_disk_storage} )); then - fatal_error "Need at least $(echo "${min_disk_storage} / 1000000" | bc) GB disk space to install CUDA and other libraries under ${EESSI_SITE_INSTALL}, exiting now..." -fi -avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') -if (( avail_space < required_space_in_tmpdir )); then - error="Need at least $(echo "${required_space_in_tmpdir} / 1000000" | bc) temporary disk space under ${tmpdir}.\n" - error="${error}Set the environment variable TEMP_DIR to a location with adequate space to pass this check." - error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH" - error="${error}to reduce this requirement. Exiting now..." - fatal_error "${error}" -fi + # Brief explanation of parameters: + # - prefix: using $tmpdir as default base directory for several EB settings + # - installpath-modules: We install the module in a hidden .modules, so that next time this script + # is run, it is not reinstalled. + # - ${accept_eula_opt}: We only set the --accept-eula-for=CUDA option if CUDA will be installed and if + # this script was called with the argument --accept-cuda-eula. + # - hooks: We don't want hooks used in this install, we need vanilla + # installations of CUDA and/or other libraries + # - easystack: Path to easystack file that defines which packages shall be + # installed + accept_eula_opt= + if [[ ${cuda_eula_accepted} -eq 1 ]]; then + accept_eula_opt="CUDA" + fi + if [[ ${cudnn_eula_accepted} -eq 1 ]]; then + if [[ -z ${accept_eula_opt} ]]; then + accept_eula_opt="cuDNN" + else + accept_eula_opt="${accept_eula_opt},cuDNN" + fi + fi + touch "$tmpdir"/none.py + eb_args="--prefix=$tmpdir" + eb_args="$eb_args --installpath-modules=${EASYBUILD_INSTALLPATH}/.modules" + eb_args="$eb_args --hooks="$tmpdir"/none.py" + eb_args="$eb_args --easystack ${EASYSTACK_FILE}" + if [[ ! -z ${accept_eula_opt} ]]; then + eb_args="$eb_args --accept-eula-for=$accept_eula_opt" + fi + echo "Running eb $eb_args" + eb $eb_args + ret=$? + if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "some installation failed, please check EasyBuild logs ${PWD}/$(basename ${eb_last_log})..." + else + echo_green "all installations at ${EESSI_SITE_SOFTWARE_PATH}/software/... succeeded!" + fi -# Brief explanation of parameters: -# - prefix: using $tmpdir as default base directory for several EB settings -# - rebuild: we need the --rebuild option, as the CUDA module may or may not be on the -# `MODULEPATH` yet. Even if it is, we still want to redo this installation -# since it will provide the symlinked targets for the parts of the CUDA -# and/or other installation in the `.../versions/...` prefix -# - installpath-modules: We install the module in our `tmpdir` since we do not need the modulefile, -# we only care about providing the targets for the symlinks. -# - ${accept_eula_opt}: We only set the --accept-eula-for=CUDA option if CUDA will be installed and if -# this script was called with the argument --accept-cuda-eula. -# - hooks: We don't want hooks used in this install, we need vanilla -# installations of CUDA and/or other libraries -# - easystack: Path to easystack file that defines which packages shall be -# installed -accept_eula_opt= -if [[ ${eula_accepted} -eq 1 ]]; then - accept_eula_opt="--accept-eula-for=CUDA" -fi -touch "$tmpdir"/none.py -eb --prefix="$tmpdir" \ - --rebuild \ - --installpath-modules=${EASYBUILD_INSTALLPATH}/.modules \ - "${accept_eula_opt}" \ - --hooks="$tmpdir"/none.py \ - --easystack ${EASYSTACK_FILE} -ret=$? -if [ $ret -ne 0 ]; then - eb_last_log=$(unset EB_VERBOSE; eb --last-log) - cp -a ${eb_last_log} . - fatal_error "some installation failed, please check EasyBuild logs ${PWD}/$(basename ${eb_last_log})..." -else - echo_green "all installations at ${EESSI_SITE_INSTALL}/software/... succeeded!" -fi -# clean up tmpdir -rm -rf "${tmpdir}" + # clean up tmpdir + rm -rf "${tmpdir}" + + # Restore MODULEPATH for next loop iteration + MODUELPATH=${SAVE_MODULEPATH} +done