diff --git a/configs/sites/aws-pcluster/README.md b/configs/sites/aws-pcluster/README.md index 8190068fb..99f5cd50e 100644 --- a/configs/sites/aws-pcluster/README.md +++ b/configs/sites/aws-pcluster/README.md @@ -4,9 +4,9 @@ ### Base instance Choose a basic AMI from the Community AMIs tab that matches your desired OS and parallelcluster version. Select an instance type of the same family that you are planning to use for the head and the compute nodes, and enough storage for a swap file and a spack-stack installation. For example: -- AMI ID: ami-07410779598773e7d (aws-parallelcluster-3.8.0-ubuntu-2204-lts-hvm-x86_64-202312160956 2023-12-16T10-00-45.861Z) -- Instance hpc7a.96xlarge -- Use 500GB of gp3 storage as / +- AMI ID: ami-093dab62f7840644b +- Instance hpc6a.48xlarge +- Use 350GB of gp3 storage as / ### Prerequisites 1. As `root`: @@ -47,7 +47,8 @@ tar -xvf Lmod-8.7.tar.bz2 cd Lmod-8.7 # Note the weird prefix, lmod installs in PREFIX/lmod/X.Y automatically ./configure --prefix=/opt/ \ ---with-lmodConfigDir=/opt/lmod/8.7/config 2>&1 | tee log.config +--with-lmodConfigDir=/opt/lmod/8.7/config \ +2>&1 | tee log.config make install 2>&1 | tee log.install ln -sf /opt/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh ln -sf /opt/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh @@ -56,19 +57,19 @@ ln -sf /opt/lmod/lmod/init/profile.fish /etc/profile.d/z00_lmod.fish # Add custom module locations and fix existing modules # # intelmpi -echo "conflict openmpi" >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi -echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.19.0amzn4.0 ] } {' >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi -echo ' module load libfabric-aws/1.19.0amzn4.0' >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi -echo '}' >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi +echo "conflict openmpi" >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi +echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.16.0~amzn4.0 ] } {' >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi +echo ' module load libfabric-aws/1.16.0~amzn4.0' >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi +echo '}' >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi # openmpi -echo "conflict intelmpi" >> /usr/share/modules/modulefiles/openmpi/4.1.6 -echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.19.0amzn4.0 ] } {' >> /usr/share/modules/modulefiles/openmpi/4.1.6 -echo ' module load libfabric-aws/1.19.0amzn4.0' >> /usr/share/modules/modulefiles/openmpi/4.1.6 -echo '}' >> /usr/share/modules/modulefiles/openmpi/4.1.6 +echo "conflict intelmpi" >> /usr/share/modules/modulefiles/openmpi/4.1.4 +echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.16.0~amzn4.0 ] } {' >> /usr/share/modules/modulefiles/openmpi/4.1.4 +echo ' module load libfabric-aws/1.16.0~amzn4.0' >> /usr/share/modules/modulefiles/openmpi/4.1.4 +echo '}' >> /usr/share/modules/modulefiles/openmpi/4.1.4 # echo "module use /usr/share/modules/modulefiles" >> /etc/profile.d/z01_lmod.sh -### NO NOT ANY MORE ### echo "module use /opt/intel/mpi/2021.9.0/modulefiles" >> /etc/profile.d/z01_lmod.sh -### NO NOT ANY MORE ### echo "module use /home/ubuntu/jedi/modulefiles" >> /etc/profile.d/z01_lmod.sh +echo "module use /opt/intel/mpi/2021.6.0/modulefiles" >> /etc/profile.d/z01_lmod.sh +echo "module use /home/ubuntu/jedi/modulefiles" >> /etc/profile.d/z01_lmod.sh # # Log out completely, ssh back into the instance and check if lua modules work exit @@ -77,10 +78,10 @@ exit ssh ... # Now user ubuntu module av -module load libfabric-aws/1.19.0amzn4.0 -module load openmpi/4.1.6 +module load libfabric-aws/1.16.0~amzn4.0 +module load openmpi/4.1.4 module list -module unload openmpi/4.1.6 +module unload openmpi/4.1.4 module load intelmpi module list module purge @@ -102,18 +103,29 @@ apt install -y unzip apt install -y automake apt install -y xterm apt install -y texlive -apt install -y cmake # This is for ecflow -apt install -y qtcreator qtbase5-dev qt5-qmake -apt install -y libqt5widgets5 +apt install -y qt5-default apt install -y libqt5svg5-dev apt install -y qt5dxcb-plugin -# For mysql -apt install -y mysql-server -# Test -mysql -u root +### # Remove AWS openmpi +### apt remove -y openmpi40-aws + +# This is because boost doesn't work with the Intel compiler +apt install -y libboost1.71-dev +apt install -y libboost-chrono1.71-dev +apt install -y libboost-date-time1.71-dev +apt install -y libboost-exception1.71-dev +apt install -y libboost-filesystem1.71-dev +apt install -y libboost-program-options1.71-dev +apt install -y libboost-python1.71-dev +apt install -y libboost-regex1.71-dev +apt install -y libboost-serialization1.71-dev +apt install -y libboost-system1.71-dev +apt install -y libboost-test1.71-dev +apt install -y libboost-thread1.71-dev +apt install -y libboost-timer1.71-dev # Python apt install -y python3-dev python3-pip @@ -122,16 +134,7 @@ apt install -y python3-dev python3-pip wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list apt-get update -apt-get install -y intel-hpckit-2024.0/all -exit - -# As ubuntu -/opt/intel/modulefiles-setup.sh -# Back to root -sudo su -mv /home/ubuntu/modulefiles /opt/intel/modulefiles -echo "module unuse /opt/intel/mpi/2021.9.0/modulefiles" >> /etc/profile.d/z01_lmod.sh -echo "module use /opt/intel/modulefiles" >> /etc/profile.d/z01_lmod.sh +apt-get install -y intel-hpckit-2022.2.0/all # Docker # See https://docs.docker.com/engine/install/ubuntu/ @@ -155,7 +158,15 @@ service sshd restart cd /usr/lib64/ ln -sf /usr/lib/x86_64-linux-gnu/libcrypt.so . cd /usr/include -ln -sf python3.10/pyconfig.h . +ln -sf python3.8/pyconfig.h . + +# Create swapfile - 100GB +dd if=/dev/zero of=/swapfile bs=128M count=800 +chmod 600 /swapfile +mkswap /swapfile +swapon /swapfile +swapon -s +echo "/swapfile swap swap defaults 0 0" >> /etc/fstab # Exit root session exit @@ -166,12 +177,73 @@ git config --global credential.helper cache 2. Log out and back in to enable x11 forwarding -3. Create directory for spack-stack external packages +3. Build ecflow outside of spack to be able to link against OS boost ``` -mkdir -p /home/ubuntu/spack-stack/external +mkdir -p /home/ubuntu/jedi/ecflow-5.8.4/src +cd /home/ubuntu/jedi/ecflow-5.8.4/src +wget https://confluence.ecmwf.int/download/attachments/8650755/ecFlow-5.8.4-Source.tar.gz?api=v2 +mv ecFlow-5.8.4-Source.tar.gz\?api\=v2 ecFlow-5.8.4-Source.tar.gz +tar -xvzf ecFlow-5.8.4-Source.tar.gz +export WK=/home/ubuntu/jedi/ecflow-5.8.4/src/ecFlow-5.8.4-Source +export BOOST_ROOT=/usr + +# Build ecFlow +cd $WK +mkdir build +cd build +cmake .. -DPython3_EXECUTABLE=/usr/bin/python3 -DENABLE_STATIC_BOOST_LIBS=OFF -DCMAKE_INSTALL_PREFIX=/home/ubuntu/jedi/ecflow-5.8.4 2>&1 | tee log.cmake +make -j4 2>&1 | tee log.make +make install 2>&1 | tee log.install + +# Create a modulefiles directory with the following ecflow/5.8.4 module in it (w/o the '%%%%...' lines): +mkdir -p /home/ubuntu/jedi/modulefiles/ecflow +vi /home/ubuntu/jedi/modulefiles/ecflow/5.8.4 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#%Module1.0 + +module-whatis "Provides an ecflow-5.8.4 server+ui installation for use with spack." + +conflict ecflow + +proc ModulesHelp { } { +puts stderr "Provides an ecflow-5.8.4 server+ui installation for use with spack." +} + +# Set this value +set ECFLOW_PATH "/home/ubuntu/jedi/ecflow-5.8.4" + +prepend-path PATH "${ECFLOW_PATH}/bin" +prepend-path LD_LIBRARY_PATH "${ECFLOW_PATH}/lib" +prepend-path LIBRARY_PATH "${ECFLOW_PATH}/lib" +prepend-path CPATH "${ECFLOW_PATH}/include" +prepend-path CMAKE_PREFIX_PATH "${ECFLOW_PATH}" +prepend-path PYTHONPATH "${ECFLOW_PATH}/lib/python3.8/site-packages" +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +``` + +4. Install msql community server +``` +cd /home/ubuntu/jedi +mkdir -p mysql-8.0.31/src +cd mysql-8.0.31/src +wget https://dev.mysql.com/get/Downloads/MySQL-8.0/mysql-server_8.0.32-1ubuntu20.04_amd64.deb-bundle.tar +tar -xvf mysql-server_8.0.32-1ubuntu20.04_amd64.deb-bundle.tar +# Switch to root +sudo su +dpkg -i *.deb +apt --fix-broken install +dpkg -i *.deb +# Use an empty password for root, choose legacy authentication method; test connection +mysql -u root +show databases; +# exit mysql +exit +# exit root session +exit +rm *.deb ``` -4. Option 1: Testing existing site config in spack-stack (skip steps 5-7 afterwards) +5. Option 1: Testing existing site config in spack-stack (skip steps 5-7 afterwards) ``` mkdir -p /home/ubuntu/sandpit cd /home/ubuntu/sandpit @@ -188,7 +260,7 @@ spack module lmod refresh spack stack setup-meta-modules ``` -5. Option 2: Test configuring site from scratch +6. Option 2: Test configuring site from scratch ``` mkdir /home/ubuntu/jedi && cd /home/ubuntu/jedi git clone -b develop --recursive https://github.com/jcsda/spack-stack spack-stack @@ -199,41 +271,39 @@ spack env activate -p envs/unified-env export SPACK_SYSTEM_CONFIG_PATH=/home/ubuntu/jedi/spack-stack/envs/unified-env/site -spack external find --scope system \ - --exclude bison --exclude cmake \ - --exclude curl --exclude openssl \ - --exclude openssh +spack external find --scope system +spack external find --scope system perl +spack external find --scope system python spack external find --scope system wget -spack external find --scope system mysql spack external find --scope system texlive -spack external find --scope system sed +spack external find --scope system mysql # No external find for pre-installed intel-oneapi-mpi (from pcluster AMI), # and no way to add object entry to list using "spack config add". echo " intel-oneapi-mpi:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " externals:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml -echo " - spec: intel-oneapi-mpi@2021.9.0%intel@2022.1.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml +echo " - spec: intel-oneapi-mpi@2021.6.0%intel@2022.1.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " prefix: /opt/intel" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " modules:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml -echo " - libfabric-aws/1.19.0amzn4.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml +echo " - libfabric-aws/1.16.0~amzn4.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " - intelmpi" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml # Add external openmpi echo " openmpi:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " externals:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml -echo " - spec: openmpi@4.1.6%gcc@9.4.0~cuda~cxx~cxx_exceptions~java~memchecker+pmi~static~wrapper-rpath" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml +echo " - spec: openmpi@4.1.4%gcc@9.4.0~cuda~cxx~cxx_exceptions~java~memchecker+pmi~static~wrapper-rpath" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " fabrics=ofi schedulers=slurm" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " prefix: /opt/amazon/openmpi" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " modules:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml -echo " - libfabric-aws/1.19.0amzn4.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml -echo " - openmpi/4.1.6" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml +echo " - libfabric-aws/1.16.0~amzn3.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml +echo " - openmpi/4.1.4" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml # Can't find qt5 because qtpluginfo is broken, # and no way to add object entry to list using "spack config add". echo " qt:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " buildable: False" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " externals:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml -echo " - spec: qt@5.15.3" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml +echo " - spec: qt@5.12.8" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml echo " prefix: /usr" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml # Add external boost @@ -255,7 +325,7 @@ spack compiler find --scope system export -n SPACK_SYSTEM_CONFIG_PATH spack config add "packages:mpi:buildable:False" -spack config add "packages:all:providers:mpi:[intel-oneapi-mpi@2021.9.0, openmpi@4.1.6]" +spack config add "packages:all:providers:mpi:[intel-oneapi-mpi@2021.6.0, openmpi@4.1.4]" spack config add "packages:all:compiler:[intel@2022.1.0, gcc@9.4.0]" # edit envs/unified-env/site/compilers.yaml and replace the following line in the **Intel** compiler section: @@ -263,7 +333,7 @@ spack config add "packages:all:compiler:[intel@2022.1.0, gcc@9.4.0]" # --> # environment: # prepend_path: -# LD_LIBRARY_PATH: '/opt/intel/oneapi/compiler/2021.9.0/linux/compiler/lib/intel64_lin' +# LD_LIBRARY_PATH: '/opt/intel/oneapi/compiler/2021.6.0/linux/compiler/lib/intel64_lin' # set: # I_MPI_PMI_LIBRARY: '/opt/slurm/lib/libpmi.so' ``` diff --git a/spack b/spack index 413c0e956..d883a4c57 160000 --- a/spack +++ b/spack @@ -1 +1 @@ -Subproject commit 413c0e9564f10c4764f9eb93f835742c936bb402 +Subproject commit d883a4c57152a3298ad886ad9d7b94b9a35ddff7