diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index b9b8018..0748c0d 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -1,9 +1,9 @@ -name: Linux GPU NVHPC -# triggered events (push, pull_request) for the develop branch +name: Linux NVHPC GPU +# triggered events (push, pull_request) for the master branch on: pull_request: - branches: [ develop ] - types: [ labeled ] + branches: [ master ] + types: [ labeled, opened, synchronize, reopened ] workflow_dispatch: #defaults: @@ -13,29 +13,62 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - GPU_build: - if: ${{ github.event.label.name == 'GPU_Test' }} + ubuntu_build: + if: contains(github.event.pull_request.labels.*.name, 'GPU-CI') - name: GPU Build + name: Ubuntu NVHPC GPU Build # Run on self-hosted runs-on: self-hosted steps: - # Load NVHPC module - - name: Load NVHPC Module + ## Install Lmod + #- name: Install Lmod + # run: | + # sudo apt-get update -y + # sudo apt-get install -y lmod + # echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile + # source /usr/share/lmod/lmod/init/bash + # module list + # + ## Install NVIDIA HPC SDK + #- name: Install NVIDIA HPC SDK + # run: | + # curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg + # echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list + # sudo apt-get update -y + # sudo apt-get install -y nvhpc-24-7 + + # Remove label + #- name: Remove GPU-CI label + # - uses: actions-ecosystem/action-remove-labels@v1 + # with: + # labels: GPU-CI + + # Check location of installed NVHPC compilers + - name: Check compiler install run: | - pwd - ls -al - echo $SHELL + source /usr/share/lmod/lmod/init/bash module use /opt/nvidia/hpc_sdk/modulefiles module load nvhpc which nvc which nvfortran - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - name: Checkout repository - uses: actions/checkout@v2 + # Test debug mode + # Turn this off because the compiler hangs while building in debug mode + #- name: Build gf debug + # run: | + # source /usr/share/lmod/lmod/init/bash + # module use /opt/nvidia/hpc_sdk/modulefiles + # module load nvhpc + # cd ref + # rm -rf build + # mkdir build + # cd build + # #export OMP_NUM_THREADS=4 + # cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on .. + # make VERBOSE=1 + # ctest --output-on-failure # Test release mode - name: Build gf release @@ -47,7 +80,17 @@ jobs: rm -rf build mkdir build cd build - export OMP_NUM_THREADS=4 + #export OMP_NUM_THREADS=4 cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on .. - make - ctest --output-on-failure -R gpu_kernel + make VERBOSE=1 + ulimit -s hard + ctest --output-on-failure + + # Debug session for failures + - + name: Debug session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 60 + with: + limit-access-to-actor: true diff --git a/.github/workflows/macos_gnu.yml b/.github/workflows/macos_gnu.yml index e0940c9..3546767 100644 --- a/.github/workflows/macos_gnu.yml +++ b/.github/workflows/macos_gnu.yml @@ -25,6 +25,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 + # Install gcc11 + - name: Install GCC + run: brew install gcc@11 + # Install OpenMPI - name: Install OpenMPI run: brew install open-mpi @@ -38,7 +42,7 @@ jobs: cd build #export OMP_NUM_THREADS=4 export CC=gcc-11 - export FC=gfortran-11 + export FC=gfortran-11 cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=off .. make VERBOSE=1 ctest --output-on-failure @@ -52,7 +56,7 @@ jobs: cd build #export OMP_NUM_THREADS=4 export CC=gcc-11 - export FC=gfortran-11 + export FC=gfortran-11 cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=off .. make VERBOSE=1 ctest --output-on-failure diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt index 811f454..67f49a1 100644 --- a/ref/CMakeLists.txt +++ b/ref/CMakeLists.txt @@ -24,7 +24,6 @@ find_package( OpenMP COMPONENTS C Fortran ) find_package( MPI COMPONENTS C Fortran ) if ( ENABLE_GPU ) - set( OpenACC_ACCEL_TARGET tesla ) find_package( OpenACC REQUIRED ) find_package( CUDAToolkit REQUIRED ) add_compile_definitions(ENABLE_GPU) @@ -36,7 +35,6 @@ if ( ENABLE_GPU ) string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_4}") message( STATUS "CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}" ) SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) - add_compile_options("-Minfo=accel") endif() add_subdirectory(src) diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake index 1cad5f0..e10d034 100644 --- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake +++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake @@ -1,7 +1,7 @@ #################################################################### # COMMON FLAGS #################################################################### -set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma") +set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma -Kieee" ) #################################################################### # RELEASE FLAGS @@ -14,7 +14,7 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" ) # DEBUG FLAGS #################################################################### -set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk" ) +set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" ) #################################################################### # FLAGS FOR GPU diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt index f7298b7..879dc4e 100644 --- a/ref/src/CMakeLists.txt +++ b/ref/src/CMakeLists.txt @@ -14,8 +14,8 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} ) if(ENABLE_GPU) add_executable( gf_kernel_gpu ${gf_kernel_common_files} ) - target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) + target_compile_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_link_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) endif() if(OpenMP_FOUND) diff --git a/ref/test/tools/setup-gpu-runner.sh b/ref/test/tools/setup-gpu-runner.sh new file mode 100644 index 0000000..711a150 --- /dev/null +++ b/ref/test/tools/setup-gpu-runner.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# Install drivers +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt-get update +sudo apt-get -y install cuda-toolkit-12-6 +sudo apt-get install nvidia-gds +sudo apt-get install -y cuda-drivers + +# reboot + +# Install LMOD +sudo apt-get update -y +sudo apt-get install -y lmod +echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile +source /usr/share/lmod/lmod/init/bash +module list + +# Install NVIDIA HPC SDK +curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg +echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list +sudo apt-get update -y +sudo apt-get install -y nvhpc-24-7 + +# Install cmake +sudo apt-get install -y cmake + + +# Run persistence driver - not needed? +#sudo /usr/bin/nvidia-persistenced --verbose + +# Create a folder +mkdir actions-runner && cd actions-runner +# Download the latest runner package +curl -o actions-runner-linux-x64-2.319.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.319.0/actions-runner-linux-x64-2.319.0.tar.gz +# Optional: Validate the hash +echo "52b8f9c5abb1a47cc506185a1a20ecea19daf0d94bbf4ddde7e617e7be109b14 actions-runner-linux-x64-2.319.0.tar.gz" | shasum -a 256 -c +# Extract the installer +tar xzf ./actions-runner-linux-x64-2.319.0.tar.gz + +# Create the runner and start the configuration experience +$ ./config.sh --url https://github.com/NOAA-GSL/SENA-gf --token +# Last step, run it! +$ ./run.sh