From 424f8d057f2a052e9d5ff761a102c72c5b1fe9ef Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Mon, 12 Aug 2024 15:52:40 +0000 Subject: [PATCH 01/16] First pass to get GPU ci test working --- .github/workflows/gpu_nvhpc.yml | 76 +++++++++++++++++--- ref/cmake/compiler_flags_NVHPC_Fortran.cmake | 4 +- ref/src/cu_gf_deep.F90 | 2 +- 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index b9b8018..b0be77b 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -1,8 +1,8 @@ name: Linux GPU NVHPC -# triggered events (push, pull_request) for the develop branch +# triggered events (push, pull_request) for the master branch on: pull_request: - branches: [ develop ] + branches: [ master ] types: [ labeled ] workflow_dispatch: @@ -16,18 +16,33 @@ jobs: GPU_build: if: ${{ github.event.label.name == 'GPU_Test' }} - name: GPU Build + name: Ubuntu NVHPC GPU Build # Run on self-hosted runs-on: self-hosted steps: - # Load NVHPC module - - name: Load NVHPC Module + # Install Lmod + - name: Install Lmod run: | - pwd - ls -al - echo $SHELL + sudo apt-get update -y + sudo apt-get install lmod + echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile + source /usr/share/lmod/lmod/init/bash + module list + + # Install NVIDIA HPC SDK + - name: Install NVIDIA HPC SDK + run: | + curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg + echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list + sudo apt-get update -y + sudo apt-get install -y nvhpc-24-7 + + # Check location of installed NVHPC compilers + - name: Check compiler install + run: | + source /usr/share/lmod/lmod/init/bash module use /opt/nvidia/hpc_sdk/modulefiles module load nvhpc which nvc @@ -37,6 +52,21 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 + # Test debug mode + - name: Build gf debug + run: | + source /usr/share/lmod/lmod/init/bash + module use /opt/nvidia/hpc_sdk/modulefiles + module load nvhpc + cd ref + rm -rf build + mkdir build + cd build + #export OMP_NUM_THREADS=4 + cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on .. + make VERBOSE=1 + ctest --output-on-failure + # Test release mode - name: Build gf release run: | @@ -47,7 +77,31 @@ jobs: rm -rf build mkdir build cd build - export OMP_NUM_THREADS=4 + #export OMP_NUM_THREADS=4 cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on .. - make - ctest --output-on-failure -R gpu_kernel + make VERBOSE=1 + ctest --output-on-failure + + # Debug session for failures + - + name: Debug session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 60 + with: + limit-access-to-actor: true + + ## Test release mode + #- name: Build gf release + # run: | + # source /usr/share/lmod/lmod/init/bash + # module use /opt/nvidia/hpc_sdk/modulefiles + # module load nvhpc + # cd ref + # rm -rf build + # mkdir build + # cd build + # export OMP_NUM_THREADS=4 + # cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on .. + # make + # ctest --output-on-failure -R gpu_kernel diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake index 1cad5f0..d02c0c0 100644 --- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake +++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake @@ -1,7 +1,7 @@ #################################################################### # COMMON FLAGS #################################################################### -set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma") +set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma" ) #################################################################### # RELEASE FLAGS @@ -14,7 +14,7 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" ) # DEBUG FLAGS #################################################################### -set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk" ) +set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" ) #################################################################### # FLAGS FOR GPU diff --git a/ref/src/cu_gf_deep.F90 b/ref/src/cu_gf_deep.F90 index ae11695..5cb354e 100644 --- a/ref/src/cu_gf_deep.F90 +++ b/ref/src/cu_gf_deep.F90 @@ -494,7 +494,7 @@ subroutine cu_gf_deep_run( & !- zws for shallow convection closure (grant 2001) !- height of the pbl zws(i) = max(0.,.001-flux_tun(i)*0.41*buo_flux*zo(i,kpbl(i))*g/t(i,kpbl(i))) - zws(i) = 1.2*zws(i)**.3333 + zws(i) = 1.2*zws(i)**.3333_kind_phys zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct enddo !$acc end kernels From 5480a7686cd5ee48c10f27475ea2bd4bd9d6f467 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Mon, 12 Aug 2024 16:28:28 +0000 Subject: [PATCH 02/16] Turn off label requirement for GPU ci workflow --- .github/workflows/gpu_nvhpc.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index b0be77b..f7a3d9f 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -1,4 +1,4 @@ -name: Linux GPU NVHPC +name: Linux NVHPC # triggered events (push, pull_request) for the master branch on: pull_request: @@ -13,8 +13,8 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - GPU_build: - if: ${{ github.event.label.name == 'GPU_Test' }} + ubuntu_build: + #if: ${{ github.event.label.name == 'GPU_Test' }} name: Ubuntu NVHPC GPU Build # Run on self-hosted From 16207c5df32b30041489f81576c5ca7b0b915c4b Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Mon, 12 Aug 2024 16:30:26 +0000 Subject: [PATCH 03/16] Add -y flag for instaling lmod on self-hosted runner --- .github/workflows/gpu_nvhpc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index f7a3d9f..f928afa 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -26,7 +26,7 @@ jobs: - name: Install Lmod run: | sudo apt-get update -y - sudo apt-get install lmod + sudo apt-get install -y lmod echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile source /usr/share/lmod/lmod/init/bash module list From fcd0cfbae2e4f1f43ba29f561f1ff1e9575585f6 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Mon, 12 Aug 2024 17:26:34 +0000 Subject: [PATCH 04/16] NVHPC has been installed on the self-hosted runner, so we can skip that --- .github/workflows/gpu_nvhpc.yml | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index f928afa..ce8a094 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -1,4 +1,4 @@ -name: Linux NVHPC +name: Linux NVHPC GPU # triggered events (push, pull_request) for the master branch on: pull_request: @@ -22,22 +22,22 @@ jobs: steps: - # Install Lmod - - name: Install Lmod - run: | - sudo apt-get update -y - sudo apt-get install -y lmod - echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile - source /usr/share/lmod/lmod/init/bash - module list - - # Install NVIDIA HPC SDK - - name: Install NVIDIA HPC SDK - run: | - curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg - echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list - sudo apt-get update -y - sudo apt-get install -y nvhpc-24-7 + ## Install Lmod + #- name: Install Lmod + # run: | + # sudo apt-get update -y + # sudo apt-get install -y lmod + # echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile + # source /usr/share/lmod/lmod/init/bash + # module list + # + ## Install NVIDIA HPC SDK + #- name: Install NVIDIA HPC SDK + # run: | + # curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg + # echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list + # sudo apt-get update -y + # sudo apt-get install -y nvhpc-24-7 # Check location of installed NVHPC compilers - name: Check compiler install From 746371e4ff581e285e92014450d24bcea140cb89 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Tue, 13 Aug 2024 21:03:40 +0000 Subject: [PATCH 05/16] Try using -Kieee to fix debug floating point error for nvfortran --- ref/cmake/compiler_flags_NVHPC_Fortran.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake index d02c0c0..f0e22b1 100644 --- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake +++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake @@ -1,7 +1,7 @@ #################################################################### # COMMON FLAGS #################################################################### -set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma" ) +set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma -Kieee" ) #################################################################### # RELEASE FLAGS @@ -15,6 +15,7 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" ) #################################################################### set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" ) +#set( CMAKE_Fortran_FLAGS_DEBUG "-O0" ) #################################################################### # FLAGS FOR GPU From 73dac2ce37fa10df758ce414f2a09d837cf49b69 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Tue, 13 Aug 2024 21:29:50 +0000 Subject: [PATCH 06/16] Simplify -ta option for nvfortran --- ref/src/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt index f7298b7..d2b83cc 100644 --- a/ref/src/CMakeLists.txt +++ b/ref/src/CMakeLists.txt @@ -14,8 +14,10 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} ) if(ENABLE_GPU) add_executable( gf_kernel_gpu ${gf_kernel_common_files} ) - target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) + #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) + target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}) + target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}) endif() if(OpenMP_FOUND) From 7301fe05850c9b7bf6f45f697947a6b682a41cec Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Tue, 13 Aug 2024 21:32:49 +0000 Subject: [PATCH 07/16] Turn off debug build for gpu workflow because the build hangs --- .github/workflows/gpu_nvhpc.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index ce8a094..7a56c66 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -53,19 +53,19 @@ jobs: uses: actions/checkout@v2 # Test debug mode - - name: Build gf debug - run: | - source /usr/share/lmod/lmod/init/bash - module use /opt/nvidia/hpc_sdk/modulefiles - module load nvhpc - cd ref - rm -rf build - mkdir build - cd build - #export OMP_NUM_THREADS=4 - cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on .. - make VERBOSE=1 - ctest --output-on-failure + #- name: Build gf debug + # run: | + # source /usr/share/lmod/lmod/init/bash + # module use /opt/nvidia/hpc_sdk/modulefiles + # module load nvhpc + # cd ref + # rm -rf build + # mkdir build + # cd build + # #export OMP_NUM_THREADS=4 + # cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on .. + # make VERBOSE=1 + # ctest --output-on-failure # Test release mode - name: Build gf release From 6b61bf20cdd2ba7111383fbd296674cf26775be5 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Tue, 13 Aug 2024 22:28:36 +0000 Subject: [PATCH 08/16] Change from old GPU options to new ones --- ref/CMakeLists.txt | 2 +- ref/src/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt index 811f454..a20b239 100644 --- a/ref/CMakeLists.txt +++ b/ref/CMakeLists.txt @@ -24,7 +24,7 @@ find_package( OpenMP COMPONENTS C Fortran ) find_package( MPI COMPONENTS C Fortran ) if ( ENABLE_GPU ) - set( OpenACC_ACCEL_TARGET tesla ) + #set( OpenACC_ACCEL_TARGET tesla ) find_package( OpenACC REQUIRED ) find_package( CUDAToolkit REQUIRED ) add_compile_definitions(ENABLE_GPU) diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt index d2b83cc..4d2ee7b 100644 --- a/ref/src/CMakeLists.txt +++ b/ref/src/CMakeLists.txt @@ -16,8 +16,8 @@ if(ENABLE_GPU) add_executable( gf_kernel_gpu ${gf_kernel_common_files} ) #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) - target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}) - target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}) + target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) endif() if(OpenMP_FOUND) From fea69c8363d3c2f8014f5a39f867ea1ae85a4428 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Tue, 13 Aug 2024 22:34:56 +0000 Subject: [PATCH 09/16] Autodetect the compute capability when building --- ref/src/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt index 4d2ee7b..b57a586 100644 --- a/ref/src/CMakeLists.txt +++ b/ref/src/CMakeLists.txt @@ -16,8 +16,8 @@ if(ENABLE_GPU) add_executable( gf_kernel_gpu ${gf_kernel_common_files} ) #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) - target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) endif() if(OpenMP_FOUND) From f29a67bcdff646ab8d455a9b175823b956cd68aa Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Tue, 13 Aug 2024 23:01:50 +0000 Subject: [PATCH 10/16] Set stacksize limit to max value --- .github/workflows/gpu_nvhpc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index 7a56c66..60b09c9 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -80,6 +80,7 @@ jobs: #export OMP_NUM_THREADS=4 cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on .. make VERBOSE=1 + ulimit -s hard ctest --output-on-failure # Debug session for failures From 531af27c312a2e99ea5f39235e8ecc1ec2ce8204 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Wed, 14 Aug 2024 17:41:48 +0000 Subject: [PATCH 11/16] Add script to capture commands needed to setup the GHA self-hosted EC2 instance --- ref/test/tools/setup-gpu-runner.sh | 45 ++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 ref/test/tools/setup-gpu-runner.sh diff --git a/ref/test/tools/setup-gpu-runner.sh b/ref/test/tools/setup-gpu-runner.sh new file mode 100644 index 0000000..711a150 --- /dev/null +++ b/ref/test/tools/setup-gpu-runner.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# Install drivers +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt-get update +sudo apt-get -y install cuda-toolkit-12-6 +sudo apt-get install nvidia-gds +sudo apt-get install -y cuda-drivers + +# reboot + +# Install LMOD +sudo apt-get update -y +sudo apt-get install -y lmod +echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile +source /usr/share/lmod/lmod/init/bash +module list + +# Install NVIDIA HPC SDK +curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg +echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list +sudo apt-get update -y +sudo apt-get install -y nvhpc-24-7 + +# Install cmake +sudo apt-get install -y cmake + + +# Run persistence driver - not needed? +#sudo /usr/bin/nvidia-persistenced --verbose + +# Create a folder +mkdir actions-runner && cd actions-runner +# Download the latest runner package +curl -o actions-runner-linux-x64-2.319.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.319.0/actions-runner-linux-x64-2.319.0.tar.gz +# Optional: Validate the hash +echo "52b8f9c5abb1a47cc506185a1a20ecea19daf0d94bbf4ddde7e617e7be109b14 actions-runner-linux-x64-2.319.0.tar.gz" | shasum -a 256 -c +# Extract the installer +tar xzf ./actions-runner-linux-x64-2.319.0.tar.gz + +# Create the runner and start the configuration experience +$ ./config.sh --url https://github.com/NOAA-GSL/SENA-gf --token +# Last step, run it! +$ ./run.sh From 889b1a8485bd590e72cd43d287e82993c45a95e4 Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Wed, 14 Aug 2024 17:56:14 +0000 Subject: [PATCH 12/16] Cleanup GPU CI --- .github/workflows/gpu_nvhpc.yml | 18 ++---------------- ref/CMakeLists.txt | 2 -- ref/cmake/compiler_flags_NVHPC_Fortran.cmake | 1 - ref/src/CMakeLists.txt | 6 ++---- ref/src/cu_gf_deep.F90 | 2 +- 5 files changed, 5 insertions(+), 24 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index 60b09c9..1db5060 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -14,7 +14,7 @@ on: jobs: ubuntu_build: - #if: ${{ github.event.label.name == 'GPU_Test' }} + if: ${{ github.event.label.name == 'GPU-CI' }} name: Ubuntu NVHPC GPU Build # Run on self-hosted @@ -53,6 +53,7 @@ jobs: uses: actions/checkout@v2 # Test debug mode + # Turn this off because the compiler hangs while building in debug mode #- name: Build gf debug # run: | # source /usr/share/lmod/lmod/init/bash @@ -91,18 +92,3 @@ jobs: timeout-minutes: 60 with: limit-access-to-actor: true - - ## Test release mode - #- name: Build gf release - # run: | - # source /usr/share/lmod/lmod/init/bash - # module use /opt/nvidia/hpc_sdk/modulefiles - # module load nvhpc - # cd ref - # rm -rf build - # mkdir build - # cd build - # export OMP_NUM_THREADS=4 - # cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on .. - # make - # ctest --output-on-failure -R gpu_kernel diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt index a20b239..67f49a1 100644 --- a/ref/CMakeLists.txt +++ b/ref/CMakeLists.txt @@ -24,7 +24,6 @@ find_package( OpenMP COMPONENTS C Fortran ) find_package( MPI COMPONENTS C Fortran ) if ( ENABLE_GPU ) - #set( OpenACC_ACCEL_TARGET tesla ) find_package( OpenACC REQUIRED ) find_package( CUDAToolkit REQUIRED ) add_compile_definitions(ENABLE_GPU) @@ -36,7 +35,6 @@ if ( ENABLE_GPU ) string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_4}") message( STATUS "CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}" ) SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) - add_compile_options("-Minfo=accel") endif() add_subdirectory(src) diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake index f0e22b1..e10d034 100644 --- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake +++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake @@ -15,7 +15,6 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" ) #################################################################### set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" ) -#set( CMAKE_Fortran_FLAGS_DEBUG "-O0" ) #################################################################### # FLAGS FOR GPU diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt index b57a586..879dc4e 100644 --- a/ref/src/CMakeLists.txt +++ b/ref/src/CMakeLists.txt @@ -14,10 +14,8 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} ) if(ENABLE_GPU) add_executable( gf_kernel_gpu ${gf_kernel_common_files} ) - #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) - target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_compile_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_link_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) endif() if(OpenMP_FOUND) diff --git a/ref/src/cu_gf_deep.F90 b/ref/src/cu_gf_deep.F90 index 5cb354e..ae11695 100644 --- a/ref/src/cu_gf_deep.F90 +++ b/ref/src/cu_gf_deep.F90 @@ -494,7 +494,7 @@ subroutine cu_gf_deep_run( & !- zws for shallow convection closure (grant 2001) !- height of the pbl zws(i) = max(0.,.001-flux_tun(i)*0.41*buo_flux*zo(i,kpbl(i))*g/t(i,kpbl(i))) - zws(i) = 1.2*zws(i)**.3333_kind_phys + zws(i) = 1.2*zws(i)**.3333 zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct enddo !$acc end kernels From 9807ceadd4b7b20a3396a71d5a21fd3c7e88601a Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Wed, 14 Aug 2024 18:24:08 +0000 Subject: [PATCH 13/16] Update gcc used for macos gnu ci --- .github/workflows/macos_gnu.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/macos_gnu.yml b/.github/workflows/macos_gnu.yml index e0940c9..999b9fa 100644 --- a/.github/workflows/macos_gnu.yml +++ b/.github/workflows/macos_gnu.yml @@ -37,8 +37,8 @@ jobs: mkdir build cd build #export OMP_NUM_THREADS=4 - export CC=gcc-11 - export FC=gfortran-11 + export CC=gcc-13 + export FC=gfortran-13 cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=off .. make VERBOSE=1 ctest --output-on-failure @@ -51,8 +51,8 @@ jobs: mkdir build cd build #export OMP_NUM_THREADS=4 - export CC=gcc-11 - export FC=gfortran-11 + export CC=gcc-13 + export FC=gfortran-13 cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=off .. make VERBOSE=1 ctest --output-on-failure From c3e47dfac5081a18ec8cdd4bfcc687acc5ab39cd Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Wed, 14 Aug 2024 18:49:50 +0000 Subject: [PATCH 14/16] Use gcc-11 for macos CI, turn off GPU label --- .github/workflows/gpu_nvhpc.yml | 10 ++++++---- .github/workflows/macos_gnu.yml | 12 ++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index 1db5060..f998091 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -39,6 +39,12 @@ jobs: # sudo apt-get update -y # sudo apt-get install -y nvhpc-24-7 + # Remove label + - name: Remove GPU-CI label + - uses: actions-ecosystem/action-remove-labels@v1 + with: + labels: GPU-CI + # Check location of installed NVHPC compilers - name: Check compiler install run: | @@ -48,10 +54,6 @@ jobs: which nvc which nvfortran - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - name: Checkout repository - uses: actions/checkout@v2 - # Test debug mode # Turn this off because the compiler hangs while building in debug mode #- name: Build gf debug diff --git a/.github/workflows/macos_gnu.yml b/.github/workflows/macos_gnu.yml index 999b9fa..3546767 100644 --- a/.github/workflows/macos_gnu.yml +++ b/.github/workflows/macos_gnu.yml @@ -25,6 +25,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 + # Install gcc11 + - name: Install GCC + run: brew install gcc@11 + # Install OpenMPI - name: Install OpenMPI run: brew install open-mpi @@ -37,8 +41,8 @@ jobs: mkdir build cd build #export OMP_NUM_THREADS=4 - export CC=gcc-13 - export FC=gfortran-13 + export CC=gcc-11 + export FC=gfortran-11 cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=off .. make VERBOSE=1 ctest --output-on-failure @@ -51,8 +55,8 @@ jobs: mkdir build cd build #export OMP_NUM_THREADS=4 - export CC=gcc-13 - export FC=gfortran-13 + export CC=gcc-11 + export FC=gfortran-11 cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=off .. make VERBOSE=1 ctest --output-on-failure From c40387f5f36e813182403373cfb00c13a027249a Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Wed, 14 Aug 2024 19:05:19 +0000 Subject: [PATCH 15/16] Turn off label criteria for gpu ci --- .github/workflows/gpu_nvhpc.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index f998091..6924f69 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -14,7 +14,7 @@ on: jobs: ubuntu_build: - if: ${{ github.event.label.name == 'GPU-CI' }} + #if: ${{ github.event.label.name == 'GPU-CI' }} name: Ubuntu NVHPC GPU Build # Run on self-hosted @@ -40,10 +40,10 @@ jobs: # sudo apt-get install -y nvhpc-24-7 # Remove label - - name: Remove GPU-CI label - - uses: actions-ecosystem/action-remove-labels@v1 - with: - labels: GPU-CI + #- name: Remove GPU-CI label + # - uses: actions-ecosystem/action-remove-labels@v1 + # with: + # labels: GPU-CI # Check location of installed NVHPC compilers - name: Check compiler install From 28483c18519c6b057c77634c077fc49c247077ee Mon Sep 17 00:00:00 2001 From: christopherwharrop-noaa Date: Wed, 14 Aug 2024 19:25:35 +0000 Subject: [PATCH 16/16] Try a different label method --- .github/workflows/gpu_nvhpc.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index 6924f69..0748c0d 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -3,7 +3,7 @@ name: Linux NVHPC GPU on: pull_request: branches: [ master ] - types: [ labeled ] + types: [ labeled, opened, synchronize, reopened ] workflow_dispatch: #defaults: @@ -14,7 +14,7 @@ on: jobs: ubuntu_build: - #if: ${{ github.event.label.name == 'GPU-CI' }} + if: contains(github.event.pull_request.labels.*.name, 'GPU-CI') name: Ubuntu NVHPC GPU Build # Run on self-hosted