From 424f8d057f2a052e9d5ff761a102c72c5b1fe9ef Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Mon, 12 Aug 2024 15:52:40 +0000
Subject: [PATCH 01/16] First pass to get GPU ci test working

---
 .github/workflows/gpu_nvhpc.yml              | 76 +++++++++++++++++---
 ref/cmake/compiler_flags_NVHPC_Fortran.cmake |  4 +-
 ref/src/cu_gf_deep.F90                       |  2 +-
 3 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index b9b8018..b0be77b 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -1,8 +1,8 @@
 name: Linux GPU NVHPC
-# triggered events (push, pull_request) for the develop branch
+# triggered events (push, pull_request) for the master branch
 on:
   pull_request:
-    branches: [ develop ]
+    branches: [ master ]
     types: [ labeled ]
   workflow_dispatch:
 
@@ -16,18 +16,33 @@ jobs:
   GPU_build:
     if: ${{ github.event.label.name == 'GPU_Test' }}
 
-    name: GPU Build
+    name: Ubuntu NVHPC GPU Build
     # Run on self-hosted
     runs-on: self-hosted
 
     steps:
 
-      # Load NVHPC module
-      - name: Load NVHPC Module
+      # Install Lmod
+      - name: Install Lmod
         run: |
-          pwd
-          ls -al
-          echo $SHELL
+          sudo apt-get update -y
+          sudo apt-get install lmod
+          echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
+          source /usr/share/lmod/lmod/init/bash
+          module list
+
+      # Install NVIDIA HPC SDK
+      - name: Install NVIDIA HPC SDK
+        run: |
+          curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
+          echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
+          sudo apt-get update -y
+          sudo apt-get install -y nvhpc-24-7
+
+      # Check location of installed NVHPC compilers
+      - name: Check compiler install
+        run: |
+          source /usr/share/lmod/lmod/init/bash
           module use /opt/nvidia/hpc_sdk/modulefiles
           module load nvhpc
           which nvc
@@ -37,6 +52,21 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v2
 
+      # Test debug mode
+      - name: Build gf debug
+        run: |
+          source /usr/share/lmod/lmod/init/bash
+          module use /opt/nvidia/hpc_sdk/modulefiles
+          module load nvhpc
+          cd ref
+          rm -rf build
+          mkdir build
+          cd build
+          #export OMP_NUM_THREADS=4
+          cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on ..
+          make VERBOSE=1
+          ctest --output-on-failure
+
       # Test release mode
       - name: Build gf release
         run: |
@@ -47,7 +77,31 @@ jobs:
           rm -rf build
           mkdir build
           cd build
-          export OMP_NUM_THREADS=4
+          #export OMP_NUM_THREADS=4
           cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on ..
-          make
-          ctest --output-on-failure -R gpu_kernel
+          make VERBOSE=1
+          ctest --output-on-failure
+
+      # Debug session for failures
+      -
+        name: Debug session
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3
+        timeout-minutes: 60
+        with:
+          limit-access-to-actor: true
+
+      ## Test release mode
+      #- name: Build gf release
+      #  run: |
+      #    source /usr/share/lmod/lmod/init/bash
+      #    module use /opt/nvidia/hpc_sdk/modulefiles
+      #    module load nvhpc
+      #    cd ref
+      #    rm -rf build
+      #    mkdir build
+      #    cd build
+      #    export OMP_NUM_THREADS=4
+      #    cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on ..
+      #    make
+      #    ctest --output-on-failure -R gpu_kernel
diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
index 1cad5f0..d02c0c0 100644
--- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
+++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
@@ -1,7 +1,7 @@
 ####################################################################
 # COMMON FLAGS
 ####################################################################
-set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma")
+set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma" )
 
 ####################################################################
 # RELEASE FLAGS
@@ -14,7 +14,7 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" )
 # DEBUG FLAGS
 ####################################################################
 
-set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk" )
+set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" )
 
 ####################################################################
 # FLAGS FOR GPU
diff --git a/ref/src/cu_gf_deep.F90 b/ref/src/cu_gf_deep.F90
index ae11695..5cb354e 100644
--- a/ref/src/cu_gf_deep.F90
+++ b/ref/src/cu_gf_deep.F90
@@ -494,7 +494,7 @@ subroutine cu_gf_deep_run(        &
          !- zws for shallow convection closure (grant 2001)
          !- height of the pbl
          zws(i) = max(0.,.001-flux_tun(i)*0.41*buo_flux*zo(i,kpbl(i))*g/t(i,kpbl(i)))
-         zws(i) = 1.2*zws(i)**.3333
+         zws(i) = 1.2*zws(i)**.3333_kind_phys
          zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct
       enddo
 !$acc end kernels

From 5480a7686cd5ee48c10f27475ea2bd4bd9d6f467 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Mon, 12 Aug 2024 16:28:28 +0000
Subject: [PATCH 02/16] Turn off label requirement for GPU ci workflow

---
 .github/workflows/gpu_nvhpc.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index b0be77b..f7a3d9f 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -1,4 +1,4 @@
-name: Linux GPU NVHPC
+name: Linux NVHPC
 # triggered events (push, pull_request) for the master branch
 on:
   pull_request:
@@ -13,8 +13,8 @@ on:
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
 
-  GPU_build:
-    if: ${{ github.event.label.name == 'GPU_Test' }}
+  ubuntu_build:
+    #if: ${{ github.event.label.name == 'GPU_Test' }}
 
     name: Ubuntu NVHPC GPU Build
     # Run on self-hosted

From 16207c5df32b30041489f81576c5ca7b0b915c4b Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Mon, 12 Aug 2024 16:30:26 +0000
Subject: [PATCH 03/16] Add -y flag for instaling lmod on self-hosted runner

---
 .github/workflows/gpu_nvhpc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index f7a3d9f..f928afa 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -26,7 +26,7 @@ jobs:
       - name: Install Lmod
         run: |
           sudo apt-get update -y
-          sudo apt-get install lmod
+          sudo apt-get install -y lmod
           echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
           source /usr/share/lmod/lmod/init/bash
           module list

From fcd0cfbae2e4f1f43ba29f561f1ff1e9575585f6 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Mon, 12 Aug 2024 17:26:34 +0000
Subject: [PATCH 04/16] NVHPC has been installed on the self-hosted runner, so
 we can skip that

---
 .github/workflows/gpu_nvhpc.yml | 34 ++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index f928afa..ce8a094 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -1,4 +1,4 @@
-name: Linux NVHPC
+name: Linux NVHPC GPU
 # triggered events (push, pull_request) for the master branch
 on:
   pull_request:
@@ -22,22 +22,22 @@ jobs:
 
     steps:
 
-      # Install Lmod
-      - name: Install Lmod
-        run: |
-          sudo apt-get update -y
-          sudo apt-get install -y lmod
-          echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
-          source /usr/share/lmod/lmod/init/bash
-          module list
-
-      # Install NVIDIA HPC SDK
-      - name: Install NVIDIA HPC SDK
-        run: |
-          curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
-          echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
-          sudo apt-get update -y
-          sudo apt-get install -y nvhpc-24-7
+      ## Install Lmod
+      #- name: Install Lmod
+      #  run: |
+      #    sudo apt-get update -y
+      #    sudo apt-get install -y lmod
+      #    echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
+      #    source /usr/share/lmod/lmod/init/bash
+      #    module list
+      #
+      ## Install NVIDIA HPC SDK
+      #- name: Install NVIDIA HPC SDK
+      #  run: |
+      #    curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
+      #    echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
+      #    sudo apt-get update -y
+      #    sudo apt-get install -y nvhpc-24-7
 
       # Check location of installed NVHPC compilers
       - name: Check compiler install

From 746371e4ff581e285e92014450d24bcea140cb89 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Tue, 13 Aug 2024 21:03:40 +0000
Subject: [PATCH 05/16] Try using -Kieee to fix debug floating point error for
 nvfortran

---
 ref/cmake/compiler_flags_NVHPC_Fortran.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
index d02c0c0..f0e22b1 100644
--- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
+++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
@@ -1,7 +1,7 @@
 ####################################################################
 # COMMON FLAGS
 ####################################################################
-set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma" )
+set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma -Kieee" )
 
 ####################################################################
 # RELEASE FLAGS
@@ -15,6 +15,7 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" )
 ####################################################################
 
 set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" )
+#set( CMAKE_Fortran_FLAGS_DEBUG "-O0" )
 
 ####################################################################
 # FLAGS FOR GPU

From 73dac2ce37fa10df758ce414f2a09d837cf49b69 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Tue, 13 Aug 2024 21:29:50 +0000
Subject: [PATCH 06/16] Simplify -ta option for nvfortran

---
 ref/src/CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt
index f7298b7..d2b83cc 100644
--- a/ref/src/CMakeLists.txt
+++ b/ref/src/CMakeLists.txt
@@ -14,8 +14,10 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} )
 
 if(ENABLE_GPU)
   add_executable( gf_kernel_gpu ${gf_kernel_common_files} )
-  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
-  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
+  #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
+  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS})
+  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS})
 endif()
 
 if(OpenMP_FOUND)

From 7301fe05850c9b7bf6f45f697947a6b682a41cec Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Tue, 13 Aug 2024 21:32:49 +0000
Subject: [PATCH 07/16] Turn off debug build for gpu workflow because the build
 hangs

---
 .github/workflows/gpu_nvhpc.yml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index ce8a094..7a56c66 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -53,19 +53,19 @@ jobs:
         uses: actions/checkout@v2
 
       # Test debug mode
-      - name: Build gf debug
-        run: |
-          source /usr/share/lmod/lmod/init/bash
-          module use /opt/nvidia/hpc_sdk/modulefiles
-          module load nvhpc
-          cd ref
-          rm -rf build
-          mkdir build
-          cd build
-          #export OMP_NUM_THREADS=4
-          cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on ..
-          make VERBOSE=1
-          ctest --output-on-failure
+      #- name: Build gf debug
+      #  run: |
+      #    source /usr/share/lmod/lmod/init/bash
+      #    module use /opt/nvidia/hpc_sdk/modulefiles
+      #    module load nvhpc
+      #    cd ref
+      #    rm -rf build
+      #    mkdir build
+      #    cd build
+      #    #export OMP_NUM_THREADS=4
+      #    cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on ..
+      #    make VERBOSE=1
+      #    ctest --output-on-failure
 
       # Test release mode
       - name: Build gf release

From 6b61bf20cdd2ba7111383fbd296674cf26775be5 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Tue, 13 Aug 2024 22:28:36 +0000
Subject: [PATCH 08/16] Change from old GPU options to new ones

---
 ref/CMakeLists.txt     | 2 +-
 ref/src/CMakeLists.txt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt
index 811f454..a20b239 100644
--- a/ref/CMakeLists.txt
+++ b/ref/CMakeLists.txt
@@ -24,7 +24,7 @@ find_package( OpenMP COMPONENTS C Fortran )
 find_package( MPI COMPONENTS C Fortran )
 
 if ( ENABLE_GPU )
-  set( OpenACC_ACCEL_TARGET tesla )
+  #set( OpenACC_ACCEL_TARGET tesla )
   find_package( OpenACC REQUIRED )
   find_package( CUDAToolkit REQUIRED )
   add_compile_definitions(ENABLE_GPU)
diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt
index d2b83cc..4d2ee7b 100644
--- a/ref/src/CMakeLists.txt
+++ b/ref/src/CMakeLists.txt
@@ -16,8 +16,8 @@ if(ENABLE_GPU)
   add_executable( gf_kernel_gpu ${gf_kernel_common_files} )
   #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
   #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
-  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS})
-  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS})
+  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
 endif()
 
 if(OpenMP_FOUND)

From fea69c8363d3c2f8014f5a39f867ea1ae85a4428 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Tue, 13 Aug 2024 22:34:56 +0000
Subject: [PATCH 09/16] Autodetect the compute capability when building

---
 ref/src/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt
index 4d2ee7b..b57a586 100644
--- a/ref/src/CMakeLists.txt
+++ b/ref/src/CMakeLists.txt
@@ -16,8 +16,8 @@ if(ENABLE_GPU)
   add_executable( gf_kernel_gpu ${gf_kernel_common_files} )
   #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
   #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
-  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
-  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=cc${CUDA_ARCH_LIST} -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
 endif()
 
 if(OpenMP_FOUND)

From f29a67bcdff646ab8d455a9b175823b956cd68aa Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Tue, 13 Aug 2024 23:01:50 +0000
Subject: [PATCH 10/16] Set stacksize limit to max value

---
 .github/workflows/gpu_nvhpc.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index 7a56c66..60b09c9 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -80,6 +80,7 @@ jobs:
           #export OMP_NUM_THREADS=4
           cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on ..
           make VERBOSE=1
+          ulimit -s hard
           ctest --output-on-failure
 
       # Debug session for failures

From 531af27c312a2e99ea5f39235e8ecc1ec2ce8204 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Wed, 14 Aug 2024 17:41:48 +0000
Subject: [PATCH 11/16] Add script to capture commands needed to setup the GHA
 self-hosted EC2 instance

---
 ref/test/tools/setup-gpu-runner.sh | 45 ++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 ref/test/tools/setup-gpu-runner.sh

diff --git a/ref/test/tools/setup-gpu-runner.sh b/ref/test/tools/setup-gpu-runner.sh
new file mode 100644
index 0000000..711a150
--- /dev/null
+++ b/ref/test/tools/setup-gpu-runner.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# Install drivers
+wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+sudo apt-get -y install cuda-toolkit-12-6
+sudo apt-get install nvidia-gds
+sudo apt-get install -y cuda-drivers
+
+# reboot
+
+# Install LMOD
+sudo apt-get update -y
+sudo apt-get install -y lmod
+echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
+source /usr/share/lmod/lmod/init/bash
+module list
+
+# Install NVIDIA HPC SDK
+curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
+echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
+sudo apt-get update -y
+sudo apt-get install -y nvhpc-24-7
+
+# Install cmake
+sudo apt-get install -y cmake
+
+
+# Run persistence driver - not needed?
+#sudo /usr/bin/nvidia-persistenced --verbose
+
+# Create a folder
+mkdir actions-runner && cd actions-runner
+# Download the latest runner package
+curl -o actions-runner-linux-x64-2.319.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.319.0/actions-runner-linux-x64-2.319.0.tar.gz
+# Optional: Validate the hash
+echo "52b8f9c5abb1a47cc506185a1a20ecea19daf0d94bbf4ddde7e617e7be109b14  actions-runner-linux-x64-2.319.0.tar.gz" | shasum -a 256 -c
+# Extract the installer
+tar xzf ./actions-runner-linux-x64-2.319.0.tar.gz
+
+# Create the runner and start the configuration experience
+$ ./config.sh --url https://github.com/NOAA-GSL/SENA-gf --token <given by github interface>
+# Last step, run it!
+$ ./run.sh

From 889b1a8485bd590e72cd43d287e82993c45a95e4 Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Wed, 14 Aug 2024 17:56:14 +0000
Subject: [PATCH 12/16] Cleanup GPU CI

---
 .github/workflows/gpu_nvhpc.yml              | 18 ++----------------
 ref/CMakeLists.txt                           |  2 --
 ref/cmake/compiler_flags_NVHPC_Fortran.cmake |  1 -
 ref/src/CMakeLists.txt                       |  6 ++----
 ref/src/cu_gf_deep.F90                       |  2 +-
 5 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index 60b09c9..1db5060 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -14,7 +14,7 @@ on:
 jobs:
 
   ubuntu_build:
-    #if: ${{ github.event.label.name == 'GPU_Test' }}
+    if: ${{ github.event.label.name == 'GPU-CI' }}
 
     name: Ubuntu NVHPC GPU Build
     # Run on self-hosted
@@ -53,6 +53,7 @@ jobs:
         uses: actions/checkout@v2
 
       # Test debug mode
+      # Turn this off because the compiler hangs while building in debug mode
       #- name: Build gf debug
       #  run: |
       #    source /usr/share/lmod/lmod/init/bash
@@ -91,18 +92,3 @@ jobs:
         timeout-minutes: 60
         with:
           limit-access-to-actor: true
-
-      ## Test release mode
-      #- name: Build gf release
-      #  run: |
-      #    source /usr/share/lmod/lmod/init/bash
-      #    module use /opt/nvidia/hpc_sdk/modulefiles
-      #    module load nvhpc
-      #    cd ref
-      #    rm -rf build
-      #    mkdir build
-      #    cd build
-      #    export OMP_NUM_THREADS=4
-      #    cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on ..
-      #    make
-      #    ctest --output-on-failure -R gpu_kernel
diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt
index a20b239..67f49a1 100644
--- a/ref/CMakeLists.txt
+++ b/ref/CMakeLists.txt
@@ -24,7 +24,6 @@ find_package( OpenMP COMPONENTS C Fortran )
 find_package( MPI COMPONENTS C Fortran )
 
 if ( ENABLE_GPU )
-  #set( OpenACC_ACCEL_TARGET tesla )
   find_package( OpenACC REQUIRED )
   find_package( CUDAToolkit REQUIRED )
   add_compile_definitions(ENABLE_GPU)
@@ -36,7 +35,6 @@ if ( ENABLE_GPU )
   string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_4}")
   message( STATUS "CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}" )
   SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST})
-  add_compile_options("-Minfo=accel")
 endif()
 
 add_subdirectory(src)
diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
index f0e22b1..e10d034 100644
--- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
+++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
@@ -15,7 +15,6 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" )
 ####################################################################
 
 set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" )
-#set( CMAKE_Fortran_FLAGS_DEBUG "-O0" )
 
 ####################################################################
 # FLAGS FOR GPU
diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt
index b57a586..879dc4e 100644
--- a/ref/src/CMakeLists.txt
+++ b/ref/src/CMakeLists.txt
@@ -14,10 +14,8 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} )
 
 if(ENABLE_GPU)
   add_executable( gf_kernel_gpu ${gf_kernel_common_files} )
-  #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
-  #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
-  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
-  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_compile_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_link_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
 endif()
 
 if(OpenMP_FOUND)
diff --git a/ref/src/cu_gf_deep.F90 b/ref/src/cu_gf_deep.F90
index 5cb354e..ae11695 100644
--- a/ref/src/cu_gf_deep.F90
+++ b/ref/src/cu_gf_deep.F90
@@ -494,7 +494,7 @@ subroutine cu_gf_deep_run(        &
          !- zws for shallow convection closure (grant 2001)
          !- height of the pbl
          zws(i) = max(0.,.001-flux_tun(i)*0.41*buo_flux*zo(i,kpbl(i))*g/t(i,kpbl(i)))
-         zws(i) = 1.2*zws(i)**.3333_kind_phys
+         zws(i) = 1.2*zws(i)**.3333
          zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct
       enddo
 !$acc end kernels

From 9807ceadd4b7b20a3396a71d5a21fd3c7e88601a Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Wed, 14 Aug 2024 18:24:08 +0000
Subject: [PATCH 13/16] Update gcc used for macos gnu ci

---
 .github/workflows/macos_gnu.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/macos_gnu.yml b/.github/workflows/macos_gnu.yml
index e0940c9..999b9fa 100644
--- a/.github/workflows/macos_gnu.yml
+++ b/.github/workflows/macos_gnu.yml
@@ -37,8 +37,8 @@ jobs:
           mkdir build
           cd build
           #export OMP_NUM_THREADS=4
-          export CC=gcc-11
-          export FC=gfortran-11          
+          export CC=gcc-13
+          export FC=gfortran-13
           cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=off .. 
           make VERBOSE=1
           ctest --output-on-failure
@@ -51,8 +51,8 @@ jobs:
           mkdir build
           cd build
           #export OMP_NUM_THREADS=4
-          export CC=gcc-11
-          export FC=gfortran-11           
+          export CC=gcc-13
+          export FC=gfortran-13
           cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=off ..
           make VERBOSE=1
           ctest --output-on-failure

From c3e47dfac5081a18ec8cdd4bfcc687acc5ab39cd Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Wed, 14 Aug 2024 18:49:50 +0000
Subject: [PATCH 14/16] Use gcc-11 for macos CI, turn off GPU label

---
 .github/workflows/gpu_nvhpc.yml | 10 ++++++----
 .github/workflows/macos_gnu.yml | 12 ++++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index 1db5060..f998091 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -39,6 +39,12 @@ jobs:
       #    sudo apt-get update -y
       #    sudo apt-get install -y nvhpc-24-7
 
+      # Remove label
+      - name: Remove GPU-CI label
+        - uses: actions-ecosystem/action-remove-labels@v1
+        with:
+          labels: GPU-CI
+
       # Check location of installed NVHPC compilers
       - name: Check compiler install
         run: |
@@ -48,10 +54,6 @@ jobs:
           which nvc
           which nvfortran
 
-      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
-      - name: Checkout repository
-        uses: actions/checkout@v2
-
       # Test debug mode
       # Turn this off because the compiler hangs while building in debug mode
       #- name: Build gf debug
diff --git a/.github/workflows/macos_gnu.yml b/.github/workflows/macos_gnu.yml
index 999b9fa..3546767 100644
--- a/.github/workflows/macos_gnu.yml
+++ b/.github/workflows/macos_gnu.yml
@@ -25,6 +25,10 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v2
 
+      # Install gcc11
+      - name: Install GCC
+        run: brew install gcc@11
+
       # Install OpenMPI
       - name: Install OpenMPI
         run: brew install open-mpi
@@ -37,8 +41,8 @@ jobs:
           mkdir build
           cd build
           #export OMP_NUM_THREADS=4
-          export CC=gcc-13
-          export FC=gfortran-13
+          export CC=gcc-11
+          export FC=gfortran-11
           cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=off .. 
           make VERBOSE=1
           ctest --output-on-failure
@@ -51,8 +55,8 @@ jobs:
           mkdir build
           cd build
           #export OMP_NUM_THREADS=4
-          export CC=gcc-13
-          export FC=gfortran-13
+          export CC=gcc-11
+          export FC=gfortran-11
           cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=off ..
           make VERBOSE=1
           ctest --output-on-failure

From c40387f5f36e813182403373cfb00c13a027249a Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Wed, 14 Aug 2024 19:05:19 +0000
Subject: [PATCH 15/16] Turn off label criteria for gpu ci

---
 .github/workflows/gpu_nvhpc.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index f998091..6924f69 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -14,7 +14,7 @@ on:
 jobs:
 
   ubuntu_build:
-    if: ${{ github.event.label.name == 'GPU-CI' }}
+    #if: ${{ github.event.label.name == 'GPU-CI' }}
 
     name: Ubuntu NVHPC GPU Build
     # Run on self-hosted
@@ -40,10 +40,10 @@ jobs:
       #    sudo apt-get install -y nvhpc-24-7
 
       # Remove label
-      - name: Remove GPU-CI label
-        - uses: actions-ecosystem/action-remove-labels@v1
-        with:
-          labels: GPU-CI
+      #- name: Remove GPU-CI label
+      #  - uses: actions-ecosystem/action-remove-labels@v1
+      #  with:
+      #    labels: GPU-CI
 
       # Check location of installed NVHPC compilers
       - name: Check compiler install

From 28483c18519c6b057c77634c077fc49c247077ee Mon Sep 17 00:00:00 2001
From: christopherwharrop-noaa <Christopher.W.Harrop@noaa.gov>
Date: Wed, 14 Aug 2024 19:25:35 +0000
Subject: [PATCH 16/16] Try a different label method

---
 .github/workflows/gpu_nvhpc.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index 6924f69..0748c0d 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -3,7 +3,7 @@ name: Linux NVHPC GPU
 on:
   pull_request:
     branches: [ master ]
-    types: [ labeled ]
+    types: [ labeled, opened, synchronize, reopened ]
   workflow_dispatch:
 
 #defaults:
@@ -14,7 +14,7 @@ on:
 jobs:
 
   ubuntu_build:
-    #if: ${{ github.event.label.name == 'GPU-CI' }}
+    if: contains(github.event.pull_request.labels.*.name, 'GPU-CI')
 
     name: Ubuntu NVHPC GPU Build
     # Run on self-hosted