diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml
index 60b09c9..1db5060 100644
--- a/.github/workflows/gpu_nvhpc.yml
+++ b/.github/workflows/gpu_nvhpc.yml
@@ -14,7 +14,7 @@ on:
 jobs:
 
   ubuntu_build:
-    #if: ${{ github.event.label.name == 'GPU_Test' }}
+    if: ${{ github.event.label.name == 'GPU-CI' }}
 
     name: Ubuntu NVHPC GPU Build
     # Run on self-hosted
@@ -53,6 +53,7 @@ jobs:
         uses: actions/checkout@v2
 
       # Test debug mode
+      # Turn this off because the compiler hangs while building in debug mode
       #- name: Build gf debug
       #  run: |
       #    source /usr/share/lmod/lmod/init/bash
@@ -91,18 +92,3 @@ jobs:
         timeout-minutes: 60
         with:
           limit-access-to-actor: true
-
-      ## Test release mode
-      #- name: Build gf release
-      #  run: |
-      #    source /usr/share/lmod/lmod/init/bash
-      #    module use /opt/nvidia/hpc_sdk/modulefiles
-      #    module load nvhpc
-      #    cd ref
-      #    rm -rf build
-      #    mkdir build
-      #    cd build
-      #    export OMP_NUM_THREADS=4
-      #    cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on ..
-      #    make
-      #    ctest --output-on-failure -R gpu_kernel
diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt
index a20b239..67f49a1 100644
--- a/ref/CMakeLists.txt
+++ b/ref/CMakeLists.txt
@@ -24,7 +24,6 @@ find_package( OpenMP COMPONENTS C Fortran )
 find_package( MPI COMPONENTS C Fortran )
 
 if ( ENABLE_GPU )
-  #set( OpenACC_ACCEL_TARGET tesla )
   find_package( OpenACC REQUIRED )
   find_package( CUDAToolkit REQUIRED )
   add_compile_definitions(ENABLE_GPU)
@@ -36,7 +35,6 @@ if ( ENABLE_GPU )
   string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_4}")
   message( STATUS "CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}" )
   SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST})
-  add_compile_options("-Minfo=accel")
 endif()
 
 add_subdirectory(src)
diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
index f0e22b1..e10d034 100644
--- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
+++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake
@@ -15,7 +15,6 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" )
 ####################################################################
 
 set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" )
-#set( CMAKE_Fortran_FLAGS_DEBUG "-O0" )
 
 ####################################################################
 # FLAGS FOR GPU
diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt
index b57a586..879dc4e 100644
--- a/ref/src/CMakeLists.txt
+++ b/ref/src/CMakeLists.txt
@@ -14,10 +14,8 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} )
 
 if(ENABLE_GPU)
   add_executable( gf_kernel_gpu ${gf_kernel_common_files} )
-  #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
-  #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
-  target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
-  target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_compile_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
+  target_link_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
 endif()
 
 if(OpenMP_FOUND)
diff --git a/ref/src/cu_gf_deep.F90 b/ref/src/cu_gf_deep.F90
index 5cb354e..ae11695 100644
--- a/ref/src/cu_gf_deep.F90
+++ b/ref/src/cu_gf_deep.F90
@@ -494,7 +494,7 @@ subroutine cu_gf_deep_run(        &
          !- zws for shallow convection closure (grant 2001)
          !- height of the pbl
          zws(i) = max(0.,.001-flux_tun(i)*0.41*buo_flux*zo(i,kpbl(i))*g/t(i,kpbl(i)))
-         zws(i) = 1.2*zws(i)**.3333_kind_phys
+         zws(i) = 1.2*zws(i)**.3333
          zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct
       enddo
 !$acc end kernels