diff --git a/.github/workflows/gpu_nvhpc.yml b/.github/workflows/gpu_nvhpc.yml index 60b09c9..1db5060 100644 --- a/.github/workflows/gpu_nvhpc.yml +++ b/.github/workflows/gpu_nvhpc.yml @@ -14,7 +14,7 @@ on: jobs: ubuntu_build: - #if: ${{ github.event.label.name == 'GPU_Test' }} + if: ${{ github.event.label.name == 'GPU-CI' }} name: Ubuntu NVHPC GPU Build # Run on self-hosted @@ -53,6 +53,7 @@ jobs: uses: actions/checkout@v2 # Test debug mode + # Turn this off because the compiler hangs while building in debug mode #- name: Build gf debug # run: | # source /usr/share/lmod/lmod/init/bash @@ -91,18 +92,3 @@ jobs: timeout-minutes: 60 with: limit-access-to-actor: true - - ## Test release mode - #- name: Build gf release - # run: | - # source /usr/share/lmod/lmod/init/bash - # module use /opt/nvidia/hpc_sdk/modulefiles - # module load nvhpc - # cd ref - # rm -rf build - # mkdir build - # cd build - # export OMP_NUM_THREADS=4 - # cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on .. - # make - # ctest --output-on-failure -R gpu_kernel diff --git a/ref/CMakeLists.txt b/ref/CMakeLists.txt index a20b239..67f49a1 100644 --- a/ref/CMakeLists.txt +++ b/ref/CMakeLists.txt @@ -24,7 +24,6 @@ find_package( OpenMP COMPONENTS C Fortran ) find_package( MPI COMPONENTS C Fortran ) if ( ENABLE_GPU ) - #set( OpenACC_ACCEL_TARGET tesla ) find_package( OpenACC REQUIRED ) find_package( CUDAToolkit REQUIRED ) add_compile_definitions(ENABLE_GPU) @@ -36,7 +35,6 @@ if ( ENABLE_GPU ) string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_4}") message( STATUS "CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}" ) SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) - add_compile_options("-Minfo=accel") endif() add_subdirectory(src) diff --git a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake index f0e22b1..e10d034 100644 --- a/ref/cmake/compiler_flags_NVHPC_Fortran.cmake +++ b/ref/cmake/compiler_flags_NVHPC_Fortran.cmake @@ -15,7 +15,6 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" ) #################################################################### set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" ) -#set( CMAKE_Fortran_FLAGS_DEBUG "-O0" ) #################################################################### # FLAGS FOR GPU diff --git a/ref/src/CMakeLists.txt b/ref/src/CMakeLists.txt index b57a586..879dc4e 100644 --- a/ref/src/CMakeLists.txt +++ b/ref/src/CMakeLists.txt @@ -14,10 +14,8 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} ) if(ENABLE_GPU) add_executable( gf_kernel_gpu ${gf_kernel_common_files} ) - #target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - #target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST}) - target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) - target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_compile_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) + target_link_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}) endif() if(OpenMP_FOUND) diff --git a/ref/src/cu_gf_deep.F90 b/ref/src/cu_gf_deep.F90 index 5cb354e..ae11695 100644 --- a/ref/src/cu_gf_deep.F90 +++ b/ref/src/cu_gf_deep.F90 @@ -494,7 +494,7 @@ subroutine cu_gf_deep_run( & !- zws for shallow convection closure (grant 2001) !- height of the pbl zws(i) = max(0.,.001-flux_tun(i)*0.41*buo_flux*zo(i,kpbl(i))*g/t(i,kpbl(i))) - zws(i) = 1.2*zws(i)**.3333_kind_phys + zws(i) = 1.2*zws(i)**.3333 zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct enddo !$acc end kernels