Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CI workflow for GPU NVHPC build #7

Merged
merged 16 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 61 additions & 18 deletions .github/workflows/gpu_nvhpc.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: Linux GPU NVHPC
# triggered events (push, pull_request) for the develop branch
name: Linux NVHPC GPU
# triggered events (push, pull_request) for the master branch
on:
pull_request:
branches: [ develop ]
types: [ labeled ]
branches: [ master ]
types: [ labeled, opened, synchronize, reopened ]
workflow_dispatch:

#defaults:
Expand All @@ -13,29 +13,62 @@ on:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:

GPU_build:
if: ${{ github.event.label.name == 'GPU_Test' }}
ubuntu_build:
if: contains(github.event.pull_request.labels.*.name, 'GPU-CI')

name: GPU Build
name: Ubuntu NVHPC GPU Build
# Run on self-hosted
runs-on: self-hosted

steps:

# Load NVHPC module
- name: Load NVHPC Module
## Install Lmod
#- name: Install Lmod
# run: |
# sudo apt-get update -y
# sudo apt-get install -y lmod
# echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
# source /usr/share/lmod/lmod/init/bash
# module list
#
## Install NVIDIA HPC SDK
#- name: Install NVIDIA HPC SDK
# run: |
# curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
# echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
# sudo apt-get update -y
# sudo apt-get install -y nvhpc-24-7

# Remove label
#- name: Remove GPU-CI label
# - uses: actions-ecosystem/action-remove-labels@v1
# with:
# labels: GPU-CI

# Check location of installed NVHPC compilers
- name: Check compiler install
run: |
pwd
ls -al
echo $SHELL
source /usr/share/lmod/lmod/init/bash
module use /opt/nvidia/hpc_sdk/modulefiles
module load nvhpc
which nvc
which nvfortran

# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Checkout repository
uses: actions/checkout@v2
# Test debug mode
# Turn this off because the compiler hangs while building in debug mode
#- name: Build gf debug
# run: |
# source /usr/share/lmod/lmod/init/bash
# module use /opt/nvidia/hpc_sdk/modulefiles
# module load nvhpc
# cd ref
# rm -rf build
# mkdir build
# cd build
# #export OMP_NUM_THREADS=4
# cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=on ..
# make VERBOSE=1
# ctest --output-on-failure

# Test release mode
- name: Build gf release
Expand All @@ -47,7 +80,17 @@ jobs:
rm -rf build
mkdir build
cd build
export OMP_NUM_THREADS=4
#export OMP_NUM_THREADS=4
cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=on ..
make
ctest --output-on-failure -R gpu_kernel
make VERBOSE=1
ulimit -s hard
ctest --output-on-failure

# Debug session for failures
-
name: Debug session
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
timeout-minutes: 60
with:
limit-access-to-actor: true
8 changes: 6 additions & 2 deletions .github/workflows/macos_gnu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v2

# Install gcc11
- name: Install GCC
run: brew install gcc@11

# Install OpenMPI
- name: Install OpenMPI
run: brew install open-mpi
Expand All @@ -38,7 +42,7 @@ jobs:
cd build
#export OMP_NUM_THREADS=4
export CC=gcc-11
export FC=gfortran-11
export FC=gfortran-11
cmake -DCMAKE_BUILD_TYPE=debug -DENABLE_GPU=off ..
make VERBOSE=1
ctest --output-on-failure
Expand All @@ -52,7 +56,7 @@ jobs:
cd build
#export OMP_NUM_THREADS=4
export CC=gcc-11
export FC=gfortran-11
export FC=gfortran-11
cmake -DCMAKE_BUILD_TYPE=release -DENABLE_GPU=off ..
make VERBOSE=1
ctest --output-on-failure
Expand Down
2 changes: 0 additions & 2 deletions ref/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ find_package( OpenMP COMPONENTS C Fortran )
find_package( MPI COMPONENTS C Fortran )

if ( ENABLE_GPU )
set( OpenACC_ACCEL_TARGET tesla )
find_package( OpenACC REQUIRED )
find_package( CUDAToolkit REQUIRED )
add_compile_definitions(ENABLE_GPU)
Expand All @@ -36,7 +35,6 @@ if ( ENABLE_GPU )
string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_4}")
message( STATUS "CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}" )
SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST})
add_compile_options("-Minfo=accel")
endif()

add_subdirectory(src)
Expand Down
4 changes: 2 additions & 2 deletions ref/cmake/compiler_flags_NVHPC_Fortran.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
####################################################################
# COMMON FLAGS
####################################################################
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma")
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -Mnofma -Kieee" )

####################################################################
# RELEASE FLAGS
Expand All @@ -14,7 +14,7 @@ set( CMAKE_Fortran_FLAGS_RELEASE "-fast -mp -Mnovect" )
# DEBUG FLAGS
####################################################################

set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk" )
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -Mbounds -Mchkptr -Mchkstk -Ktrap=fp" )

####################################################################
# FLAGS FOR GPU
Expand Down
4 changes: 2 additions & 2 deletions ref/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ add_executable( gf_kernel_cpu ${gf_kernel_common_files} )

if(ENABLE_GPU)
add_executable( gf_kernel_gpu ${gf_kernel_common_files} )
target_compile_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST},cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
target_link_options(gf_kernel_gpu PUBLIC ${OpenACC_Fortran_OPTIONS}:cc${CUDA_ARCH_LIST})
target_compile_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
target_link_options(gf_kernel_gpu PUBLIC -Minfo=accel ${OpenACC_Fortran_OPTIONS}=gpu -gpu=ccnative -gpu=cuda${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR})
endif()

if(OpenMP_FOUND)
Expand Down
45 changes: 45 additions & 0 deletions ref/test/tools/setup-gpu-runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash

# Install drivers
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-toolkit-12-6
sudo apt-get install nvidia-gds
sudo apt-get install -y cuda-drivers

# reboot

# Install LMOD
sudo apt-get update -y
sudo apt-get install -y lmod
echo "source /usr/share/lmod/lmod/init/bash" >> ~/.bash_profile
source /usr/share/lmod/lmod/init/bash
module list

# Install NVIDIA HPC SDK
curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
sudo apt-get update -y
sudo apt-get install -y nvhpc-24-7

# Install cmake
sudo apt-get install -y cmake


# Run persistence driver - not needed?
#sudo /usr/bin/nvidia-persistenced --verbose

# Create a folder
mkdir actions-runner && cd actions-runner
# Download the latest runner package
curl -o actions-runner-linux-x64-2.319.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.319.0/actions-runner-linux-x64-2.319.0.tar.gz
# Optional: Validate the hash
echo "52b8f9c5abb1a47cc506185a1a20ecea19daf0d94bbf4ddde7e617e7be109b14 actions-runner-linux-x64-2.319.0.tar.gz" | shasum -a 256 -c
# Extract the installer
tar xzf ./actions-runner-linux-x64-2.319.0.tar.gz

# Create the runner and start the configuration experience
$ ./config.sh --url https://github.com/NOAA-GSL/SENA-gf --token <given by github interface>
# Last step, run it!
$ ./run.sh
Loading