Address CUDA MPI/IPC issue with Kokkos <=4.4.1 #1193

Workflow file for this run

.github/workflows/ci-extended.yml at 53ccbcf

	name: CI extended

	on:
	# run every day at 06:00 UTC
	schedule:
	- cron: '0 6 * * *'
	# when triggered manually
	workflow_dispatch:
	# when auto merge is enabled (hack to make sure it's run before merging)
	pull_request:
	types: [auto_merge_enabled]

	# Cancel "duplicated" workflows triggered by pushes to internal
	# branches with associated PRs.
	concurrency:
	group: ${{ github.ref }}-${{ github.head_ref }}-CI-extended
	cancel-in-progress: true

	env:
	CTEST_OUTPUT_ON_FAILURE: 1
	CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
	MACHINE_CFG: cmake/machinecfg/CI.cmake
	OMPI_MCA_mpi_common_cuda_event_max: 1000
	# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
	OMPI_MCA_btl_vader_single_copy_mechanism: none

	jobs:
	perf-and-regression:
	strategy:
	matrix:
	device: ['cuda', 'host']
	parallel: ['serial', 'mpi']
	runs-on: [self-hosted, A100]
	container:
	image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
	# map to local user id on CI machine to allow writing to build cache
	options: --user 1001
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: 'true'

	- name: Setup cache for gold standard
	uses: actions/cache@v3
	with:
	path: tst/regression/gold_standard/
	key: gold-standard

	- name: Set vars based on matrix
	id: cmake-vars
	run: \|
	if ${{ matrix.device == 'host' }}; then
	echo "enable_asan=ON" >> $GITHUB_OUTPUT
	else
	echo "enable_asan=OFF" >> $GITHUB_OUTPUT
	fi

	- name: Configure
	run: \|
	cmake -B build \
	-DCMAKE_BUILD_TYPE=Release \
	-DENABLE_ASAN=${{ steps.cmake-vars.outputs.enable_asan }} \
	-DMACHINE_VARIANT=${{ matrix.device }}-${{ matrix.parallel }}

	- name: Build
	run: cmake --build build

	# run performance "unit" tests (none use MPI)
	- name: Performance tests
	if: ${{ matrix.parallel == 'serial' }}
	run: \|
	cd build
	# Pick GPU with most available memory
	export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader \| sort -nr \| head -1 \| awk '{ print $NF }')
	# Sanitizers options (leak detection is disabled)
	export ASAN_OPTIONS=abort_on_error=1:fast_unwind_on_malloc=1
	export UBSAN_OPTIONS=print_stacktrace=0
	export LSAN_OPTIONS=detect_leaks=0
	ctest -L performance -LE perf-reg

	# run regression tests
	- name: Regression tests
	run: \|
	cd build
	# Pick GPU with most available memory
	export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader \| sort -nr \| head -1 \| awk '{ print $NF }')
	# Sanitizers options (disable leak detection for MPI runs, due to OpenMPI leaks)
	export ASAN_OPTIONS=abort_on_error=1:fast_unwind_on_malloc=1
	export UBSAN_OPTIONS=print_stacktrace=0
	export LSAN_OPTIONS=detect_leaks=0
	ctest -L regression -L ${{ matrix.parallel }} -LE perf-reg --timeout 3600

	# Test Ascent integration (only most complex setup with MPI and on device)
	- name: Ascent tests
	if: ${{ matrix.parallel == 'mpi' && matrix.device == 'cuda' }}
	run: \|
	cmake -B build-ascent \
	-DCMAKE_BUILD_TYPE=Release \
	-DMACHINE_VARIANT=${{ matrix.device }}-${{ matrix.parallel }} \
	-DPARTHENON_ENABLE_ASCENT=ON \
	-DAscent_DIR=/usr/local/ascent-develop/lib/cmake/ascent
	cmake --build build-ascent
	cd example/advection/
	# Pick GPU with most available memory
	export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader \| sort -nr \| head -1 \| awk '{ print $NF }')
	mpirun -np 2 ../../build-ascent/example/advection/advection-example \
	-i parthinput.advection \
	parthenon/output5/dt=0.05 \
	parthenon/time/tlim=0.1
	# check if file exists
	if [ ! -f "ascent_render_57.png" ]; then
	echo "'ascent_render_57.png' does not exist."
	exit 1
	fi

	- uses: actions/upload-artifact@v3
	with:
	name: log-and-convergence-${{ matrix.device }}-${{ matrix.parallel }}
	path: \|
	build/CMakeFiles/CMakeOutput.log
	build/tst/regression/outputs/advection_convergence*/advection-errors.dat
	build/tst/regression/outputs/advection_convergence*/advection-errors.png
	example/advection/ascent_render_57.png
	retention-days: 3

	perf-and-regression-amdgpu:
	strategy:
	matrix:
	parallel: ['serial', 'mpi']
	runs-on: [self-hosted, navi1030]
	container:
	image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
	# Map to local user id on CI machine to allow writing to build cache and
	# forward device handles to access AMD GPU within container
	options: --user 1000 -w /home/ci --device /dev/kfd --device /dev/dri --security-opt seccomp=unconfined
	env:
	CMAKE_GENERATOR: Ninja
	CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: 'true'

	- name: Setup cache for gold standard
	uses: actions/cache@v3
	with:
	path: tst/regression/gold_standard/
	key: gold-standard

	- name: Configure
	run: \|
	cmake -B build \
	-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
	-DCMAKE_BUILD_TYPE=Release \
	-DMACHINE_VARIANT=hip-${{ matrix.parallel }} \
	-DCMAKE_CXX_COMPILER=hipcc

	- name: Build
	run: cmake --build build

	# run performance "unit" tests (none use MPI)
	- name: Performance tests
	if: ${{ matrix.parallel == 'serial' }}
	run: \|
	cd build
	ctest -L performance -LE perf-reg

	# run regression tests
	- name: Regression tests
	run: \|
	cd build
	ctest -L regression -L ${{ matrix.parallel }} -LE perf-reg --timeout 3600

	- uses: actions/upload-artifact@v3
	with:
	name: log-and-convergence-${{ matrix.parallel }}
	path: \|
	build/CMakeFiles/CMakeOutput.log
	build/tst/regression/outputs/advection_convergence*/advection-errors.dat
	build/tst/regression/outputs/advection_convergence*/advection-errors.png
	retention-days: 3

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Address CUDA MPI/IPC issue with Kokkos <=4.4.1 #1193

Workflow file

Address CUDA MPI/IPC issue with Kokkos <=4.4.1 #1193

Jobs

Run details

Workflow file for this run