From 643f42b6c39ca709dfcbc5e88f4810a0934922cb Mon Sep 17 00:00:00 2001 From: Aaron Black Date: Mon, 4 Dec 2023 11:02:04 -0800 Subject: [PATCH] Feature/aaroncblack/version update 5.3.0 (#18) * updates to 5.3.0. See README and CHANGELOG for more info. --- benchmarks/README | 4 - .../generate_strong_scaling_runs_spp1.py | 41 -- benchmarks/generate_weak_scaling_runs_spp1.py | 41 -- build_and_run_umt.sh | 45 +- host-configs/example.nvidia.cmake | 19 - src/CMakeLists.txt | 2 +- src/cmake/InitBuildTypeCompilerFlags.cmake | 8 +- src/teton/CMakeLists.txt | 5 +- src/teton/aux/AppendSourceToPsi.F90 | 136 ++++++ src/teton/aux/CMakeLists.txt | 1 + src/teton/aux/checkInputSanity.F90 | 8 +- src/teton/control/finalizeSets.F90 | 288 ++++++------- src/teton/control/initializeSets.F90 | 101 ++--- src/teton/driver/test_driver.cc | 237 +++++----- src/teton/gpu/CornerSweepUCBxyz_OMPOL.F90 | 30 +- src/teton/gpu/OMPWrappers.F90.templates | 69 +-- src/teton/gpu/finalizeGPUMemory.F90 | 38 +- src/teton/gpu/initializeGPUMemory.F90 | 56 +-- src/teton/include/TetonBlueprint.hh | 16 + src/teton/include/TetonConduitInterface.hh | 26 +- src/teton/include/TetonInterface.hh | 16 + src/teton/include/TetonSources.hh | 153 +++++++ src/teton/include/macros.h | 11 + src/teton/include/omp_wrappers.h | 12 +- src/teton/interface/CMakeLists.txt | 1 + src/teton/interface/TetonBlueprint.cc | 228 +++++++++- src/teton/interface/TetonConduitInterface.cc | 404 +++++++++++++----- src/teton/interface/TetonSources.cc | 207 +++++++++ src/teton/interface/TetonSurfaceTallies.cc | 6 +- src/teton/mods/MemoryAllocator_mod.F90 | 38 +- .../mods/MemoryAllocator_mod.F90.templates | 53 +-- src/teton/mods/Options_mod.F90 | 20 - src/teton/mods/Size_mod.F90 | 4 +- src/teton/rt/RecvFlux.F90 | 10 - src/teton/rt/SendFlux.F90 | 13 - src/teton/rt/initcomm.F90 | 51 +-- 36 files changed, 1459 insertions(+), 939 deletions(-) delete mode 100644 benchmarks/README delete mode 100644 benchmarks/generate_strong_scaling_runs_spp1.py delete mode 100644 benchmarks/generate_weak_scaling_runs_spp1.py delete mode 100644 host-configs/example.nvidia.cmake create mode 100644 src/teton/aux/AppendSourceToPsi.F90 create mode 100644 src/teton/include/TetonSources.hh create mode 100644 src/teton/interface/TetonSources.cc diff --git a/benchmarks/README b/benchmarks/README deleted file mode 100644 index d216070..0000000 --- a/benchmarks/README +++ /dev/null @@ -1,4 +0,0 @@ -Helper script for creating run lines for a weak and strong scaling study. -Example: -python3 generate_weak_scaling_runs_spp1.py >& run_weak_scaling_study_spp1.sh -python3 generate_strong_scaling_runs_spp1.py >& run_strong_scaling_study_spp1.sh diff --git a/benchmarks/generate_strong_scaling_runs_spp1.py b/benchmarks/generate_strong_scaling_runs_spp1.py deleted file mode 100644 index b5ca33a..0000000 --- a/benchmarks/generate_strong_scaling_runs_spp1.py +++ /dev/null @@ -1,41 +0,0 @@ -# Generate runs for a strong scaling study. -# -# This python script can be used to assist in generating a series of run configurations for the UMT unstructured box 3d mesh. -# The series of runs are designed to strong scale the problem from 1 to n processors. -# -# node_memory - total memory on a node (gigabytes) -# node_memory_target - % of memory to use on node at max number of MPI ranks. -# memory_tolerance - tolerance for problem memory usage. Will try to generate runs that use the same amount of memory per rank to within X%. - -# Node has 256gb of memory -node_memory = 256 -# Use 50% of node's memory, within +/- 5%. -node_memory_target = 0.50 -memory_tolerance = 0.05 -max_ranks = 112 - -# Number of polar by azimuthal angles to use in the product quadrature set -a = 3 -# Number of energy groups to use -g = 128 -# Additional memory required factor. Amount of additional memory needed, on top of memory to hold the radiation energy density field. -memFactor = 1.3 - -# upper and lower bound of memory usage to calculate series for, in bytes. -mem_lower_bound = node_memory * node_memory_target * (1 - memory_tolerance) * 1024 * 1024 * 1024 -mem_upper_bound = node_memory * node_memory_target * (1 + memory_tolerance) * 1024 * 1024 * 1024 - -print ("# Calculating size of mesh to use for your angle and group count...") -for r in range(1,1000): - # 3d mesh - zones = 12*(r**3) - angles = a * a * 8 - totalDOFs = zones * 8 * g * angles - mem = totalDOFs * 8 * memFactor - memGB = mem/1024/1024/1024 - if mem >= mem_lower_bound and mem <= mem_upper_bound: - print(f"#You will need to refine the mesh {r} times and generate a mesh of {zones} zones to use {memGB} GB of memory for your problem configuration.") - print(f"./makeUnstructuredBox -r {r} -o umt_spp1.mesh") - -for r in range(1,max_ranks+1): - print(f"srun -n {r} --exclusive ./test_driver -c 1 -b 1 -i ./umt_spp1.mesh >& run.spp1.{r}_ranks.{zones}_zones.{totalDOFs}_dofs.log") diff --git a/benchmarks/generate_weak_scaling_runs_spp1.py b/benchmarks/generate_weak_scaling_runs_spp1.py deleted file mode 100644 index bf7ab59..0000000 --- a/benchmarks/generate_weak_scaling_runs_spp1.py +++ /dev/null @@ -1,41 +0,0 @@ -# Generate runs for a weak scaling run. -# -# This python script can be used to assist in generating a series of run configurations for the UMT unstructured box 3d mesh. -# The series of runs are designed to weak scale the problem from 1 to n processors. -# -# node_memory - total memory on a node (gigabytes) -# node_memory_target - % of memory to use on node at max number of MPI ranks. -# memory_tolerance - tolerance for problem memory usage. Will try to generate runs that use the same amount of memory per rank to within X%. - -# Node has 128gb of memory -node_memory = 256 -# Max # mpi ranks to use ( typically equal to # cores on node ). -max_ranks = 112 -# Use 50% of node's memory, within +/- 5%. -node_memory_target = 0.50 -memory_tolerance = 0.05 - -# Number of polar x azimuthal angles to use in the product quadrature set -a = 3 -# Number of energy groups to use -g = 128 - -# upper and lower bound of memory usage to calculate series for, in bytes, per rank -mem_lower_bound = node_memory * node_memory_target * (1 - memory_tolerance) * 1024 * 1024 * 1024 / max_ranks -mem_upper_bound = node_memory * node_memory_target * (1 + memory_tolerance) * 1024 * 1024 * 1024 / max_ranks - -print ("#---------------------------") -print ("#Angles: 3x3 Groups: 32") -print ("#---------------------------") -for r in range(1,1000): - # 3d mesh - zones = 12*(r**3) - angles = a * a * 8 - totalDOFs = zones * 8 * g * angles - totalMem = totalDOFs * 8 - for p in range(1,max_ranks+1): - mem_per_rank = totalMem / p - if mem_per_rank >= mem_lower_bound and mem_per_rank <= mem_upper_bound: - DOFsPerRank = int(totalDOFs / p) - if 8*p < zones: - print(f"srun -N1-1 -n {p} --exclusive ./test_driver -c 1 -b 1 -r 1 -R {r} -o . -i unstructBox3D.mesh >& run.spp1.{p}_ranks.{DOFsPerRank}_dofs_per_rank.{zones}_zones.{totalDOFs}_total_dofs.log") diff --git a/build_and_run_umt.sh b/build_and_run_umt.sh index 3fd7476..6f0747d 100755 --- a/build_and_run_umt.sh +++ b/build_and_run_umt.sh @@ -1,8 +1,4 @@ #!/bin/sh -x -# This script will compile a basic RelWithDebInfo build of UMT. Additional CMake options can be added to the command line args of this script, and they will be picked up and added to the UMT CMake command at the bottom of this script. -# For a list of supported CMake options, run 'ccmake /path/to/umt/src'. -# Do not copy this script out of the UMT repo directory, it assumes it is located next to the UMT source files in order to work. - # If you have a UMT tarball, untar it. Otherwise, git clone it from github. # git clone https://github.com/LLNL/UMT.git @@ -11,31 +7,22 @@ # CXX = # FC = -# Alternatively, you can set the compilers to point to MPI compiler wrappers, if CMake has trouble finding your MPI installation location. +# Default to GNU +CC=gcc +CXX=g++ +FC=gfortran -# Example using GNU -#CC=gcc -#CXX=g++ -#FC=gfortran +FFLAGS=-fallow-argument-mismatch -# Example using Intel +# Intel example # CC=icx # CXX=icpx # FC=ifx -# Example using MPI compiler wrappers -CC=mpicc -CXX=mpicxx -FC=mpif90 - -#If using CUDA, set this to point to your CUDA installation -#export CUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-11.8.0 - -# Desired installation path -INSTALL_PATH=${PWD}/umt_workspace/install -# Set common CMAKE args for Conduit and UMT. -CMAKE_COMMON_ARGS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_C_COMPILER=${CC} -DCMAKE_CXX_COMPILER=${CXX} -DCMAKE_Fortran_COMPILER=${FC}" +# This script will compile a basic Release build of UMT. Additional CMake options can be added to the command line args of this script, and they will be picked up and added to the UMT CMake command at the bottom of this script. +# For a list of supported CMake options, run 'ccmake /path/to/umt/src'. +# Do not copy this script out of the UMT repo directory, it assumes it is located next to the UMT source files in order to work. # Get directory this script is located in. This is assumed to be the UMT repo location. SOURCE="${BASH_SOURCE[0]}" @@ -46,28 +33,26 @@ while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symli done UMT_REPO_PATH="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" -# Create workspace for building umt and required library conduit +# Create workspace for building umt and required libraries conduit, metis, hypre, mfem. +INSTALL_PATH=${PWD}/umt_workspace/install mkdir -p ${INSTALL_PATH} echo Libraries will be installed to: ${INSTALL_PATH} cd umt_workspace -# Git latest conduit source from github git clone --recurse-submodules https://github.com/LLNL/conduit.git conduit mkdir build_conduit cd build_conduit -# Run CMake on Conduit, compile, and install. -cmake ${PWD}/../conduit/src ${CMAKE_COMMON_ARGS} -DBUILD_SHARED_LIBS=OFF -DENABLE_TESTS=OFF -DENABLE_EXAMPLES=OFF -DENABLE_DOCS=OFF -DENABLE_FORTRAN=ON -DENABLE_MPI=ON -DENABLE_PYTHON=OFF -DENABLE_UTILS=OFF -DENABLE_RELAY_WEBSERVER=OFF +cmake ${PWD}/../conduit/src -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_C_COMPILER=${CC} -DCMAKE_CXX_COMPILER=${CXX} -DCMAKE_Fortran_COMPILER=${FC} -DMPI_CXX_COMPILER=mpicxx -DMPI_Fortran_COMPILER=mpifort -DBUILD_SHARED_LIBS=OFF -DENABLE_TESTS=OFF -DENABLE_EXAMPLES=OFF -DENABLE_DOCS=OFF -DENABLE_FORTRAN=ON -DENABLE_MPI=ON -DENABLE_PYTHON=OFF gmake -j install cd .. -mkdir build_umt -cd build_umt # Run CMake on UMT, compile, and install. -cmake ${UMT_REPO_PATH}/src ${CMAKE_COMMON_ARGS} -DCONDUIT_ROOT=${INSTALL_PATH} +cmake ${UMT_REPO_PATH}/src -DCMAKE_Fortran_FLAGS=${FFLAGS} -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=${CXX} -DCMAKE_Fortran_COMPILER=${FC} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCONDUIT_ROOT=${INSTALL_PATH} $1 gmake -j install cd .. -# Test on 2D and 3D problem srun -n 8 ${INSTALL_PATH}/bin/test_driver -c 10 -B -d 8,8,0 --benchmark_problem 2 srun -n 8 ${INSTALL_PATH}/bin/test_driver -c 10 -B -d 4,4,4 --benchmark_problem 2 + +# Test UMT on SSP1 unstructured 3d mesh problem on two mpi ranks. Refine the mesh via -r and -R arguments. diff --git a/host-configs/example.nvidia.cmake b/host-configs/example.nvidia.cmake deleted file mode 100644 index 1ebcf4e..0000000 --- a/host-configs/example.nvidia.cmake +++ /dev/null @@ -1,19 +0,0 @@ -# Example cmake cache configuration for UMT. - -# Tested with gnu 10.2.1 -set(TPL_ROOT "/path/to/libraries" CACHE PATH "") - -set(CMAKE_C_COMPILER "nvcc" CACHE PATH "") -set(CMAKE_CXX_COMPILER "nvcc" CACHE PATH "") -set(CMAKE_Fortran_COMPILER "nvfortran" CACHE PATH "") - -set(CMAKE_CXX_FLAGS "" CACHE PATH "") -set(CMAKE_Fortran_FLAGS "" CACHE PATH "") - -set(ENABLE_OPENMP ON CACHE BOOL "") -set(ENABLE_OPENMP_OFFLOAD OFF CACHE BOOL "") - -set(CONDUIT_ROOT ${TPL_ROOT}/conduit/0.8.2 CACHE PATH "") -set(MFEM_ROOT ${TPL_ROOT}/mfem/4.4 CACHE PATH "") -set(HYPRE_ROOT ${TPL_ROOT}/hypre/2.24.0 CACHE PATH "") -set(METIS_ROOT ${TPL_ROOT}/metis/5.1.0 CACHE PATH "") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 24b5f3a..7040317 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,7 @@ set(PROJECT_NAME teton) # Update version information in this file. set(TETON_VERSION_MAJOR 5) -set(TETON_VERSION_MINOR 1) +set(TETON_VERSION_MINOR 3) set(TETON_VERSION_PATCH 0) include (cmake/Version.cmake) diff --git a/src/cmake/InitBuildTypeCompilerFlags.cmake b/src/cmake/InitBuildTypeCompilerFlags.cmake index 70bbcdc..651771e 100644 --- a/src/cmake/InitBuildTypeCompilerFlags.cmake +++ b/src/cmake/InitBuildTypeCompilerFlags.cmake @@ -34,8 +34,8 @@ if (NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM") set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -diag-enable=remark -traceback") - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -diag-enable=remark -traceback") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -diag-enable=remark") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -diag-enable=remark") elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") @@ -74,8 +74,8 @@ if (NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") elseif("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "IntelLLVM") set(CMAKE_Fortran_FLAGS_RELEASE "-O2 -DNDEBUG") - set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-O2 -g -warn all,noexternal,nointerfaces -diag-enable=remark -fpen=0-traceback") - set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -warn all,noexternal,nointerfaces -diag-enable=remark -check all -fpen=0 -traceback") + set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-O2 -g -warn all,noexternal,nointerfaces -diag-enable=remark -fpe-all=0 -fpe0 -traceback") + set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -warn all,noexternal,nointerfaces -diag-enable=remark -check all -fpe-all=0 -fpe0 -traceback") elseif("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "PGI") diff --git a/src/teton/CMakeLists.txt b/src/teton/CMakeLists.txt index acf6855..0ad90ce 100644 --- a/src/teton/CMakeLists.txt +++ b/src/teton/CMakeLists.txt @@ -58,6 +58,7 @@ if (ENABLE_CALIPER) endif() if (ENABLE_BLUEPRINT_INTERFACE) + target_sources( teton INTERFACE include/TetonSources.hh ) target_sources( teton INTERFACE include/TetonBlueprint.hh ) target_sources( teton INTERFACE include/TetonSurfaceTallies.hh ) target_sources( teton INTERFACE include/TetonConduitInterface.hh ) @@ -95,6 +96,7 @@ if (ENABLE_BLUEPRINT_INTERFACE) install( FILES "include/TetonConduitInterface.hh" DESTINATION include ) install( FILES "include/TetonBlueprint.hh" DESTINATION include ) install( FILES "include/TetonSurfaceTallies.hh" DESTINATION include ) + install( FILES "include/TetonSources.hh" DESTINATION include ) endif() install( FILES "${PROJECT_BINARY_DIR}/teton/include/TetonVersion.hh" DESTINATION include ) @@ -120,7 +122,6 @@ if(ENABLE_TESTS) endif() add_executable( test_driver - interface/TetonBlueprint.cc driver/test_driver.cc ) if (ENABLE_FIND_MPI) @@ -140,7 +141,7 @@ if(ENABLE_TESTS) # Generally preferred to link with the Fortran compiler. # Intel has trouble with this, fallback on using the C++ compiler to link. - if ( NOT CMAKE_Fortran_COMPILER_ID STREQUAL Intel AND NOT CMAKE_Fortran_COMPILER_ID STREQUAL IntelLLVM AND NOT CMAKE_Fortran_COMPILER_ID STREQUAL NVHPC) + if ( NOT CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_Fortran_COMPILER_ID STREQUAL IntelLLVM) set_target_properties(test_driver PROPERTIES LINKER_LANGUAGE Fortran) endif () diff --git a/src/teton/aux/AppendSourceToPsi.F90 b/src/teton/aux/AppendSourceToPsi.F90 new file mode 100644 index 0000000..e18791e --- /dev/null +++ b/src/teton/aux/AppendSourceToPsi.F90 @@ -0,0 +1,136 @@ +#include "macros.h" + +!*********************************************************************** +! Last Update: 2023/01/10 BCY * +! * +! AppendSourceToPsi - Updates Psi from the previous time step to * +! to account for external fixed radiation sources. * +! * +! * +! We are basically adding q*c*dt to \psi_old to piggy-back off * +! the existing \psi_old/(c*dt) term in the sweep * +! * +! * +!*********************************************************************** + + subroutine AppendSourceToPsi(numSourceZones, & + numSourceAngles, & + numSourceGroups, & + sourceZoneList, & + sourceValues) BIND(C,NAME="teton_appendsourcetopsi") + + USE ISO_C_BINDING + use kind_mod + use constant_mod + use radconstant_mod + use Size_mod + use QuadratureList_mod + use SetData_mod + use AngleSet_mod + use TimeStepControls_mod + use Geometry_mod + use QuadratureList_mod + + implicit none + +! Arguments + + integer(C_INT), intent(in) :: numSourceZones + integer(C_INT), intent(in) :: numSourceAngles + integer(C_INT), intent(in) :: numSourceGroups + + integer(C_INT), intent(in) :: sourceZoneList(numSourceZones) + ! This must be in units of energy per time + ! That is, sourceValues*c*dt/zoneVolume must have the same units as \psi + real(C_DOUBLE), intent(in) :: sourceValues(numSourceGroups, numSourceAngles, numSourceZones) + +! Local + + type(SetData), pointer :: Set + type(AngleSet), pointer :: ASet + + integer :: angle + integer :: angle0 + integer :: polarAngle + integer :: c + integer :: c0 + integer :: g0 + integer :: nCornerInZone + integer :: z + integer :: zone + integer :: NumGroupsInSet + integer :: NumAnglesInSet + integer :: setID + integer :: nSets + real(adqt) :: cdt + real(adqt) :: volumeZone + real(adqt) :: src_value(numSourceGroups) + + integer :: angleSetID + integer :: numAngleSets + integer :: numAnglesInternal + integer :: numPolarAnglesInternal + +! Constants + + nSets = getNumberOfSets(Quad) + numAngleSets = getNumberOfAngleSets(Quad) + cdt = speed_light*getRadTimeStep(DtControls) + +! Some checks on quadrature/angle sizes: + numAnglesInternal = 0 + do angleSetID = 1,numAngleSets + ASet => getAngleSetData(Quad,angleSetID) + numAnglesInternal = numAnglesInternal + ASet%NumAngles + enddo + TETON_VERIFY(Quad%QuadPtr(1)%TypeName /= 'lobatto', 'Lobatto quadratures not supported for general sources yet.') + if (Quad%QuadPtr(1)%TypeName == 'levelsym') then + TETON_VERIFY(numSourceAngles == 1 .or. numSourceAngles == numAnglesInternal, "For level symmetric, numSourceAngles must be 1 or # of total angles.") + elseif (Quad%QuadPtr(1)%TypeName == 'product') then + numPolarAnglesInternal = Quad%QuadPtr(1)%nPolarAngles + TETON_VERIFY(numSourceAngles == 1 .or. numSourceAngles == numAnglesInternal .or. numSourceAngles == numPolarAnglesInternal, "numSourceAngles must be 1, # of polar angles, or # of total angles.") + endif + + TETON_VERIFY(numSourceGroups == Size%ngr, "number of source groups must match ngr in Size") + + SetLoop: do setID=1,nSets + + Set => getSetData(Quad, setID) + + NumGroupsInSet = Set% Groups + NumAnglesInSet = Set% NumAngles + g0 = Set% g0 + angle0 = Set% angle0 + + AngleLoop: do angle=1,Set%NumAngles + + SourceZoneLoop: do z = 1,numSourceZones + + zone = sourceZoneList(z) + c0 = Geom%cOffset(zone) + nCornerInZone = Geom%numCorner(zone) + volumeZone = Geom%VolumeZone(zone)*getGeometryFactor(Size) + + if (numSourceAngles == 1) then + src_value(1:NumGroupsInSet) = sourceValues(g0+1:g0+NumGroupsInSet, 1, z)*Size%wtiso/volumeZone + else if (numSourceAngles == numPolarAnglesInternal) then + polarAngle = Set%polarAngle(angle) + if (polarAngle < 1) then ! skip starting/finishing directions + cycle + endif + src_value(1:NumGroupsInSet) = sourceValues(g0+1:g0+NumGroupsInSet, polarAngle, z)*Size%wtiso*two/volumeZone + else !if (numSourceAngles == numAnglesInternal) then + src_value(1:NumGroupsInSet) = sourceValues(g0+1:g0+NumGroupsInSet, angle0+angle, z)/volumeZone + endif + src_value = cdt*src_value + + do c=c0+1,c0+nCornerInZone + Set% Psi(:,c,angle) = Set% Psi(:,c,angle) + src_value(1:NumGroupsInSet) + enddo + + enddo SourceZoneLoop + enddo AngleLoop + enddo SetLoop + + return + end subroutine AppendSourceToPsi diff --git a/src/teton/aux/CMakeLists.txt b/src/teton/aux/CMakeLists.txt index 102b635..b047025 100644 --- a/src/teton/aux/CMakeLists.txt +++ b/src/teton/aux/CMakeLists.txt @@ -13,6 +13,7 @@ target_sources( teton PRIVATE AdjustRadEnergyDensityRelTol.F90 AdjustTemperatureRelTol.F90 AdjustTemperatureMaxIts.F90 + AppendSourceToPsi.F90 ConstructBoundary.F90 ConstructDtControls.F90 ConstructEditor.F90 diff --git a/src/teton/aux/checkInputSanity.F90 b/src/teton/aux/checkInputSanity.F90 index ea3f513..dd26b13 100644 --- a/src/teton/aux/checkInputSanity.F90 +++ b/src/teton/aux/checkInputSanity.F90 @@ -75,8 +75,8 @@ subroutine checkInputSanity(killOnBad, & ! Category format string 100 FORMAT('Bad ',A20,' Data. ',I10, ' Entries out of' , I12, ' were bad') - ! sigma format string -200 FORMAT('Zone: ',I8, ' Group: ',I4,' Sigma',A20,' is bad: ',ES20.8) + ! Zone and group-dependent format string +200 FORMAT('Zone: ',I8, ' Group: ',I4, 1X, A20,' is bad: ',ES20.8) ! Zone material property 300 FORMAT('Zone: ' , I8, 1X , A20, ' is bad: ',ES20.8) @@ -90,7 +90,8 @@ subroutine checkInputSanity(killOnBad, & do cats=1,numCatsToCheck select case ( arrayOfCatsToCheck(cats) ) - case(1) +#if !defined(TETON_ENABLE_MINIAPP_BUILD) + case (1) ! ************************************** ! sigs ! ************************************** @@ -224,6 +225,7 @@ subroutine checkInputSanity(killOnBad, & WRITE( nout , 100) categoryName, numBadEntries, numEntries endif endif +#endif case (7) ! ************************************** ! Corner volumes diff --git a/src/teton/control/finalizeSets.F90 b/src/teton/control/finalizeSets.F90 index 0f5c221..9991895 100644 --- a/src/teton/control/finalizeSets.F90 +++ b/src/teton/control/finalizeSets.F90 @@ -24,6 +24,7 @@ subroutine finalizeSets use Material_mod #if !defined(TETON_ENABLE_MINIAPP_BUILD) use ComptonControl_mod + use flags_mod #endif use RadIntensity_mod use OMPWrappers_mod @@ -90,107 +91,64 @@ subroutine finalizeSets ! Unmap zone sets - TOMP(target exit data map(release: ZSet% nCornerSet)) - TOMP(target exit data map(release: ZSet% nCornerBatch)) - TOMP(target exit data map(release: ZSet% offset)) - TOMP(target exit data map(release: ZSet% cornerList)) - TOMP(target exit data map(release: ZSet% cornerMap)) - TOMP(target exit data map(release: ZSet% zoneList)) - TOMP(target exit data map(release: ZSet% cornerConverged)) - TOMP(target exit data map(release: ZSet% Te)) - TOMP(target exit data map(release: ZSet% TeOld)) - TOMP(target exit data map(release: ZSet% delta)) - TOMP(target exit data map(release: ZSet% sumT)) - TOMP(target exit data map(release: ZSet% netRate)) - TOMP(target exit data map(release: ZSet% dTCompton)) - TOMP(target exit data map(release: ZSet% B)) - TOMP(target exit data map(release: ZSet% dBdT)) - TOMP(target exit data map(release: ZSet% Snu0)) - TOMP(target exit data map(release: ZSet% dSnu0dT)) - TOMP(target exit data map(release: ZSet% AD)) - TOMP(target exit data map(release: ZSet% z)) - TOMP(target exit data map(release: ZSet% fk2)) - TOMP(target exit data map(release: ZSet% nI)) - TOMP(target exit data map(release: ZSet% nS)) - TOMP(target exit data map(release: ZSet% ex)) - TOMP(target exit data map(release: ZSet% expPH)) - TOMP(target exit data map(release: ZSet% comptonDeltaEr)) - TOMP(target exit data map(release: ZSet% dComptonDT)) - TOMP(target exit data map(release: ZSet% comptonSe)) - TOMP(target exit data map(release: ZSet% AU)) - TOMP(target exit data map(release: ZSet% AL)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% nCornerSet) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% nCornerBatch) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% offset) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% cornerList) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% cornerMap) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% zoneList) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% cornerConverged) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% Te) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% TeOld) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% delta) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% sumT) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% netRate) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% dTCompton) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% B) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% dBdT) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% Snu0) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% dSnu0dT) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% AD) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% z) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% fk2) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% nI) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% nS) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% ex) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% expPH) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% comptonDeltaEr) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% dComptonDT) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% comptonSe) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% AU) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(ZSet% AL) TOMP(target exit data map(release: ZSet)) ! Unmap group sets do gSetID=1,nGroupSets - TOMP(target exit data map(release:Quad% GrpSetPtr(gSetID)% STotal)) - TOMP(target exit data map(release:Quad% GrpSetPtr(gSetID)% Sigt)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% GrpSetPtr(gSetID)% STotal) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% GrpSetPtr(gSetID)% Sigt) enddo -! Leave as-of-yet unneeded maps in here as comments. -! I anticipate some of these will be needed as more code is pushed to the GPU for the MPI. -! -- Aaron - if (Options%getMPIUseDeviceAddresses()) then - ! Unmap comm sets - do cSetID=1,nCommSets+nGTASets - - ! Loop over communicator fluxes -! do commID=1,Size%ncomm -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommFluxPtr(commID)% irequest)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommFluxPtr(commID)% IncFlux)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommFluxPtr(commID)% ExitFlux)) -! enddo - -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommFluxPtr)) - - ! Loop over communicators - do commID=1,Size%ncomm - do angle=1,Quad%CommSetPtr(cSetID)% NumAngles - if (Quad%CommSetPtr(cSetID)% CommPtr(commID, angle)%nSend > 0) then -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% ListSend)) - TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% psibsend)) - endif - if (Quad%CommSetPtr(cSetID)% CommPtr(commID, angle)%nRecv > 0) then -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% ListRecv)) - TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% psibrecv)) - endif -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% irequest)) - end do - end do - TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% CommPtr)) - -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% NangBinList)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% AngleToBin)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% AngleOrder)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% AngleOrder0)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% RecvOrder0)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% RecvOrder)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% request)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% IncFlux)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% IncFluxOld)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% NetFlux)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% relError)) -! TOMP(target exit data map(release:Quad% CommSetPtr(cSetID)% Converged)) - - TOMP(target exit data map(release:Quad% CommSetPtr)) - enddo - endif ! Options%getMPIUseDeviceAddresses() - do aSetID=1,nAngleSets+nGTASets - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% nextZ)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% nextC)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% Omega)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% Weight)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% numCycles)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% cycleOffSet)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% cycleList)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% nHyperPlanes)) - + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% nextZ) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% nextC) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% StartingDirection) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% FinishingDirection) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% Omega) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% Weight) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% numCycles) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% cycleOffSet) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% cycleList) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% nHyperPlanes) + + ! This loop unmaps internal components of HypPlanePtr and BdyExitPtr. + ! Delay unmapping these until this loop is done. do angle=1,Quad%AngSetPtr(aSetID)% numAngles + ! Unable to map this to UMPIRE device pool, causes segfault. TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% BdyExitPtr(angle)%bdyList)) if ( .not. Quad%AngSetPtr(aSetID)% FinishingDirection(angle) ) then + ! Unable to map these to UMPIRE device pool, causes segfault or wrong answers. TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% zonesInPlane)) TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% hplane1)) TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% hplane2)) @@ -198,24 +156,21 @@ subroutine finalizeSets endif enddo - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% StartingDirection)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% FinishingDirection)) - - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% HypPlanePtr)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% BdyExitPtr)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% HypPlanePtr) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% BdyExitPtr) if ( aSetID <= nAngleSets ) then - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% AfpNorm)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% AezNorm)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% ANormSum)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% AfpNorm) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% AezNorm) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% ANormSum) endif if (Size% ndim == 2) then - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% angDerivFac)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% quadTauW1)) - TOMP(target exit data map(release:Quad% AngSetPtr(aSetID)% quadTauW2)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% angDerivFac) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% quadTauW1) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% AngSetPtr(aSetID)% quadTauW2) endif @@ -223,99 +178,103 @@ subroutine finalizeSets ! Geometry - TOMP(target exit data map(release:Geom% Volume)) - TOMP(target exit data map(release:Geom% VolumeOld)) - TOMP(target exit data map(release:Geom% VolumeZone)) - TOMP(target exit data map(release:Geom% cOffSet)) - TOMP(target exit data map(release:Geom% numCorner)) - TOMP(target exit data map(release:Geom% CToZone)) - TOMP(target exit data map(release:Geom% corner1)) - TOMP(target exit data map(release:Geom% corner2)) - TOMP(target exit data map(release:Geom% zone1)) - TOMP(target exit data map(release:Geom% zone2)) - TOMP(target exit data map(release:Geom% cEZ)) - TOMP(target exit data map(release:Geom% cFP)) - TOMP(target exit data map(release:Geom% A_ez)) - TOMP(target exit data map(release:Geom% A_fp)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% Volume) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% VolumeOld) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% VolumeZone) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% cOffSet) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% numCorner) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% CToZone) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% corner1) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% corner2) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% zone1) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% zone2) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% cEZ) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% cFP) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% A_ez) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% A_fp) if (Size% ndim == 2) then - TOMP(target exit data map(release:Geom% Area)) - TOMP(target exit data map(release:Geom% RadiusEZ)) - TOMP(target exit data map(release:Geom% RadiusFP)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% Area) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% RadiusEZ) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% RadiusFP) elseif (Size% ndim == 3) then - TOMP(target exit data map(release:Geom% nCFacesArray)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Geom% nCFacesArray) endif TOMP(target exit data map(release:Geom)) ! Radiation Intensity - TOMP(target exit data map(release:Rad% PhiTotal)) - TOMP(target exit data map(release:Rad% radEnergy)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Rad% PhiTotal) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Rad% radEnergy) TOMP(target exit data map(release:Rad)) ! GTA if (Size%useNewGTASolver) then - TOMP(target exit data map(release:GTA% TT)) - TOMP(target exit data map(release:GTA% Pvv)) - TOMP(target exit data map(release:GTA% GreySigTotal)) - TOMP(target exit data map(release:GTA% GreySigScat)) - TOMP(target exit data map(release:GTA% GreySigScatVol)) - TOMP(target exit data map(release:GTA% GreySigtInv)) - TOMP(target exit data map(release:GTA% PhiInc)) - TOMP(target exit data map(release:GTA% Q)) - TOMP(target exit data map(release:GTA% TsaSource)) - TOMP(target exit data map(release:GTA% AfpNorm)) - TOMP(target exit data map(release:GTA% AezNorm)) - TOMP(target exit data map(release:GTA% ANormSum)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% TT) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% Pvv) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% GreySigTotal) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% GreySigScat) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% GreySigScatVol) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% GreySigtInv) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% PhiInc) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% Q) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% TsaSource) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% AfpNorm) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% AezNorm) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% ANormSum) if (Size% ndim == 2) then - TOMP(target exit data map(release:GTA% Tvv)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% Tvv) endif endif - TOMP(target exit data map(release:GTA% GreySource)) - TOMP(target exit data map(release:GTA% GreyCorrection)) - TOMP(target exit data map(release:GTA% Chi)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% GreySource) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% GreyCorrection) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(GTA% Chi) TOMP(target exit data map(release:GTA)) do setID=nSets+1,nSets+nGTASets - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% AngleOrder)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% tPsi)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% pInc)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% src)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% AngleOrder) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% tPsi) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% pInc) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% src) if (Size% ndim == 2) then - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% tPsiM)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% tInc)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% tPsiM) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% tInc) endif enddo ! Material - TOMP(target exit data map(release:Mat% Tec)) - TOMP(target exit data map(release:Mat% Tecn)) - TOMP(target exit data map(release:Mat% denec)) - TOMP(target exit data map(release:Mat% cve)) - TOMP(target exit data map(release:Mat% rho)) - TOMP(target exit data map(release:Mat% nez)) - TOMP(target exit data map(release:Mat% stimComptonMult)) - TOMP(target exit data map(release:Mat% Siga)) - TOMP(target exit data map(release:Mat% Sigs)) - TOMP(target exit data map(release:Mat% Eta)) - TOMP(target exit data map(release:Mat% EmissionRate)) - TOMP(target exit data map(release:Mat% SMatEff)) - TOMP(target exit data map(release:Mat% PowerEmitted)) - TOMP(target exit data map(release:Mat% PowerCompton)) - TOMP(target exit data map(release:Mat% nonLinearIterations)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% Tec) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% Tecn) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% denec) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% cve) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% rho) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% nez) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% stimComptonMult) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% Siga) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% Sigs) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% Eta) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% EmissionRate) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% SMatEff) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% PowerEmitted) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% PowerCompton) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Mat% nonLinearIterations) TOMP(target exit data map(release:Mat)) +! IF THESE ARE UNALLOCATED WILL CRASH? #if !defined(TETON_ENABLE_MINIAPP_BUILD) - TOMP(target exit data map(release:Compton% gamMean)) - TOMP(target exit data map(release:Compton% gamSqdDGam)) - TOMP(target exit data map(release:Compton% gamCubedDGam)) - TOMP(target exit data map(release:Compton% gamD)) + if (getComptonFlag(Compton) /= comptonType_None) then + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Compton% gamMean) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Compton% gamSqdDGam) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Compton% gamCubedDGam) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Compton% gamD) + endif + TOMP(target exit data map(release:Compton)) #endif @@ -336,6 +295,7 @@ subroutine finalizeSets if ( Size% useGPU ) then #if defined(TETON_ENABLE_OPENMP_OFFLOAD) call finalizeGPUMemory(setID) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% AngleOrder) #endif endif @@ -365,9 +325,9 @@ subroutine finalizeSets if ( Size% useGPU ) then - TOMP(target exit data map(release:Quad%AngSetPtr)) - TOMP(target exit data map(release:Quad%GrpSetPtr)) - TOMP(target exit data map(release:Quad%SetDataPtr)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad%AngSetPtr) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad%GrpSetPtr) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad%SetDataPtr) TOMP(target exit data map(release:Quad)) endif diff --git a/src/teton/control/initializeSets.F90 b/src/teton/control/initializeSets.F90 index 8c6f694..6d4333f 100644 --- a/src/teton/control/initializeSets.F90 +++ b/src/teton/control/initializeSets.F90 @@ -28,6 +28,7 @@ subroutine initializeSets use Material_mod #if !defined(TETON_ENABLE_MINIAPP_BUILD) use ComptonControl_mod + use flags_mod #endif use RadIntensity_mod use MemoryAllocator_mod @@ -125,7 +126,7 @@ subroutine initializeSets #endif ! Map Quadrature List - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad) + TOMP(target enter data map(to:Quad)) TOMP_TARGET_ENTER_DATA_MAP_TO(Quad%SetDataPtr) TOMP_TARGET_ENTER_DATA_MAP_TO(Quad%GrpSetPtr) TOMP_TARGET_ENTER_DATA_MAP_TO(Quad%AngSetPtr) @@ -138,7 +139,7 @@ subroutine initializeSets ! Map ZoneSets - TOMP_TARGET_ENTER_DATA_MAP_TO(ZSet) + TOMP(target enter data map(to:ZSet)) TOMP_TARGET_ENTER_DATA_MAP_TO(ZSet% AL) TOMP_TARGET_ENTER_DATA_MAP_TO(ZSet% AU) TOMP_TARGET_ENTER_DATA_MAP_TO(ZSet% nCornerSet) @@ -169,56 +170,6 @@ subroutine initializeSets TOMP_TARGET_ENTER_DATA_MAP_TO(ZSet% dComptonDT) TOMP_TARGET_ENTER_DATA_MAP_TO(ZSet% comptonSe) - - if (Options%getMPIUseDeviceAddresses()) then - ! Map Comm Sets - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr) - - CommSetLoop1: do cSetID=1,nCommSets+nGTASets - -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% NangBinList) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% AngleToBin) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% AngleOrder) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% AngleOrder0) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% RecvOrder0) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% RecvOrder) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% request) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% IncFlux) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% IncFluxOld) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% NetFlux) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% relError) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% Converged) - - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommPtr) - - ! Loop over communicators - do commID=1,Size%ncomm - do angle=1,Quad%CommSetPtr(cSetID)%NumAngles - if (Quad%CommSetPtr(cSetID)% CommPtr(commID, angle)%nSend > 0) then - TOMP_TARGET_ENTER_DATA_MAP_ALLOC(Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% psibsend) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% ListSend) - endif - - if (Quad%CommSetPtr(cSetID)% CommPtr(commID, angle)%nRecv > 0) then -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% ListRecv) - TOMP_TARGET_ENTER_DATA_MAP_ALLOC(Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% psibrecv) - endif - -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommPtr(commID, angle)% irequest) - end do - end do - -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommFluxPtr) - ! Loops over communicator fluxes -! do commID=1,Size%ncomm -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommFluxPtr(commID)% irequest) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommFluxPtr(commID)% IncFlux) -! TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% CommSetPtr(cSetID)% CommFluxPtr(commID)% ExitFlux) -! enddo - enddo CommSetLoop1 - - endif ! Options%getMPIUseDeviceAddresses() - ! Map Angle Sets do aSetID=1,nAngleSets+nGTASets @@ -244,13 +195,15 @@ subroutine initializeSets do angle=1,Quad% AngSetPtr(aSetID)% numAngles - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% AngSetPtr(aSetID)% BdyExitPtr(angle)% bdyList) + ! Unable to map this to UMPIRE device pool, causes a segfault. + TOMP(target enter data map(to: Quad% AngSetPtr(aSetID)% BdyExitPtr(angle)% bdyList)) if ( .not. Quad% AngSetPtr(aSetID)% FinishingDirection(angle) ) then - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% zonesInPlane) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% hplane1) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% hplane2) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% ndone) + ! Unable to map these to UMPIRE device pool, causes a segfault or wrong answers. + TOMP(target enter data map(to:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% zonesInPlane)) + TOMP(target enter data map(to:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% hplane1)) + TOMP(target enter data map(to:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% hplane2)) + TOMP(target enter data map(to:Quad% AngSetPtr(aSetID)% HypPlanePtr(angle)% ndone)) endif enddo @@ -265,7 +218,7 @@ subroutine initializeSets ! Geometry - TOMP_TARGET_ENTER_DATA_MAP_TO(Geom) + TOMP(target enter data map(to:Geom)) TOMP_TARGET_ENTER_DATA_MAP_TO(Geom% Volume) TOMP_TARGET_ENTER_DATA_MAP_TO(Geom% VolumeOld) TOMP_TARGET_ENTER_DATA_MAP_TO(Geom% VolumeZone) @@ -291,21 +244,24 @@ subroutine initializeSets ! Radiation Intensity - TOMP_TARGET_ENTER_DATA_MAP_TO(Rad) + TOMP(target enter data map(to:Rad)) TOMP_TARGET_ENTER_DATA_MAP_TO(Rad% PhiTotal) TOMP_TARGET_ENTER_DATA_MAP_TO(Rad% radEnergy) #if !defined(TETON_ENABLE_MINIAPP_BUILD) - TOMP_TARGET_ENTER_DATA_MAP_TO(Compton) - TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamMean) - TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamSqdDGam) - TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamCubedDGam) - TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamD) + TOMP(target enter data map(to:Compton)) + + if (getComptonFlag(Compton) /= comptonType_None) then + TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamMean) + TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamSqdDGam) + TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamCubedDGam) + TOMP_TARGET_ENTER_DATA_MAP_TO(Compton% gamD) + endif #endif ! GTA - TOMP_TARGET_ENTER_DATA_MAP_TO(GTA) + TOMP(target enter data map(to:GTA)) TOMP_TARGET_ENTER_DATA_MAP_TO(GTA% GreySource) TOMP_TARGET_ENTER_DATA_MAP_TO(GTA% GreyCorrection) TOMP_TARGET_ENTER_DATA_MAP_TO(GTA% Chi) @@ -332,16 +288,12 @@ subroutine initializeSets ! Initialize communication handles for persistent communicators -! Moved from findexit routine. This needs to be done after the comm data is -! mapped to the GPU, as these are set up with the device addresses of these buffers. -! -- Aaron ! QUESTION - We're passing in angle set IDs, but inside the initcomm the -! parameter is 'cSetID'. Should this be a loop over comm sets or angle sets?? +! parameter is 'cSetID'. Should this be a loop over comm sets or angle sets?? -black27 do aSetID=1,nAngleSets+nGTASets call initcomm(aSetID) enddo - ! Begin Initialize Phase !$omp parallel do schedule(static) default(none) & @@ -384,11 +336,8 @@ subroutine initializeSets !$omp end parallel do if (Size%useGPU) then - do cSetID=1,nCommSets - CSet => getCommSetData(Quad, cSetID) - do setID=CSet% set1,CSet% set2 - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% AngleOrder) - enddo + do setID=1,nSets + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% AngleOrder) enddo endif @@ -408,7 +357,7 @@ subroutine initializeSets ! Material if ( Size% useGPU ) then - TOMP_TARGET_ENTER_DATA_MAP_TO(Mat) + TOMP(target enter data map(to:Mat)) TOMP_TARGET_ENTER_DATA_MAP_TO(Mat% Tec) TOMP_TARGET_ENTER_DATA_MAP_TO(Mat% Tecn) TOMP_TARGET_ENTER_DATA_MAP_TO(Mat% denec) diff --git a/src/teton/driver/test_driver.cc b/src/teton/driver/test_driver.cc index 3207fd7..1720cec 100644 --- a/src/teton/driver/test_driver.cc +++ b/src/teton/driver/test_driver.cc @@ -153,7 +153,7 @@ void conduit_debug_err_handler(const std::string &s1, const std::string &s2, int class TetonDriver { public: - TetonDriver(); + TetonDriver() = default; ~TetonDriver(); void initialize(); @@ -188,110 +188,62 @@ class TetonDriver void release(); private: - int return_status; - int myRank; - int mySize; - unsigned int cycles; - int numPhaseAngleSets; - int useUmpire; - int numOmpMaxThreads; // Max number of CPU threads to use If -1, use value from omp_get_max_threads() - double fixedDT; - bool dumpViz; - double energy_check_tolerance; - - unsigned int benchmarkProblem; - int numPolarUser; - int numAzimuthalUser; - int numGroupsUser; + int return_status{0}; + int myRank{0}; + int mySize{0}; + unsigned int cycles{0}; + int numPhaseAngleSets{0}; + int useUmpire{2}; + int numOmpMaxThreads{-1}; // Max number of CPU threads to use If -1, use value from omp_get_max_threads() + double fixedDT{0.0}; + bool dumpViz{false}; + double energy_check_tolerance{1.0e-9}; + int input_sanitizer_level{1}; + + unsigned int benchmarkProblem{0}; + int numPolarUser{-1}; + int numAzimuthalUser{-1}; + int numGroupsUser{0}; #if defined(TETON_ENABLE_MFEM) - int numSerialRefinementFactor; - int numParallelRefinementFactor; - int numSerialRefinementLevels; - int numParallelRefinementLevels; - mfem::Mesh *mesh; - mfem::ParMesh *pmesh; - mfem::ConduitDataCollection *conduit_data_collec; + int numSerialRefinementFactor{2}; + int numParallelRefinementFactor{2}; + int numSerialRefinementLevels{0}; + int numParallelRefinementLevels{0}; + mfem::Mesh *mesh{nullptr}; + mfem::ParMesh *pmesh{nullptr}; + mfem::ConduitDataCollection *conduit_data_collec{nullptr}; #endif // MPI - MPI_Comm comm; - int verbose; - - bool useGPU; - bool useCUDASweep; - int gta_kernel; - int sweep_kernel; - std::string scattering_kernel; - bool useDeviceAwareMPI; // Pass device addresses to MPI ( device-aware MPI ). - - ::Teton::Teton myTetonObject; - std::string inputPath; - std::string outputPath; - std::string label; - std::string colorFile; - std::string caliper_config; - std::string meshOrdering; -#if defined(TETON_ENABLE_CALIPER) - cali::ConfigManager mgr; -#endif - bool blueprintMesh; - int dims[3]; //!< Number of cells in blueprint mesh. -}; - -//--------------------------------------------------------------------------- -TetonDriver::TetonDriver() - : return_status(0), - myRank(0), - mySize(0), - cycles(1), - numPhaseAngleSets(0), - useUmpire(2), - numOmpMaxThreads(-1), // Max number of CPU threads to use If -1, use value from omp_get_max_threads() - fixedDT(0.0), - dumpViz(false), - energy_check_tolerance(0.0), - benchmarkProblem(0), - numPolarUser(-1), - numAzimuthalUser(-1), - numGroupsUser(0), -#if defined(TETON_ENABLE_MFEM) - numSerialRefinementFactor(2), - numParallelRefinementFactor(2), - numSerialRefinementLevels(0), - numParallelRefinementLevels(0), - mesh(nullptr), - pmesh(nullptr), - conduit_data_collec(nullptr), -#endif - comm(MPI_COMM_WORLD), - verbose(1), - useGPU(false), - useCUDASweep(false), - gta_kernel(1), - sweep_kernel(-1), - scattering_kernel(""), - useDeviceAwareMPI(false), - myTetonObject(), - inputPath("."), - outputPath("."), - label(""), - colorFile(""), + MPI_Comm comm{MPI_COMM_WORLD}; + int verbose{1}; + + bool useGPU{false}; + bool useCUDASweep{false}; + int gta_kernel{1}; + int sweep_kernel{-1}; + std::string scattering_kernel{}; + + ::Teton::Teton myTetonObject{}; + std::string inputPath{"."}; + std::string outputPath{"."}; + std::string label{}; + std::string colorFile{}; + std::string caliper_config + { #if defined(TETON_ENABLE_CUDA) - caliper_config("runtime-report,nvprof") + "runtime-report,nvprof" #else - caliper_config("runtime-report") + "runtime-report" #endif - , - meshOrdering("kdtree") + }; + std::string meshOrdering{"kdtree"}; #if defined(TETON_ENABLE_CALIPER) - , - mgr() + cali::ConfigManager mgr{}; #endif - , - blueprintMesh(false) -{ - dims[2] = dims[1] = dims[0] = 10; -} + bool blueprintMesh{false}; + int dims[3]{10, 10, 10}; //!< Number of cells in blueprint mesh. +}; //--------------------------------------------------------------------------- TetonDriver::~TetonDriver() @@ -393,6 +345,7 @@ int TetonDriver::processArguments(int argc, char *argv[]) {"blueprint", no_argument, 0, 'B'}, {"dims", required_argument, 0, 'd'}, {"caliper", required_argument, 0, 'p'}, + {"input_sanitizer_level", required_argument, 0, 'y'}, {"handler", no_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"input_path", required_argument, 0, 'i'}, @@ -428,10 +381,10 @@ int TetonDriver::processArguments(int argc, char *argv[]) int option_index = 0; #if defined(TETON_ENABLE_MFEM) - auto optString = "A:Bb:c:D:d:eG:gHhi:k:l:M:mn:o:P:p:s:S:t:u:Vv:" // Base options - "C:R:r:z:Z:"; // MFEM-only options + auto optString = "A:Bb:c:D:d:eG:gHhi:k:l:M:mn:o:P:p:s:S:t:u:Vv:y:" // Base options + "C:R:r:z:Z:"; // MFEM-only options #else - auto optString = "A:Bb:c:D:d:eG:gHhi:k:l:M:mn:o:P:p:s:S:t:u:Vv:"; // Base options + auto optString = "A:Bb:c:D:d:eG:gHhi:k:l:M:mn:o:P:p:s:S:t:u:Vv:y:"; // Base options #endif int opt = getopt_long(argc, argv, optString, long_options, &option_index); @@ -529,9 +482,6 @@ int TetonDriver::processArguments(int argc, char *argv[]) throw std::runtime_error("Unsupported mesh ordering " + meshOrdering); } break; - case 'm': - useDeviceAwareMPI = true; - break; case 'n': gta_kernel = atoi(optarg); if (myRank == 0) @@ -675,6 +625,13 @@ int TetonDriver::processArguments(int argc, char *argv[]) std::cout << "Teton driver: setting verbosity to " << verbose << std::endl; } break; + case 'y': + input_sanitizer_level = atoi(optarg); + if (myRank == 0) + { + std::cout << "Teton driver: setting input_sanitizer_level to " << input_sanitizer_level << std::endl; + } + break; case '?': if (myRank == 0) { @@ -699,7 +656,7 @@ void TetonDriver::printUsage(const std::string &argv0) const std::cout << " -e, --use_cuda_sweep Use experimental CUDA sweep. Do not specify this option and -g at the same time." << std::endl; - std::cout << " -g, --use-gpu-kernels Run solvers on GPU and enable associated sub-options, where supported." + std::cout << " -g, --use_gpu_kernels Run solvers on GPU and enable associated sub-options, where supported." << std::endl; std::cout << " -H, --handler Install an alternate Conduit error handler for debugging." << std::endl; @@ -728,9 +685,12 @@ void TetonDriver::printUsage(const std::string &argv0) const std::cout << " -t, --num_threads Max number of threads for cpu OpenMP parallel regions." << std::endl; std::cout << " -u, --umpire_mode <0,1,2> 0 - Disable umpire. 1 - Use Umpire for CPU allocations." << " 2 - Use Umpire for CPU and GPU allocations." << std::endl; - std::cout << " -V, --write-viz-file Output blueprint mesh vizualization file each cycle" << std::endl; + std::cout << " -V, --write_viz_file Output blueprint mesh vizualization file each cycle" << std::endl; std::cout << " -v, --verbose [0,1,2] 0 - quite 1 - informational(default) 2 - really chatty and dump files" << std::endl; + std::cout + << " -y, --input_sanitizer_level 0 - don't check inputs\n 1 - print one message for each bad input category\n 2 - print one message for each bad value of each bad category" + << std::endl; #if defined(TETON_ENABLE_MFEM) std::cout << " -r, --serial_refinement_levels Number of times to halve each edge the MFEM mesh before " << "doing parallel decomposition. Applied after the refinement_factor. (factor of 2^((r*dim) new zones)" @@ -778,24 +738,32 @@ int TetonDriver::execute() { if (benchmarkProblem == 1) { - options["iteration/relativeTolerance"] = 1e-10; + double relTol = 1.0e-10; + options["iteration/relativeTolerance"] = relTol; fixedDT = 1e-3; - cycles = 5; + if (cycles == 0) + { + cycles = 2; + } numPolarUser = 3; numAzimuthalUser = 3; numGroupsUser = 128; - energy_check_tolerance = 1.0e-11; + energy_check_tolerance = 10 * relTol; label = "UMTSPP1"; } else if (benchmarkProblem == 2) { - options["iteration/relativeTolerance"] = 1e-10; + double relTol = 1.0e-10; + options["iteration/relativeTolerance"] = relTol; fixedDT = 1e-3; - cycles = 5; + if (cycles == 0) + { + cycles = 2; + } numPolarUser = 2; numAzimuthalUser = 2; numGroupsUser = 16; - energy_check_tolerance = 1.0e-11; + energy_check_tolerance = 10 * relTol; label = "UMTSPP2"; } else @@ -932,7 +900,7 @@ int TetonDriver::execute() writeEndSummary(end_time, start_time, num_unknowns, local_num_unknowns); } - return 0; + return return_status; } //--------------------------------------------------------------------------- @@ -1511,11 +1479,6 @@ void TetonDriver::setOptions() options["size/useCUDASweep"] = false; - if (useDeviceAwareMPI == true) - { - options["mpi/useDeviceAddresses"] = 0; - } - if (!scattering_kernel.empty()) { int tetonComptonFlag; @@ -1643,6 +1606,10 @@ void TetonDriver::setOptions() //Set verbosity level ( default is 1 ) options["verbose"] = verbose; + // Set sanitizer options: + options["iteration/sanitizer/level"] = input_sanitizer_level; + options["iteration/sanitizer/kill_if_bad"] = 1; // Have TetonConduitInterface kill the code + if (fixedDT > 0) { options["iteration/dtrad"] = fixedDT; @@ -1914,7 +1881,7 @@ void TetonDriver::writeStartSummary(unsigned int ndims, if (options.has_path("iteration/relativeTolerance")) { double relative_tol = options.fetch_existing("iteration/relativeTolerance").value(); - std::cout << "Relative tolerance set to " << relative_tol << "." << std::endl; + std::cout << "Iteration control: relative tolerance set to " << relative_tol << "." << std::endl; } std::cout << "=================================================================" << std::endl; std::cout << std::endl; @@ -1983,28 +1950,26 @@ void TetonDriver::writeEndSummary(double end_time, outfile.close(); -#if defined(TETON_ENABLE_MINIAPP_BUILD) - if (benchmarkProblem <= 2) + if (std::abs(energy_check) / (std::abs(energy_radiation) + 1.0e-50) <= energy_check_tolerance) { - if (energy_check <= energy_check_tolerance) - { - std::cout << "RESULT CHECK PASSED: Energy check " << energy_check << " exceeded tolerance of " - << energy_check_tolerance << std::endl; - } - else - { - std::cerr << "RESULT CHECK FAILED: Energy check " << energy_check << " exceeded tolerance of " - << energy_check_tolerance << std::endl; - return_status = 1; - } + std::cout << "RESULT CHECK PASSED: Energy check " << energy_check << " within tolerance of +/- " + << energy_check_tolerance << "; check '" << filePath << "' for tally details\n" + << std::endl; } else { - std::cerr << "Benchmark problem " << benchmarkProblem << " does not have reference data to compare against." + std::cerr << "RESULT CHECK FAILED: Energy check " << energy_check << " exceeded tolerance of +/- " + << energy_check_tolerance << "; check '" << filePath << "' for tally details\n" << std::endl; - } -#endif + std::cerr << "Energy radiation: " << energy_radiation << std::endl; + std::cerr << "Power incident: " << power_incident << std::endl; + std::cerr << "Power escaped: " << power_escape << std::endl; + std::cerr << "Power absorbed: " << power_absorbed << std::endl; + std::cerr << "Power emitted: " << power_emitted << std::endl << std::endl; + + return_status = 1; + } } } } @@ -2049,12 +2014,6 @@ void TetonDriver::finalize() } #endif -#if defined(TETON_ENABLE_OPENMP_OFFLOAD) - omp_pause_resource_all(omp_pause_hard); - if (myRank == 0) - print_gpu_mem("Teton driver: After omp_pause_resource_all at end of run."); -#endif - release(); #if defined(TETON_ENABLE_CALIPER) diff --git a/src/teton/gpu/CornerSweepUCBxyz_OMPOL.F90 b/src/teton/gpu/CornerSweepUCBxyz_OMPOL.F90 index 8ff8df7..904e17f 100644 --- a/src/teton/gpu/CornerSweepUCBxyz_OMPOL.F90 +++ b/src/teton/gpu/CornerSweepUCBxyz_OMPOL.F90 @@ -538,17 +538,20 @@ subroutine CornerSweepUCBxyz_GPU(nSets, sendIndex, savePsi) half*aez*gden*( Set% Q(g,c,ii) - Set% Q(g,cez,ii) ) )/ & ( gnum + gden*sig) - Set% S(g,c,ii) = Set% S(g,c,ii) + sez - Set% S(g,cez,ii) = Set% S(g,cez,ii) - sez - else - sez = half*aez*( Set% Q(g,c,ii) - Set% Q(g,cez,ii) )/sig - Set% S(g,c,ii) = Set% S(g,c,ii) + sez - Set% S(g,cez,ii) = Set% S(g,cez,ii) - sez + sez = half*aez*( Set% Q(g,c,ii) - Set% Q(g,cez,ii) )/sig endif TestOppositeFace + ATOMIC_UPDATE + Set% S(g,c,ii) = Set% S(g,c,ii) + sez + ATOMIC_END + + ATOMIC_UPDATE + Set% S(g,cez,ii) = Set% S(g,cez,ii) - sez + ATOMIC_END + endif enddo @@ -643,17 +646,20 @@ subroutine CornerSweepUCBxyz_GPU(nSets, sendIndex, savePsi) half*aez*gden*( Set% Q(g,c,ii) - Set% Q(g,cez,ii) ) )/ & ( gnum + gden*sig) - Set% S(g,c,ii) = Set% S(g,c,ii) + sez - Set% S(g,cez,ii) = Set% S(g,cez,ii) - sez - else - sez = half*aez*( Set% Q(g,c,ii) - Set% Q(g,cez,ii) )/sig - Set% S(g,c,ii) = Set% S(g,c,ii) + sez - Set% S(g,cez,ii) = Set% S(g,cez,ii) - sez + sez = half*aez*( Set% Q(g,c,ii) - Set% Q(g,cez,ii) )/sig endif TestOppositeFace1 + ATOMIC_UPDATE + Set% S(g,c,ii) = Set% S(g,c,ii) + sez + ATOMIC_END + + ATOMIC_UPDATE + Set% S(g,cez,ii) = Set% S(g,cez,ii) - sez + ATOMIC_END + endif enddo diff --git a/src/teton/gpu/OMPWrappers.F90.templates b/src/teton/gpu/OMPWrappers.F90.templates index ca6585d..230277a 100644 --- a/src/teton/gpu/OMPWrappers.F90.templates +++ b/src/teton/gpu/OMPWrappers.F90.templates @@ -42,7 +42,7 @@ !----------------------------------------------------------------------------- !----------------------------------------------------------------------------- - function FTM_CAT4(target_alloc_and_pair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) (h_ptr, label) result(err_code) + subroutine FTM_CAT4(target_alloc_and_pair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) (h_ptr, label) FTM_TYPE ( FTM_KIND ) , intent(in), target, dimension( FTM_RANK_STRING ) :: h_ptr character(len=*), intent(in), optional :: label @@ -50,84 +50,93 @@ integer(C_SIZE_T) :: num_bytes, offset type(C_PTR) :: d_ptr - err_code = 1 - num_bytes = storage_size(h_ptr,kind=C_SIZE_T)/8*SIZE(h_ptr) - offset = 0 - d_ptr = c_null_ptr + err_code = 0 + + if (Allocator%umpire_device_allocator_id >= 0) then + + num_bytes = storage_size(h_ptr,kind=C_SIZE_T)/8*SIZE(h_ptr) + + TETON_VERIFY( num_bytes > 0, "Unable to map umpire memory for " // label // " to device, size is not positive.") - ! Allocate device memory via umpire allocator + offset = 0 + d_ptr = c_null_ptr + + ! Allocate device memory via umpire allocator !$omp critical #if defined(TETON_ENABLE_UMPIRE) - d_ptr = Allocator%umpire_device_allocator%allocate_pointer(num_bytes) + d_ptr = Allocator%umpire_device_allocator%allocate_pointer(num_bytes) #endif !$omp end critical - ! Verify that pointer now points to something. - TETON_VERIFY( C_ASSOCIATED(d_ptr), "Failed to allocate " // label // " on device.") + ! Verify that pointer now points to something. + TETON_VERIFY( C_ASSOCIATED(d_ptr), "Failed to allocate " // label // " on device.") #if defined(TETON_ENABLE_OPENMP_OFFLOAD) - ! Associate host and device c pointers. - err_code = omp_target_associate_ptr( C_LOC(h_ptr), d_ptr, num_bytes, offset, omp_get_default_device() ) + ! Associate host and device c pointers. + err_code = omp_target_associate_ptr( C_LOC(h_ptr), d_ptr, num_bytes, offset, omp_get_default_device() ) #endif - TETON_VERIFY( err_code == 0, "Failed to associate host variable " // label // " with device pointer.") + TETON_VERIFY( err_code == 0, "Failed to associate host variable " // label // " with device pointer.") #if defined(TETON_ENABLE_OPENMP_OFFLOAD) - TETON_VERIFY( omp_target_is_present(C_LOC(h_ptr), omp_get_default_device()) /= 0, "Runtime failed to add " // label // " in host<->device table.") + TETON_VERIFY( omp_target_is_present(C_LOC(h_ptr), omp_get_default_device()) /= 0, "Runtime failed to add " // label // " in host<->device table.") #endif - - return + endif - end function FTM_CAT4(target_alloc_and_pair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) + end subroutine FTM_CAT4(target_alloc_and_pair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) !----------------------------------------------------------------------------- - function FTM_CAT4(target_free_and_unpair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) (h_ptr, label) result(err_code) + subroutine FTM_CAT4(target_free_and_unpair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) (h_ptr, label) FTM_TYPE ( FTM_KIND ) , intent(in), target, dimension( FTM_RANK_STRING ) :: h_ptr character(len=*), intent(in), optional :: label integer :: err_code type(C_PTR) :: d_ptr + err_code = 1 - d_ptr = c_null_ptr + if (Allocator%umpire_device_allocator_id >= 0) then + + d_ptr = c_null_ptr #if defined(TETON_OPENMP_HAS_USE_DEVICE_ADDR) - TOMP( target data use_device_addr(h_ptr)) + TOMP( target data use_device_addr(h_ptr)) #else - TOMP( target data use_device_ptr(h_ptr)) + TOMP( target data use_device_ptr(h_ptr)) #endif - d_ptr = C_LOC(h_ptr) - TOMP( end target data ) + d_ptr = C_LOC(h_ptr) + TOMP( end target data ) - TETON_VERIFY( c_associated(d_ptr), "Failed to retrieve device pointer for " // label ) + TETON_VERIFY( c_associated(d_ptr), "Failed to retrieve device pointer for " // label ) ! Disassociate the pointer #if defined(TETON_ENABLE_OPENMP_OFFLOAD) ! Workaround for El Capitan EA platform. Otherwise, the runtime complains about an invalid reference count in the pointer table. ! (Kostas has put in a bug ticket to compiler team already) - TOMP(target exit data map(release:h_ptr)) + TOMP(target exit data map(release:h_ptr)) - err_code = omp_target_disassociate_ptr( C_LOC(h_ptr), omp_get_default_device() ) + err_code = omp_target_disassociate_ptr( C_LOC(h_ptr), omp_get_default_device() ) #endif - TETON_VERIFY( err_code == 0, "Failed to disassociate host variable " // label ) + TETON_VERIFY( err_code == 0, "Failed to disassociate host variable " // label ) #if defined(TETON_ENABLE_OPENMP_OFFLOAD) - TETON_VERIFY( omp_target_is_present(C_LOC(h_ptr), omp_get_default_device()) == 0, "Runtime failed to remove variable " // label // " from host<->device table.") + TETON_VERIFY( omp_target_is_present(C_LOC(h_ptr), omp_get_default_device()) == 0, "Runtime failed to remove variable " // label // " from host<->device table.") #endif !$omp critical #if defined(TETON_ENABLE_UMPIRE) - call Allocator%umpire_device_allocator%deallocate_pointer(d_ptr) + call Allocator%umpire_device_allocator%deallocate_pointer(d_ptr) #endif !$omp end critical - return - end function FTM_CAT4(target_free_and_unpair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) + endif + + end subroutine FTM_CAT4(target_free_and_unpair_ptrs,FTM_TYPE,FTM_KIND,FTM_RANK) !----------------------------------------------------------------------------- diff --git a/src/teton/gpu/finalizeGPUMemory.F90 b/src/teton/gpu/finalizeGPUMemory.F90 index 4047f61..c31569d 100644 --- a/src/teton/gpu/finalizeGPUMemory.F90 +++ b/src/teton/gpu/finalizeGPUMemory.F90 @@ -30,42 +30,18 @@ subroutine finalizeGPUMemory(setID) ! Delete the arrays - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% AngleOrder)) - if (Size% ndim == 2) then - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% PsiM)) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% PsiM) endif - if (Allocator%umpire_device_allocator_id >= 0) then - err_code = target_free_and_unpair_ptrs(Quad% SetDataPtr(setID)% Psi) - err_code = target_free_and_unpair_ptrs(Quad% SetDataPtr(setID)% Psi1) - err_code = target_free_and_unpair_ptrs(Quad% SetDataPtr(setID)% PsiB) - err_code = target_free_and_unpair_ptrs(Quad% SetDataPtr(setID)% Q) - err_code = target_free_and_unpair_ptrs(Quad% SetDataPtr(setID)% S) - err_code = target_free_and_unpair_ptrs(Quad% SetDataPtr(setID)% cyclePsi) - - ! I'm assuming these are necessary to force the runtime to perform the - ! pointer unattachments between the derived types and their pointer - ! members. --Aaron - TOMP(target exit data map(always, release:Quad% SetDataPtr(setID)% Psi)) - TOMP(target exit data map(always, release:Quad% SetDataPtr(setID)% Psi1)) - TOMP(target exit data map(always, release:Quad% SetDataPtr(setID)% PsiB)) - TOMP(target exit data map(always, release:Quad% SetDataPtr(setID)% Q)) - TOMP(target exit data map(always, release:Quad% SetDataPtr(setID)% S)) - TOMP(target exit data map(always, release:Quad% SetDataPtr(setID)% cyclePsi)) - - else - - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% Psi)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% Psi1)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% PsiB)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% Q)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% S)) - TOMP(target exit data map(release:Quad% SetDataPtr(setID)% cyclePsi)) - - endif + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% Psi) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% Psi1) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% PsiB) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% Q) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% S) + TOMP_TARGET_EXIT_DATA_MAP_RELEASE(Quad% SetDataPtr(setID)% cyclePsi) return end subroutine finalizeGPUMemory diff --git a/src/teton/gpu/initializeGPUMemory.F90 b/src/teton/gpu/initializeGPUMemory.F90 index 8479486..368b8c4 100644 --- a/src/teton/gpu/initializeGPUMemory.F90 +++ b/src/teton/gpu/initializeGPUMemory.F90 @@ -35,56 +35,12 @@ subroutine initializeGPUMemory TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID) % PsiM) endif - if (Allocator%umpire_device_allocator_id >= 0) then - - ! TODO - Move this documentation to an appropriate place. - ! Maybe in the OpenMP wrapper module? Or a README? - ! - ! The target_alloc_and_pair_ptrs function allows OpenMP to make use - ! of memory from an umpire device memory pool. However, any data - ! mapped this way will not work with map to/from pragmas. They will - ! essentially be no-ops. - ! - ! This function only supports standalone pointers, on its own. - ! To work with nested data, a follow up map is required (see below). - err_code = target_alloc_and_pair_ptrs(Quad% SetDataPtr(setID) % Psi, "Psi") - err_code = target_alloc_and_pair_ptrs(Quad% SetDataPtr(setID)% Psi1, "Psi1") - err_code = target_alloc_and_pair_ptrs(Quad% SetDataPtr(setID)% PsiB, "PsiB") - err_code = target_alloc_and_pair_ptrs(Quad% SetDataPtr(setID)% Q, "Q") - err_code = target_alloc_and_pair_ptrs(Quad% SetDataPtr(setID)% S, "S") - err_code = target_alloc_and_pair_ptrs(Quad% SetDataPtr(setID)% cyclePsi, "cyclePsi") - - ! The maps below are essentially no-ops ( they do not actually allocate - ! data on the gpu ). However, when combined with the 'always' keyword - ! they force the runtime to perform the pointer attachment between - ! the derived type and its member pointers. - ! Accesses such as 'Set%Psi' on the gpu will then work. - ! - ! See OpenMP spec "2.19.7.1 map Clause" for details on this behavior. - TOMP(target enter data map(always,alloc:Quad% SetDataPtr(setID)% Psi)) - TOMP(target enter data map(always,alloc:Quad% SetDataPtr(setID)% Psi1)) - TOMP(target enter data map(always,alloc:Quad% SetDataPtr(setID)% PsiB)) - TOMP(target enter data map(always,alloc:Quad% SetDataPtr(setID)% Q)) - TOMP(target enter data map(always,alloc:Quad% SetDataPtr(setID)% S)) - TOMP(target enter data map(always,alloc:Quad% SetDataPtr(setID)% cyclePsi)) - - TOMP(target update to(Quad% SetDataPtr(setID)% Psi)) - TOMP(target update to(Quad% SetDataPtr(setID)% Psi1)) - TOMP(target update to(Quad% SetDataPtr(setID)% PsiB)) - TOMP(target update to(Quad% SetDataPtr(setID)% Q)) - TOMP(target update to(Quad% SetDataPtr(setID)% S)) - TOMP(target update to(Quad% SetDataPtr(setID)% cyclePsi)) - - else - - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% Psi) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% Psi1) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% PsiB) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% Q) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% S) - TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% cyclePsi) - - endif + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% Psi) + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% Psi1) + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% PsiB) + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% Q) + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% S) + TOMP_TARGET_ENTER_DATA_MAP_TO(Quad% SetDataPtr(setID)% cyclePsi) enddo diff --git a/src/teton/include/TetonBlueprint.hh b/src/teton/include/TetonBlueprint.hh index d68f5e4..3599fe4 100644 --- a/src/teton/include/TetonBlueprint.hh +++ b/src/teton/include/TetonBlueprint.hh @@ -67,6 +67,18 @@ class TetonBlueprint std::vector &face_to_bcid, int rank); + +/* ------------------------------------------------------------------------------------------------------------------ + This builds several arrays containing the boundary condition information for each boundary element in 1D. + The 'shared' boundary info is pulled from the blueprint face topology adjacency sets. + TODO: need to fix for MPI-decomposed meshes + ------------------------------------------------------------------------------------------------------------------ */ + void ComputeFaceIDs1D(int *boundary_connectivity, + int *boundary_attributes, + std::vector &boundaries_types, + int rank); + +/* ------------------------------------------------------------------------------------------------------------------ /* ------------------------------------------------------------------------------------------------------------------ Set of methods used to create the teton mesh connectivity array. This array consists of, for each zone,: zoneID, - id of zone @@ -167,6 +179,10 @@ class TetonBlueprint std::vector corner_to_node_z; std::vector m_face_to_bcid; + // For handling Teton's boundary conditions + enum BC_Type { bc_reflecting = 32, bc_vaccuum = 35, + bc_source_temp = 34, bc_source_fd = 36, bc_shared = 33 }; + public: TetonBlueprint(conduit::Node &blueprintNode, conduit::Node ¶metersNode) : mMeshNode(blueprintNode), diff --git a/src/teton/include/TetonConduitInterface.hh b/src/teton/include/TetonConduitInterface.hh index 313335c..754ac4e 100644 --- a/src/teton/include/TetonConduitInterface.hh +++ b/src/teton/include/TetonConduitInterface.hh @@ -13,6 +13,7 @@ #ifndef __TETON_CONDUIT_INTERFACE_HH__ #define __TETON_CONDUIT_INTERFACE_HH__ +#include "TetonSources.hh" #include "conduit/conduit.hpp" #include #include @@ -22,7 +23,7 @@ namespace Teton class Teton { public: - Teton() : areSourceProfilesSet(false) + Teton() : areSourceProfilesSet(false), mIsInitialized(false) { } @@ -36,6 +37,16 @@ class Teton // forces on the vertices void storeMeshData(); + // sanitizer_node must have `level`: + // 0 - no sanitizer (does nothing and returns + // 1 - quieter sanitizer + // 2 - noisy sanitizer + // Optional entries: + // -`cat_list` (defaults to all inputs except \sigma_s) + // -`kill_if_bad` (defaults to false) + // Returns the number of bad input categories + int checkInputSanity(const conduit::Node &sanitizer_node) const; + void constructBoundaries(); void constructComptonControl(); @@ -104,8 +115,9 @@ class Teton // changes to the mesh nodes (from hydro) void updateMeshPositions(); - // TODO: remove this once all host codes swich to getting the + // TODO: remove these once all host codes swich to getting the // force density fields from the conduit node + void getRadiationForceDensity1D(double *RadiationForceDensityX); void getRadiationForceDensity(double *RadiationForceDensityX, double *RadiationForceDensityY, double *RadiationForceDensityZ); @@ -173,6 +185,7 @@ class Teton private: bool areSourceProfilesSet; // Whether or not setSourceProfiles has been called + bool mIsInitialized; // Whether or not Teton::initialize has been called int mGTAorder; // quadrature order used for grey transport acceleration (def=2 for s2 acc) int mInternalComptonFlag; @@ -181,6 +194,10 @@ class Teton MPI_Comm mCommunicator; int mRank; + // list of sources to be appended to right hand side before each time step + // these could be point sources, MMS, etc. + TetonSourceManager mSourceManager; + // To compute radiation forces on the vertices, Teton // needs to hang on to this connectivity array // !!!! NOTE: THIS WILL NOT WORK FOR AMR MESHES, WHERE A @@ -190,11 +207,6 @@ class Teton std::vector mZoneToNCorners; std::vector mZoneToCorners; std::vector mCornerToZone; - // REMOVE // - std::vector mCornerToVertexCoordX; - std::vector mCornerToVertexCoordY; - std::vector mCornerToVertexCoordZ; - // REMOVE // }; } //end namespace Teton diff --git a/src/teton/include/TetonInterface.hh b/src/teton/include/TetonInterface.hh index 213c4b7..e8fe290 100644 --- a/src/teton/include/TetonInterface.hh +++ b/src/teton/include/TetonInterface.hh @@ -156,6 +156,22 @@ void teton_addprofile_internal(const int *NumTimes, const double *Values, int *TetonProfileID); +// append source terms to the right hand side for the next time step +// +// the units of sourceValues are such that sourceValues[angle][zone][group]*c*dt has the same units as psi +// group varies fastest, angle varies slowest. +// +// if numSourceAngles == 1, then all source values will be divided by wtiso (4pi) +// if numSourceAngles == numPolarAngles, then all source values will be divided by half of wtiso (2pi) +// +// Future TODO: variants of this function for isotropic sources? +void teton_appendsourcetopsi( + const int *numSourceZones, // scalar + const int *numSourceAngles, // scalar, must be 1, # of polar angles, or # of angles + const int *numSourceGroups, // scalar, must match # of groups in Teton + const int *sourceZoneList, // array of length numSourceZone + const double *sourceValues); // numAngles x numSourceZones x numGroups, same units as \psi/c/dt + // // teton_resetprofile_internal // diff --git a/src/teton/include/TetonSources.hh b/src/teton/include/TetonSources.hh new file mode 100644 index 0000000..c69db9e --- /dev/null +++ b/src/teton/include/TetonSources.hh @@ -0,0 +1,153 @@ +#ifndef TETON_SOURCES_HH__ +#define TETON_SOURCES_HH__ + +#include "TetonInterface.hh" +#include + +class TetonSource +{ + public: + TetonSource(int nangles, int nsrczones, int ngroups) + : m_num_angles(nangles), + m_num_srczones(nsrczones), + m_num_groups(ngroups), + m_src_values(), + m_zone_list(), + m_time(-1.) + { + if (m_num_srczones > 0) + { + m_src_values.resize(nangles * nsrczones * ngroups); + m_zone_list.resize(nsrczones); + } + } + + virtual ~TetonSource() = default; + + virtual void UpdateSourceValues(double time, double dtrad) = 0; + + const double *GetSourceValues() const + { + return m_src_values.data(); + } + const int *GetZoneList() const + { + return m_zone_list.data(); + } + + int GetNumAngles() const + { + return m_num_angles; + } + int GetNumZones() const + { + return m_num_srczones; + } + int GetNumGroups() const + { + return m_num_groups; + } + + protected: + const int m_num_angles; + const int m_num_srczones; + const int m_num_groups; + + // nangles x nsrczones x ngroups array with the source values with this time step + // groups varies fastest, angles varies slowest + // Units should be energy per time + std::vector m_src_values; + + // length nsrczones list of Fortran indices corresponding to the zones for m_src_values + // (indices should be between 1 and the number of zones in the local domain, inclusive) + std::vector m_zone_list; + + // Current value of the time + double m_time; +}; + +class TetonSourceManager +{ + public: + TetonSourceManager(int rank = -1) : mRank(rank), m_source_list() + { + } + + void AddSource(TetonSource *source) + { + m_source_list.push_back(source); + } + + void AddPointSourceFromTally(int teton_zone_index, + std::string tally_file_name, + std::string tally_name, + double multiplier = 1.0); + + void UpdateSources(double time, double dtrad) + { + for (auto source : m_source_list) + { + source->UpdateSourceValues(time, dtrad); + } + } + + void UpdatePsiWithSources() const + { + for (auto source : m_source_list) + { + const int num_zones = source->GetNumZones(); + if (num_zones < 1) + continue; // this rank has no source zones + const int num_angles = source->GetNumAngles(); + const int num_groups = source->GetNumGroups(); + // Some call to Teton: + teton_appendsourcetopsi(&num_zones, + &num_angles, + &num_groups, + source->GetZoneList(), + source->GetSourceValues()); + } + } + + void SetRank(int rank) + { + mRank = rank; + } + + ~TetonSourceManager() + { + for (auto source : m_source_list) + { + delete source; + } + m_source_list.clear(); + } + + protected: + int mRank; + std::vector m_source_list; +}; + +class PointSource : public TetonSource +{ + // Class for an extensive point source + public: + // timevals is of size ntimes + // source_profile is of size ntimes x ngroups x nangles + PointSource(int nangles, + int ngroups, + int zone_index, // Which zone is the point source in? TODO, convert from coordinate to zone index + int ntimebins, + const double *time_bin_bounds, + const double *source_profile); // units of energy + + virtual void UpdateSourceValues(double time, double dtrad); + + protected: + // TODO, do we want to make copies of these, or just let these be const pointers? + std::vector m_profile; // size m_num_times x m_num_groups x m_num_angles + std::vector m_time_bin_bounds; // length m_num_time_bins+1 + int m_num_time_bins; // Number of time bins +}; + +#endif // TETON_SOURCES_HH__ diff --git a/src/teton/include/macros.h b/src/teton/include/macros.h index a680425..264cdd2 100644 --- a/src/teton/include/macros.h +++ b/src/teton/include/macros.h @@ -85,4 +85,15 @@ # define TETON_OPENMP_HAS_USE_DEVICE_ADDR #endif +#ifdef TETON_ENABLE_OPENACC +# define ATOMIC_UPDATE !$acc atomic update +# define ATOMIC_END !$acc end atomic +#elif defined(TETON_ENABLE_OPENMP_OFFLOAD) +# define ATOMIC_UPDATE !$omp atomic update +# define ATOMIC_END !$omp end atomic +#else +# define ATOMIC_UPDATE +# define ATOMIC_END +#endif + #endif diff --git a/src/teton/include/omp_wrappers.h b/src/teton/include/omp_wrappers.h index 689864a..27f765c 100644 --- a/src/teton/include/omp_wrappers.h +++ b/src/teton/include/omp_wrappers.h @@ -14,7 +14,16 @@ #if defined(TETON_ENABLE_OPENMP_OFFLOAD) # define TOMP(source) !$omp source # define TOMPC(source) !$omp& source -# define TOMP_TARGET_ENTER_DATA_MAP_TO(source) !$omp target enter data map(to:source) + +#define TOMP_TARGET_ENTER_DATA_MAP_TO(source) \ +call target_alloc_and_pair_ptrs(source, "source");\ +!$omp target enter data map(always,alloc:source);\ +!$omp target update to(source) + +#define TOMP_TARGET_EXIT_DATA_MAP_RELEASE(source) \ +call target_free_and_unpair_ptrs(source, "source"); \ +!$omp target exit data map(always, release:source) + # define TOMP_TARGET_ENTER_DATA_MAP_ALLOC(source) !$omp target enter data map(alloc:source) # define TOMP_TARGET_ENTER_DATA_MAP_FROM(source) !$omp target exit data map(from:source) #else @@ -23,4 +32,5 @@ # define TOMP_TARGET_ENTER_DATA_MAP_TO(source) # define TOMP_TARGET_ENTER_DATA_MAP_ALLOC(source) # define TOMP_TARGET_ENTER_DATA_MAP_FROM(source) +# define TOMP_TARGET_EXIT_DATA_MAP_RELEASE(source) #endif diff --git a/src/teton/interface/CMakeLists.txt b/src/teton/interface/CMakeLists.txt index 01fafed..62b0a85 100644 --- a/src/teton/interface/CMakeLists.txt +++ b/src/teton/interface/CMakeLists.txt @@ -1,5 +1,6 @@ if (ENABLE_BLUEPRINT_INTERFACE) target_sources( teton PRIVATE + TetonSources.cc TetonBlueprint.cc TetonConduitInterface.cc TetonSurfaceTallies.cc diff --git a/src/teton/interface/TetonBlueprint.cc b/src/teton/interface/TetonBlueprint.cc index 98c90fd..57506ff 100644 --- a/src/teton/interface/TetonBlueprint.cc +++ b/src/teton/interface/TetonBlueprint.cc @@ -769,12 +769,6 @@ void TetonBlueprint::ComputeSharedFaces(int rank) } face_is_shared_bndry[face] = true; } - - // TODO: QUESTION: DO WE NEED TO DO THIS ? - if (nsfaces > 0) - { - mMeshNode["shared_boundaries/shared_faces"].set(shared_faces_array_ptr, sfaces_array_len); - } } void TetonBlueprint::ComputeTetonMeshConnectivityArray(int rank) @@ -1457,10 +1451,6 @@ void TetonBlueprint::ProcessSurfaceEdits(int rank) surf_edits_corners[surface_id].size()); surface_id += 1; } - - // REMOVE // - // conduit::relay::io::save(mMeshNode["teton/surface_edits"], "surface_edits.json", "json"); - // REMOVE // } void TetonBlueprint::ComputeFaceIDs(std::map> &boundaries, @@ -1739,20 +1729,232 @@ void TetonBlueprint::ComputeFaceIDs(std::map> &boundaries, mParametersNode["boundary_conditions/neighbor_ids"].set(neighbor_ids.data(), neighbor_ids.size()); } +// Need to populate +// BCTypeInt = {type1, type2}, where 31 <= type1 <= 37 is Teton's boundary ID type mapping, +// where type1 corresponds to the first boundary and type2 corresponds to the second boundary. +// Here BCCornerFaces = {1,1} since always 1 corner per boundary condition (is this ever not true?). +// BCNeighborID = {rank1, rank2}, where rank1 corresponds to the corresponding rank of a shared boundary. +// If not a shared boundary, then rank1 = -1 (same with rank2). +// teton_addboundary(&numBCTotal, &BCTypeInt[0], &BCCornerFaces[0], &BCNeighborID[0]); +// Also need +// teton_setzone1d(&zoneID, &numBCTotal, &BCZoneID[0]); +// Here numBCTotal = 2 (is this ever not true)? and +// Teton expects two boundary conditions, one for zone1D == 1 and one for zoneID == nzones +// Here BCZoneID[0] == 1, BCZoneID[1] == nzones or BCZoneID[0] == nzones and BCZoneID[1] == 1 + +void TetonBlueprint::ComputeFaceIDs1D(int *boundary_connectivity, // boundary to vertex mapping + int *boundary_attributes, // boundary to mesh attribute mapping + std::vector &boundaries_types, // boundary to Teton boundary type mapping + int rank) +{ + // need to fill these out and store them in mMeshNode + std::vector bc_type_int(2); + std::vector bc_corner_faces(2); + std::vector bc_nghbr_id(2); + std::vector bc_zone_id(2); + + // TODO: check if there is ever a case where this doesn't hold + bc_corner_faces[0] = 1; + bc_corner_faces[1] = 1; + + std::map boundary_id_to_type; + // map boundary ID to corresponding Teton boundary type (c.g. 34 for a vaccuum boundary condition) + if (mParametersNode.has_path("boundary_conditions/id_to_type_map")) + { + + conduit::int_accessor boundary_ids = mParametersNode.fetch_existing("boundary_conditions/id_to_type_map/ids").value(); + conduit::int_accessor teton_bc_types = mParametersNode.fetch_existing("boundary_conditions/id_to_type_map/types").value(); + + TETON_VERIFY_C(rank, boundary_ids.number_of_elements() == teton_bc_types.number_of_elements(), "boundary ids list length != boundary types list length"); + + for (int i = 0; i < boundary_ids.number_of_elements(); ++i) + { + boundary_id_to_type[boundary_ids[i]] = teton_bc_types[i]; + } + } + + std::map number_boundary_types; + for (int j = 31; j < 37; ++j) + number_boundary_types[j] = 0; + + // First tag each face with the original boundary condition ID from the mesh. + // This will later be changed to the re-enumerated boundary condition ID (so + // each ID is consecutive and reordered by boundary condition type). + //elem_to_bcid.resize(nelem); + //for (int face = 0; face < nfaces; ++face) + // elem_to_bcid[face] = 0; + + for (int j = 0; j < 2; ++j) + { + int bc_id = boundary_attributes[j]; + { + int bc_type = boundary_id_to_type.at(bc_id); + number_boundary_types[bc_type] += 1; + } + } + + int num_face_nbrs_for_teton = 0; + + if (mMeshNode.has_path("adjsets/main_face")) + { + num_face_nbrs_for_teton = mMeshNode.fetch_existing("adjsets/main_face/groups").number_of_children(); + } + + boundaries_types[0] = number_boundary_types.at(32); // number of reflecting + boundaries_types[1] = number_boundary_types.at(35); // number of vacuum + boundaries_types[2] = number_boundary_types.at(34) + number_boundary_types.at(36); // number of source + boundaries_types[3] = num_face_nbrs_for_teton; // number of shared + + // Order the boundaries by: reflecting, vacuum, source, shared. + + // First add the non-shared boundaries + int num_nonshared_bndrs = boundaries_types[0] + boundaries_types[1] + boundaries_types[2]; + int num_bndrs = num_nonshared_bndrs + boundaries_types[3]; + int nreflec, nvacuum; //, nsource; + nreflec = boundaries_types[0]; + nvacuum = boundaries_types[1]; +// nsource = boundaries_types[2]; +// We don't appear to use nsource for anything -- black27 + std::vector bc_ids_ordered(num_bndrs); + int jreflect = 0, jvaccuum = 0, jsource = 0; + for (int j = 0; j < 2; ++j) + { + int bc_id = boundary_attributes[j]; + int bc_type = boundary_id_to_type.at(bc_id); + if (bc_type == BC_Type::bc_reflecting) // reflecting + { + bc_ids_ordered[jreflect] = bc_id; + bc_type_int[jreflect] = bc_type; + int bc_vertex_id = boundary_connectivity[j]; + if (bc_vertex_id == 0) + { + bc_zone_id[jreflect] = 1; // vertexID 0 corresponds to zoneID = 1 in Teton + } + else + { + bc_zone_id[jreflect] = bc_vertex_id; // vertexID nzones corresponds to zoneID = nzones in Teton + } + } + else if (bc_type == BC_Type::bc_vaccuum) // vaccuum + { + int index = jvaccuum + nreflec; + bc_ids_ordered[index] = bc_id; + bc_type_int[index] = bc_type; + int bc_vertex_id = boundary_connectivity[j]; + if (bc_vertex_id == 0) + { + bc_zone_id[index] = 1; // vertexID 0 corresponds to zoneID = 1 in Teton + } + else + { + bc_zone_id[index] = bc_vertex_id; // vertexID nzones corresponds to zoneID = nzones in Teton + } + jvaccuum += 1; + } + else if (bc_type == BC_Type::bc_source_temp || bc_type == BC_Type::bc_source_fd) // source + { + int index = jsource + nreflec + nvacuum; + bc_ids_ordered[index] = bc_id; + bc_type_int[index] = bc_type; + int bc_vertex_id = boundary_connectivity[j]; + if (bc_vertex_id == 0) + { + bc_zone_id[index] = 1; // vertexID 0 corresponds to zoneID = 1 in Teton + } + else + { + bc_zone_id[index] = bc_vertex_id; // vertexID nzones corresponds to zoneID = nzones in Teton + } + jsource += 1; + } + } + + // Now compute the shared boundaries info + + if (mMeshNode.has_path("adjsets/mesh")) + { + const conduit::Node &face_adjset = mMeshNode.fetch_existing("adjsets/mesh"); + conduit::NodeConstIterator groups_it = face_adjset["groups"].children(); + int fn_counter_teton = 0; + while (groups_it.has_next()) + { + const conduit::Node &group = groups_it.next(); + int num_sfaces = group["values"].dtype().number_of_elements(); + int nbr_rank = group["neighbors"].to_int(); + int index = fn_counter_teton + num_nonshared_bndrs; + // We want to increment this only when num_sfaces > 0 + fn_counter_teton += 1; + + //int *szones_ptr = group["values"].value(); + const conduit::int32 *szones_ptr = group["values"].as_int32_ptr(); + bc_type_int[index] = BC_Type::bc_shared; // shared boundary + bc_zone_id[index] = szones_ptr[0]; // only one shared zone per face-neighbor in 1d + bc_nghbr_id[index] = nbr_rank; + bc_corner_faces[index] = 1; + } + } + + // Create boundary conditions array for Teton + + // CONDUIT OUTPUT + // Add to conduit parameters input file + mParametersNode["boundary_conditions/num_reflecting"] = boundaries_types[0]; + mParametersNode["boundary_conditions/num_vacuum"] = boundaries_types[1]; + mParametersNode["boundary_conditions/num_source"] = boundaries_types[2]; + mParametersNode["boundary_conditions/num_comm"] = boundaries_types[3]; + mParametersNode["boundary_conditions/num_total"] = num_bndrs; + + std::vector elem_ids(2); + int nelem = mMeshNode["topologies/mesh/elements/dims/i"].value(); + mParametersNode["boundary_conditions/type"].set(bc_type_int.data(), 2); + mParametersNode["boundary_conditions/zone_ids"].set(bc_zone_id.data(), 2); + mParametersNode["boundary_conditions/neighbor_ids"].set(bc_nghbr_id.data(), 2); + mParametersNode["boundary_conditions/bc_ncorner_faces"].set(bc_corner_faces.data(), 2); + +} + + void TetonBlueprint::OutputTetonMesh(int rank, MPI_Comm comm) { + const conduit::Node &coords = mMeshNode["coordsets/coords/values"]; + if (!coords.has_path("y") && !coords.has_path("z")) + { + int nelem = mMeshNode["topologies/mesh/elements/dims/i"].value(); + // TODO Move to mesh conduit Node + // TODO: fix rank + int rank = 0; + mParametersNode["size/ndim"] = 1; + mParametersNode["size/rank"] = rank; + mParametersNode["size/nzones"] = nelem; + mParametersNode["size/nverts"] = 2*nelem; + mParametersNode["size/ncornr"] = 2*nelem; + mParametersNode["size/nsides"] = 2*nelem; + mParametersNode["size/nbelem"] = 2; + mParametersNode["size/maxcf"] = 2; + mParametersNode["size/maxCorner"] = 2; + // TODO: enable slab geometry option as well + mParametersNode["size/geomType"] = 42; //spherical + //mParametersNode["size/geomType"] = 41; //slab + int num_face_nbrs_teton = mParametersNode["boundary_conditions/num_comm"].value(); + mParametersNode["size/ncomm"] = num_face_nbrs_teton; + mMeshNode["shared_boundaries/nsfaces"] = num_face_nbrs_teton; + mParametersNode["shared_boundaries/nsfaces"] = num_face_nbrs_teton; + + return; + } + CALI_CXX_MARK_FUNCTION; verifyInput(mMeshNode, comm); // face_to_corners2. CreateConnectivityArrays(mMeshNode, comm); - const conduit::Node &base_coordset = mMeshNode["coordsets/coords"]; const conduit::Node &base_topology = mMeshNode["topologies/main"]; + const conduit::Node &base_coordset = mMeshNode["coordsets/coords"]; const conduit::Node &corner_topology = mMeshNode["topologies/main_corner"]; + const int ndim = conduit::blueprint::mesh::utils::topology::dims(base_topology); const int nvert = conduit::blueprint::mesh::utils::coordset::length(base_coordset); const int nelem = conduit::blueprint::mesh::utils::topology::length(base_topology); - const int ndim = conduit::blueprint::mesh::utils::topology::dims(base_topology); const int ncorners_total = conduit::blueprint::mesh::utils::topology::length(corner_topology); // Divide up the mesh boundary faces in to groups with @@ -1969,3 +2171,5 @@ void TetonBlueprint::OutputTetonMesh(int rank, MPI_Comm comm) verifyInput(mMeshNode, comm); } + + diff --git a/src/teton/interface/TetonConduitInterface.cc b/src/teton/interface/TetonConduitInterface.cc index 82cf715..137e58d 100644 --- a/src/teton/interface/TetonConduitInterface.cc +++ b/src/teton/interface/TetonConduitInterface.cc @@ -48,16 +48,19 @@ namespace Teton Teton::~Teton() { - bool enableNLTE = false; - if (getDatastore().has_path("options")) + if (mIsInitialized) { - conduit::Node &options = getOptions(); - if (options.has_path("size/enableNLTE")) + bool enableNLTE = false; + if (getDatastore().has_path("options")) { - enableNLTE = options.fetch_existing("size/enableNLTE").as_int(); + conduit::Node &options = getOptions(); + if (options.has_path("size/enableNLTE")) + { + enableNLTE = options.fetch_existing("size/enableNLTE").as_int(); + } } + teton_destructmeshdata(&enableNLTE); } - teton_destructmeshdata(&enableNLTE); } void Teton::initialize(MPI_Comm communicator, bool fromRestart) @@ -68,6 +71,7 @@ void Teton::initialize(MPI_Comm communicator, bool fromRestart) MPI_Fint fcomm = MPI_Comm_c2f(communicator); MPI_Comm_rank(communicator, &mRank); + mSourceManager.SetRank(mRank); conduit::Node &datastore = getDatastore(); conduit::Node &options = getOptions(); @@ -116,7 +120,6 @@ void Teton::initialize(MPI_Comm communicator, bool fromRestart) // Set the Dt controls early, as tfloor is set here and needed by constructSize in the line below. constructDtControls(); - constructSize(); constructMemoryAllocator(); constructQuadrature(); @@ -127,17 +130,15 @@ void Teton::initialize(MPI_Comm communicator, bool fromRestart) MPI_Barrier(communicator); - teton_setoppositeface(); //Prerequisite for calling setMeshSizeAndPositions() + int ndim = options.fetch_existing("size/ndim").value(); + if (ndim > 1) teton_setoppositeface(); //Prerequisite for calling setMeshSizeAndPositions() in 2D/3D setCommunication(); //Prerequisite for calling setMeshSizeAndPositions() - setMeshSizeAndPositions(); // This is an awful hack to make sure Z%VolumeOld is initialized teton_getvolume(); teton_getvolume(); - teton_setcommunicationgroup(&fcomm); - teton_checksharedboundary(); bool enableNLTE = options.fetch_existing("size/enableNLTE").as_int(); @@ -190,6 +191,8 @@ void Teton::initialize(MPI_Comm communicator, bool fromRestart) // is stored, which associates the Teton corner id with the mesh vertex id storeMeshData(); + mIsInitialized = true; + CALI_MARK_END("Teton_Initialize"); } @@ -253,6 +256,39 @@ void Teton::storeMeshData() } } +int Teton::checkInputSanity(const conduit::Node &sanitizer_node) const +{ + // level = 0 --> Don't run it + // level = 1 --> print one complaint per problematic category + // level = 2 --> print one complaint per problematic zone/corner + const int sanitizer_level = sanitizer_node.fetch_existing("level").to_int(); + if (sanitizer_level < 1) + return 0; + + // We'll kill the code in the C++ rather than the Fortran + bool kill_if_bad = false; + + //Check all categories except scattering opacity: + int num_cats = 6; + const int default_cat_list[] = {2, 3, 4, 5, 6, 7}; + const int *cat_list_ptr = default_cat_list; + if (sanitizer_node.has_path("cat_list")) + { + num_cats = sanitizer_node.fetch_existing("cat_list").dtype().number_of_elements(); + cat_list_ptr = sanitizer_node.fetch_existing("cat_list").as_int_ptr(); + } + + int num_bad_cats = 0; + teton_checkinputsanity(&kill_if_bad, &sanitizer_level, &num_cats, cat_list_ptr, &num_bad_cats); + + if (sanitizer_node.has_path("kill_if_bad") && sanitizer_node.fetch_existing("kill_if_bad").to_int()) + { + TETON_VERIFY_C(mRank, num_bad_cats == 0, "Bad inputs detected!"); + } + + return num_bad_cats; +} + void Teton::constructBoundaries() { conduit::Node &options = getOptions(); @@ -264,19 +300,28 @@ void Teton::constructBoundaries() teton_constructboundary(&nrefl, &nvac, &nsrc, &num_comm); - int numBCTotal = options.fetch_existing("boundary_conditions/num_total").value(); - int *BCTypeInt64 = options.fetch_existing("boundary_conditions/type").value(); - int *BCCornerFaces64 = options.fetch_existing("boundary_conditions/corner_face_ids").value(); - int *BCNeighborID64 = options.fetch_existing("boundary_conditions/neighbor_ids").value(); + int ndim = options.fetch_existing("size/ndim").value(); + if (ndim > 1) + { + int numBCTotal = options.fetch_existing("boundary_conditions/num_total").value(); + int *BCTypeInt = options.fetch_existing("boundary_conditions/type").as_int_ptr(); + int *BCCornerFaces = options.fetch_existing("boundary_conditions/corner_face_ids").as_int_ptr(); + int *BCNeighborID = options.fetch_existing("boundary_conditions/neighbor_ids").as_int_ptr(); - // Ask conduit team how to pull out these items as integer pointers, instead of needing to do this. - std::vector BCTypeInt(BCTypeInt64, BCTypeInt64 + numBCTotal); - std::vector BCCornerFaces(BCCornerFaces64, BCCornerFaces64 + numBCTotal); - std::vector BCNeighborID(BCNeighborID64, BCNeighborID64 + numBCTotal); + TETON_VERIFY_C(mRank, (numBCTotal > 0), "No boundary conditions defined."); - TETON_VERIFY_C(mRank, (numBCTotal > 0), "No boundary conditions defined."); + teton_addboundary(&numBCTotal, &BCTypeInt[0], &BCCornerFaces[0], &BCNeighborID[0]); + } + else + { + int *BCTypeInt = options.fetch_existing("boundary_conditions/type").as_int_ptr(); + int *BCNeighborID = options.fetch_existing("boundary_conditions/neighbor_ids").as_int_ptr(); + int *BCCornerFaces = options.fetch_existing("boundary_conditions/bc_ncorner_faces").as_int_ptr(); + int numBCTotal = 2; + + teton_addboundary(&numBCTotal, &BCTypeInt[0], &BCCornerFaces[0], &BCNeighborID[0]); - teton_addboundary(&numBCTotal, &BCTypeInt[0], &BCCornerFaces[0], &BCNeighborID[0]); + } } #if !defined(TETON_ENABLE_MINIAPP_BUILD) @@ -589,6 +634,10 @@ double Teton::step(int cycle) double timerad = options.fetch_existing("iteration/timerad").value(); double tfloor = options.fetch_existing("iteration/tfloor").value(); + // Update volumetric source profiles: + mSourceManager.UpdateSources(timerad, dtrad); + mSourceManager.UpdatePsiWithSources(); + // ------------------------------------------------------------ // Run the step // ------------------------------------------------------------ @@ -598,9 +647,25 @@ double Teton::step(int cycle) // Update cycle number in mesh blueprint. This is used by conduit or Visit if we dump this mesh. blueprint["state/cycle"] = cycle; + // If sanitizer level is provided: + if (options.has_path("iteration/sanitizer/level")) + { + int num_bad_cats = checkInputSanity(options.fetch_existing("iteration/sanitizer")); + options["iteration/sanitizer/num_bad_cats"] = num_bad_cats; + + if (num_bad_cats > 0) + { + // If we reached this point, we're letting the host code decide whether or not to kill the code. + // We don't want to proceed with this time step because it'll most likely crash or hang, so we'll do nothing and return. + // Host code should check the value of options["iteration/sanitizer/num_bad_cats"] after each step call. + if (mRank == 0) + std::cout << "Teton: Bad inputs found! Skipping time step." << std::endl; + return mDTrad; + } + } + // Main function in Teton to take a radiation step teton_radtr(); - // Update the radiation force (if the field is present) #if !defined(TETON_ENABLE_MINIAPP_BUILD) // Note that, if the radiation force is present, there will always be @@ -645,13 +710,6 @@ double Teton::step(int cycle) #if !defined(TETON_ENABLE_MINIAPP_BUILD) computeGenericSurfaceFluxTally(); - - // REMOVE // - // int mRank; - // MPI_Comm_rank(MPI_COMM_WORLD, &mRank); - // conduit::relay::io::save(blueprint, - // "blueprint_mesh.rank" + std::to_string(mRank) + ".cycle" + std::to_string(cycle) + ".json", "json"); - // REMOVE // #endif // Compute the recommended time step @@ -968,9 +1026,9 @@ void Teton::setSourceProfiles() { conduit::Node &options = getOptions(); - int nsrc = options.fetch_existing("boundary_conditions/num_source").value(); + int nsrc_bdry = options.fetch_existing("boundary_conditions/num_source").value(); - for (int j = 0; j < nsrc; ++j) + for (int j = 0; j < nsrc_bdry; ++j) { std::string top = "sources/profile" + std::to_string(j + 1) + "/"; int NumTimes = options.fetch_existing(top + "NumTimes").value(); @@ -1024,6 +1082,55 @@ void Teton::setSourceProfiles() } areSourceProfilesSet = true; + + if (!options.has_path("sources/interior_sources")) + return; + + // TODO move this into TetonSources.cc? + // all interior (volumetric?) sources: + const conduit::Node &profiles_node = options["sources/profiles"]; + conduit::NodeConstIterator interior_sources_it = options["sources/interior_sources"].children(); + int isrc = 0; + while (interior_sources_it.has_next()) + { + const conduit::Node &src_node = interior_sources_it.next(); + std::string spatial_shape = src_node["spatial_shape"].as_string(); + std::string profile_str = src_node["profile"].as_string(); + if (spatial_shape == "point") // point source from tally + { + const conduit::Node &profile_node = profiles_node[profile_str]; + std::string profile_type = profile_node.fetch_existing("type").as_string(); + + // const double* location = src_node["location"].as_double_ptr(); // TODO convert coordinate to zone index + int source_rank = src_node.fetch_existing("rank").to_int(); // Rank that contains the point source + int teton_zone_index = (mRank == source_rank) ? src_node.fetch_existing("zone_index").to_int() : -1; + // int teton_zone_index = (mRank == 3) ? (isrc == 0 ? 4 : 8) : -1; + double multiplier = 1.0; + if (src_node.has_path("multiplier")) + multiplier = src_node.fetch_existing("multiplier").to_double(); + + if (profile_type == "tallyfile") + { + std::string tally_file_name = profile_node["filename"].as_string(); + std::string tally_name = profile_node["tallyname"].as_string(); + mSourceManager.AddPointSourceFromTally(teton_zone_index, tally_file_name, tally_name, multiplier); + } + // else if (profile_type == "isotropic") + // { // TODO generic isotropic group-dependent + // } + else + { + std::cerr << "Unsupported source profile type " << profile_type << std::endl; + exit(1); + } + } + else + { + std::cerr << "Unsupported source spatial shape " << spatial_shape << std::endl; + exit(1); + } + isrc++; + } } void Teton::setMeshSizeAndPositions() @@ -1031,29 +1138,49 @@ void Teton::setMeshSizeAndPositions() conduit::Node &options = getOptions(); conduit::Node &blueprint = getMeshBlueprint(); - int nzones = options.fetch_existing("size/nzones").value(); - double *zone_verts_ptr = blueprint.fetch_existing("arrays/zone_verts").value(); - //int *ncorners_ptr = blueprint.fetch_existing("fields/ncorners").value(); - int *ncorners_ptr = blueprint.fetch_existing("arrays/zone_to_ncorners").value(); int ndim = options.fetch_existing("size/ndim").value(); - int maxCorner = options.fetch_existing("size/maxCorner").value(); - - int off_set = 0; - std::vector zoneCoordinates(ndim * maxCorner); + int nzones = options.fetch_existing("size/nzones").value(); - for (int zone = 0; zone < nzones; ++zone) + if (ndim > 1) { - int zoneID = zone + 1; - int ncorners = ncorners_ptr[zone]; - for (int c = 0; c < ncorners; ++c) + double *zone_verts_ptr = blueprint.fetch_existing("arrays/zone_verts").value(); + //int *ncorners_ptr = blueprint.fetch_existing("fields/ncorners").value(); + int *ncorners_ptr = blueprint.fetch_existing("arrays/zone_to_ncorners").value(); + int ndim = options.fetch_existing("size/ndim").value(); + int maxCorner = options.fetch_existing("size/maxCorner").value(); + + int off_set = 0; + std::vector zoneCoordinates(ndim * maxCorner); + + for (int zone = 0; zone < nzones; ++zone) { - for (int i = 0; i < ndim; i++) + int zoneID = zone + 1; + int ncorners = ncorners_ptr[zone]; + for (int c = 0; c < ncorners; ++c) { - zoneCoordinates[ndim * c + i] = zone_verts_ptr[off_set]; - off_set += 1; + for (int i = 0; i < ndim; i++) + { + zoneCoordinates[ndim * c + i] = zone_verts_ptr[off_set]; + off_set += 1; + } } + teton_setnodeposition(&zoneID, &zoneCoordinates[0]); + } + } + else + { + int nvertices = blueprint["coordsets/coords/values/x"].dtype().number_of_elements(); + double *vertex_coords = blueprint["coordsets/coords/values/x"].value(); + int nzones = nvertices-1; + std::vector zoneCoordinates(2); + for (int zone = 0; zone < nzones; ++zone) + { + int zoneID = zone + 1; + zoneCoordinates[0] = vertex_coords[zone]; + zoneCoordinates[1] = vertex_coords[zone+1]; + // TODO: add asset that zoneCoordinates[0] < zoneCoordinates[1] + teton_setnodeposition(&zoneID, &zoneCoordinates[0]); } - teton_setnodeposition(&zoneID, &zoneCoordinates[0]); } return; @@ -1152,70 +1279,84 @@ void Teton::setMeshConnectivity() conduit::Node &options = getOptions(); conduit::Node &blueprint = getMeshBlueprint(); - int *connectivity_ptr = blueprint.fetch_existing("teton/arrays/corner_connectivity").value(); - int nzones = options.fetch_existing("size/nzones").value(); - // ADDED // - int connect_off_set = 0; - for (int zone = 0; zone < nzones; ++zone) + std::string coord_type = blueprint.fetch_existing("coordsets/coords/type").as_string(); + if (coord_type == "rectilinear") { - int zoneID = connectivity_ptr[connect_off_set]; - int corner0 = connectivity_ptr[connect_off_set + 1]; - int zoneFaces = connectivity_ptr[connect_off_set + 2]; - int cornerFaces = connectivity_ptr[connect_off_set + 3]; - int zoneNCorner = connectivity_ptr[connect_off_set + 4]; - connect_off_set += 5; - - std::vector zoneOpp(zoneFaces); - std::vector CornerID(cornerFaces); - std::vector CornerOpp(cornerFaces); - std::vector nCPerFace(zoneFaces); - std::vector FaceToBCList(zoneFaces); - - for (int j = 0; j < zoneFaces; ++j) + // Teton expects two boundary conditions, one for zone1D == 1 and one for zoneID == nzones + // Here BCZoneID[0] == 1, BCZoneID[1] == nzones or BCZoneID[0] == nzones and BCZoneID[1] == 1 + constexpr int numBCTotal = 2; + int *BCZoneID = options.fetch_existing("boundary_conditions/zone_ids").value(); + for (int zone = 0; zone < nzones; ++zone) { - zoneOpp[j] = connectivity_ptr[connect_off_set + j]; + int zoneID = zone+1; + teton_setzone1d(&zoneID, &numBCTotal, &BCZoneID[0]); } - connect_off_set += zoneFaces; - - for (int j = 0; j < cornerFaces; ++j) + } + else + { + int connect_off_set = 0; + int *connectivity_ptr = blueprint.fetch_existing("teton/arrays/corner_connectivity").value(); + for (int zone = 0; zone < nzones; ++zone) { - CornerID[j] = connectivity_ptr[connect_off_set + j]; - } - connect_off_set += cornerFaces; + int zoneID = connectivity_ptr[connect_off_set]; + int corner0 = connectivity_ptr[connect_off_set + 1]; + int zoneFaces = connectivity_ptr[connect_off_set + 2]; + int cornerFaces = connectivity_ptr[connect_off_set + 3]; + int zoneNCorner = connectivity_ptr[connect_off_set + 4]; + connect_off_set += 5; + + std::vector zoneOpp(zoneFaces); + std::vector CornerID(cornerFaces); + std::vector CornerOpp(cornerFaces); + std::vector nCPerFace(zoneFaces); + std::vector FaceToBCList(zoneFaces); + + for (int j = 0; j < zoneFaces; ++j) + { + zoneOpp[j] = connectivity_ptr[connect_off_set + j]; + } + connect_off_set += zoneFaces; - for (int j = 0; j < cornerFaces; ++j) - { - CornerOpp[j] = connectivity_ptr[connect_off_set + j]; - } - connect_off_set += cornerFaces; + for (int j = 0; j < cornerFaces; ++j) + { + CornerID[j] = connectivity_ptr[connect_off_set + j]; + } + connect_off_set += cornerFaces; - for (int j = 0; j < zoneFaces; ++j) - { - nCPerFace[j] = connectivity_ptr[connect_off_set + j]; - } - connect_off_set += zoneFaces; + for (int j = 0; j < cornerFaces; ++j) + { + CornerOpp[j] = connectivity_ptr[connect_off_set + j]; + } + connect_off_set += cornerFaces; - for (int j = 0; j < zoneFaces; ++j) - { - FaceToBCList[j] = connectivity_ptr[connect_off_set + j]; + for (int j = 0; j < zoneFaces; ++j) + { + nCPerFace[j] = connectivity_ptr[connect_off_set + j]; + } + connect_off_set += zoneFaces; + + for (int j = 0; j < zoneFaces; ++j) + { + FaceToBCList[j] = connectivity_ptr[connect_off_set + j]; + } + connect_off_set += zoneFaces; + + teton_setzone(&zoneID, + &corner0, + &zoneFaces, + &cornerFaces, + &zoneNCorner, + &zoneOpp[0], + &CornerID[0], + &CornerOpp[0], + &nCPerFace[0], + &FaceToBCList[0]); } - connect_off_set += zoneFaces; - teton_setzone(&zoneID, - &corner0, - &zoneFaces, - &cornerFaces, - &zoneNCorner, - &zoneOpp[0], - &CornerID[0], - &CornerOpp[0], - &nCPerFace[0], - &FaceToBCList[0]); + blueprint.remove("teton/arrays/corner_connectivity"); } - - blueprint.remove("teton/arrays/corner_connectivity"); } void Teton::setMaterials() @@ -1604,18 +1745,66 @@ void Teton::updateMeshPositions() return; } +// NOTE: the Vectors RadiationForceXTotal, ..., must +// already be sized to the number of mesh vertices +void Teton::getRadiationForceDensity1D(double *RadiationForceDensityX) +{ + // Compute the radiation force internally in Teton + // for each zone and corner + teton_setradiationforce(); + + conduit::Node &options = getOptions(); + int nzones = options.fetch_existing("size/nzones").value(); + int nverts = nzones + 1; + std::vector RadiationForce(2); + std::vector CornerVolumes(2); + std::vector CornerVolumeSumsAtVertex(nverts); + + for (int v = 0; v < nverts; ++v) + { + CornerVolumeSumsAtVertex[v] = 0.0; + RadiationForceDensityX[v] = 0.0; + } + + for (int zone = 0; zone < nzones; ++zone) + { + // Get the radiation force and volume on each corner of each zone + int zoneID = zone + 1; + teton_getradiationforce(&zoneID, &RadiationForce[0]); + teton_getcornervolumes(&zoneID, &CornerVolumes[0]); + + int v1 = zone; + int v2 = zone + 1; + RadiationForceDensityX[v1] += RadiationForce[0]; + RadiationForceDensityX[v2] += RadiationForce[1]; + CornerVolumeSumsAtVertex[v1] += CornerVolumes[0]; + CornerVolumeSumsAtVertex[v2] += CornerVolumes[1]; + } + + for (int v = 0; v < nzones+1; ++v) + { + RadiationForceDensityX[v] /= CornerVolumeSumsAtVertex[v]; + } +} + // NOTE: the Vectors RadiationForceXTotal, ..., must // already be sized to the number of mesh vertices void Teton::getRadiationForceDensity(double *RadiationForceDensityX, double *RadiationForceDensityY, double *RadiationForceDensityZ) { + conduit::Node &options = getOptions(); + int ndim = options.fetch_existing("size/ndim").value(); + if (ndim == 1) + { + getRadiationForceDensity1D(RadiationForceDensityX); + return; + } + // Compute the radiation force internally in Teton // for each zone and corner teton_setradiationforce(); - conduit::Node &options = getOptions(); - int ndim = options.fetch_existing("size/ndim").value(); int maxCorner = options.fetch_existing("size/maxCorner").value(); int nzones = options.fetch_existing("size/nzones").value(); int nverts = options.fetch_existing("size/nverts").value(); @@ -1624,13 +1813,6 @@ void Teton::getRadiationForceDensity(double *RadiationForceDensityX, std::vector CornerVolumeSumsAtVertex(nverts); int corner_counter = 0; - // TODO: finish this case - if (ndim == 1) - { - std::cerr << "1D getRadiationForceDensity not yet implemented! Teton is exiting . . ." << std::endl; - exit(1); - } - for (int v = 0; v < nverts; ++v) { CornerVolumeSumsAtVertex[v] = 0.0; @@ -1655,7 +1837,8 @@ void Teton::getRadiationForceDensity(double *RadiationForceDensityX, int vertexID = mCornerToVertex[cornerID]; corner_counter += 1; RadiationForceDensityX[vertexID] += RadiationForce[c * ndim + 0]; - RadiationForceDensityY[vertexID] += RadiationForce[c * ndim + 1]; + if (ndim > 1) + RadiationForceDensityY[vertexID] += RadiationForce[c * ndim + 1]; if (ndim == 3) RadiationForceDensityZ[vertexID] += RadiationForce[c * ndim + 2]; CornerVolumeSumsAtVertex[vertexID] += CornerVolumes[c]; @@ -1665,7 +1848,8 @@ void Teton::getRadiationForceDensity(double *RadiationForceDensityX, for (int v = 0; v < nverts; ++v) { RadiationForceDensityX[v] /= CornerVolumeSumsAtVertex[v]; - RadiationForceDensityY[v] /= CornerVolumeSumsAtVertex[v]; + if (ndim > 1) + RadiationForceDensityY[v] /= CornerVolumeSumsAtVertex[v]; if (ndim == 3) RadiationForceDensityZ[v] /= CornerVolumeSumsAtVertex[v]; } diff --git a/src/teton/interface/TetonSources.cc b/src/teton/interface/TetonSources.cc new file mode 100644 index 0000000..5cfcb89 --- /dev/null +++ b/src/teton/interface/TetonSources.cc @@ -0,0 +1,207 @@ +#include "TetonSources.hh" +#include "dbc_macros.h" + +#include "conduit/conduit_blueprint.hpp" +#include "conduit/conduit_relay.hpp" +#include "conduit/conduit_relay_config.h" +#include "conduit/conduit_relay_mpi_io_blueprint.hpp" + +void TetonSourceManager::AddPointSourceFromTally(int zone_index, + std::string tally_file_name, + std::string tally_name, + double multiplier) +{ + conduit::Node source_node, source_node_full; + conduit::relay::io::load_merged(tally_file_name, "json", source_node_full); + source_node.set_external_node( + source_node_full.fetch_existing("xray/surfaceTallies/" + tally_name + "/instantaneousEnergy")); + TETON_VERIFY_C(mRank, source_node["numDimension"].as_int64() == 3, "Replayed point source must have 3D data."); + + const long *shape = source_node["shape"].as_int64_ptr(); + int ntimebins = shape[0]; + int nanglebins = shape[1]; + int ngroups_tally = shape[2]; + int total_size = ntimebins * nanglebins * ngroups_tally; + + // int ngroups = options["quadrature/num_groups"].as_int(); + // int npolar = options["quadrature/npolar"].as_int(); + // TETON_VERIFY_C(mRank, + // options["quadrature/qtype"].as_int() == 2, + // "Angle-dependent sources require product quadrature (qtype = 2)."); + // TETON_VERIFY_C(mRank, + // ngroups_tally == ngroups, + // "Cannot interpolate between source and problem group structure yet."); + // TETON_VERIFY_C(mRank, + // nanglebins == 2 * npolar, + // "Cannot interpolate between source and problem polar bin boundaries yet."); + // TETON_VERIFY_C(mRank, nanglebins == 4, "TODO remove hardcoding for 4 angle bins "); + + TETON_VERIFY_C(mRank, source_node["dim0/units"].as_string() == "sh", "Time unit of tally must be in shakes."); + + const double *time_bin_bounds = source_node["dim0/bounds"].as_double_ptr(); + const double *angle_bin_bounds = source_node["dim1/bounds"].as_double_ptr(); + + double factor = multiplier; + double energy_unit_conversion = 1.0; + if (source_node["units"].as_string() == "Terg") + { + energy_unit_conversion = 1.0e-4; + } + factor *= energy_unit_conversion; + + // Convert extensive tally (units of energy) to intensive tally (units of energy per time per angle unit) + const double *source_profile_integrated = source_node["data"].as_double_ptr(); + std::vector source_profile(total_size); + int counter = 0; + for (int timebin = 0; timebin < ntimebins; timebin++) + { + for (int anglebin = 0; anglebin < nanglebins; anglebin++) + { + double angle_bin_width = angle_bin_bounds[anglebin + 1] - angle_bin_bounds[anglebin]; + for (int group = 0; group < ngroups_tally; group++) + { + source_profile[counter] = source_profile_integrated[counter] * factor / angle_bin_width; + counter++; + } + } + } + + m_source_list.push_back( + new PointSource(nanglebins, ngroups_tally, zone_index, ntimebins, time_bin_bounds, &source_profile[0])); +} + +PointSource::PointSource(int nangles, + int ngroups, + int zone_index, + int ntimebins, + const double *timebinbounds, + const double *source_profile) + : m_num_time_bins(ntimebins), + m_profile(), + m_time_bin_bounds(), + TetonSource(nangles, zone_index > 0 ? 1 : 0, ngroups) +{ + if (zone_index <= 0) + return; + + m_zone_list[0] = zone_index; + + int shape_size = nangles * ngroups; + m_time_bin_bounds.resize(ntimebins + 1); + m_profile.resize(shape_size * ntimebins); + + // Copy over data: + int index = 0; + for (int timebin = 0; timebin < ntimebins; timebin++) + { + m_time_bin_bounds[timebin] = timebinbounds[timebin]; + for (int i = 0; i < shape_size; i++) + { + m_profile[index] = source_profile[index]; + index++; + } + } + m_time_bin_bounds[ntimebins] = timebinbounds[ntimebins]; +} + +void PointSource::UpdateSourceValues(double time, double dtrad) +{ + // If this rank has no source zones, return + if (m_num_srczones < 1) + return; + + m_time = time; + + int shape_size = m_num_groups * m_num_angles; + + // TODO, something to interpolate between angle bin differences? + + double time_prev = time - dtrad; + + // Zero out source: + for (int i = 0; i < shape_size; i++) + { + m_src_values[i] = 0.; + } + + // Before the first time value or after the last time value, + // source is just zero. + if (time < m_time_bin_bounds[0] || time_prev > m_time_bin_bounds[m_num_time_bins]) + { + return; + } + + // Otherwise, loop through the time bins and accumulate source contributions: + for (int timebin = 0; timebin < m_num_time_bins; timebin++) + { + // as timebin goes up, the time bin in the diagrams below moves to the right + // [ source profile's time bin ] --> + + // If time bin lower bound is beyond this time step, we're done + // [ source profile's time bin ] + // [ time step ] + if (m_time_bin_bounds[timebin] > time) + break; + + // If time bin upper bound < time_prev, keep going + // [ source profile's time bin ] + // [ time step ] + if (m_time_bin_bounds[timebin + 1] < time_prev) + continue; + + // If we're here, then the current time bin overlaps with the time step. + + double factor = 0.; + + // else, we want a fraction of the energy from this time bin: + const double source_time_bin_width = m_time_bin_bounds[timebin + 1] - m_time_bin_bounds[timebin]; + if (m_time_bin_bounds[timebin] < time_prev && m_time_bin_bounds[timebin + 1] > time) + { + // the time step falls entirely within the time step. + // [ source profile's time bin ] + // [ time step ] + factor = dtrad / source_time_bin_width; + } + else if (m_time_bin_bounds[timebin] < time_prev) + // equivalent to else if (m_time_bin_bounds[timebin] < time_prev && m_time_bin_bounds[timebin+1] <= time) + { + // [ source profile's time bin ] + // [ time step ] + factor = (m_time_bin_bounds[timebin + 1] - time_prev) / source_time_bin_width; + } + else if (m_time_bin_bounds[timebin + 1] > time) + // equivalent to else if (m_time_bin_bounds[timebin] >= time_prev && m_time_bin_bounds[timebin+1] > time) + { + // [ source profile's time bin ] + // [ time step ] + factor = (time - m_time_bin_bounds[timebin]) / source_time_bin_width; + } + else + // equivalent to else if (m_time_bin_bounds[timebin] >= time_prev && m_time_bin_bounds[timebin+1] <= time) + { + // factor = 1 if the source time bin is entirely within the time step + // [ source profile's time bin ] + // [ time step ] + // (We want to add the contribution of the entire time bin to this time step's source) + factor = 1.0; + } + + if (factor > 1.0) + { + std::cerr << "Factor should not ever be larger than 1" << std::endl; + exit(1); + } + + const double *data_start = m_profile.data() + timebin * shape_size; + for (int i = 0; i < shape_size; i++) + { + m_src_values[i] += factor * data_start[i]; + } + } + + // Divide by dtrad to convert from energy to power: + for (int i = 0; i < shape_size; i++) + { + m_src_values[i] /= dtrad; + } +} diff --git a/src/teton/interface/TetonSurfaceTallies.cc b/src/teton/interface/TetonSurfaceTallies.cc index 2df93e0..bdf5887 100644 --- a/src/teton/interface/TetonSurfaceTallies.cc +++ b/src/teton/interface/TetonSurfaceTallies.cc @@ -26,15 +26,15 @@ void dumpTallyToJson(const conduit::Node &blueprint, const conduit::Node &option std::string conduit_base_path = "xray/surfaceTallies/"; const int ngroups_teton = options["quadrature/num_groups"].as_int(); - const double *group_bounds = options.fetch_existing("quadrature/gnu").value(); + const double *group_bounds = options.fetch_existing("quadrature/gnu").as_double_ptr(); int nanglebin_teton = -1; teton_getnumanglebins(&nanglebin_teton); std::string tally_file_name = "radenergyflux.tal"; - if (options.has_path("surface_edits/tally_file_name")) + if (options.has_path("surface_edits_output_filename")) { - tally_file_name = options.fetch_existing("surface_edits/tally_file_name").to_string(); + tally_file_name = options.fetch_existing("surface_edits_output_filename").as_string(); } const conduit::Node &surface_edit_options_all = options["surface_edits"]; diff --git a/src/teton/mods/MemoryAllocator_mod.F90 b/src/teton/mods/MemoryAllocator_mod.F90 index 06a8177..779338c 100644 --- a/src/teton/mods/MemoryAllocator_mod.F90 +++ b/src/teton/mods/MemoryAllocator_mod.F90 @@ -3,6 +3,7 @@ module MemoryAllocator_mod use iso_c_binding, only : c_int, c_double, c_bool, C_SIZE_T, c_f_pointer, c_ptr, c_loc + use Size_mod, only : Size #if defined(TETON_ENABLE_UMPIRE) use umpire_mod #endif @@ -21,7 +22,8 @@ module MemoryAllocator_mod #endif integer, public :: umpire_host_allocator_id = -1 integer, public :: umpire_device_allocator_id = -1 - logical, public :: use_internal_host_allocator = .FALSE. + logical, public :: host_allocator_present = .FALSE. + logical, public :: device_allocator_present = .FALSE. contains @@ -59,8 +61,6 @@ module MemoryAllocator_mod contains subroutine AllocatorType_construct(self, umpire_host_allocator_id, umpire_device_allocator_id) - use Size_mod, only: Size - class(AllocatorType), intent(inout) :: self integer :: umpire_host_allocator_id integer :: umpire_device_allocator_id @@ -68,39 +68,18 @@ subroutine AllocatorType_construct(self, umpire_host_allocator_id, umpire_device #if defined(TETON_ENABLE_UMPIRE) type(UmpireResourceManager) :: umpire_resource_manager - ! These are used if teton needs to create its own host pinned pool allocator. - integer(kind=C_SIZE_T) :: initial_pool_size ! Initial size in bytes for a pool allocator - integer(kind=C_SIZE_T) :: pool_growth_size ! Size of additional blocks to allocate, if pool needs to grow. - - initial_pool_size = 512*1024*1024 ! 512MB is default for Umpire - pool_growth_size = 1*1024*1024 ! 1MB is default for Umpire - umpire_resource_manager = umpire_resource_manager%get_instance() if (umpire_host_allocator_id >= 0) then self%umpire_host_allocator = umpire_resource_manager%get_allocator_by_id(umpire_host_allocator_id) self%umpire_host_allocator_id = umpire_host_allocator_id - else -! If Teton is using OpenMP target offload kernel or CUDA kernels, it requires an -! umpire allocator to allocate memory in page-locked ( pinned ) memory. If one -! was not provided then Teton will create its own. -! -! We should check the Size%useGPU variable to see if we are executing -! kernels on the GPU. However, the Size module is not constructed until _after_ -! the memory allocator module. This needs to be refactored. Until then, we'll -! default to constructing an allocator if Teton was compiled with gpu kernel -! support. -#if defined(TETON_ENABLE_OPENMP_OFFLOAD) - self%use_internal_host_allocator = .TRUE. - self%umpire_host_allocator = umpire_resource_manager%get_allocator_by_name("PINNED") - self%umpire_host_allocator = umpire_resource_manager%make_allocator_quick_pool("POOL", self%umpire_host_allocator, initial_pool_size, pool_growth_size) - self%umpire_host_allocator_id = self%umpire_host_allocator%get_id() -#endif + self%host_allocator_present = .TRUE. endif if (umpire_device_allocator_id >= 0) then self%umpire_device_allocator = umpire_resource_manager%get_allocator_by_id(umpire_device_allocator_id) self%umpire_device_allocator_id = umpire_device_allocator_id + self%device_allocator_present = .TRUE. endif #endif @@ -113,13 +92,6 @@ subroutine AllocatorType_destruct(self) class(AllocatorType), intent(inout) :: self -#if defined(TETON_ENABLE_OPENMP_OFFLOAD) - !if (use_internal_host_allocator) then - ! TODO Ask Umpire how to teardown allocators. - ! Need to tear down the pinned pool allocator. - !endif -#endif - end subroutine AllocatorType_destruct !----------------------------------------------------------------------------- diff --git a/src/teton/mods/MemoryAllocator_mod.F90.templates b/src/teton/mods/MemoryAllocator_mod.F90.templates index 3b3a6b0..ee344e0 100644 --- a/src/teton/mods/MemoryAllocator_mod.F90.templates +++ b/src/teton/mods/MemoryAllocator_mod.F90.templates @@ -27,17 +27,15 @@ !----------------------------------------------------------------------------- ! C_DOUBLE subroutines !----------------------------------------------------------------------------- - subroutine FTM_CAT4(allocate_host,real,c_double,FTM_RANK) (self, pinnedMemory, parentName, varName, varPtr, FTM_DIMS) - + subroutine FTM_CAT4(allocate_host,real,c_double,FTM_RANK) (self, preferUmpire, parentName, varName, varPtr, FTM_DIMS) class(AllocatorType) :: self - logical, intent(in) :: pinnedMemory + logical, intent(in) :: preferUmpire character(len=*), intent(in) :: parentName character(len=*), intent(in) :: varName real(kind=c_double), pointer, contiguous, dimension( FTM_RANK_STRING ), intent(inout) :: varPtr integer :: FTM_DIMS integer, dimension( FTM_RANK ) :: theShape - integer(kind=c_size_t) :: numElements theShape = [ FTM_DIMS ] @@ -45,42 +43,32 @@ TETON_VERIFY(.NOT. associated(varPtr), "MemoryAllocator: Unable to allocate memory for "//parentName//"/"//varName//", pointer is already associated!") nullify(varPtr) - if (pinnedMemory) then - TETON_VERIFY(self%umpire_host_allocator_id /= -1, "MemoryAllocator: Unable to allocate in pinned memory, an Umpire host allocator was not provided.") + if (preferUmpire .AND. self%host_allocator_present) then #if defined(TETON_ENABLE_UMPIRE) call self%umpire_host_allocator%allocate(varPtr, theShape) -#else - TETON_FATAL("MemoryAllocator: Unable to allocate in pinned memory, Teton was not compiled with Umpire support.") #endif else - allocate( varPtr( FTM_DIMS ) ) + allocate( varPtr( FTM_DIMS )) endif TETON_VERIFY(associated(varPtr), "MemoryAllocator: Allocation failed for "//parentName//"/"//varName//".") - numElements = PRODUCT(theShape) - - if (numElements > 0) then - ! Initialize contents to zero - varPtr = 0 - endif + varPtr( FTM_RANK_STRING ) = 0.0 !$omp end critical return end subroutine FTM_CAT4(allocate_host,real,c_double,FTM_RANK) !----------------------------------------------------------------------------- - subroutine FTM_CAT4(deallocate_host,real,c_double,FTM_RANK) (self, pinnedMemory, parentName, varName, varPtr) - + subroutine FTM_CAT4(deallocate_host,real,c_double,FTM_RANK) (self, preferUmpire, parentName, varName, varPtr) class(AllocatorType) :: self - logical, intent(in) :: pinnedMemory + logical, intent(in) :: preferUmpire character(len=*), intent(in) :: parentName character(len=*), intent(in) :: varName real(kind=c_double), pointer, contiguous, dimension( FTM_RANK_STRING ), intent(inout) :: varPtr !$omp critical - if (pinnedMemory) then - TETON_VERIFY(self%umpire_host_allocator_id /= -1, "MemoryAllocator: Unable to deallocate from pinned memory, an Umpire host allocator was not provided.") + if (preferUmpire .AND. self%host_allocator_present) then #if defined(TETON_ENABLE_UMPIRE) call self%umpire_host_allocator%deallocate(varPtr) #endif @@ -96,17 +84,15 @@ !----------------------------------------------------------------------------- ! C_INT subroutines !----------------------------------------------------------------------------- - subroutine FTM_CAT4(allocate_host,integer,c_int,FTM_RANK) (self, pinnedMemory, parentName, varName, varPtr, FTM_DIMS) - + subroutine FTM_CAT4(allocate_host,integer,c_int,FTM_RANK) (self, preferUmpire, parentName, varName, varPtr, FTM_DIMS) class(AllocatorType) :: self - logical, intent(in) :: pinnedMemory + logical, intent(in) :: preferUmpire character(len=*), intent(in) :: parentName character(len=*), intent(in) :: varName integer(kind=c_int), pointer, contiguous, dimension( FTM_RANK_STRING ), intent(inout) :: varPtr integer :: FTM_DIMS integer, dimension( FTM_RANK ) :: theShape - integer(kind=c_size_t) :: numElements theShape = [ FTM_DIMS ] @@ -114,10 +100,9 @@ TETON_VERIFY(.NOT. associated(varPtr), "MemoryAllocator: Unable to allocate memory for "//parentName//"/"//varName//", pointer is already associated!") nullify(varPtr) - if (pinnedMemory) then - TETON_VERIFY(self%umpire_host_allocator_id /= -1, "MemoryAllocator: Unable to allocate in pinned memory, an Umpire host allocator was not provided.") + if (preferUmpire .AND. self%host_allocator_present) then #if defined(TETON_ENABLE_UMPIRE) - call self%umpire_host_allocator%allocate(varPtr, theShape) + call self%umpire_host_allocator%allocate(varPtr, theShape) #else TETON_FATAL("MemoryAllocator: Unable to allocate in pinned memory, Teton was not compiled with Umpire support.") #endif @@ -127,29 +112,23 @@ TETON_VERIFY(associated(varPtr), "MemoryAllocator: Allocation failed for "//parentName//"/"//varName//".") - numElements = PRODUCT(theShape) - - if (numElements > 0) then - ! Initialize contents to zero - varPtr = 0 - endif + varPtr( FTM_RANK_STRING ) = 0 !$omp end critical return end subroutine FTM_CAT4(allocate_host,integer,c_int,FTM_RANK) !----------------------------------------------------------------------------- - subroutine FTM_CAT4(deallocate_host,integer,c_int,FTM_RANK) (self, pinnedMemory, parentName, varName, varPtr) + subroutine FTM_CAT4(deallocate_host,integer,c_int,FTM_RANK) (self, preferUmpire, parentName, varName, varPtr) class(AllocatorType) :: self - logical, intent(in) :: pinnedMemory + logical, intent(in) :: preferUmpire character(len=*), intent(in) :: parentName character(len=*), intent(in) :: varName integer(kind=c_int), pointer, contiguous, dimension( FTM_RANK_STRING ), intent(inout) :: varPtr !$omp critical - if (pinnedMemory) then - TETON_VERIFY(self%umpire_host_allocator_id /= -1, "MemoryAllocator: Unable to deallocate from pinned memory, an Umpire host allocator was not provided.") + if (preferUmpire .AND. self%host_allocator_present) then #if defined(TETON_ENABLE_UMPIRE) call self%umpire_host_allocator%deallocate(varPtr) #endif diff --git a/src/teton/mods/Options_mod.F90 b/src/teton/mods/Options_mod.F90 index 27ad717..e4a6660 100644 --- a/src/teton/mods/Options_mod.F90 +++ b/src/teton/mods/Options_mod.F90 @@ -14,7 +14,6 @@ module Options_mod type :: options_type contains procedure :: getNumOmpMaxThreads - procedure :: getMPIUseDeviceAddresses procedure :: initialize procedure :: check procedure :: getVerbose @@ -58,9 +57,6 @@ subroutine initialize(self) call theDatastore%root%set_path("options/concurrency/omp_cpu_max_threads", omp_cpu_max_threads) endif - ! Default to not using device (GPU) addresses for MPI - call theDatastore%root%set_path("options/mpi/useDeviceAddresses", 0) - ! Build information. ! These are populated in the cmake_defines_mod.F90 call theDatastore%root%set_path("build_meta_data/cmake_install_prefix",install_prefix) @@ -98,9 +94,6 @@ subroutine check(self) temp = theDatastore%root%has_path("options/concurrency/omp_cpu_max_threads") TETON_VERIFY(temp, "Options is missing concurrency/omp_cpu_max_threads.") - temp = theDatastore%root%has_path("options/mpi/useDeviceAddresses") - TETON_VERIFY(temp, "Options tree is missing mpi/useDeviceAddresses.") - temp = theDatastore%root%has_path("options/verbose") TETON_VERIFY(temp, "Options tree is missing verbose.") @@ -113,19 +106,6 @@ integer(kind=int32) function getNumOmpMaxThreads(self) result(numOmpMaxThreads) numOmpMaxThreads = theDatastore%root%fetch_path_as_int32("options/concurrency/omp_cpu_max_threads") end function - logical(kind=c_bool) function getMPIUseDeviceAddresses(self) result(useDeviceAddresses) - class(options_type) :: self - integer(kind=int32) :: useDeviceAddressesInt - useDeviceAddressesInt = theDataStore%root%fetch_path_as_int32("options/mpi/useDeviceAddresses") - - if (useDeviceAddressesInt == 0) then - useDeviceAddresses = .FALSE. - else - useDeviceAddresses = .TRUE. - endif - - end function - !*********************************************************************** ! getVerbose - Get verbosity level. ! diff --git a/src/teton/mods/Size_mod.F90 b/src/teton/mods/Size_mod.F90 index e740b6d..86e94d0 100644 --- a/src/teton/mods/Size_mod.F90 +++ b/src/teton/mods/Size_mod.F90 @@ -227,7 +227,7 @@ subroutine Size_ctor(self, myRankInGroup, nzones, ncornr, nSides, & endif ! Pinned memory improves CPU<->GPU memory transfer performance, so default to -! using this if running GPU kernels. Allocating data in pinned memory requires Umpire. +! preferring to use Umpire for pinned memory allocations on CPU. #if defined(TETON_ENABLE_UMPIRE) self% usePinnedMemory = self%useGPU #else @@ -235,7 +235,7 @@ subroutine Size_ctor(self, myRankInGroup, nzones, ncornr, nSides, & #endif if (self% useGPU .AND. .NOT. self% usePinnedMemory .AND. myRankInGroup == 0) then - print *, "TETON WARNING: Detected that GPU kernels are enabled, but UMPIRE pinned memory is not enabled. This is not recommended and will result in severe performance degradation." + print *, "TETON WARNING: Detected that GPU kernels are enabled, but UMPIRE support is not enabled. This may impact CPU<->GPU memory transfer performance." endif self% useCUDASolver = useCUDASolver diff --git a/src/teton/rt/RecvFlux.F90 b/src/teton/rt/RecvFlux.F90 index 303dc73..52092dc 100644 --- a/src/teton/rt/RecvFlux.F90 +++ b/src/teton/rt/RecvFlux.F90 @@ -45,10 +45,6 @@ subroutine RecvFlux(SnSweep, cSetID, Angle, PsiB) integer :: angle0 integer :: sharedID integer :: nShared - logical(kind=c_bool) :: useDeviceAwareMPI - -! Constants - useDeviceAwareMPI = Options%getMPIUseDeviceAddresses() .AND. Size%useGPU nShared = getNumberOfShared(RadBoundary) CSet => getCommSetData(Quad, cSetID) @@ -66,9 +62,6 @@ subroutine RecvFlux(SnSweep, cSetID, Angle, PsiB) ! Check for completion of the receive call MPIWait(CommT% irequest(2)) - if (useDeviceAwareMPI) then - TOMP(target update from(CommT%psibrecv)) - endif ! Loop over boundary elements that are incident for this communicator @@ -96,9 +89,6 @@ subroutine RecvFlux(SnSweep, cSetID, Angle, PsiB) ! Check for completion of the receive call MPIWait(CommT% irequest(2)) - if (useDeviceAwareMPI) then - TOMP(target update from(CommT%psibrecv)) - endif ! Loop over boundary elements that are incident for this communicator diff --git a/src/teton/rt/SendFlux.F90 b/src/teton/rt/SendFlux.F90 index 78e1b46..3aae409 100644 --- a/src/teton/rt/SendFlux.F90 +++ b/src/teton/rt/SendFlux.F90 @@ -47,11 +47,6 @@ subroutine SendFlux(SnSweep, cSetID, sendIndex, PsiB) integer :: sharedID integer :: nShared - logical(kind=c_bool) :: useDeviceAwareMPI - -! Constants - useDeviceAwareMPI = Options%getMPIUseDeviceAddresses() .AND. Size%useGPU - nShared = getNumberOfShared(RadBoundary) CSet => getCommSetData(Quad, cSetID) @@ -82,10 +77,6 @@ subroutine SendFlux(SnSweep, cSetID, sendIndex, PsiB) enddo ! Start send for this communicator (odd numbered handle) - - if (useDeviceAwareMPI) then - TOMP(target update to(CommT%psibsend)) - endif call MPIStart(CommT% irequest(1)) endif @@ -103,10 +94,6 @@ subroutine SendFlux(SnSweep, cSetID, sendIndex, PsiB) enddo ! Start send for this communicator (odd numbered handle) - - if (useDeviceAwareMPI) then - TOMP(target update to(CommT%psibsend)) - endif call MPIStart(CommT% irequest(1)) endif diff --git a/src/teton/rt/initcomm.F90 b/src/teton/rt/initcomm.F90 index fc71578..f0edcdc 100644 --- a/src/teton/rt/initcomm.F90 +++ b/src/teton/rt/initcomm.F90 @@ -57,9 +57,6 @@ subroutine initcomm(cSetID) integer :: numSets integer :: Groups - logical(kind=c_bool) :: useDeviceAwareMPI - useDeviceAwareMPI = Options%getMPIUseDeviceAddresses() .AND. Size%useGPU - ! Wait for all nodes to arrive CSet => getCommSetData(Quad, cSetID) @@ -95,48 +92,14 @@ subroutine initcomm(cSetID) offset = (myRank + neighbor)*NumAngles tag = tagBase + offset + Angle - if (useDeviceAwareMPI) then - ! Workaround for XLF internal compiler error if use_device_ptr is used - ! on a variable like %. - - associate(psibrecv => CommT%psibrecv, & - psibsend => CommT%psibsend) - - ! TODO - See if these use_device_ptr constructs can be applied to the - ! entire CSet%CommPtr array instead of on each array member within this - ! loop. - ! -- Aaron -#if defined(TETON_OPENMP_HAS_USE_DEVICE_ADDR) - TOMP(target data use_device_addr( psibrecv, psibsend )) -#else - TOMP(target data use_device_ptr( psibrecv, psibsend )) -#endif - - if (nrecv > 0) then - call MPIRecvInit(psibrecv, nrecv, neighbor, tag, & - COMM_GROUP, CommT% irequest(2)) - endif - - if (nsend > 0) then - call MPISendInit(psibsend, nsend, neighbor, tag, & - COMM_GROUP, CommT% irequest(1)) - endif - - TOMP(end target data) - end associate - - else - - if (nrecv > 0) then - call MPIRecvInit(CommT%psibrecv, nrecv, neighbor, tag, & - COMM_GROUP, CommT% irequest(2)) - endif - - if (nsend > 0) then - call MPISendInit(CommT%psibsend, nsend, neighbor, tag, & - COMM_GROUP, CommT% irequest(1)) - endif + if (nrecv > 0) then + call MPIRecvInit(CommT%psibrecv, nrecv, neighbor, tag, & + COMM_GROUP, CommT% irequest(2)) + endif + if (nsend > 0) then + call MPISendInit(CommT%psibsend, nsend, neighbor, tag, & + COMM_GROUP, CommT% irequest(1)) endif enddo CommunicatorLoop