Skip to content

Commit

Permalink
remove ALPAKA_ASSERT_OFFLOAD, introduce ALPAKA_ASSERT_ACC
Browse files Browse the repository at this point in the history
fix #2186
fix #2001

Provide a device side assert `ALPAKA_ASSERT_ACC` which can be disabled
by defining `ALPAKA_DISABLE_ASSERT_ACC` the C++ code or by the CMake
option `alpaka_ASSERT_ACC_ENABLE`.
For CPU devices or host side code the assert behaves
like`ALPAKA_ASSERT`.

Co-authored-by: Andrea Bocci <andrea.bocci@cern.ch>
  • Loading branch information
2 people authored and bernhardmgruber committed Dec 20, 2023
1 parent 3503d42 commit 2e84f77
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 27 deletions.
9 changes: 6 additions & 3 deletions cmake/alpakaCommon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ if(alpaka_DISABLE_VENDOR_RNG)
target_compile_definitions(alpaka INTERFACE "ALPAKA_DISABLE_VENDOR_RNG")
endif()

# Device side assert
option(alpaka_ASSERT_ACC_ENABLE "Enable device side asserts. In case value is OFF device side asserts will be disabled even if NDEBUG is not defined." ON)
if(!alpaka_ASSERT_ACC_ENABLE)
target_compile_definitions(alpaka INTERFACE "ALPAKA_DISABLE_ASSERT_ACC")
endif()

#-------------------------------------------------------------------------------
# Debug output of common variables.
if(${alpaka_DEBUG} GREATER 1)
Expand Down Expand Up @@ -731,9 +737,6 @@ if(alpaka_ACC_SYCL_ENABLE)
endif()

target_compile_definitions(alpaka INTERFACE "ALPAKA_DEBUG=${alpaka_DEBUG}")
if(alpaka_DEBUG_OFFLOAD_ASSUME_HOST)
target_compile_definitions(alpaka INTERFACE "ALPAKA_DEBUG_OFFLOAD_ASSUME_HOST")
endif()

target_compile_definitions(alpaka INTERFACE "ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB=${alpaka_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB}")

Expand Down
2 changes: 1 addition & 1 deletion example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct TestBufferKernel
for(size_t z = idx[0]; z < data.extent(0); z += gridSize[0])
for(size_t y = idx[1]; y < data.extent(1); y += gridSize[1])
for(size_t x = idx[2]; x < data.extent(2); x += gridSize[2])
ALPAKA_ASSERT_OFFLOAD(
ALPAKA_ASSERT_ACC(
data(z, y, x)
== alpaka::mapIdx<1u>(Vec{z, y, x}, Vec{data.extent(0), data.extent(1), data.extent(2)})[0]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace alpaka
public:
BlockSharedMemDynMember(std::size_t sizeBytes) : m_dynPitch(getPitch(sizeBytes))
{
ALPAKA_ASSERT_OFFLOAD(static_cast<std::uint32_t>(sizeBytes) <= staticAllocBytes());
ALPAKA_ASSERT_ACC(static_cast<std::uint32_t>(sizeBytes) <= staticAllocBytes());
}

auto dynMemBegin() const -> uint8_t*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace alpaka::detail
: m_mem(mem)
, m_capacity(static_cast<std::uint32_t>(capacity))
{
ALPAKA_ASSERT_OFFLOAD((m_mem == nullptr) == (m_capacity == 0u));
ALPAKA_ASSERT_ACC((m_mem == nullptr) == (m_capacity == 0u));
}
#else
BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t) : m_mem(mem)
Expand All @@ -52,12 +52,12 @@ namespace alpaka::detail
{
// Add meta data chunk in front of the user data
m_allocdBytes = varChunkEnd<MetaData>(m_allocdBytes);
ALPAKA_ASSERT_OFFLOAD(m_allocdBytes <= m_capacity);
ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
auto* meta = getLatestVarPtr<MetaData>();

// Allocate variable
m_allocdBytes = varChunkEnd<T>(m_allocdBytes);
ALPAKA_ASSERT_OFFLOAD(m_allocdBytes <= m_capacity);
ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);

// Update meta data with id and offset for the allocated variable.
meta->id = id;
Expand Down Expand Up @@ -87,7 +87,7 @@ namespace alpaka::detail
// Adjust offset to be aligned
std::uint32_t const alignedMetaDataOffset
= varChunkEnd<MetaData>(off) - static_cast<std::uint32_t>(sizeof(MetaData));
ALPAKA_ASSERT_OFFLOAD(
ALPAKA_ASSERT_ACC(
(alignedMetaDataOffset + static_cast<std::uint32_t>(sizeof(MetaData))) <= m_allocdBytes);
auto* metaDataPtr = reinterpret_cast<MetaData*>(m_mem + alignedMetaDataOffset);
off = metaDataPtr->offset;
Expand Down
55 changes: 40 additions & 15 deletions include/alpaka/core/Assert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,47 @@
#include <cassert>
#include <type_traits>

//! The assert can be explicit disabled by defining NDEBUG
#define ALPAKA_ASSERT(...) assert(__VA_ARGS__)

#if defined(ALPAKA_DEBUG_OFFLOAD_ASSUME_HOST) || defined(SYCL_EXT_ONEAPI_ASSERT)
# define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) ALPAKA_ASSERT(EXPRESSION)
#elif defined __AMDGCN__ && (!defined NDEBUG)
# define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) \
do \
{ \
if(!(EXPRESSION)) \
__builtin_trap(); \
} while(false)
//! Macro which expands to a noop.
//! Macro enforces an semicolon after the call.
#define ALPAKA_NOOP(...) \
do \
{ \
} while(false)

//! ALPAKA_ASSERT_ACC_IMPL is an assert-like macro.
//! It can be disabled setting the ALPAKA_DISABLE_ASSERT_ACC preprocessor symbol or the NDEBUG preprocessor symbol.
#if !defined(ALPAKA_DISABLE_ASSERT_ACC)
# define ALPAKA_ASSERT_ACC_IMPL(...) ALPAKA_ASSERT(__VA_ARGS__)
#else
# define ALPAKA_ASSERT_ACC_IMPL(...) ALPAKA_NOOP(__VA_ARGS__)
#endif

//! ALPAKA_ASSERT_ACC is an assert-like macro.
//!
//! In device code for a GPU or SYCL backend it can be disabled setting the ALPAKA_DISABLE_ASSERT_ACC preprocessor
//! symbol or the NDEBUG preprocessor symbol. In device code for a native C++ CPU backend and in host code, it is
//! equivalent to ALPAKA_ASSERT, and can be disabled setting the NDEBUG preprocessor symbol.
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && defined(__CUDA_ARCH__)
// CUDA device code
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
#elif defined(ALPAKA_ACC_GPU_HIP_ENABLED) && defined(__HIP_DEVICE_COMPILE__)
// HIP/ROCm device code
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
#elif defined(ALPAKA_ACC_SYCL_ENABLED) && defined(__SYCL_DEVICE_ONLY__)
// SYCL/oneAPI device code
# if defined(SYCL_EXT_ONEAPI_ASSERT)
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
# else
# define ALPAKA_ASSERT_ACC(...) ALPAKA_NOOP(__VA_ARGS__)
# endif
// add here any other #elif conditions for non-CPU backends
// ...
#else
# define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) \
do \
{ \
} while(false)
// CPU backend, or host code
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT(__VA_ARGS__)
#endif

namespace alpaka::core
Expand All @@ -38,7 +63,7 @@ namespace alpaka::core
[[maybe_unused]] TArg const& arg)
{
if constexpr(std::is_signed_v<TArg>)
ALPAKA_ASSERT_OFFLOAD(arg >= 0);
ALPAKA_ASSERT_ACC(arg >= 0);

// Nothing to do for unsigned types.
}
Expand All @@ -63,7 +88,7 @@ namespace alpaka::core
[[maybe_unused]] TRhs const& rhs)
{
if constexpr(std::is_signed_v<TRhs> || (TLhs::value != 0u))
ALPAKA_ASSERT_OFFLOAD(TLhs::value > rhs);
ALPAKA_ASSERT_ACC(TLhs::value > rhs);

// Nothing to do for unsigned types comparing to zero.
}
Expand Down
2 changes: 1 addition & 1 deletion include/alpaka/idx/bt/IdxBtOmp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace alpaka
static auto getIdx(bt::IdxBtOmp<TDim, TIdx> const& /* idx */, TWorkDiv const& workDiv) -> Vec<TDim, TIdx>
{
// We assume that the thread id is positive.
ALPAKA_ASSERT_OFFLOAD(::omp_get_thread_num() >= 0);
ALPAKA_ASSERT_ACC(::omp_get_thread_num() >= 0);
// \TODO: Would it be faster to precompute the index and cache it inside an array?
return mapIdx<TDim::value>(
Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(::omp_get_thread_num())),
Expand Down
4 changes: 2 additions & 2 deletions include/alpaka/warp/WarpGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ namespace alpaka::warp::trait
template<typename T>
static auto shfl(warp::WarpGenericSycl<TDim> const& warp, T value, std::int32_t srcLane, std::int32_t width)
{
ALPAKA_ASSERT_OFFLOAD(width > 0);
ALPAKA_ASSERT_OFFLOAD(srcLane >= 0);
ALPAKA_ASSERT_ACC(width > 0);
ALPAKA_ASSERT_ACC(srcLane >= 0);

/* If width < srcLane the sub-group needs to be split into assumed subdivisions. The first item of each
subdivision has the assumed index 0. The srcLane index is relative to the subdivisions.
Expand Down

0 comments on commit 2e84f77

Please sign in to comment.