diff --git a/cmake/alpakaCommon.cmake b/cmake/alpakaCommon.cmake
index 0d447f7fc2e3..fcc275354f2c 100644
--- a/cmake/alpakaCommon.cmake
+++ b/cmake/alpakaCommon.cmake
@@ -139,6 +139,12 @@ if(alpaka_DISABLE_VENDOR_RNG)
     target_compile_definitions(alpaka INTERFACE "ALPAKA_DISABLE_VENDOR_RNG")
 endif()
 
+# Device side assert
+option(alpaka_ASSERT_ACC_ENABLE "Enable device side asserts. In case  value is OFF device side asserts will be disabled even if NDEBUG is not defined." ON)
+if(!alpaka_ASSERT_ACC_ENABLE)
+    target_compile_definitions(alpaka INTERFACE "ALPAKA_DISABLE_ASSERT_ACC")
+endif()
+
 #-------------------------------------------------------------------------------
 # Debug output of common variables.
 if(${alpaka_DEBUG} GREATER 1)
@@ -731,9 +737,6 @@ if(alpaka_ACC_SYCL_ENABLE)
 endif()
 
 target_compile_definitions(alpaka INTERFACE "ALPAKA_DEBUG=${alpaka_DEBUG}")
-if(alpaka_DEBUG_OFFLOAD_ASSUME_HOST)
-   target_compile_definitions(alpaka INTERFACE "ALPAKA_DEBUG_OFFLOAD_ASSUME_HOST")
-endif()
 
 target_compile_definitions(alpaka INTERFACE "ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB=${alpaka_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB}")
 
diff --git a/example/bufferCopy/src/bufferCopy.cpp b/example/bufferCopy/src/bufferCopy.cpp
index b1e53ff20f7e..998df9539175 100644
--- a/example/bufferCopy/src/bufferCopy.cpp
+++ b/example/bufferCopy/src/bufferCopy.cpp
@@ -39,7 +39,7 @@ struct TestBufferKernel
         for(size_t z = idx[0]; z < data.extent(0); z += gridSize[0])
             for(size_t y = idx[1]; y < data.extent(1); y += gridSize[1])
                 for(size_t x = idx[2]; x < data.extent(2); x += gridSize[2])
-                    ALPAKA_ASSERT_OFFLOAD(
+                    ALPAKA_ASSERT_ACC(
                         data(z, y, x)
                         == alpaka::mapIdx<1u>(Vec{z, y, x}, Vec{data.extent(0), data.extent(1), data.extent(2)})[0]);
     }
diff --git a/include/alpaka/block/shared/dyn/BlockSharedMemDynMember.hpp b/include/alpaka/block/shared/dyn/BlockSharedMemDynMember.hpp
index fc1cced8ad4b..c6a323989d21 100644
--- a/include/alpaka/block/shared/dyn/BlockSharedMemDynMember.hpp
+++ b/include/alpaka/block/shared/dyn/BlockSharedMemDynMember.hpp
@@ -42,7 +42,7 @@ namespace alpaka
     public:
         BlockSharedMemDynMember(std::size_t sizeBytes) : m_dynPitch(getPitch(sizeBytes))
         {
-            ALPAKA_ASSERT_OFFLOAD(static_cast<std::uint32_t>(sizeBytes) <= staticAllocBytes());
+            ALPAKA_ASSERT_ACC(static_cast<std::uint32_t>(sizeBytes) <= staticAllocBytes());
         }
 
         auto dynMemBegin() const -> uint8_t*
diff --git a/include/alpaka/block/shared/st/detail/BlockSharedMemStMemberImpl.hpp b/include/alpaka/block/shared/st/detail/BlockSharedMemStMemberImpl.hpp
index 1cb4922556b6..eb09790ff859 100644
--- a/include/alpaka/block/shared/st/detail/BlockSharedMemStMemberImpl.hpp
+++ b/include/alpaka/block/shared/st/detail/BlockSharedMemStMemberImpl.hpp
@@ -39,7 +39,7 @@ namespace alpaka::detail
             : m_mem(mem)
             , m_capacity(static_cast<std::uint32_t>(capacity))
         {
-            ALPAKA_ASSERT_OFFLOAD((m_mem == nullptr) == (m_capacity == 0u));
+            ALPAKA_ASSERT_ACC((m_mem == nullptr) == (m_capacity == 0u));
         }
 #else
         BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t) : m_mem(mem)
@@ -52,12 +52,12 @@ namespace alpaka::detail
         {
             // Add meta data chunk in front of the user data
             m_allocdBytes = varChunkEnd<MetaData>(m_allocdBytes);
-            ALPAKA_ASSERT_OFFLOAD(m_allocdBytes <= m_capacity);
+            ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
             auto* meta = getLatestVarPtr<MetaData>();
 
             // Allocate variable
             m_allocdBytes = varChunkEnd<T>(m_allocdBytes);
-            ALPAKA_ASSERT_OFFLOAD(m_allocdBytes <= m_capacity);
+            ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
 
             // Update meta data with id and offset for the allocated variable.
             meta->id = id;
@@ -87,7 +87,7 @@ namespace alpaka::detail
                 // Adjust offset to be aligned
                 std::uint32_t const alignedMetaDataOffset
                     = varChunkEnd<MetaData>(off) - static_cast<std::uint32_t>(sizeof(MetaData));
-                ALPAKA_ASSERT_OFFLOAD(
+                ALPAKA_ASSERT_ACC(
                     (alignedMetaDataOffset + static_cast<std::uint32_t>(sizeof(MetaData))) <= m_allocdBytes);
                 auto* metaDataPtr = reinterpret_cast<MetaData*>(m_mem + alignedMetaDataOffset);
                 off = metaDataPtr->offset;
diff --git a/include/alpaka/core/Assert.hpp b/include/alpaka/core/Assert.hpp
index 55e1560934ef..7ad2a2b0734e 100644
--- a/include/alpaka/core/Assert.hpp
+++ b/include/alpaka/core/Assert.hpp
@@ -9,22 +9,47 @@
 #include <cassert>
 #include <type_traits>
 
+//! The assert can be explicit disabled by defining NDEBUG
 #define ALPAKA_ASSERT(...) assert(__VA_ARGS__)
 
-#if defined(ALPAKA_DEBUG_OFFLOAD_ASSUME_HOST) || defined(SYCL_EXT_ONEAPI_ASSERT)
-#    define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) ALPAKA_ASSERT(EXPRESSION)
-#elif defined __AMDGCN__ && (!defined NDEBUG)
-#    define ALPAKA_ASSERT_OFFLOAD(EXPRESSION)                                                                         \
-        do                                                                                                            \
-        {                                                                                                             \
-            if(!(EXPRESSION))                                                                                         \
-                __builtin_trap();                                                                                     \
-        } while(false)
+//! Macro which expands to a noop.
+//! Macro enforces an semicolon after the call.
+#define ALPAKA_NOOP(...)                                                                                              \
+    do                                                                                                                \
+    {                                                                                                                 \
+    } while(false)
+
+//! ALPAKA_ASSERT_ACC_IMPL is an assert-like macro.
+//! It can be disabled setting the ALPAKA_DISABLE_ASSERT_ACC preprocessor symbol or the NDEBUG preprocessor symbol.
+#if !defined(ALPAKA_DISABLE_ASSERT_ACC)
+#    define ALPAKA_ASSERT_ACC_IMPL(...) ALPAKA_ASSERT(__VA_ARGS__)
+#else
+#    define ALPAKA_ASSERT_ACC_IMPL(...) ALPAKA_NOOP(__VA_ARGS__)
+#endif
+
+//! ALPAKA_ASSERT_ACC is an assert-like macro.
+//!
+//! In device code for a GPU or SYCL backend it can be disabled setting the ALPAKA_DISABLE_ASSERT_ACC preprocessor
+//! symbol or the NDEBUG preprocessor symbol. In device code for a native C++ CPU backend and in host code, it is
+//! equivalent to ALPAKA_ASSERT, and can be disabled setting the NDEBUG preprocessor symbol.
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && defined(__CUDA_ARCH__)
+// CUDA device code
+#    define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
+#elif defined(ALPAKA_ACC_GPU_HIP_ENABLED) && defined(__HIP_DEVICE_COMPILE__)
+// HIP/ROCm device code
+#    define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
+#elif defined(ALPAKA_ACC_SYCL_ENABLED) && defined(__SYCL_DEVICE_ONLY__)
+// SYCL/oneAPI device code
+#    if defined(SYCL_EXT_ONEAPI_ASSERT)
+#        define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
+#    else
+#        define ALPAKA_ASSERT_ACC(...) ALPAKA_NOOP(__VA_ARGS__)
+#    endif
+// add here any other #elif conditions for non-CPU backends
+// ...
 #else
-#    define ALPAKA_ASSERT_OFFLOAD(EXPRESSION)                                                                         \
-        do                                                                                                            \
-        {                                                                                                             \
-        } while(false)
+// CPU backend, or host code
+#    define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT(__VA_ARGS__)
 #endif
 
 namespace alpaka::core
@@ -38,7 +63,7 @@ namespace alpaka::core
                 [[maybe_unused]] TArg const& arg)
             {
                 if constexpr(std::is_signed_v<TArg>)
-                    ALPAKA_ASSERT_OFFLOAD(arg >= 0);
+                    ALPAKA_ASSERT_ACC(arg >= 0);
 
                 // Nothing to do for unsigned types.
             }
@@ -63,7 +88,7 @@ namespace alpaka::core
                 [[maybe_unused]] TRhs const& rhs)
             {
                 if constexpr(std::is_signed_v<TRhs> || (TLhs::value != 0u))
-                    ALPAKA_ASSERT_OFFLOAD(TLhs::value > rhs);
+                    ALPAKA_ASSERT_ACC(TLhs::value > rhs);
 
                 // Nothing to do for unsigned types comparing to zero.
             }
diff --git a/include/alpaka/idx/bt/IdxBtOmp.hpp b/include/alpaka/idx/bt/IdxBtOmp.hpp
index eb9930471eae..df5a96a87b9d 100644
--- a/include/alpaka/idx/bt/IdxBtOmp.hpp
+++ b/include/alpaka/idx/bt/IdxBtOmp.hpp
@@ -45,7 +45,7 @@ namespace alpaka
             static auto getIdx(bt::IdxBtOmp<TDim, TIdx> const& /* idx */, TWorkDiv const& workDiv) -> Vec<TDim, TIdx>
             {
                 // We assume that the thread id is positive.
-                ALPAKA_ASSERT_OFFLOAD(::omp_get_thread_num() >= 0);
+                ALPAKA_ASSERT_ACC(::omp_get_thread_num() >= 0);
                 // \TODO: Would it be faster to precompute the index and cache it inside an array?
                 return mapIdx<TDim::value>(
                     Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(::omp_get_thread_num())),
diff --git a/include/alpaka/warp/WarpGenericSycl.hpp b/include/alpaka/warp/WarpGenericSycl.hpp
index 425d97a25859..51957ba79a27 100644
--- a/include/alpaka/warp/WarpGenericSycl.hpp
+++ b/include/alpaka/warp/WarpGenericSycl.hpp
@@ -119,8 +119,8 @@ namespace alpaka::warp::trait
         template<typename T>
         static auto shfl(warp::WarpGenericSycl<TDim> const& warp, T value, std::int32_t srcLane, std::int32_t width)
         {
-            ALPAKA_ASSERT_OFFLOAD(width > 0);
-            ALPAKA_ASSERT_OFFLOAD(srcLane >= 0);
+            ALPAKA_ASSERT_ACC(width > 0);
+            ALPAKA_ASSERT_ACC(srcLane >= 0);
 
             /* If width < srcLane the sub-group needs to be split into assumed subdivisions. The first item of each
                subdivision has the assumed index 0. The srcLane index is relative to the subdivisions.