Skip to content

Commit

Permalink
rewrite device global memory using Tag
Browse files Browse the repository at this point in the history
  • Loading branch information
AuroraPerego committed Feb 23, 2024
1 parent 97b602c commit 81aa05e
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 85 deletions.
1 change: 1 addition & 0 deletions include/alpaka/alpaka.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
#include "alpaka/mem/global/DeviceGlobalCpu.hpp"
#include "alpaka/mem/global/DeviceGlobalGenericSycl.hpp"
#include "alpaka/mem/global/DeviceGlobalUniformCudaHipBuiltIn.hpp"
#include "alpaka/mem/global/Traits.hpp"
#include "alpaka/mem/view/Traits.hpp"
#include "alpaka/mem/view/ViewConst.hpp"
#include "alpaka/mem/view/ViewPlainPtr.hpp"
Expand Down
70 changes: 20 additions & 50 deletions include/alpaka/core/Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
//! This macro defines a variable lying in global accelerator device memory.
//!
//! Example:
//! ALPAKA_STATIC_ACC_MEM_GLOBAL(int, variable);
//! ALPAKA_STATIC_ACC_MEM_GLOBAL alpaka::DevGlobal<int> variable;
//!
//! Those variables behave like ordinary variables when used in file-scope,
//! but inside kernels the get() method must be used to access the variable.
Expand All @@ -114,66 +114,40 @@
//! \attention It is not allowed to initialize the variable together with the declaration.
//! To initialize the variable alpaka::memcpy must be used.
//! \code{.cpp}
//! ALPAKA_STATIC_ACC_MEM_GLOBAL(int, foo);
//! ALPAKA_STATIC_ACC_MEM_GLOBAL alpaka::DevGlobal<int> foo;
//!
//! struct DeviceMemoryKernel
//! {
//! ALPAKA_NO_HOST_ACC_WARNING
//! template<typename TAcc>
//! ALPAKA_FN_ACC void operator()(TAcc const& acc) const
//! {
//! auto a = foo.get();
//! auto a = foo<Tag>.get();
//! }
//! }
//!
//! void initFoo() {
//! auto extent = alpaka::Vec<alpaka::DimInt<1u>, size_t>{1};
//! int initialValue = 42;
//! alpaka::ViewPlainPtr<DevHost, int, alpaka::DimInt<1u>, size_t> bufHost(&initialValue, devHost, extent);
//! alpaka::memcpy(queue, foo, bufHost, extent);
//! alpaka::memcpy(queue, foo<Tag>, bufHost, extent);
//! }
//! \endcode

namespace alpaka
{

template<typename T>
struct DevGlobal
{
using Type = std::remove_const_t<T>;
Type value; // backend specific value

ALPAKA_FN_HOST_ACC T* operator&()
{
return &value;
}

ALPAKA_FN_HOST_ACC T& get()
{
return value;
}
};
} // namespace alpaka

#if((BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA) || (BOOST_LANG_CUDA && BOOST_COMP_NVCC && BOOST_ARCH_PTX) \
|| BOOST_LANG_HIP)
# define ALPAKA_STATIC_ACC_MEM_GLOBAL(type, name) \
template<typename TAcc> \
inline __device__ alpaka::DevGlobal<type> name
#elif defined(ALPAKA_ACC_SYCL_ENABLED)
# define ALPAKA_STATIC_ACC_MEM_GLOBAL(type, name) \
template<typename TAcc> \
inline sycl::ext::oneapi::experimental::device_global<type> name
# define ALPAKA_STATIC_ACC_MEM_GLOBAL \
template<typename TTag> \
inline __device__
#else
# define ALPAKA_STATIC_ACC_MEM_GLOBAL(type, name) \
template<typename TAcc> \
inline alpaka::DevGlobal<type> name
# define ALPAKA_STATIC_ACC_MEM_GLOBAL \
template<typename TTag> \
inline
#endif

//! This macro defines a variable lying in constant accelerator device memory.
//!
//! Example:
//! ALPAKA_STATIC_ACC_MEM_CONSTANT(int, variable);
//! ALPAKA_STATIC_ACC_MEM_CONSTANT alpaka::DevGlobal<int> variable;
//!
//! Those variables behave like ordinary variables when used in file-scope,
//! but inside kernels the get() method must be used to access the variable.
Expand All @@ -189,38 +163,34 @@ namespace alpaka
//! \attention It is not allowed to initialize the variable together with the declaration.
//! To initialize the variable alpaka::memcpy must be used.
//! \code{.cpp}
//! ALPAKA_STATIC_ACC_MEM_CONSTANT(int, foo);
//! ALPAKA_STATIC_ACC_MEM_CONSTANT alpaka::DevGlobal<int> foo;
//!
//! struct DeviceMemoryKernel
//! {
//! ALPAKA_NO_HOST_ACC_WARNING
//! template<typename TAcc>
//! ALPAKA_FN_ACC void operator()(TAcc const& acc) const
//! {
//! auto a = foo.get();
//! auto a = foo<Tag>.get();
//! }
//! }
//!
//! void initFoo() {
//! auto extent = alpaka::Vec<alpaka::DimInt<1u>, size_t>{1};
//! int initialValue = 42;
//! alpaka::ViewPlainPtr<DevHost, int, alpaka::DimInt<1u>, size_t> bufHost(&initialValue, devHost, extent);
//! alpaka::memcpy(queue, foo, bufHost, extent);
//! alpaka::memcpy(queue, foo<Tag>, bufHost, extent);
//! }
//! \endcode
#if((BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA) || (BOOST_LANG_CUDA && BOOST_COMP_NVCC && BOOST_ARCH_PTX) \
|| BOOST_LANG_HIP)
# define ALPAKA_STATIC_ACC_MEM_CONSTANT(type, name) \
template<typename TAcc> \
inline __constant__ alpaka::DevGlobal<const type> name
#elif defined(ALPAKA_ACC_SYCL_ENABLED)
# define ALPAKA_STATIC_ACC_MEM_CONSTANT(type, name) \
template<typename TAcc> \
inline sycl::ext::oneapi::experimental::device_global<const type> name
# define ALPAKA_STATIC_ACC_MEM_CONSTANT \
template<typename TTag> \
inline __constant__
#else
# define ALPAKA_STATIC_ACC_MEM_CONSTANT(type, name) \
template<typename TAcc> \
inline alpaka::DevGlobal<const type> name
# define ALPAKA_STATIC_ACC_MEM_CONSTANT \
template<typename TTag> \
inline
#endif

//! This macro disables memory optimizations for annotated device memory.
Expand Down
59 changes: 49 additions & 10 deletions include/alpaka/mem/global/DeviceGlobalCpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,68 @@
#pragma once

#include "alpaka/mem/buf/cpu/Copy.hpp"
#include "alpaka/mem/global/Traits.hpp"
#include "alpaka/mem/view/ViewPlainPtr.hpp"

#include <type_traits>

// memcpy specialization for device global variables
namespace alpaka
{
template<typename TViewSrc, typename TViewDstFwd, typename TQueue>
ALPAKA_FN_HOST auto memcpy(TQueue& queue, alpaka::DevGlobal<TViewDstFwd>& viewDst, TViewSrc const& viewSrc) -> void

namespace detail
{
template<typename T>
struct DevGlobalTrait<TagCpuOmp2Blocks, T>
{
using Type = detail::DevGlobalImplGeneric<TagCpuOmp2Blocks, T>;
};

template<typename T>
struct DevGlobalTrait<TagCpuOmp2Threads, T>
{
using Type = detail::DevGlobalImplGeneric<TagCpuOmp2Threads, T>;
};

template<typename T>
struct DevGlobalTrait<TagCpuSerial, T>
{
using Type = detail::DevGlobalImplGeneric<TagCpuSerial, T>;
};

template<typename T>
struct DevGlobalTrait<TagCpuTbbBlocks, T>
{
using Type = detail::DevGlobalImplGeneric<TagCpuTbbBlocks, T>;
};

template<typename T>
struct DevGlobalTrait<TagCpuThreads, T>
{
using Type = detail::DevGlobalImplGeneric<TagCpuThreads, T>;
};
} // namespace detail

template<typename TAcc, typename TViewSrc, typename TViewDstFwd, typename TQueue>
ALPAKA_FN_HOST auto memcpy(
TQueue& queue,
alpaka::detail::DevGlobalImplGeneric<TAcc, TViewDstFwd>& viewDst,
TViewSrc const& viewSrc) -> void
{
//using TypeC = std::remove_all_extents_t<TViewDstFwd>;
using Type = std::remove_const_t<std::remove_all_extents_t<TViewDstFwd>>;
auto extent = getExtents(viewSrc);
auto view = alpaka::ViewPlainPtr<DevCpu, Type, alpaka::Dim<decltype(extent)>, alpaka::Idx<decltype(extent)>>(
//const_cast<std::remove_const_t<Type*>>(reinterpret_cast<Type*>(&viewDst)),
reinterpret_cast<Type*>(const_cast<std::remove_const_t<TViewDstFwd>*>(&viewDst)),
alpaka::getDev(queue),
extent);
enqueue(queue, createTaskMemcpy(std::forward<decltype(view)>(view), viewSrc, extent));
}

template<typename TViewSrc, typename TViewDstFwd, typename TQueue>
ALPAKA_FN_HOST auto memcpy(TQueue& queue, TViewDstFwd&& viewDst, alpaka::DevGlobal<TViewSrc>& viewSrc) -> void
template<typename TAcc, typename TViewSrc, typename TViewDstFwd, typename TQueue>
ALPAKA_FN_HOST auto memcpy(
TQueue& queue,
TViewDstFwd&& viewDst,
alpaka::detail::DevGlobalImplGeneric<TAcc, TViewSrc>& viewSrc) -> void
{
using Type = std::remove_all_extents_t<TViewSrc>;
auto extent = getExtents(viewDst);
Expand All @@ -38,10 +77,10 @@ namespace alpaka
enqueue(queue, createTaskMemcpy(std::forward<TViewDstFwd>(viewDst), view, extent));
}

template<typename TExtent, typename TViewSrc, typename TViewDstFwd, typename TQueue>
template<typename TAcc, typename TExtent, typename TViewSrc, typename TViewDstFwd, typename TQueue>
ALPAKA_FN_HOST auto memcpy(
TQueue& queue,
alpaka::DevGlobal<TViewDstFwd>& viewDst,
alpaka::detail::DevGlobalImplGeneric<TAcc, TViewDstFwd>& viewDst,
TViewSrc const& viewSrc,
TExtent const& extent) -> void
{
Expand All @@ -53,11 +92,11 @@ namespace alpaka
enqueue(queue, createTaskMemcpy(std::forward<decltype(view)>(view), viewSrc, extent));
}

template<typename TExtent, typename TViewSrc, typename TViewDstFwd, typename TQueue>
template<typename TAcc, typename TExtent, typename TViewSrc, typename TViewDstFwd, typename TQueue>
ALPAKA_FN_HOST auto memcpy(
TQueue& queue,
TViewDstFwd&& viewDst,
alpaka::DevGlobal<TViewSrc>& viewSrc,
alpaka::detail::DevGlobalImplGeneric<TAcc, TViewSrc>& viewSrc,
TExtent const& extent) -> void
{
using Type = std::remove_all_extents_t<TViewSrc>;
Expand Down
32 changes: 32 additions & 0 deletions include/alpaka/mem/global/DeviceGlobalGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include "alpaka/mem/global/Traits.hpp"
#include "alpaka/queue/sycl/QueueGenericSyclBase.hpp"

#ifdef ALPAKA_ACC_SYCL_ENABLED
Expand All @@ -14,6 +15,37 @@ namespace alpaka
{
using sycl::ext::oneapi::experimental::device_global;

namespace detail
{
template<typename T>
struct DevGlobalTrait<TagCpuSycl, T>
{
// SYCL implementation
using Type = device_global<T>;
};

template<typename T>
struct DevGlobalTrait<TagGpuSyclIntel, T>
{
// SYCL implementation
using Type = device_global<T>;
};

template<typename T>
struct DevGlobalTrait<TagFpgaSyclIntel, T>
{
// SYCL implementation
using Type = device_global<T>;
};

template<typename T>
struct DevGlobalTrait<TagGenericSycl, T>
{
// SYCL implementation
using Type = device_global<T>;
};
} // namespace detail

// from device to host
template<typename TDev, bool TBlocking, typename TViewDst, typename TViewSrc>
ALPAKA_FN_HOST auto memcpy(
Expand Down
Loading

0 comments on commit 81aa05e

Please sign in to comment.