Skip to content

Commit

Permalink
changed MemFenceGenericSycl (need explicit use of atomics now)
Browse files Browse the repository at this point in the history
  • Loading branch information
AuroraPerego committed Aug 4, 2023
1 parent cb84d88 commit 267a9b7
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 45 deletions.
6 changes: 2 additions & 4 deletions include/alpaka/acc/AccGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ namespace alpaka
Vec<TDim, TIdx> const& threadElemExtent,
sycl::nd_item<TDim::value> work_item,
sycl::local_accessor<std::byte> dyn_shared_acc,
sycl::local_accessor<std::byte> st_shared_acc,
sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device> global_fence_dummy,
sycl::local_accessor<int> local_fence_dummy)
sycl::local_accessor<std::byte> st_shared_acc)
: WorkDivGenericSycl<TDim, TIdx>{threadElemExtent, work_item}
, gb::IdxGbGenericSycl<TDim, TIdx>{work_item}
, bt::IdxBtGenericSycl<TDim, TIdx>{work_item}
Expand All @@ -89,7 +87,7 @@ namespace alpaka
, BlockSharedMemStGenericSycl{st_shared_acc}
, BlockSyncGenericSycl<TDim>{work_item}
, IntrinsicGenericSycl{}
, MemFenceGenericSycl{global_fence_dummy, local_fence_dummy}
, MemFenceGenericSycl{}
# ifdef ALPAKA_DISABLE_VENDOR_RNG
, rand::RandDefault{}
# else
Expand Down
20 changes: 3 additions & 17 deletions include/alpaka/kernel/TaskKernelGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,14 @@
[item_elements, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
local_fence_dummy, \
k_func, \
k_args](sycl::nd_item<TDim::value> work_item) [[intel::reqd_sub_group_size(sub_group_size)]] \
{ \
auto acc = TAcc{ \
item_elements, \
work_item, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
local_fence_dummy}; \
st_shared_accessor}; \
core::apply( \
[k_func, &acc](typename std::decay_t<TArgs> const&... args) { k_func(acc, args...); }, \
k_args); \
Expand All @@ -66,18 +62,14 @@
[item_elements, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
local_fence_dummy, \
k_func, \
k_args](sycl::nd_item<TDim::value> work_item) \
{ \
auto acc = TAcc{ \
item_elements, \
work_item, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
local_fence_dummy}; \
st_shared_accessor}; \
core::apply( \
[k_func, &acc](typename std::decay_t<TArgs> const&... args) { k_func(acc, args...); }, \
k_args); \
Expand All @@ -90,8 +82,6 @@
[item_elements, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
local_fence_dummy, \
k_func, \
k_args](sycl::nd_item<TDim::value> work_item) {});

Expand Down Expand Up @@ -121,7 +111,7 @@ namespace alpaka
{
}

auto operator()(sycl::handler& cgh, sycl::buffer<int, 1>& global_fence_buf) const -> void
auto operator()(sycl::handler& cgh) const -> void
{

auto const work_groups = WorkDivMembers<TDim, TIdx>::m_gridBlockExtent;
Expand All @@ -146,10 +136,6 @@ namespace alpaka
constexpr auto st_shared_mem_bytes = std::size_t{ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB * 1024};
auto st_shared_accessor = sycl::local_accessor<std::byte>{sycl::range<1>{st_shared_mem_bytes}, cgh};

// register memory fence dummies
auto global_fence_dummy = global_fence_buf.get_access(cgh); // Exists once per queue
auto local_fence_dummy = sycl::local_accessor<int>{sycl::range<1>{1}, cgh};

// copy-by-value so we don't access 'this' on the device
auto k_func = m_kernelFnObj;
auto k_args = m_args;
Expand Down
23 changes: 1 addition & 22 deletions include/alpaka/mem/fence/MemFenceGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,24 @@ namespace alpaka
struct SyclFenceProps<alpaka::memory_scope::Block>
{
static constexpr auto scope = sycl::memory_scope::work_group;
static constexpr auto space = sycl::access::address_space::local_space;
};

template<>
struct SyclFenceProps<alpaka::memory_scope::Device>
{
static constexpr auto scope = sycl::memory_scope::device;
static constexpr auto space = sycl::access::address_space::global_space;
};

template<>
struct SyclFenceProps<alpaka::memory_scope::Grid>
{
static constexpr auto scope = sycl::memory_scope::device;
static constexpr auto space = sycl::access::address_space::global_space;
};
} // namespace detail

//! The SYCL memory fence.
class MemFenceGenericSycl : public concepts::Implements<ConceptMemFence, MemFenceGenericSycl>
{
public:
MemFenceGenericSycl(
sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device> global_dummy,
sycl::local_accessor<int> local_dummy)
: m_global_dummy{global_dummy}
, m_local_dummy{local_dummy}
{
}

sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device> m_global_dummy;
sycl::local_accessor<int> m_local_dummy;
};
} // namespace alpaka

Expand All @@ -63,17 +49,10 @@ namespace alpaka::trait
template<typename TMemScope>
struct MemFence<MemFenceGenericSycl, TMemScope>
{
static auto mem_fence(MemFenceGenericSycl const& fence, TMemScope const&)
static auto mem_fence(MemFenceGenericSycl const& , TMemScope const&)
{
static constexpr auto scope = detail::SyclFenceProps<TMemScope>::scope;
static constexpr auto space = detail::SyclFenceProps<TMemScope>::space;
auto dummy
= (scope == sycl::memory_scope::work_group)
? sycl::atomic_ref<int, sycl::memory_order::relaxed, scope, space>{fence.m_local_dummy[0]}
: sycl::atomic_ref<int, sycl::memory_order::relaxed, scope, space>{fence.m_global_dummy[0]};
auto const dummy_val = dummy.load();
sycl::atomic_fence(sycl::memory_order::acq_rel, scope);
dummy.store(dummy_val);
}
};
} // namespace alpaka::trait
Expand Down
3 changes: 1 addition & 2 deletions include/alpaka/queue/sycl/QueueGenericSyclBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ namespace alpaka::detail
cgh.depends_on(m_dependencies);

if constexpr(is_sycl_kernel<TTask>) // Kernel
task(cgh, m_fence_dummy); // Will call cgh.parallel_for internally
task(cgh); // Will call cgh.parallel_for internally
else // Host
cgh.host_task(task);
});
Expand All @@ -159,7 +159,6 @@ namespace alpaka::detail

std::vector<sycl::event> m_dependencies;
sycl::event m_last_event;
sycl::buffer<int, 1> m_fence_dummy{sycl::range<1>{1}};
std::shared_mutex mutable m_mutex;

private:
Expand Down

0 comments on commit 267a9b7

Please sign in to comment.