From da1061bcee63bbba2566553bc97e96442bba8c83 Mon Sep 17 00:00:00 2001 From: AuroraPerego Date: Sun, 11 Aug 2024 23:28:26 +0200 Subject: [PATCH] template SYCL objects and device selectors on the Tag - template the Device, Queue, Event, Acc, Buf, Platform, EventHostManualTriggerSycl and device selectors on the Tag - adapt the default queue for the tests - avoid code duplication for SYCL where possible --- include/alpaka/acc/AccCpuSycl.hpp | 3 +- include/alpaka/acc/AccFpgaSyclIntel.hpp | 3 +- include/alpaka/acc/AccGenericSycl.hpp | 75 +- include/alpaka/acc/AccGpuSyclIntel.hpp | 3 +- include/alpaka/dev/DevCpuSycl.hpp | 4 +- include/alpaka/dev/DevFpgaSyclIntel.hpp | 4 +- include/alpaka/dev/DevGenericSycl.hpp | 234 +++-- include/alpaka/dev/DevGpuSyclIntel.hpp | 4 +- include/alpaka/event/EventCpuSycl.hpp | 4 +- include/alpaka/event/EventFpgaSyclIntel.hpp | 4 +- include/alpaka/event/EventGenericSycl.hpp | 60 +- include/alpaka/event/EventGpuSyclIntel.hpp | 4 +- include/alpaka/kernel/TaskKernelCpuSycl.hpp | 5 +- .../alpaka/kernel/TaskKernelFpgaSyclIntel.hpp | 4 +- .../alpaka/kernel/TaskKernelGenericSycl.hpp | 8 +- .../alpaka/kernel/TaskKernelGpuSyclIntel.hpp | 4 +- include/alpaka/mem/buf/BufCpuSycl.hpp | 1 + include/alpaka/mem/buf/BufFpgaSyclIntel.hpp | 1 + include/alpaka/mem/buf/BufGenericSycl.hpp | 100 +- include/alpaka/mem/buf/BufGpuSyclIntel.hpp | 1 + include/alpaka/mem/buf/sycl/Copy.hpp | 12 +- include/alpaka/mem/view/ViewPlainPtr.hpp | 17 +- include/alpaka/platform/PlatformCpuSycl.hpp | 15 +- .../alpaka/platform/PlatformFpgaSyclIntel.hpp | 15 +- .../alpaka/platform/PlatformGenericSycl.hpp | 954 +++++++++--------- .../alpaka/platform/PlatformGpuSyclIntel.hpp | 16 +- include/alpaka/queue/QueueCpuSyclBlocking.hpp | 4 +- .../alpaka/queue/QueueCpuSyclNonBlocking.hpp | 4 +- .../queue/QueueFpgaSyclIntelBlocking.hpp | 4 +- .../queue/QueueFpgaSyclIntelNonBlocking.hpp | 4 +- .../alpaka/queue/QueueGenericSyclBlocking.hpp | 4 +- .../queue/QueueGenericSyclNonBlocking.hpp | 4 +- .../queue/QueueGpuSyclIntelBlocking.hpp | 4 +- .../queue/QueueGpuSyclIntelNonBlocking.hpp | 4 +- .../queue/sycl/QueueGenericSyclBase.hpp | 432 ++++---- include/alpaka/test/acc/TestAccs.hpp | 6 +- .../test/event/EventHostManualTrigger.hpp | 43 +- include/alpaka/test/queue/Queue.hpp | 91 +- 38 files changed, 1052 insertions(+), 1107 deletions(-) diff --git a/include/alpaka/acc/AccCpuSycl.hpp b/include/alpaka/acc/AccCpuSycl.hpp index c7049394bbfd..ea5ac22a09ca 100644 --- a/include/alpaka/acc/AccCpuSycl.hpp +++ b/include/alpaka/acc/AccCpuSycl.hpp @@ -7,7 +7,6 @@ #include "alpaka/acc/AccGenericSycl.hpp" #include "alpaka/acc/Tag.hpp" #include "alpaka/core/Sycl.hpp" -#include "alpaka/platform/PlatformCpuSycl.hpp" #include #include @@ -20,7 +19,7 @@ namespace alpaka //! //! This accelerator allows parallel kernel execution on a oneAPI-capable CPU target device. template - using AccCpuSycl = AccGenericSycl; + using AccCpuSycl = AccGenericSycl; namespace trait { diff --git a/include/alpaka/acc/AccFpgaSyclIntel.hpp b/include/alpaka/acc/AccFpgaSyclIntel.hpp index a6de9b73b43b..1d1b6a937288 100644 --- a/include/alpaka/acc/AccFpgaSyclIntel.hpp +++ b/include/alpaka/acc/AccFpgaSyclIntel.hpp @@ -7,7 +7,6 @@ #include "alpaka/acc/AccGenericSycl.hpp" #include "alpaka/acc/Tag.hpp" #include "alpaka/core/Sycl.hpp" -#include "alpaka/platform/PlatformFpgaSyclIntel.hpp" #include #include @@ -20,7 +19,7 @@ namespace alpaka //! //! This accelerator allows parallel kernel execution on a oneAPI-capable Intel FPGA target device. template - using AccFpgaSyclIntel = AccGenericSycl; + using AccFpgaSyclIntel = AccGenericSycl; namespace trait { diff --git a/include/alpaka/acc/AccGenericSycl.hpp b/include/alpaka/acc/AccGenericSycl.hpp index b9437ee76723..b7132a749de6 100644 --- a/include/alpaka/acc/AccGenericSycl.hpp +++ b/include/alpaka/acc/AccGenericSycl.hpp @@ -46,13 +46,13 @@ namespace alpaka { - template + template class TaskKernelGenericSycl; //! The SYCL accelerator. //! //! This accelerator allows parallel kernel execution on SYCL devices. - template + template class AccGenericSycl : public WorkDivGenericSycl , public gb::IdxGbGenericSycl @@ -103,30 +103,29 @@ namespace alpaka namespace alpaka::trait { //! The SYCL accelerator type trait specialization. - template - struct AccType> + template + struct AccType> { - using type = AccGenericSycl; + using type = AccGenericSycl; }; //! The SYCL single thread accelerator type trait specialization. - template - struct IsSingleThreadAcc> : std::false_type + template + struct IsSingleThreadAcc> : std::false_type { }; //! The SYCL multi thread accelerator type trait specialization. - template - struct IsMultiThreadAcc> : std::true_type + template + struct IsMultiThreadAcc> : std::true_type { }; //! The SYCL accelerator device properties get trait specialization. - template - struct GetAccDevProps> + template + struct GetAccDevProps> { - static auto getAccDevProps(DevGenericSycl> const& dev) - -> AccDevProps + static auto getAccDevProps(DevGenericSycl const& dev) -> AccDevProps { auto const device = dev.getNativeHandle().first; auto const max_threads_dim @@ -160,63 +159,53 @@ namespace alpaka::trait }; //! The SYCL accelerator name trait specialization. - template - struct GetAccName> + template + struct GetAccName> { static auto getAccName() -> std::string { - // TODO implement TSelector::name - return std::string("Acc") + TSelector::name + "<" + std::to_string(TDim::value) + "," - + core::demangled + ">"; + return std::string("Acc") + detail::SYCLDeviceSelector::name + "<" + std::to_string(TDim::value) + + "," + core::demangled + ">"; } }; //! The SYCL accelerator device type trait specialization. - template - struct DevType> + template + struct DevType> { - using type = DevGenericSycl>; + using type = DevGenericSycl; }; //! The SYCL accelerator dimension getter trait specialization. - template - struct DimType> + template + struct DimType> { using type = TDim; }; //! The SYCL accelerator execution task type trait specialization. - template< - typename TSelector, - typename TDim, - typename TIdx, - typename TWorkDiv, - typename TKernelFnObj, - typename... TArgs> - struct CreateTaskKernel, TWorkDiv, TKernelFnObj, TArgs...> + template + struct CreateTaskKernel, TWorkDiv, TKernelFnObj, TArgs...> { static auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args) { - return TaskKernelGenericSycl< - TSelector, - AccGenericSycl, - TDim, - TIdx, - TKernelFnObj, - TArgs...>{workDiv, kernelFnObj, std::forward(args)...}; + return TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>{ + workDiv, + kernelFnObj, + std::forward(args)...}; } }; //! The SYCL execution task platform type trait specialization. - template - struct PlatformType> + template + struct PlatformType> { - using type = PlatformGenericSycl; + using type = PlatformGenericSycl; }; //! The SYCL accelerator idx type trait specialization. - template - struct IdxType> + template + struct IdxType> { using type = TIdx; }; diff --git a/include/alpaka/acc/AccGpuSyclIntel.hpp b/include/alpaka/acc/AccGpuSyclIntel.hpp index 1e2b55d1f038..d544b9c1749e 100644 --- a/include/alpaka/acc/AccGpuSyclIntel.hpp +++ b/include/alpaka/acc/AccGpuSyclIntel.hpp @@ -7,7 +7,6 @@ #include "alpaka/acc/AccGenericSycl.hpp" #include "alpaka/acc/Tag.hpp" #include "alpaka/core/Sycl.hpp" -#include "alpaka/platform/PlatformGpuSyclIntel.hpp" #include #include @@ -20,7 +19,7 @@ namespace alpaka //! //! This accelerator allows parallel kernel execution on a oneAPI-capable Intel GPU target device. template - using AccGpuSyclIntel = AccGenericSycl; + using AccGpuSyclIntel = AccGenericSycl; namespace trait { diff --git a/include/alpaka/dev/DevCpuSycl.hpp b/include/alpaka/dev/DevCpuSycl.hpp index 04b15a867558..5e9a2321769c 100644 --- a/include/alpaka/dev/DevCpuSycl.hpp +++ b/include/alpaka/dev/DevCpuSycl.hpp @@ -4,14 +4,14 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/dev/DevGenericSycl.hpp" -#include "alpaka/platform/PlatformCpuSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU) namespace alpaka { - using DevCpuSycl = DevGenericSycl; + using DevCpuSycl = DevGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/dev/DevFpgaSyclIntel.hpp b/include/alpaka/dev/DevFpgaSyclIntel.hpp index 516027db6b2a..8004aad039fc 100644 --- a/include/alpaka/dev/DevFpgaSyclIntel.hpp +++ b/include/alpaka/dev/DevFpgaSyclIntel.hpp @@ -4,14 +4,14 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/dev/DevGenericSycl.hpp" -#include "alpaka/platform/PlatformFpgaSyclIntel.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) namespace alpaka { - using DevFpgaSyclIntel = DevGenericSycl; + using DevFpgaSyclIntel = DevGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/dev/DevGenericSycl.hpp b/include/alpaka/dev/DevGenericSycl.hpp index 729090f8f2d3..efbcad92a0e5 100644 --- a/include/alpaka/dev/DevGenericSycl.hpp +++ b/include/alpaka/dev/DevGenericSycl.hpp @@ -32,7 +32,22 @@ namespace alpaka { - template + namespace trait + { + template + struct GetDevByIdx; + } // namespace trait + + template + using QueueGenericSyclBlocking = detail::QueueGenericSyclBase; + + template + using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase; + + template + struct PlatformGenericSycl; + + template class BufGenericSycl; namespace detail @@ -105,11 +120,13 @@ namespace alpaka } // namespace detail //! The SYCL device handle. - template + template class DevGenericSycl - : public concepts::Implements> - , public concepts::Implements> + : public concepts::Implements> + , public concepts::Implements> { + friend struct trait::GetDevByIdx>; + public: DevGenericSycl(sycl::device device, sycl::context context) : m_impl{std::make_shared(std::move(device), std::move(context))} @@ -133,128 +150,133 @@ namespace alpaka std::shared_ptr m_impl; }; -} // namespace alpaka -namespace alpaka::trait -{ - //! The SYCL device name get trait specialization. - template - struct GetName> + namespace trait { - static auto getName(DevGenericSycl const& dev) -> std::string + //! The SYCL device name get trait specialization. + template + struct GetName> { - auto const device = dev.getNativeHandle().first; - return device.template get_info(); - } - }; + static auto getName(DevGenericSycl const& dev) -> std::string + { + auto const device = dev.getNativeHandle().first; + return device.template get_info(); + } + }; - //! The SYCL device available memory get trait specialization. - template - struct GetMemBytes> - { - static auto getMemBytes(DevGenericSycl const& dev) -> std::size_t + //! The SYCL device available memory get trait specialization. + template + struct GetMemBytes> { - auto const device = dev.getNativeHandle().first; - return device.template get_info(); - } - }; + static auto getMemBytes(DevGenericSycl const& dev) -> std::size_t + { + auto const device = dev.getNativeHandle().first; + return device.template get_info(); + } + }; - //! The SYCL device free memory get trait specialization. - template - struct GetFreeMemBytes> - { - static auto getFreeMemBytes(DevGenericSycl const& /* dev */) -> std::size_t + //! The SYCL device free memory get trait specialization. + template + struct GetFreeMemBytes> { - static_assert(!sizeof(TPlatform), "Querying free device memory not supported for SYCL devices."); - return std::size_t{}; - } - }; + static auto getFreeMemBytes(DevGenericSycl const& /* dev */) -> std::size_t + { + static_assert( + !sizeof(PlatformGenericSycl), + "Querying free device memory not supported for SYCL devices."); + return std::size_t{}; + } + }; - //! The SYCL device warp size get trait specialization. - template - struct GetWarpSizes> - { - static auto getWarpSizes(DevGenericSycl const& dev) -> std::vector + //! The SYCL device warp size get trait specialization. + template + struct GetWarpSizes> { - auto const device = dev.getNativeHandle().first; - std::vector warp_sizes = device.template get_info(); - // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently does not - auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64); - if(find64 != warp_sizes.end()) - warp_sizes.erase(find64); - // Sort the warp sizes in decreasing order - std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{}); - return warp_sizes; - } - }; + static auto getWarpSizes(DevGenericSycl const& dev) -> std::vector + { + auto const device = dev.getNativeHandle().first; + std::vector warp_sizes = device.template get_info(); + // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently does not + auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64); + if(find64 != warp_sizes.end()) + warp_sizes.erase(find64); + // Sort the warp sizes in decreasing order + std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{}); + return warp_sizes; + } + }; - //! The SYCL device preferred warp size get trait specialization. - template - struct GetPreferredWarpSize> - { - static auto getPreferredWarpSize(DevGenericSycl const& dev) -> std::size_t + //! The SYCL device preferred warp size get trait specialization. + template + struct GetPreferredWarpSize> { - return GetWarpSizes>::getWarpSizes(dev).front(); - } - }; + static auto getPreferredWarpSize(DevGenericSycl const& dev) -> std::size_t + { + return GetWarpSizes>::getWarpSizes(dev).front(); + } + }; - //! The SYCL device reset trait specialization. - template - struct Reset> - { - static auto reset(DevGenericSycl const&) -> void + //! The SYCL device reset trait specialization. + template + struct Reset> { - static_assert(!sizeof(TPlatform), "Explicit device reset not supported for SYCL devices"); - } - }; + static auto reset(DevGenericSycl const&) -> void + { + static_assert( + !sizeof(PlatformGenericSycl), + "Explicit device reset not supported for SYCL devices"); + } + }; - //! The SYCL device native handle trait specialization. - template - struct NativeHandle> - { - [[nodiscard]] static auto getNativeHandle(DevGenericSycl const& dev) + //! The SYCL device native handle trait specialization. + template + struct NativeHandle> { - return dev.getNativeHandle(); - } - }; + [[nodiscard]] static auto getNativeHandle(DevGenericSycl const& dev) + { + return dev.getNativeHandle(); + } + }; - //! The SYCL device memory buffer type trait specialization. - template - struct BufType, TElem, TDim, TIdx> - { - using type = BufGenericSycl; - }; + //! The SYCL device memory buffer type trait specialization. + template + struct BufType, TElem, TDim, TIdx> + { + using type = BufGenericSycl; + }; - //! The SYCL device platform type trait specialization. - template - struct PlatformType> - { - using type = TPlatform; - }; + //! The SYCL device platform type trait specialization. + template + struct PlatformType> + { + using type = PlatformGenericSycl; + }; - //! The thread SYCL device wait specialization. - template - struct CurrentThreadWaitFor> - { - static auto currentThreadWaitFor(DevGenericSycl const& dev) -> void + //! The thread SYCL device wait specialization. + template + struct CurrentThreadWaitFor> { - dev.m_impl->wait(); - } - }; + static auto currentThreadWaitFor(DevGenericSycl const& dev) -> void + { + dev.m_impl->wait(); + } + }; - //! The SYCL blocking queue trait specialization. - template - struct QueueType, Blocking> - { - using type = detail::QueueGenericSyclBase, true>; - }; + //! The SYCL blocking queue trait specialization. + template + struct QueueType, Blocking> + { + using type = QueueGenericSyclBlocking; + }; - //! The SYCL non-blocking queue trait specialization. - template - struct QueueType, NonBlocking> - { - using type = detail::QueueGenericSyclBase, false>; - }; -} // namespace alpaka::trait + //! The SYCL non-blocking queue trait specialization. + template + struct QueueType, NonBlocking> + { + using type = QueueGenericSyclNonBlocking; + }; + + } // namespace trait +} // namespace alpaka #endif diff --git a/include/alpaka/dev/DevGpuSyclIntel.hpp b/include/alpaka/dev/DevGpuSyclIntel.hpp index 9897d40ebbc5..d26bb4ca72db 100644 --- a/include/alpaka/dev/DevGpuSyclIntel.hpp +++ b/include/alpaka/dev/DevGpuSyclIntel.hpp @@ -4,14 +4,14 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/dev/DevGenericSycl.hpp" -#include "alpaka/platform/PlatformGpuSyclIntel.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU) namespace alpaka { - using DevGpuSyclIntel = DevGenericSycl; + using DevGpuSyclIntel = DevGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/event/EventCpuSycl.hpp b/include/alpaka/event/EventCpuSycl.hpp index c95ed8e20df3..7c5b6310ae5b 100644 --- a/include/alpaka/event/EventCpuSycl.hpp +++ b/include/alpaka/event/EventCpuSycl.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevCpuSycl.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/event/EventGenericSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU) namespace alpaka { - using EventCpuSycl = EventGenericSycl; + using EventCpuSycl = EventGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/event/EventFpgaSyclIntel.hpp b/include/alpaka/event/EventFpgaSyclIntel.hpp index d79d8aca2cfd..0148c967e4f6 100644 --- a/include/alpaka/event/EventFpgaSyclIntel.hpp +++ b/include/alpaka/event/EventFpgaSyclIntel.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevFpgaSyclIntel.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/event/EventGenericSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) namespace alpaka { - using EventFpgaSyclIntel = EventGenericSycl; + using EventFpgaSyclIntel = EventGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/event/EventGenericSycl.hpp b/include/alpaka/event/EventGenericSycl.hpp index 68011a0247cd..8b81f7029b17 100644 --- a/include/alpaka/event/EventGenericSycl.hpp +++ b/include/alpaka/event/EventGenericSycl.hpp @@ -22,11 +22,11 @@ namespace alpaka { //! The SYCL device event. - template + template class EventGenericSycl final { public: - explicit EventGenericSycl(TDev const& dev) : m_dev{dev} + explicit EventGenericSycl(DevGenericSycl const& dev) : m_dev{dev} { } @@ -50,7 +50,7 @@ namespace alpaka m_event = event; } - TDev m_dev; + DevGenericSycl m_dev; private: sycl::event m_event{}; @@ -60,20 +60,20 @@ namespace alpaka namespace alpaka::trait { //! The SYCL device event device get trait specialization. - template - struct GetDev> + template + struct GetDev> { - static auto getDev(EventGenericSycl const& event) -> TDev + static auto getDev(EventGenericSycl const& event) -> DevGenericSycl { return event.m_dev; } }; //! The SYCL device event test trait specialization. - template - struct IsComplete> + template + struct IsComplete> { - static auto isComplete(EventGenericSycl const& event) + static auto isComplete(EventGenericSycl const& event) { auto const status = event.getNativeHandle().template get_info(); @@ -82,20 +82,20 @@ namespace alpaka::trait }; //! The SYCL queue enqueue trait specialization. - template - struct Enqueue, EventGenericSycl> + template + struct Enqueue, EventGenericSycl> { - static auto enqueue(QueueGenericSyclNonBlocking& queue, EventGenericSycl& event) + static auto enqueue(QueueGenericSyclNonBlocking& queue, EventGenericSycl& event) { event.setEvent(queue.m_spQueueImpl->get_last_event()); } }; //! The SYCL queue enqueue trait specialization. - template - struct Enqueue, EventGenericSycl> + template + struct Enqueue, EventGenericSycl> { - static auto enqueue(QueueGenericSyclBlocking& queue, EventGenericSycl& event) + static auto enqueue(QueueGenericSyclBlocking& queue, EventGenericSycl& event) { event.setEvent(queue.m_spQueueImpl->get_last_event()); } @@ -105,30 +105,30 @@ namespace alpaka::trait //! //! Waits until the event itself and therefore all tasks preceding it in the queue it is enqueued to have been //! completed. If the event is not enqueued to a queue the method returns immediately. - template - struct CurrentThreadWaitFor> + template + struct CurrentThreadWaitFor> { - static auto currentThreadWaitFor(EventGenericSycl const& event) + static auto currentThreadWaitFor(EventGenericSycl const& event) { event.getNativeHandle().wait_and_throw(); } }; //! The SYCL queue event wait trait specialization. - template - struct WaiterWaitFor, EventGenericSycl> + template + struct WaiterWaitFor, EventGenericSycl> { - static auto waiterWaitFor(QueueGenericSyclNonBlocking& queue, EventGenericSycl const& event) + static auto waiterWaitFor(QueueGenericSyclNonBlocking& queue, EventGenericSycl const& event) { queue.m_spQueueImpl->register_dependency(event.getNativeHandle()); } }; //! The SYCL queue event wait trait specialization. - template - struct WaiterWaitFor, EventGenericSycl> + template + struct WaiterWaitFor, EventGenericSycl> { - static auto waiterWaitFor(QueueGenericSyclBlocking& queue, EventGenericSycl const& event) + static auto waiterWaitFor(QueueGenericSyclBlocking& queue, EventGenericSycl const& event) { queue.m_spQueueImpl->register_dependency(event.getNativeHandle()); } @@ -138,20 +138,20 @@ namespace alpaka::trait //! //! Any future work submitted in any queue of this device will wait for event to complete before beginning //! execution. - template - struct WaiterWaitFor> + template + struct WaiterWaitFor, EventGenericSycl> { - static auto waiterWaitFor(TDev& dev, EventGenericSycl const& event) + static auto waiterWaitFor(DevGenericSycl& dev, EventGenericSycl const& event) { dev.m_impl->register_dependency(event.getNativeHandle()); } }; //! The SYCL device event native handle trait specialization. - template - struct NativeHandle> + template + struct NativeHandle> { - [[nodiscard]] static auto getNativeHandle(EventGenericSycl const& event) + [[nodiscard]] static auto getNativeHandle(EventGenericSycl const& event) { return event.getNativeHandle(); } diff --git a/include/alpaka/event/EventGpuSyclIntel.hpp b/include/alpaka/event/EventGpuSyclIntel.hpp index d59562a9e497..0f1bee69be20 100644 --- a/include/alpaka/event/EventGpuSyclIntel.hpp +++ b/include/alpaka/event/EventGpuSyclIntel.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevGpuSyclIntel.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/event/EventGenericSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU) namespace alpaka { - using EventGpuSyclIntel = EventGenericSycl; + using EventGpuSyclIntel = EventGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/kernel/TaskKernelCpuSycl.hpp b/include/alpaka/kernel/TaskKernelCpuSycl.hpp index 528846246053..3feef5e0b1aa 100644 --- a/include/alpaka/kernel/TaskKernelCpuSycl.hpp +++ b/include/alpaka/kernel/TaskKernelCpuSycl.hpp @@ -4,15 +4,16 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/kernel/TaskKernelGenericSycl.hpp" -#include "alpaka/platform/PlatformCpuSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU) namespace alpaka { template - using TaskKernelCpuSycl = TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>; + using TaskKernelCpuSycl + = TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>; } // namespace alpaka diff --git a/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp b/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp index 2e54f279c471..6d75b06f86a7 100644 --- a/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp +++ b/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp @@ -4,8 +4,8 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/kernel/TaskKernelGenericSycl.hpp" -#include "alpaka/platform/PlatformFpgaSyclIntel.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) @@ -13,7 +13,7 @@ namespace alpaka { template using TaskKernelFpgaSyclIntel - = TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>; + = TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>; } // namespace alpaka diff --git a/include/alpaka/kernel/TaskKernelGenericSycl.hpp b/include/alpaka/kernel/TaskKernelGenericSycl.hpp index b56d905c7f66..11cc2cae4590 100644 --- a/include/alpaka/kernel/TaskKernelGenericSycl.hpp +++ b/include/alpaka/kernel/TaskKernelGenericSycl.hpp @@ -4,8 +4,8 @@ #pragma once -#include "alpaka/acc/Traits.hpp" #include "alpaka/acc/AccGenericSycl.hpp" +#include "alpaka/acc/Traits.hpp" #include "alpaka/core/BoostPredef.hpp" #include "alpaka/core/Sycl.hpp" #include "alpaka/dev/Traits.hpp" @@ -71,7 +71,7 @@ namespace alpaka { //! The SYCL accelerator execution task. - template + template class TaskKernelGenericSycl final : public WorkDivMembers { public: @@ -280,7 +280,7 @@ namespace alpaka::trait }; //! \brief Specialisation of the class template FunctionAttributes - //! \tparam TSelector The SYCL device selector. + //! \tparam TTag The SYCL device selector. //! \tparam TDev The device type. //! \tparam TDim The dimensionality of the accelerator device properties. //! \tparam TIdx The idx type of the accelerator device properties. @@ -302,7 +302,7 @@ namespace alpaka::trait alpaka::KernelFunctionAttributes kernelFunctionAttributes; // set function properties for maxThreadsPerBlock to device properties - auto const& props = alpaka::getAccDevProps>(dev); + auto const& props = alpaka::getAccDevProps>(dev); kernelFunctionAttributes.maxThreadsPerBlock = static_cast(props.m_blockThreadCountMax); return kernelFunctionAttributes; } diff --git a/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp b/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp index 175c76129fa4..b4543d0b4c22 100644 --- a/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp +++ b/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp @@ -4,8 +4,8 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/kernel/TaskKernelGenericSycl.hpp" -#include "alpaka/platform/PlatformGpuSyclIntel.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU) @@ -13,7 +13,7 @@ namespace alpaka { template using TaskKernelGpuSyclIntel - = TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>; + = TaskKernelGenericSycl, TDim, TIdx, TKernelFnObj, TArgs...>; } // namespace alpaka diff --git a/include/alpaka/mem/buf/BufCpuSycl.hpp b/include/alpaka/mem/buf/BufCpuSycl.hpp index d63eebf540ca..2e7559946ecc 100644 --- a/include/alpaka/mem/buf/BufCpuSycl.hpp +++ b/include/alpaka/mem/buf/BufCpuSycl.hpp @@ -6,6 +6,7 @@ #include "alpaka/dev/DevCpuSycl.hpp" #include "alpaka/mem/buf/BufGenericSycl.hpp" +#include "alpaka/platform/PlatformCpuSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU) diff --git a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp index 8daafd75d099..926227cc840f 100644 --- a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp +++ b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp @@ -6,6 +6,7 @@ #include "alpaka/dev/DevFpgaSyclIntel.hpp" #include "alpaka/mem/buf/BufGenericSycl.hpp" +#include "alpaka/platform/PlatformFpgaSyclIntel.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) diff --git a/include/alpaka/mem/buf/BufGenericSycl.hpp b/include/alpaka/mem/buf/BufGenericSycl.hpp index b4a5fd94ed54..221aa55ed396 100644 --- a/include/alpaka/mem/buf/BufGenericSycl.hpp +++ b/include/alpaka/mem/buf/BufGenericSycl.hpp @@ -24,8 +24,8 @@ namespace alpaka { //! The SYCL memory buffer. - template - class BufGenericSycl : public internal::ViewAccessOps> + template + class BufGenericSycl : public internal::ViewAccessOps> { public: static_assert( @@ -36,7 +36,7 @@ namespace alpaka //! Constructor template - BufGenericSycl(DevGenericSycl const& dev, TElem* const pMem, Deleter deleter, TExtent const& extent) + BufGenericSycl(DevGenericSycl const& dev, TElem* const pMem, Deleter deleter, TExtent const& extent) : m_dev{dev} , m_extentElements{getExtentVecEnd(extent)} , m_spMem(pMem, std::move(deleter)) @@ -53,7 +53,7 @@ namespace alpaka "The idx type of TExtent and the TIdx template parameter have to be identical!"); } - DevGenericSycl m_dev; + DevGenericSycl m_dev; Vec m_extentElements; std::shared_ptr m_spMem; }; @@ -62,68 +62,67 @@ namespace alpaka namespace alpaka::trait { //! The BufGenericSycl device type trait specialization. - template - struct DevType> + template + struct DevType> { - using type = DevGenericSycl; + using type = DevGenericSycl; }; //! The BufGenericSycl device get trait specialization. - template - struct GetDev> + template + struct GetDev> { - static auto getDev(BufGenericSycl const& buf) + static auto getDev(BufGenericSycl const& buf) { return buf.m_dev; } }; //! The BufGenericSycl dimension getter trait specialization. - template - struct DimType> + template + struct DimType> { using type = TDim; }; //! The BufGenericSycl memory element type get trait specialization. - template - struct ElemType> + template + struct ElemType> { using type = TElem; }; //! The BufGenericSycl extent get trait specialization. - template - struct GetExtents> + template + struct GetExtents> { - auto operator()(BufGenericSycl const& buf) const + auto operator()(BufGenericSycl const& buf) const { return buf.m_extentElements; } }; //! The BufGenericSycl native pointer get trait specialization. - template - struct GetPtrNative> + template + struct GetPtrNative> { - static auto getPtrNative(BufGenericSycl const& buf) -> TElem const* + static auto getPtrNative(BufGenericSycl const& buf) -> TElem const* { return buf.m_spMem.get(); } - static auto getPtrNative(BufGenericSycl& buf) -> TElem* + static auto getPtrNative(BufGenericSycl& buf) -> TElem* { return buf.m_spMem.get(); } }; //! The BufGenericSycl pointer on device get trait specialization. - template - struct GetPtrDev, DevGenericSycl> + template + struct GetPtrDev, DevGenericSycl> { - static auto getPtrDev( - BufGenericSycl const& buf, - DevGenericSycl const& dev) -> TElem const* + static auto getPtrDev(BufGenericSycl const& buf, DevGenericSycl const& dev) + -> TElem const* { if(dev == getDev(buf)) { @@ -135,8 +134,7 @@ namespace alpaka::trait } } - static auto getPtrDev(BufGenericSycl& buf, DevGenericSycl const& dev) - -> TElem* + static auto getPtrDev(BufGenericSycl& buf, DevGenericSycl const& dev) -> TElem* { if(dev == getDev(buf)) { @@ -150,12 +148,12 @@ namespace alpaka::trait }; //! The SYCL memory allocation trait specialization. - template - struct BufAlloc> + template + struct BufAlloc> { template - static auto allocBuf(DevGenericSycl const& dev, TExtent const& extent) - -> BufGenericSycl + static auto allocBuf(DevGenericSycl const& dev, TExtent const& extent) + -> BufGenericSycl { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -197,38 +195,40 @@ namespace alpaka::trait nativeContext); auto deleter = [ctx = nativeContext](TElem* ptr) { sycl::free(ptr, ctx); }; - return BufGenericSycl(dev, memPtr, std::move(deleter), extent); + return BufGenericSycl(dev, memPtr, std::move(deleter), extent); } }; //! The BufGenericSycl stream-ordered memory allocation capability trait specialization. - template - struct HasAsyncBufSupport> : std::false_type + template + struct HasAsyncBufSupport> : std::false_type { }; //! The BufGenericSycl offset get trait specialization. - template - struct GetOffsets> + template + struct GetOffsets> { - auto operator()(BufGenericSycl const&) const -> Vec + auto operator()(BufGenericSycl const&) const -> Vec { return Vec::zeros(); } }; //! The pinned/mapped memory allocation trait specialization for the SYCL devices. - template - struct BufAllocMapped + template + struct BufAllocMapped, TElem, TDim, TIdx> { template - static auto allocMappedBuf(DevCpu const& host, TPlatform const& platform, TExtent const& extent) - -> BufCpu + static auto allocMappedBuf( + DevCpu const& host, + PlatformGenericSycl const& platform, + TExtent const& extent) -> BufCpu { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; - // Allocate SYCL page-locked memory on the host, mapped into the TPlatform address space and - // accessible to all devices in the TPlatform. + // Allocate SYCL page-locked memory on the host, mapped into the PlatformGenericSycl address space and + // accessible to all devices in the PlatformGenericSycl. auto ctx = platform.syclContext(); TElem* memPtr = sycl::malloc_host(static_cast(getExtentProduct(extent)), ctx); auto deleter = [ctx](TElem* ptr) { sycl::free(ptr, ctx); }; @@ -238,22 +238,22 @@ namespace alpaka::trait }; //! The BufGenericSycl idx type trait specialization. - template - struct IdxType> + template + struct IdxType> { using type = TIdx; }; //! The BufCpu pointer on SYCL device get trait specialization. - template - struct GetPtrDev, DevGenericSycl> + template + struct GetPtrDev, DevGenericSycl> { - static auto getPtrDev(BufCpu const& buf, DevGenericSycl const&) -> TElem const* + static auto getPtrDev(BufCpu const& buf, DevGenericSycl const&) -> TElem const* { return getPtrNative(buf); } - static auto getPtrDev(BufCpu& buf, DevGenericSycl const&) -> TElem* + static auto getPtrDev(BufCpu& buf, DevGenericSycl const&) -> TElem* { return getPtrNative(buf); } diff --git a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp index dd20f8a39648..8d31f0f577e0 100644 --- a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp +++ b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp @@ -6,6 +6,7 @@ #include "alpaka/dev/DevGpuSyclIntel.hpp" #include "alpaka/mem/buf/BufGenericSycl.hpp" +#include "alpaka/platform/PlatformGpuSyclIntel.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU) diff --git a/include/alpaka/mem/buf/sycl/Copy.hpp b/include/alpaka/mem/buf/sycl/Copy.hpp index 806c728acce3..1275c009e081 100644 --- a/include/alpaka/mem/buf/sycl/Copy.hpp +++ b/include/alpaka/mem/buf/sycl/Copy.hpp @@ -195,8 +195,8 @@ namespace alpaka::detail namespace alpaka::trait { //! The SYCL host-to-device memory copy trait specialization. - template - struct CreateTaskMemcpy, DevCpu> + template + struct CreateTaskMemcpy, DevCpu> { template static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent) @@ -209,8 +209,8 @@ namespace alpaka::trait }; //! The SYCL device-to-host memory copy trait specialization. - template - struct CreateTaskMemcpy> + template + struct CreateTaskMemcpy> { template static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent) @@ -223,8 +223,8 @@ namespace alpaka::trait }; //! The SYCL device-to-device memory copy trait specialization. - template - struct CreateTaskMemcpy, DevGenericSycl> + template + struct CreateTaskMemcpy, DevGenericSycl> { template static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent) diff --git a/include/alpaka/mem/view/ViewPlainPtr.hpp b/include/alpaka/mem/view/ViewPlainPtr.hpp index ceb4d95aed32..3559a9db2fdd 100644 --- a/include/alpaka/mem/view/ViewPlainPtr.hpp +++ b/include/alpaka/mem/view/ViewPlainPtr.hpp @@ -154,22 +154,21 @@ namespace alpaka #if defined(ALPAKA_ACC_SYCL_ENABLED) //! The SYCL device CreateViewPlainPtr trait specialization. - template - struct CreateViewPlainPtr> + template + struct CreateViewPlainPtr> { template static auto createViewPlainPtr( - DevGenericSycl const& dev, + DevGenericSycl const& dev, TElem* pMem, TExtent const& extent, TPitch pitch) { - return alpaka:: - ViewPlainPtr, TElem, alpaka::Dim, alpaka::Idx>( - pMem, - dev, - extent, - pitch); + return alpaka::ViewPlainPtr, TElem, alpaka::Dim, alpaka::Idx>( + pMem, + dev, + extent, + pitch); } }; #endif diff --git a/include/alpaka/platform/PlatformCpuSycl.hpp b/include/alpaka/platform/PlatformCpuSycl.hpp index 52496985a25c..bab56bdeb334 100644 --- a/include/alpaka/platform/PlatformCpuSycl.hpp +++ b/include/alpaka/platform/PlatformCpuSycl.hpp @@ -18,7 +18,8 @@ namespace alpaka { namespace detail { - struct SyclCpuSelector + template<> + struct SYCLDeviceSelector { auto operator()(sycl::device const& dev) const -> int { @@ -30,17 +31,7 @@ namespace alpaka } // namespace detail //! The SYCL device manager. - using PlatformCpuSycl = PlatformGenericSycl; + using PlatformCpuSycl = PlatformGenericSycl; } // namespace alpaka -namespace alpaka::trait -{ - //! The SYCL device manager device type trait specialization. - template<> - struct DevType - { - using type = DevGenericSycl; // = DevCpuSycl - }; -} // namespace alpaka::trait - #endif diff --git a/include/alpaka/platform/PlatformFpgaSyclIntel.hpp b/include/alpaka/platform/PlatformFpgaSyclIntel.hpp index 0edb028943ce..61ad81bca48e 100644 --- a/include/alpaka/platform/PlatformFpgaSyclIntel.hpp +++ b/include/alpaka/platform/PlatformFpgaSyclIntel.hpp @@ -24,7 +24,8 @@ namespace alpaka # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wweak-vtables" # endif - struct IntelFpgaSelector final + template<> + struct SYCLDeviceSelector { # ifdef ALPAKA_FPGA_EMULATION static constexpr auto platform_name = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -48,17 +49,7 @@ namespace alpaka } // namespace detail //! The SYCL device manager. - using PlatformFpgaSyclIntel = PlatformGenericSycl; + using PlatformFpgaSyclIntel = PlatformGenericSycl; } // namespace alpaka -namespace alpaka::trait -{ - //! The SYCL device manager device type trait specialization. - template<> - struct DevType - { - using type = DevGenericSycl; // = DevFpgaSyclIntel - }; -} // namespace alpaka::trait - #endif diff --git a/include/alpaka/platform/PlatformGenericSycl.hpp b/include/alpaka/platform/PlatformGenericSycl.hpp index c4df17c6660a..be007e7d721c 100644 --- a/include/alpaka/platform/PlatformGenericSycl.hpp +++ b/include/alpaka/platform/PlatformGenericSycl.hpp @@ -6,6 +6,7 @@ #include "alpaka/core/Concepts.hpp" #include "alpaka/core/Sycl.hpp" +#include "alpaka/dev/DevGenericSycl.hpp" #include "alpaka/dev/Traits.hpp" #include "alpaka/platform/Traits.hpp" @@ -24,12 +25,18 @@ namespace alpaka { + namespace detail + { + template + struct SYCLDeviceSelector; + } // namespace detail + //! The SYCL device manager. - template - struct PlatformGenericSycl : concepts::Implements> + template + struct PlatformGenericSycl : concepts::Implements> { PlatformGenericSycl() - : platform{TSelector{}} + : platform{detail::SYCLDeviceSelector{}} , devices(platform.get_devices()) , context{sycl::context{ devices, @@ -88,636 +95,643 @@ namespace alpaka std::vector devices; sycl::context context; }; -} // namespace alpaka -namespace alpaka::trait -{ - //! The SYCL platform device count get trait specialization. - template - struct GetDevCount> + namespace trait { - static auto getDevCount(PlatformGenericSycl const& platform) -> std::size_t + //! The SYCL platform device type trait specialization. + template + struct DevType> { - ALPAKA_DEBUG_FULL_LOG_SCOPE; - - return platform.syclDevices().size(); - } - }; + using type = DevGenericSycl; + }; - //! The SYCL platform device get trait specialization. - template - struct GetDevByIdx> - { - static auto getDevByIdx(PlatformGenericSycl const& platform, std::size_t const& devIdx) + //! The SYCL platform device count get trait specialization. + template + struct GetDevCount> { - ALPAKA_DEBUG_FULL_LOG_SCOPE; - - auto const& devices = platform.syclDevices(); - if(devIdx >= devices.size()) + static auto getDevCount(PlatformGenericSycl const& platform) -> std::size_t { - auto ss_err = std::stringstream{}; - ss_err << "Unable to return device handle for device " << devIdx << ". There are only " - << devices.size() << " SYCL devices!"; - throw std::runtime_error(ss_err.str()); + ALPAKA_DEBUG_FULL_LOG_SCOPE; + + return platform.syclDevices().size(); } + }; - auto sycl_dev = devices.at(devIdx); + //! The SYCL platform device get trait specialization. + template + struct GetDevByIdx> + { + static auto getDevByIdx(PlatformGenericSycl const& platform, std::size_t const& devIdx) + { + ALPAKA_DEBUG_FULL_LOG_SCOPE; - // Log this device. + auto const& devices = platform.syclDevices(); + if(devIdx >= devices.size()) + { + auto ss_err = std::stringstream{}; + ss_err << "Unable to return device handle for device " << devIdx << ". There are only " + << devices.size() << " SYCL devices!"; + throw std::runtime_error(ss_err.str()); + } + + auto sycl_dev = devices.at(devIdx); + + // Log this device. # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL - printDeviceProperties(sycl_dev); + printDeviceProperties(sycl_dev); # elif ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL - std::cout << __func__ << sycl_dev.template get_info() << '\n'; + std::cout << __func__ << sycl_dev.template get_info() << '\n'; # endif - using SyclPlatform = alpaka::PlatformGenericSycl; - return typename DevType::type{sycl_dev, platform.syclContext()}; - } + using SyclPlatform = alpaka::PlatformGenericSycl; + return typename DevType::type{sycl_dev, platform.syclContext()}; + } - private: + private: # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL - //! Prints all the device properties to std::cout. - static auto printDeviceProperties(sycl::device const& device) -> void - { - ALPAKA_DEBUG_FULL_LOG_SCOPE; + //! Prints all the device properties to std::cout. + static auto printDeviceProperties(sycl::device const& device) -> void + { + ALPAKA_DEBUG_FULL_LOG_SCOPE; - constexpr auto KiB = std::size_t{1024}; - constexpr auto MiB = KiB * KiB; + constexpr auto KiB = std::size_t{1024}; + constexpr auto MiB = KiB * KiB; - std::cout << "Device type: "; - switch(device.get_info()) - { - case sycl::info::device_type::cpu: - std::cout << "CPU"; - break; - - case sycl::info::device_type::gpu: - std::cout << "GPU"; - break; - - case sycl::info::device_type::accelerator: - std::cout << "Accelerator"; - break; - - case sycl::info::device_type::custom: - std::cout << "Custom"; - break; - - case sycl::info::device_type::automatic: - std::cout << "Automatic"; - break; - - case sycl::info::device_type::host: - std::cout << "Host"; - break; - - // The SYCL spec forbids the return of device_type::all - // Including this here to prevent warnings because of - // missing cases - case sycl::info::device_type::all: - std::cout << "All"; - break; - } - std::cout << '\n'; + std::cout << "Device type: "; + switch(device.get_info()) + { + case sycl::info::device_type::cpu: + std::cout << "CPU"; + break; - std::cout << "Name: " << device.get_info() << '\n'; + case sycl::info::device_type::gpu: + std::cout << "GPU"; + break; - std::cout << "Vendor: " << device.get_info() << '\n'; + case sycl::info::device_type::accelerator: + std::cout << "Accelerator"; + break; - std::cout << "Vendor ID: " << device.get_info() << '\n'; + case sycl::info::device_type::custom: + std::cout << "Custom"; + break; + + case sycl::info::device_type::automatic: + std::cout << "Automatic"; + break; - std::cout << "Driver version: " << device.get_info() << '\n'; + case sycl::info::device_type::host: + std::cout << "Host"; + break; - std::cout << "SYCL version: " << device.get_info() << '\n'; + // The SYCL spec forbids the return of device_type::all + // Including this here to prevent warnings because of + // missing cases + case sycl::info::device_type::all: + std::cout << "All"; + break; + } + std::cout << '\n'; + + std::cout << "Name: " << device.get_info() << '\n'; + + std::cout << "Vendor: " << device.get_info() << '\n'; + + std::cout << "Vendor ID: " << device.get_info() << '\n'; + + std::cout << "Driver version: " << device.get_info() << '\n'; + + std::cout << "SYCL version: " << device.get_info() << '\n'; # if !defined(BOOST_COMP_ICPX) - // Not defined by Level Zero back-end - std::cout << "Backend version: " << device.get_info() << '\n'; + // Not defined by Level Zero back-end + std::cout << "Backend version: " << device.get_info() << '\n'; # endif - std::cout << "Aspects: " << '\n'; + std::cout << "Aspects: " << '\n'; # if defined(BOOST_COMP_ICPX) # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0) - // These aspects are missing from oneAPI versions < 2023.2.0 - if(device.has(sycl::aspect::emulated)) - std::cout << "\t* emulated\n"; + // These aspects are missing from oneAPI versions < 2023.2.0 + if(device.has(sycl::aspect::emulated)) + std::cout << "\t* emulated\n"; - if(device.has(sycl::aspect::host_debuggable)) - std::cout << "\t* debuggable using standard debuggers\n"; + if(device.has(sycl::aspect::host_debuggable)) + std::cout << "\t* debuggable using standard debuggers\n"; # endif # endif - if(device.has(sycl::aspect::fp16)) - std::cout << "\t* supports sycl::half precision\n"; + if(device.has(sycl::aspect::fp16)) + std::cout << "\t* supports sycl::half precision\n"; - if(device.has(sycl::aspect::fp64)) - std::cout << "\t* supports double precision\n"; + if(device.has(sycl::aspect::fp64)) + std::cout << "\t* supports double precision\n"; - if(device.has(sycl::aspect::atomic64)) - std::cout << "\t* supports 64-bit atomics\n"; + if(device.has(sycl::aspect::atomic64)) + std::cout << "\t* supports 64-bit atomics\n"; - if(device.has(sycl::aspect::image)) - std::cout << "\t* supports images\n"; + if(device.has(sycl::aspect::image)) + std::cout << "\t* supports images\n"; - if(device.has(sycl::aspect::online_compiler)) - std::cout << "\t* supports online compilation of device code\n"; + if(device.has(sycl::aspect::online_compiler)) + std::cout << "\t* supports online compilation of device code\n"; - if(device.has(sycl::aspect::online_linker)) - std::cout << "\t* supports online linking of device code\n"; + if(device.has(sycl::aspect::online_linker)) + std::cout << "\t* supports online linking of device code\n"; - if(device.has(sycl::aspect::queue_profiling)) - std::cout << "\t* supports queue profiling\n"; + if(device.has(sycl::aspect::queue_profiling)) + std::cout << "\t* supports queue profiling\n"; - if(device.has(sycl::aspect::usm_device_allocations)) - std::cout << "\t* supports explicit USM allocations\n"; + if(device.has(sycl::aspect::usm_device_allocations)) + std::cout << "\t* supports explicit USM allocations\n"; - if(device.has(sycl::aspect::usm_host_allocations)) - std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host\n"; + if(device.has(sycl::aspect::usm_host_allocations)) + std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host\n"; - if(device.has(sycl::aspect::usm_atomic_host_allocations)) - std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n"; + if(device.has(sycl::aspect::usm_atomic_host_allocations)) + std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n"; - if(device.has(sycl::aspect::usm_shared_allocations)) - std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared\n"; + if(device.has(sycl::aspect::usm_shared_allocations)) + std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared\n"; - if(device.has(sycl::aspect::usm_atomic_shared_allocations)) - std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n"; + if(device.has(sycl::aspect::usm_atomic_shared_allocations)) + std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n"; - if(device.has(sycl::aspect::usm_system_allocations)) - std::cout << "\t* can access memory allocated by the system allocator\n"; + if(device.has(sycl::aspect::usm_system_allocations)) + std::cout << "\t* can access memory allocated by the system allocator\n"; - std::cout << "Available compute units: " << device.get_info() - << '\n'; + std::cout << "Available compute units: " << device.get_info() + << '\n'; - std::cout << "Maximum work item dimensions: "; - auto dims = device.get_info(); - std::cout << dims << std::endl; + std::cout << "Maximum work item dimensions: "; + auto dims = device.get_info(); + std::cout << dims << std::endl; - std::cout << "Maximum number of work items:\n"; - auto const wi_1D = device.get_info>(); - auto const wi_2D = device.get_info>(); - auto const wi_3D = device.get_info>(); - std::cout << "\t* 1D: (" << wi_1D.get(0) << ")\n"; - std::cout << "\t* 2D: (" << wi_2D.get(0) << ", " << wi_2D.get(1) << ")\n"; - std::cout << "\t* 3D: (" << wi_3D.get(0) << ", " << wi_3D.get(1) << ", " << wi_3D.get(2) << ")\n"; + std::cout << "Maximum number of work items:\n"; + auto const wi_1D = device.get_info>(); + auto const wi_2D = device.get_info>(); + auto const wi_3D = device.get_info>(); + std::cout << "\t* 1D: (" << wi_1D.get(0) << ")\n"; + std::cout << "\t* 2D: (" << wi_2D.get(0) << ", " << wi_2D.get(1) << ")\n"; + std::cout << "\t* 3D: (" << wi_3D.get(0) << ", " << wi_3D.get(1) << ", " << wi_3D.get(2) << ")\n"; - std::cout << "Maximum number of work items per work-group: " - << device.get_info() << '\n'; + std::cout << "Maximum number of work items per work-group: " + << device.get_info() << '\n'; - std::cout << "Maximum number of sub-groups per work-group: " - << device.get_info() << '\n'; + std::cout << "Maximum number of sub-groups per work-group: " + << device.get_info() << '\n'; - std::cout << "Supported sub-group sizes: "; - auto const sg_sizes = device.get_info(); - for(auto const& sz : sg_sizes) - std::cout << sz << ", "; - std::cout << '\n'; + std::cout << "Supported sub-group sizes: "; + auto const sg_sizes = device.get_info(); + for(auto const& sz : sg_sizes) + std::cout << sz << ", "; + std::cout << '\n'; - std::cout << "Preferred native vector width (char): " - << device.get_info() << '\n'; + std::cout << "Preferred native vector width (char): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (char): " - << device.get_info() << '\n'; + std::cout << "Native ISA vector width (char): " + << device.get_info() << '\n'; - std::cout << "Preferred native vector width (short): " - << device.get_info() << '\n'; + std::cout << "Preferred native vector width (short): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (short): " - << device.get_info() << '\n'; + std::cout << "Native ISA vector width (short): " + << device.get_info() << '\n'; - std::cout << "Preferred native vector width (int): " - << device.get_info() << '\n'; + std::cout << "Preferred native vector width (int): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (int): " - << device.get_info() << '\n'; + std::cout << "Native ISA vector width (int): " + << device.get_info() << '\n'; - std::cout << "Preferred native vector width (long): " - << device.get_info() << '\n'; + std::cout << "Preferred native vector width (long): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (long): " - << device.get_info() << '\n'; + std::cout << "Native ISA vector width (long): " + << device.get_info() << '\n'; - std::cout << "Preferred native vector width (float): " - << device.get_info() << '\n'; + std::cout << "Preferred native vector width (float): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (float): " - << device.get_info() << '\n'; + std::cout << "Native ISA vector width (float): " + << device.get_info() << '\n'; - if(device.has(sycl::aspect::fp64)) - { - std::cout << "Preferred native vector width (double): " - << device.get_info() << '\n'; + if(device.has(sycl::aspect::fp64)) + { + std::cout << "Preferred native vector width (double): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (double): " - << device.get_info() << '\n'; - } + std::cout << "Native ISA vector width (double): " + << device.get_info() << '\n'; + } - if(device.has(sycl::aspect::fp16)) - { - std::cout << "Preferred native vector width (half): " - << device.get_info() << '\n'; + if(device.has(sycl::aspect::fp16)) + { + std::cout << "Preferred native vector width (half): " + << device.get_info() << '\n'; - std::cout << "Native ISA vector width (half): " - << device.get_info() << '\n'; - } + std::cout << "Native ISA vector width (half): " + << device.get_info() << '\n'; + } - std::cout << "Maximum clock frequency: " << device.get_info() - << " MHz\n"; + std::cout << "Maximum clock frequency: " << device.get_info() + << " MHz\n"; - std::cout << "Address space size: " << device.get_info() << "-bit\n"; + std::cout << "Address space size: " << device.get_info() << "-bit\n"; - std::cout << "Maximum size of memory object allocation: " - << device.get_info() << " bytes\n"; + std::cout << "Maximum size of memory object allocation: " + << device.get_info() << " bytes\n"; - if(device.has(sycl::aspect::image)) - { - std::cout << "Maximum number of simultaneous image object reads per kernel: " - << device.get_info() << '\n'; + if(device.has(sycl::aspect::image)) + { + std::cout << "Maximum number of simultaneous image object reads per kernel: " + << device.get_info() << '\n'; - std::cout << "Maximum number of simultaneous image writes per kernel: " - << device.get_info() << '\n'; + std::cout << "Maximum number of simultaneous image writes per kernel: " + << device.get_info() << '\n'; - std::cout << "Maximum 1D/2D image width: " << device.get_info() - << " px\n"; + std::cout << "Maximum 1D/2D image width: " + << device.get_info() << " px\n"; - std::cout << "Maximum 2D image height: " << device.get_info() - << " px\n"; + std::cout << "Maximum 2D image height: " + << device.get_info() << " px\n"; - std::cout << "Maximum 3D image width: " << device.get_info() - << " px\n"; + std::cout << "Maximum 3D image width: " << device.get_info() + << " px\n"; - std::cout << "Maximum 3D image height: " << device.get_info() - << " px\n"; + std::cout << "Maximum 3D image height: " + << device.get_info() << " px\n"; - std::cout << "Maximum 3D image depth: " << device.get_info() - << " px\n"; + std::cout << "Maximum 3D image depth: " << device.get_info() + << " px\n"; - std::cout << "Maximum number of samplers per kernel: " - << device.get_info() << '\n'; - } + std::cout << "Maximum number of samplers per kernel: " + << device.get_info() << '\n'; + } - std::cout << "Maximum kernel argument size: " << device.get_info() - << " bytes\n"; + std::cout << "Maximum kernel argument size: " + << device.get_info() << " bytes\n"; - std::cout << "Memory base address alignment: " - << device.get_info() << " bit\n"; + std::cout << "Memory base address alignment: " + << device.get_info() << " bit\n"; - auto print_fp_config = [](std::string const& fp, std::vector const& conf) - { - std::cout << fp << " precision floating-point capabilities:\n"; - - auto find_and_print = [&](sycl::info::fp_config val) + auto print_fp_config = [](std::string const& fp, std::vector const& conf) { - auto it = std::find(begin(conf), end(conf), val); - std::cout << (it == std::end(conf) ? "No" : "Yes") << '\n'; - }; + std::cout << fp << " precision floating-point capabilities:\n"; - std::cout << "\t* denorm support: "; - find_and_print(sycl::info::fp_config::denorm); + auto find_and_print = [&](sycl::info::fp_config val) + { + auto it = std::find(begin(conf), end(conf), val); + std::cout << (it == std::end(conf) ? "No" : "Yes") << '\n'; + }; - std::cout << "\t* INF & quiet NaN support: "; - find_and_print(sycl::info::fp_config::inf_nan); + std::cout << "\t* denorm support: "; + find_and_print(sycl::info::fp_config::denorm); - std::cout << "\t* round to nearest even support: "; - find_and_print(sycl::info::fp_config::round_to_nearest); + std::cout << "\t* INF & quiet NaN support: "; + find_and_print(sycl::info::fp_config::inf_nan); - std::cout << "\t* round to zero support: "; - find_and_print(sycl::info::fp_config::round_to_zero); + std::cout << "\t* round to nearest even support: "; + find_and_print(sycl::info::fp_config::round_to_nearest); - std::cout << "\t* round to infinity support: "; - find_and_print(sycl::info::fp_config::round_to_inf); + std::cout << "\t* round to zero support: "; + find_and_print(sycl::info::fp_config::round_to_zero); - std::cout << "\t* IEEE754-2008 FMA support: "; - find_and_print(sycl::info::fp_config::fma); + std::cout << "\t* round to infinity support: "; + find_and_print(sycl::info::fp_config::round_to_inf); - std::cout << "\t* correctly rounded divide/sqrt support: "; - find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt); + std::cout << "\t* IEEE754-2008 FMA support: "; + find_and_print(sycl::info::fp_config::fma); - std::cout << "\t* software-implemented floating point operations: "; - find_and_print(sycl::info::fp_config::soft_float); - }; + std::cout << "\t* correctly rounded divide/sqrt support: "; + find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt); - if(device.has(sycl::aspect::fp16)) - { - auto const fp16_conf = device.get_info(); - print_fp_config("Half", fp16_conf); - } + std::cout << "\t* software-implemented floating point operations: "; + find_and_print(sycl::info::fp_config::soft_float); + }; - auto const fp32_conf = device.get_info(); - print_fp_config("Single", fp32_conf); + if(device.has(sycl::aspect::fp16)) + { + auto const fp16_conf = device.get_info(); + print_fp_config("Half", fp16_conf); + } - if(device.has(sycl::aspect::fp64)) - { - auto const fp64_conf = device.get_info(); - print_fp_config("Double", fp64_conf); - } + auto const fp32_conf = device.get_info(); + print_fp_config("Single", fp32_conf); - std::cout << "Global memory cache type: "; - auto has_global_mem_cache = false; - switch(device.get_info()) - { - case sycl::info::global_mem_cache_type::none: - std::cout << "none"; - break; - - case sycl::info::global_mem_cache_type::read_only: - std::cout << "read-only"; - has_global_mem_cache = true; - break; - - case sycl::info::global_mem_cache_type::read_write: - std::cout << "read-write"; - has_global_mem_cache = true; - break; - } - std::cout << '\n'; + if(device.has(sycl::aspect::fp64)) + { + auto const fp64_conf = device.get_info(); + print_fp_config("Double", fp64_conf); + } - if(has_global_mem_cache) - { - std::cout << "Global memory cache line size: " - << device.get_info() << " bytes\n"; + std::cout << "Global memory cache type: "; + auto has_global_mem_cache = false; + switch(device.get_info()) + { + case sycl::info::global_mem_cache_type::none: + std::cout << "none"; + break; - std::cout << "Global memory cache size: " - << device.get_info() / KiB << " KiB\n"; - } + case sycl::info::global_mem_cache_type::read_only: + std::cout << "read-only"; + has_global_mem_cache = true; + break; - std::cout << "Global memory size: " << device.get_info() / MiB - << " MiB" << std::endl; + case sycl::info::global_mem_cache_type::read_write: + std::cout << "read-write"; + has_global_mem_cache = true; + break; + } + std::cout << '\n'; - std::cout << "Local memory type: "; - auto has_local_memory = false; - switch(device.get_info()) - { - case sycl::info::local_mem_type::none: - std::cout << "none"; - break; - - case sycl::info::local_mem_type::local: - std::cout << "local"; - has_local_memory = true; - break; - - case sycl::info::local_mem_type::global: - std::cout << "global"; - has_local_memory = true; - break; - } - std::cout << '\n'; + if(has_global_mem_cache) + { + std::cout << "Global memory cache line size: " + << device.get_info() << " bytes\n"; + + std::cout << "Global memory cache size: " + << device.get_info() / KiB << " KiB\n"; + } - if(has_local_memory) - std::cout << "Local memory size: " << device.get_info() / KiB - << " KiB\n"; + std::cout << "Global memory size: " << device.get_info() / MiB + << " MiB" << std::endl; - std::cout << "Error correction support: " - << (device.get_info() ? "Yes" : "No") << '\n'; + std::cout << "Local memory type: "; + auto has_local_memory = false; + switch(device.get_info()) + { + case sycl::info::local_mem_type::none: + std::cout << "none"; + break; - auto print_memory_orders = [](std::vector const& mem_orders) - { - for(auto const& cap : mem_orders) + case sycl::info::local_mem_type::local: + std::cout << "local"; + has_local_memory = true; + break; + + case sycl::info::local_mem_type::global: + std::cout << "global"; + has_local_memory = true; + break; + } + std::cout << '\n'; + + if(has_local_memory) + std::cout << "Local memory size: " << device.get_info() / KiB + << " KiB\n"; + + std::cout << "Error correction support: " + << (device.get_info() ? "Yes" : "No") << '\n'; + + auto print_memory_orders = [](std::vector const& mem_orders) { - switch(cap) + for(auto const& cap : mem_orders) { - case sycl::memory_order::relaxed: - std::cout << "relaxed"; - break; + switch(cap) + { + case sycl::memory_order::relaxed: + std::cout << "relaxed"; + break; - case sycl::memory_order::acquire: - std::cout << "acquire"; - break; + case sycl::memory_order::acquire: + std::cout << "acquire"; + break; - case sycl::memory_order::release: - std::cout << "release"; - break; + case sycl::memory_order::release: + std::cout << "release"; + break; - case sycl::memory_order::acq_rel: - std::cout << "acq_rel"; - break; + case sycl::memory_order::acq_rel: + std::cout << "acq_rel"; + break; - case sycl::memory_order::seq_cst: - std::cout << "seq_cst"; - break; + case sycl::memory_order::seq_cst: + std::cout << "seq_cst"; + break; # if defined(BOOST_COMP_ICPX) - // Stop icpx from complaining about its own internals. - case sycl::memory_order::__consume_unsupported: - break; + // Stop icpx from complaining about its own internals. + case sycl::memory_order::__consume_unsupported: + break; # endif + } + std::cout << ", "; } - std::cout << ", "; - } - std::cout << '\n'; - }; + std::cout << '\n'; + }; - std::cout << "Supported memory orderings for atomic operations: "; - auto const mem_orders = device.get_info(); - print_memory_orders(mem_orders); + std::cout << "Supported memory orderings for atomic operations: "; + auto const mem_orders = device.get_info(); + print_memory_orders(mem_orders); # if defined(BOOST_COMP_ICPX) # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0) - // Not implemented in oneAPI < 2023.2.0 - std::cout << "Supported memory orderings for sycl::atomic_fence: "; - auto const fence_orders = device.get_info(); - print_memory_orders(fence_orders); + // Not implemented in oneAPI < 2023.2.0 + std::cout << "Supported memory orderings for sycl::atomic_fence: "; + auto const fence_orders = device.get_info(); + print_memory_orders(fence_orders); # endif # endif - auto print_memory_scopes = [](std::vector const& mem_scopes) - { - for(auto const& cap : mem_scopes) + auto print_memory_scopes = [](std::vector const& mem_scopes) { - switch(cap) + for(auto const& cap : mem_scopes) { - case sycl::memory_scope::work_item: - std::cout << "work-item"; - break; + switch(cap) + { + case sycl::memory_scope::work_item: + std::cout << "work-item"; + break; - case sycl::memory_scope::sub_group: - std::cout << "sub-group"; - break; + case sycl::memory_scope::sub_group: + std::cout << "sub-group"; + break; - case sycl::memory_scope::work_group: - std::cout << "work-group"; - break; + case sycl::memory_scope::work_group: + std::cout << "work-group"; + break; - case sycl::memory_scope::device: - std::cout << "device"; - break; + case sycl::memory_scope::device: + std::cout << "device"; + break; - case sycl::memory_scope::system: - std::cout << "system"; - break; + case sycl::memory_scope::system: + std::cout << "system"; + break; + } + std::cout << ", "; } - std::cout << ", "; - } - std::cout << '\n'; - }; + std::cout << '\n'; + }; - std::cout << "Supported memory scopes for atomic operations: "; - auto const mem_scopes = device.get_info(); - print_memory_scopes(mem_scopes); + std::cout << "Supported memory scopes for atomic operations: "; + auto const mem_scopes = device.get_info(); + print_memory_scopes(mem_scopes); # if defined(BOOST_COMP_ICPX) # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0) - // Not implemented in oneAPI < 2023.2.0 - std::cout << "Supported memory scopes for sycl::atomic_fence: "; - auto const fence_scopes = device.get_info(); - print_memory_scopes(fence_scopes); + // Not implemented in oneAPI < 2023.2.0 + std::cout << "Supported memory scopes for sycl::atomic_fence: "; + auto const fence_scopes = device.get_info(); + print_memory_scopes(fence_scopes); # endif # endif - std::cout << "Device timer resolution: " - << device.get_info() << " ns\n"; + std::cout << "Device timer resolution: " + << device.get_info() << " ns\n"; - std::cout << "Built-in kernels: "; - auto const builtins = device.get_info(); - for(auto const& b : builtins) - std::cout << b.get_name() << ", "; - std::cout << '\n'; + std::cout << "Built-in kernels: "; + auto const builtins = device.get_info(); + for(auto const& b : builtins) + std::cout << b.get_name() << ", "; + std::cout << '\n'; - std::cout << "Maximum number of subdevices: "; - auto const max_subs = device.get_info(); - std::cout << max_subs << '\n'; + std::cout << "Maximum number of subdevices: "; + auto const max_subs = device.get_info(); + std::cout << max_subs << '\n'; - if(max_subs > 1) - { - std::cout << "Supported partition properties: "; - auto const part_props = device.get_info(); - auto has_affinity_domains = false; - for(auto const& prop : part_props) + if(max_subs > 1) { - switch(prop) + std::cout << "Supported partition properties: "; + auto const part_props = device.get_info(); + auto has_affinity_domains = false; + for(auto const& prop : part_props) + { + switch(prop) + { + case sycl::info::partition_property::no_partition: + std::cout << "no partition"; + break; + + case sycl::info::partition_property::partition_equally: + std::cout << "equally"; + break; + + case sycl::info::partition_property::partition_by_counts: + std::cout << "by counts"; + break; + + case sycl::info::partition_property::partition_by_affinity_domain: + std::cout << "by affinity domain"; + has_affinity_domains = true; + break; +# if defined(BOOST_COMP_ICPX) + case sycl::info::partition_property::ext_intel_partition_by_cslice: + std::cout << "by compute slice (Intel extension; deprecated)"; + break; +# endif + } + std::cout << ", "; + } + std::cout << '\n'; + + if(has_affinity_domains) + { + std::cout << "Supported partition affinity domains: "; + auto const aff_doms = device.get_info(); + for(auto const& dom : aff_doms) + { + switch(dom) + { + case sycl::info::partition_affinity_domain::not_applicable: + std::cout << "not applicable"; + break; + + case sycl::info::partition_affinity_domain::numa: + std::cout << "NUMA"; + break; + + case sycl::info::partition_affinity_domain::L4_cache: + std::cout << "L4 cache"; + break; + + case sycl::info::partition_affinity_domain::L3_cache: + std::cout << "L3 cache"; + break; + + case sycl::info::partition_affinity_domain::L2_cache: + std::cout << "L2 cache"; + break; + + case sycl::info::partition_affinity_domain::L1_cache: + std::cout << "L1 cache"; + break; + + case sycl::info::partition_affinity_domain::next_partitionable: + std::cout << "next partitionable"; + break; + } + std::cout << ", "; + } + std::cout << '\n'; + } + + std::cout << "Current partition property: "; + switch(device.get_info()) { case sycl::info::partition_property::no_partition: std::cout << "no partition"; break; case sycl::info::partition_property::partition_equally: - std::cout << "equally"; + std::cout << "partitioned equally"; break; case sycl::info::partition_property::partition_by_counts: - std::cout << "by counts"; + std::cout << "partitioned by counts"; break; case sycl::info::partition_property::partition_by_affinity_domain: - std::cout << "by affinity domain"; - has_affinity_domains = true; + std::cout << "partitioned by affinity domain"; break; + # if defined(BOOST_COMP_ICPX) case sycl::info::partition_property::ext_intel_partition_by_cslice: - std::cout << "by compute slice (Intel extension; deprecated)"; + std::cout << "partitioned by compute slice (Intel extension; deprecated)"; break; # endif } - std::cout << ", "; - } - std::cout << '\n'; + std::cout << '\n'; - if(has_affinity_domains) - { - std::cout << "Supported partition affinity domains: "; - auto const aff_doms = device.get_info(); - for(auto const& dom : aff_doms) + std::cout << "Current partition affinity domain: "; + switch(device.get_info()) { - switch(dom) - { - case sycl::info::partition_affinity_domain::not_applicable: - std::cout << "not applicable"; - break; + case sycl::info::partition_affinity_domain::not_applicable: + std::cout << "not applicable"; + break; - case sycl::info::partition_affinity_domain::numa: - std::cout << "NUMA"; - break; + case sycl::info::partition_affinity_domain::numa: + std::cout << "NUMA"; + break; - case sycl::info::partition_affinity_domain::L4_cache: - std::cout << "L4 cache"; - break; + case sycl::info::partition_affinity_domain::L4_cache: + std::cout << "L4 cache"; + break; - case sycl::info::partition_affinity_domain::L3_cache: - std::cout << "L3 cache"; - break; + case sycl::info::partition_affinity_domain::L3_cache: + std::cout << "L3 cache"; + break; - case sycl::info::partition_affinity_domain::L2_cache: - std::cout << "L2 cache"; - break; + case sycl::info::partition_affinity_domain::L2_cache: + std::cout << "L2 cache"; + break; - case sycl::info::partition_affinity_domain::L1_cache: - std::cout << "L1 cache"; - break; + case sycl::info::partition_affinity_domain::L1_cache: + std::cout << "L1 cache"; + break; - case sycl::info::partition_affinity_domain::next_partitionable: - std::cout << "next partitionable"; - break; - } - std::cout << ", "; + case sycl::info::partition_affinity_domain::next_partitionable: + std::cout << "next partitionable"; + break; } std::cout << '\n'; } - std::cout << "Current partition property: "; - switch(device.get_info()) - { - case sycl::info::partition_property::no_partition: - std::cout << "no partition"; - break; - - case sycl::info::partition_property::partition_equally: - std::cout << "partitioned equally"; - break; - - case sycl::info::partition_property::partition_by_counts: - std::cout << "partitioned by counts"; - break; - - case sycl::info::partition_property::partition_by_affinity_domain: - std::cout << "partitioned by affinity domain"; - break; - -# if defined(BOOST_COMP_ICPX) - case sycl::info::partition_property::ext_intel_partition_by_cslice: - std::cout << "partitioned by compute slice (Intel extension; deprecated)"; - break; -# endif - } - std::cout << '\n'; - - std::cout << "Current partition affinity domain: "; - switch(device.get_info()) - { - case sycl::info::partition_affinity_domain::not_applicable: - std::cout << "not applicable"; - break; - - case sycl::info::partition_affinity_domain::numa: - std::cout << "NUMA"; - break; - - case sycl::info::partition_affinity_domain::L4_cache: - std::cout << "L4 cache"; - break; - - case sycl::info::partition_affinity_domain::L3_cache: - std::cout << "L3 cache"; - break; - - case sycl::info::partition_affinity_domain::L2_cache: - std::cout << "L2 cache"; - break; - - case sycl::info::partition_affinity_domain::L1_cache: - std::cout << "L1 cache"; - break; - - case sycl::info::partition_affinity_domain::next_partitionable: - std::cout << "next partitionable"; - break; - } - std::cout << '\n'; + std::cout.flush(); } - - std::cout.flush(); - } # endif - }; -} // namespace alpaka::trait + }; + } // namespace trait +} // namespace alpaka #endif diff --git a/include/alpaka/platform/PlatformGpuSyclIntel.hpp b/include/alpaka/platform/PlatformGpuSyclIntel.hpp index b51373646c9c..dd465895c18d 100644 --- a/include/alpaka/platform/PlatformGpuSyclIntel.hpp +++ b/include/alpaka/platform/PlatformGpuSyclIntel.hpp @@ -4,6 +4,7 @@ #pragma once +#include "alpaka/acc/Tag.hpp" #include "alpaka/dev/DevGenericSycl.hpp" #include "alpaka/dev/Traits.hpp" #include "alpaka/platform/PlatformGenericSycl.hpp" @@ -18,7 +19,8 @@ namespace alpaka { namespace detail { - struct IntelGpuSelector + template<> + struct SYCLDeviceSelector { auto operator()(sycl::device const& dev) const -> int { @@ -33,17 +35,7 @@ namespace alpaka } // namespace detail //! The SYCL device manager. - using PlatformGpuSyclIntel = PlatformGenericSycl; + using PlatformGpuSyclIntel = PlatformGenericSycl; } // namespace alpaka -namespace alpaka::trait -{ - //! The SYCL device manager device type trait specialization. - template<> - struct DevType - { - using type = DevGenericSycl; // = DevGpuSyclIntel - }; -} // namespace alpaka::trait - #endif diff --git a/include/alpaka/queue/QueueCpuSyclBlocking.hpp b/include/alpaka/queue/QueueCpuSyclBlocking.hpp index 63dc39fc0c16..052fc234672d 100644 --- a/include/alpaka/queue/QueueCpuSyclBlocking.hpp +++ b/include/alpaka/queue/QueueCpuSyclBlocking.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevCpuSycl.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/queue/QueueGenericSyclBlocking.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU) namespace alpaka { - using QueueCpuSyclBlocking = QueueGenericSyclBlocking; + using QueueCpuSyclBlocking = QueueGenericSyclBlocking; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp b/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp index d3fab4dcfbdb..330a2cc174d6 100644 --- a/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp +++ b/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevCpuSycl.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/queue/QueueGenericSyclNonBlocking.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU) namespace alpaka { - using QueueCpuSyclNonBlocking = QueueGenericSyclNonBlocking; + using QueueCpuSyclNonBlocking = QueueGenericSyclNonBlocking; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp b/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp index 9ff2e58dc48d..ea17bb1ef4ff 100644 --- a/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp +++ b/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevFpgaSyclIntel.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/queue/QueueGenericSyclBlocking.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) namespace alpaka { - using QueueFpgaSyclIntelBlocking = QueueGenericSyclBlocking; + using QueueFpgaSyclIntelBlocking = QueueGenericSyclBlocking; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp b/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp index 20ea0bb83e81..e187b14f2651 100644 --- a/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp +++ b/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevFpgaSyclIntel.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/queue/QueueGenericSyclNonBlocking.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) namespace alpaka { - using QueueFpgaSyclIntelNonBlocking = QueueGenericSyclNonBlocking; + using QueueFpgaSyclIntelNonBlocking = QueueGenericSyclNonBlocking; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGenericSyclBlocking.hpp b/include/alpaka/queue/QueueGenericSyclBlocking.hpp index bb743226c5d3..cbe98bb46c78 100644 --- a/include/alpaka/queue/QueueGenericSyclBlocking.hpp +++ b/include/alpaka/queue/QueueGenericSyclBlocking.hpp @@ -10,8 +10,8 @@ namespace alpaka { - template - using QueueGenericSyclBlocking = detail::QueueGenericSyclBase; + template + using QueueGenericSyclBlocking = detail::QueueGenericSyclBase; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp b/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp index b5dcbe84004c..b9f0d7f6298a 100644 --- a/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp +++ b/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp @@ -10,8 +10,8 @@ namespace alpaka { - template - using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase; + template + using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp b/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp index 358513e1e2fc..3758dc335df4 100644 --- a/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp +++ b/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevGpuSyclIntel.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/queue/QueueGenericSyclBlocking.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU) namespace alpaka { - using QueueGpuSyclIntelBlocking = QueueGenericSyclBlocking; + using QueueGpuSyclIntelBlocking = QueueGenericSyclBlocking; } // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp b/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp index f3be15c9dcb2..6c2bcd093bb9 100644 --- a/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp +++ b/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp @@ -4,14 +4,14 @@ #pragma once -#include "alpaka/dev/DevGpuSyclIntel.hpp" +#include "alpaka/acc/Tag.hpp" #include "alpaka/queue/QueueGenericSyclNonBlocking.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU) namespace alpaka { - using QueueGpuSyclIntelNonBlocking = QueueGenericSyclNonBlocking; + using QueueGpuSyclIntelNonBlocking = QueueGenericSyclNonBlocking; } // namespace alpaka #endif diff --git a/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp b/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp index 655deb973675..c99b2aeaa53f 100644 --- a/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp +++ b/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp @@ -23,262 +23,264 @@ # include -namespace alpaka::detail +namespace alpaka { - template - inline constexpr auto is_sycl_task = false; - - template - inline constexpr auto is_sycl_task> = true; - - template - inline constexpr auto is_sycl_kernel = false; + template + class DevGenericSycl; - template - inline constexpr auto is_sycl_kernel> = true; + template + class EventGenericSycl; - class QueueGenericSyclImpl + namespace detail { - public: - QueueGenericSyclImpl(sycl::context context, sycl::device device) - : m_queue{ - std::move(context), // This is important. In SYCL a device can belong to multiple contexts. - std::move(device), - {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}} - { - } + template + inline constexpr auto is_sycl_task = false; + + template + inline constexpr auto is_sycl_task> = true; - // This class will only exist as a pointer. We don't care about copy and move semantics. - QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete; - auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete; + template + inline constexpr auto is_sycl_kernel = false; - QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete; - auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete; + template + inline constexpr auto is_sycl_kernel> = true; - ~QueueGenericSyclImpl() + class QueueGenericSyclImpl { - try + public: + QueueGenericSyclImpl(sycl::context context, sycl::device device) + : m_queue{ + std::move(context), // This is important. In SYCL a device can belong to multiple contexts. + std::move(device), + {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}} { - m_queue.wait_and_throw(); } - catch(sycl::exception const& err) + + // This class will only exist as a pointer. We don't care about copy and move semantics. + QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete; + auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete; + + QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete; + auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete; + + ~QueueGenericSyclImpl() { - std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " (" - << err.code() << ')' << std::endl; + try + { + m_queue.wait_and_throw(); + } + catch(sycl::exception const& err) + { + std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " (" + << err.code() << ')' << std::endl; + } + catch(std::exception const& err) + { + std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what() + << std::endl; + } } - catch(std::exception const& err) + + // Don't call this without locking first! + auto clean_dependencies() -> void { - std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what() - << std::endl; + // Clean up completed events + auto const start = std::begin(m_dependencies); + auto const old_end = std::end(m_dependencies); + auto const new_end = std::remove_if( + start, + old_end, + [](sycl::event ev) { + return ev.get_info() + == sycl::info::event_command_status::complete; + }); + + m_dependencies.erase(new_end, old_end); } - } - - // Don't call this without locking first! - auto clean_dependencies() -> void - { - // Clean up completed events - auto const start = std::begin(m_dependencies); - auto const old_end = std::end(m_dependencies); - auto const new_end = std::remove_if( - start, - old_end, - [](sycl::event ev) { - return ev.get_info() - == sycl::info::event_command_status::complete; - }); - - m_dependencies.erase(new_end, old_end); - } - - auto register_dependency(sycl::event event) -> void - { - std::lock_guard lock{m_mutex}; - - clean_dependencies(); - m_dependencies.push_back(event); - } - auto empty() const -> bool - { - std::shared_lock lock{m_mutex}; - return m_last_event.get_info() - == sycl::info::event_command_status::complete; - } + auto register_dependency(sycl::event event) -> void + { + std::lock_guard lock{m_mutex}; - auto wait() -> void - { - // SYCL queues are thread-safe. - m_queue.wait_and_throw(); - } + clean_dependencies(); + m_dependencies.push_back(event); + } - auto get_last_event() const -> sycl::event - { - std::shared_lock lock{m_mutex}; - return m_last_event; - } + auto empty() const -> bool + { + std::shared_lock lock{m_mutex}; + return m_last_event.get_info() + == sycl::info::event_command_status::complete; + } - template - auto enqueue(TTask const& task) -> void - { + auto wait() -> void { - std::lock_guard lock{m_mutex}; + // SYCL queues are thread-safe. + m_queue.wait_and_throw(); + } - clean_dependencies(); + auto get_last_event() const -> sycl::event + { + std::shared_lock lock{m_mutex}; + return m_last_event; + } - // Execute task - if constexpr(is_sycl_task && !is_sycl_kernel) // Copy / Fill - { - m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally - } - else + template + auto enqueue(TTask const& task) -> void + { { - m_last_event = m_queue.submit( - [this, &task](sycl::handler& cgh) - { - if(!m_dependencies.empty()) - cgh.depends_on(m_dependencies); - - if constexpr(is_sycl_kernel) // Kernel - task(cgh); // Will call cgh.parallel_for internally - else // Host - cgh.host_task(task); - }); + std::lock_guard lock{m_mutex}; + + clean_dependencies(); + + // Execute task + if constexpr(is_sycl_task && !is_sycl_kernel) // Copy / Fill + { + m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally + } + else + { + m_last_event = m_queue.submit( + [this, &task](sycl::handler& cgh) + { + if(!m_dependencies.empty()) + cgh.depends_on(m_dependencies); + + if constexpr(is_sycl_kernel) // Kernel + task(cgh); // Will call cgh.parallel_for internally + else // Host + cgh.host_task(task); + }); + } + + m_dependencies.clear(); } - m_dependencies.clear(); + if constexpr(TBlocking) + wait(); + } + + [[nodiscard]] auto getNativeHandle() const noexcept + { + return m_queue; } - if constexpr(TBlocking) - wait(); - } + std::vector m_dependencies; + sycl::event m_last_event; + std::shared_mutex mutable m_mutex; - [[nodiscard]] auto getNativeHandle() const noexcept - { - return m_queue; - } - - std::vector m_dependencies; - sycl::event m_last_event; - std::shared_mutex mutable m_mutex; - - private: - sycl::queue m_queue; - }; - - template - class QueueGenericSyclBase - : public concepts::Implements> - , public concepts::Implements> - , public concepts::Implements> - { - public: - QueueGenericSyclBase(TDev const& dev) - : m_dev{dev} - , m_spQueueImpl{std::make_shared( - dev.getNativeHandle().second, - dev.getNativeHandle().first)} - { - m_dev.m_impl->register_queue(m_spQueueImpl); - } + private: + sycl::queue m_queue; + }; - friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool + template + class QueueGenericSyclBase + : public concepts::Implements> + , public concepts::Implements> + , public concepts::Implements> { - return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl); - } + public: + QueueGenericSyclBase(DevGenericSycl const& dev) + : m_dev{dev} + , m_spQueueImpl{std::make_shared( + dev.getNativeHandle().second, + dev.getNativeHandle().first)} + { + m_dev.m_impl->register_queue(m_spQueueImpl); + } - friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool - { - return !(lhs == rhs); - } + friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool + { + return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl); + } - [[nodiscard]] auto getNativeHandle() const noexcept - { - return m_spQueueImpl->getNativeHandle(); - } + friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool + { + return !(lhs == rhs); + } - TDev m_dev; - std::shared_ptr m_spQueueImpl; - }; -} // namespace alpaka::detail + [[nodiscard]] auto getNativeHandle() const noexcept + { + return m_spQueueImpl->getNativeHandle(); + } -namespace alpaka -{ - template - class EventGenericSycl; -} // namespace alpaka + DevGenericSycl m_dev; + std::shared_ptr m_spQueueImpl; + }; + } // namespace detail -namespace alpaka::trait -{ - //! The SYCL blocking queue device type trait specialization. - template - struct DevType> + namespace trait { - using type = TDev; - }; + //! The SYCL blocking queue device type trait specialization. + template + struct DevType> + { + using type = DevGenericSycl; + }; - //! The SYCL blocking queue device get trait specialization. - template - struct GetDev> - { - static auto getDev(detail::QueueGenericSyclBase const& queue) + //! The SYCL blocking queue device get trait specialization. + template + struct GetDev> { - ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; - return queue.m_dev; - } - }; - - //! The SYCL blocking queue event type trait specialization. - template - struct EventType> - { - using type = EventGenericSycl; - }; + static auto getDev(detail::QueueGenericSyclBase const& queue) + { + ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; + return queue.m_dev; + } + }; - //! The SYCL blocking queue enqueue trait specialization. - template - struct Enqueue, TTask> - { - static auto enqueue(detail::QueueGenericSyclBase& queue, TTask const& task) -> void + //! The SYCL blocking queue event type trait specialization. + template + struct EventType> { - ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; - queue.m_spQueueImpl->template enqueue(task); - } - }; - - //! The SYCL blocking queue test trait specialization. - template - struct Empty> - { - static auto empty(detail::QueueGenericSyclBase const& queue) -> bool + using type = EventGenericSycl; + }; + + //! The SYCL blocking queue enqueue trait specialization. + template + struct Enqueue, TTask> { - ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; - return queue.m_spQueueImpl->empty(); - } - }; - - //! The SYCL blocking queue thread wait trait specialization. - //! - //! Blocks execution of the calling thread until the queue has finished processing all previously requested - //! tasks (kernels, data copies, ...) - template - struct CurrentThreadWaitFor> - { - static auto currentThreadWaitFor(detail::QueueGenericSyclBase const& queue) -> void + static auto enqueue(detail::QueueGenericSyclBase& queue, TTask const& task) -> void + { + ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; + queue.m_spQueueImpl->template enqueue(task); + } + }; + + //! The SYCL blocking queue test trait specialization. + template + struct Empty> { - ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; - queue.m_spQueueImpl->wait(); - } - }; - - //! The SYCL queue native handle trait specialization. - template - struct NativeHandle> - { - [[nodiscard]] static auto getNativeHandle(detail::QueueGenericSyclBase const& queue) + static auto empty(detail::QueueGenericSyclBase const& queue) -> bool + { + ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; + return queue.m_spQueueImpl->empty(); + } + }; + + //! The SYCL blocking queue thread wait trait specialization. + //! + //! Blocks execution of the calling thread until the queue has finished processing all previously requested + //! tasks (kernels, data copies, ...) + template + struct CurrentThreadWaitFor> { - return queue.getNativeHandle(); - } - }; -} // namespace alpaka::trait + static auto currentThreadWaitFor(detail::QueueGenericSyclBase const& queue) -> void + { + ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; + queue.m_spQueueImpl->wait(); + } + }; + //! The SYCL queue native handle trait specialization. + template + struct NativeHandle> + { + [[nodiscard]] static auto getNativeHandle(detail::QueueGenericSyclBase const& queue) + { + return queue.getNativeHandle(); + } + }; + } // namespace trait +} // namespace alpaka #endif diff --git a/include/alpaka/test/acc/TestAccs.hpp b/include/alpaka/test/acc/TestAccs.hpp index c0751aa18cf6..e84bb7800f39 100644 --- a/include/alpaka/test/acc/TestAccs.hpp +++ b/include/alpaka/test/acc/TestAccs.hpp @@ -79,21 +79,21 @@ namespace alpaka::test #endif #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_TARGET_CPU) template - using AccCpuSyclIfAvailableElseInt = alpaka::AccCpuSycl; + using AccCpuSyclIfAvailableElseInt = AccCpuSycl; #else template using AccCpuSyclIfAvailableElseInt = int; #endif #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_TARGET_FPGA) template - using AccFpgaSyclIntelIfAvailableElseInt = alpaka::AccFpgaSyclIntel; + using AccFpgaSyclIntelIfAvailableElseInt = AccFpgaSyclIntel; #else template using AccFpgaSyclIntelIfAvailableElseInt = int; #endif #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_TARGET_GPU) template - using AccGpuSyclIntelIfAvailableElseInt = alpaka::AccGpuSyclIntel; + using AccGpuSyclIntelIfAvailableElseInt = AccGpuSyclIntel; #else template using AccGpuSyclIntelIfAvailableElseInt = int; diff --git a/include/alpaka/test/event/EventHostManualTrigger.hpp b/include/alpaka/test/event/EventHostManualTrigger.hpp index 4ce363907475..dbdc269f0070 100644 --- a/include/alpaka/test/event/EventHostManualTrigger.hpp +++ b/include/alpaka/test/event/EventHostManualTrigger.hpp @@ -712,11 +712,11 @@ namespace alpaka { namespace test { - template + template class EventHostManualTriggerSycl { public: - EventHostManualTriggerSycl(DevGenericSycl const&) + EventHostManualTriggerSycl(DevGenericSycl const&) { } @@ -727,16 +727,16 @@ namespace alpaka namespace trait { - template - struct EventHostManualTriggerType> + template + struct EventHostManualTriggerType> { - using type = alpaka::test::EventHostManualTriggerSycl; + using type = alpaka::test::EventHostManualTriggerSycl; }; - template - struct IsEventHostManualTriggerSupported> + template + struct IsEventHostManualTriggerSupported> { - ALPAKA_FN_HOST static auto isSupported(DevGenericSycl const&) -> bool + ALPAKA_FN_HOST static auto isSupported(DevGenericSycl const&) -> bool { return false; } @@ -746,35 +746,30 @@ namespace alpaka namespace trait { - template - struct Enqueue< - QueueGenericSyclBlocking>, - test::EventHostManualTriggerSycl> + template + struct Enqueue, test::EventHostManualTriggerSycl> { ALPAKA_FN_HOST static auto enqueue( - QueueGenericSyclBlocking>& /* queue */, - test::EventHostManualTriggerSycl& /* event */) -> void + QueueGenericSyclBlocking& /* queue */, + test::EventHostManualTriggerSycl& /* event */) -> void { } }; - template - struct Enqueue< - QueueGenericSyclNonBlocking>, - test::EventHostManualTriggerSycl> + template + struct Enqueue, test::EventHostManualTriggerSycl> { ALPAKA_FN_HOST static auto enqueue( - QueueGenericSyclNonBlocking>& /* queue */, - test::EventHostManualTriggerSycl& /* event */) -> void + QueueGenericSyclNonBlocking& /* queue */, + test::EventHostManualTriggerSycl& /* event */) -> void { } }; - template - struct IsComplete> + template + struct IsComplete> { - ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl const& /* event */) - -> bool + ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl const& /* event */) -> bool { return true; } diff --git a/include/alpaka/test/queue/Queue.hpp b/include/alpaka/test/queue/Queue.hpp index 22432fc719b4..d9c0e9927934 100644 --- a/include/alpaka/test/queue/Queue.hpp +++ b/include/alpaka/test/queue/Queue.hpp @@ -38,14 +38,20 @@ namespace alpaka::test # endif }; #endif - } // namespace trait - //! The queue type that should be used for the given device. - template - using DefaultQueue = typename trait::DefaultQueueType::type; +#ifdef ALPAKA_ACC_SYCL_ENABLED + //! The default queue type trait specialization for the SYCL device. + template + struct DefaultQueueType> + { +# if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL) + using type = QueueGenericSyclBlocking; +# else + using type = QueueGenericSyclNonBlocking; +# endif + }; +#endif - namespace trait - { //! The blocking queue trait. template struct IsBlockingQueue; @@ -82,81 +88,24 @@ namespace alpaka::test #endif #ifdef ALPAKA_ACC_SYCL_ENABLED -# ifdef ALPAKA_SYCL_ONEAPI_CPU - //! The default queue type trait specialization for the Intel CPU device. - template<> - struct DefaultQueueType - { -# if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL) - using type = alpaka::QueueCpuSyclBlocking; -# else - using type = alpaka::QueueCpuSyclNonBlocking; -# endif - }; - - template<> - struct IsBlockingQueue + template + struct IsBlockingQueue> { static constexpr auto value = true; }; - template<> - struct IsBlockingQueue + template + struct IsBlockingQueue> { static constexpr auto value = false; }; -# endif -# ifdef ALPAKA_SYCL_ONEAPI_FPGA - //! The default queue type trait specialization for the Intel SYCL device. - template<> - struct DefaultQueueType - { -# if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL) - using type = alpaka::QueueFpgaSyclIntelBlocking; -# else - using type = alpaka::QueueFpgaSyclIntelNonBlocking; -# endif - }; - - template<> - struct IsBlockingQueue - { - static constexpr auto value = true; - }; - - template<> - struct IsBlockingQueue - { - static constexpr auto value = false; - }; -# endif -# ifdef ALPAKA_SYCL_ONEAPI_GPU - //! The default queue type trait specialization for the Intel CPU device. - template<> - struct DefaultQueueType - { -# if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL) - using type = alpaka::QueueGpuSyclIntelBlocking; -# else - using type = alpaka::QueueGpuSyclIntelNonBlocking; -# endif - }; - - template<> - struct IsBlockingQueue - { - static constexpr auto value = true; - }; - - template<> - struct IsBlockingQueue - { - static constexpr auto value = false; - }; -# endif #endif } // namespace trait + //! The queue type that should be used for the given device. + template + using DefaultQueue = typename trait::DefaultQueueType::type; + //! The queue type that should be used for the given accelerator. template using IsBlockingQueue = trait::IsBlockingQueue;