From da1061bcee63bbba2566553bc97e96442bba8c83 Mon Sep 17 00:00:00 2001
From: AuroraPerego <aurora.perego@cern.ch>
Date: Sun, 11 Aug 2024 23:28:26 +0200
Subject: [PATCH] template SYCL objects and device selectors on the Tag

- template the Device, Queue, Event, Acc, Buf, Platform, EventHostManualTriggerSycl and device selectors on the Tag
- adapt the default queue for the tests
- avoid code duplication for SYCL where possible
---
 include/alpaka/acc/AccCpuSycl.hpp             |   3 +-
 include/alpaka/acc/AccFpgaSyclIntel.hpp       |   3 +-
 include/alpaka/acc/AccGenericSycl.hpp         |  75 +-
 include/alpaka/acc/AccGpuSyclIntel.hpp        |   3 +-
 include/alpaka/dev/DevCpuSycl.hpp             |   4 +-
 include/alpaka/dev/DevFpgaSyclIntel.hpp       |   4 +-
 include/alpaka/dev/DevGenericSycl.hpp         | 234 +++--
 include/alpaka/dev/DevGpuSyclIntel.hpp        |   4 +-
 include/alpaka/event/EventCpuSycl.hpp         |   4 +-
 include/alpaka/event/EventFpgaSyclIntel.hpp   |   4 +-
 include/alpaka/event/EventGenericSycl.hpp     |  60 +-
 include/alpaka/event/EventGpuSyclIntel.hpp    |   4 +-
 include/alpaka/kernel/TaskKernelCpuSycl.hpp   |   5 +-
 .../alpaka/kernel/TaskKernelFpgaSyclIntel.hpp |   4 +-
 .../alpaka/kernel/TaskKernelGenericSycl.hpp   |   8 +-
 .../alpaka/kernel/TaskKernelGpuSyclIntel.hpp  |   4 +-
 include/alpaka/mem/buf/BufCpuSycl.hpp         |   1 +
 include/alpaka/mem/buf/BufFpgaSyclIntel.hpp   |   1 +
 include/alpaka/mem/buf/BufGenericSycl.hpp     | 100 +-
 include/alpaka/mem/buf/BufGpuSyclIntel.hpp    |   1 +
 include/alpaka/mem/buf/sycl/Copy.hpp          |  12 +-
 include/alpaka/mem/view/ViewPlainPtr.hpp      |  17 +-
 include/alpaka/platform/PlatformCpuSycl.hpp   |  15 +-
 .../alpaka/platform/PlatformFpgaSyclIntel.hpp |  15 +-
 .../alpaka/platform/PlatformGenericSycl.hpp   | 954 +++++++++---------
 .../alpaka/platform/PlatformGpuSyclIntel.hpp  |  16 +-
 include/alpaka/queue/QueueCpuSyclBlocking.hpp |   4 +-
 .../alpaka/queue/QueueCpuSyclNonBlocking.hpp  |   4 +-
 .../queue/QueueFpgaSyclIntelBlocking.hpp      |   4 +-
 .../queue/QueueFpgaSyclIntelNonBlocking.hpp   |   4 +-
 .../alpaka/queue/QueueGenericSyclBlocking.hpp |   4 +-
 .../queue/QueueGenericSyclNonBlocking.hpp     |   4 +-
 .../queue/QueueGpuSyclIntelBlocking.hpp       |   4 +-
 .../queue/QueueGpuSyclIntelNonBlocking.hpp    |   4 +-
 .../queue/sycl/QueueGenericSyclBase.hpp       | 432 ++++----
 include/alpaka/test/acc/TestAccs.hpp          |   6 +-
 .../test/event/EventHostManualTrigger.hpp     |  43 +-
 include/alpaka/test/queue/Queue.hpp           |  91 +-
 38 files changed, 1052 insertions(+), 1107 deletions(-)
diff --git a/include/alpaka/acc/AccCpuSycl.hpp b/include/alpaka/acc/AccCpuSycl.hpp
index c7049394bbfd..ea5ac22a09ca 100644
--- a/include/alpaka/acc/AccCpuSycl.hpp
+++ b/include/alpaka/acc/AccCpuSycl.hpp
@@ -7,7 +7,6 @@
 #include "alpaka/acc/AccGenericSycl.hpp"
 #include "alpaka/acc/Tag.hpp"
 #include "alpaka/core/Sycl.hpp"
-#include "alpaka/platform/PlatformCpuSycl.hpp"
 
 #include <string>
 #include <utility>
@@ -20,7 +19,7 @@ namespace alpaka
     //!
     //! This accelerator allows parallel kernel execution on a oneAPI-capable CPU target device.
     template<typename TDim, typename TIdx>
-    using AccCpuSycl = AccGenericSycl<detail::SyclCpuSelector, TDim, TIdx>;
+    using AccCpuSycl = AccGenericSycl<TagCpuSycl, TDim, TIdx>;
 
     namespace trait
     {
diff --git a/include/alpaka/acc/AccFpgaSyclIntel.hpp b/include/alpaka/acc/AccFpgaSyclIntel.hpp
index a6de9b73b43b..1d1b6a937288 100644
--- a/include/alpaka/acc/AccFpgaSyclIntel.hpp
+++ b/include/alpaka/acc/AccFpgaSyclIntel.hpp
@@ -7,7 +7,6 @@
 #include "alpaka/acc/AccGenericSycl.hpp"
 #include "alpaka/acc/Tag.hpp"
 #include "alpaka/core/Sycl.hpp"
-#include "alpaka/platform/PlatformFpgaSyclIntel.hpp"
 
 #include <string>
 #include <utility>
@@ -20,7 +19,7 @@ namespace alpaka
     //!
     //! This accelerator allows parallel kernel execution on a oneAPI-capable Intel FPGA target device.
     template<typename TDim, typename TIdx>
-    using AccFpgaSyclIntel = AccGenericSycl<detail::IntelFpgaSelector, TDim, TIdx>;
+    using AccFpgaSyclIntel = AccGenericSycl<TagFpgaSyclIntel, TDim, TIdx>;
 
     namespace trait
     {
diff --git a/include/alpaka/acc/AccGenericSycl.hpp b/include/alpaka/acc/AccGenericSycl.hpp
index b9437ee76723..b7132a749de6 100644
--- a/include/alpaka/acc/AccGenericSycl.hpp
+++ b/include/alpaka/acc/AccGenericSycl.hpp
@@ -46,13 +46,13 @@
 
 namespace alpaka
 {
-    template<typename TSelector, typename TAcc, typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
+    template<typename TTag, typename TAcc, typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
     class TaskKernelGenericSycl;
 
     //! The SYCL accelerator.
     //!
     //! This accelerator allows parallel kernel execution on SYCL devices.
-    template<typename TSelector, typename TDim, typename TIdx>
+    template<typename TTag, typename TDim, typename TIdx>
     class AccGenericSycl
         : public WorkDivGenericSycl<TDim, TIdx>
         , public gb::IdxGbGenericSycl<TDim, TIdx>
@@ -103,30 +103,29 @@ namespace alpaka
 namespace alpaka::trait
 {
     //! The SYCL accelerator type trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct AccType<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct AccType<AccGenericSycl<TTag, TDim, TIdx>>
     {
-        using type = AccGenericSycl<TSelector, TDim, TIdx>;
+        using type = AccGenericSycl<TTag, TDim, TIdx>;
     };
 
     //! The SYCL single thread accelerator type trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct IsSingleThreadAcc<AccGenericSycl<TSelector, TDim, TIdx>> : std::false_type
+    template<typename TTag, typename TDim, typename TIdx>
+    struct IsSingleThreadAcc<AccGenericSycl<TTag, TDim, TIdx>> : std::false_type
     {
     };
 
     //! The SYCL multi thread accelerator type trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct IsMultiThreadAcc<AccGenericSycl<TSelector, TDim, TIdx>> : std::true_type
+    template<typename TTag, typename TDim, typename TIdx>
+    struct IsMultiThreadAcc<AccGenericSycl<TTag, TDim, TIdx>> : std::true_type
     {
     };
 
     //! The SYCL accelerator device properties get trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct GetAccDevProps<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct GetAccDevProps<AccGenericSycl<TTag, TDim, TIdx>>
     {
-        static auto getAccDevProps(DevGenericSycl<PlatformGenericSycl<TSelector>> const& dev)
-            -> AccDevProps<TDim, TIdx>
+        static auto getAccDevProps(DevGenericSycl<TTag> const& dev) -> AccDevProps<TDim, TIdx>
         {
             auto const device = dev.getNativeHandle().first;
             auto const max_threads_dim
@@ -160,63 +159,53 @@ namespace alpaka::trait
     };
 
     //! The SYCL accelerator name trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct GetAccName<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct GetAccName<AccGenericSycl<TTag, TDim, TIdx>>
     {
         static auto getAccName() -> std::string
         {
-            // TODO implement TSelector::name
-            return std::string("Acc") + TSelector::name + "<" + std::to_string(TDim::value) + ","
-                   + core::demangled<TIdx> + ">";
+            return std::string("Acc") + detail::SYCLDeviceSelector<TTag>::name + "<" + std::to_string(TDim::value)
+                   + "," + core::demangled<TIdx> + ">";
         }
     };
 
     //! The SYCL accelerator device type trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct DevType<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct DevType<AccGenericSycl<TTag, TDim, TIdx>>
     {
-        using type = DevGenericSycl<PlatformGenericSycl<TSelector>>;
+        using type = DevGenericSycl<TTag>;
     };
 
     //! The SYCL accelerator dimension getter trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct DimType<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct DimType<AccGenericSycl<TTag, TDim, TIdx>>
     {
         using type = TDim;
     };
 
     //! The SYCL accelerator execution task type trait specialization.
-    template<
-        typename TSelector,
-        typename TDim,
-        typename TIdx,
-        typename TWorkDiv,
-        typename TKernelFnObj,
-        typename... TArgs>
-    struct CreateTaskKernel<AccGenericSycl<TSelector, TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
+    template<typename TTag, typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
+    struct CreateTaskKernel<AccGenericSycl<TTag, TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
     {
         static auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
         {
-            return TaskKernelGenericSycl<
-                TSelector,
-                AccGenericSycl<TSelector, TDim, TIdx>,
-                TDim,
-                TIdx,
-                TKernelFnObj,
-                TArgs...>{workDiv, kernelFnObj, std::forward<TArgs>(args)...};
+            return TaskKernelGenericSycl<TTag, AccGenericSycl<TTag, TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>{
+                workDiv,
+                kernelFnObj,
+                std::forward<TArgs>(args)...};
         }
     };
 
     //! The SYCL execution task platform type trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct PlatformType<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct PlatformType<AccGenericSycl<TTag, TDim, TIdx>>
     {
-        using type = PlatformGenericSycl<TSelector>;
+        using type = PlatformGenericSycl<TTag>;
     };
 
     //! The SYCL accelerator idx type trait specialization.
-    template<typename TSelector, typename TDim, typename TIdx>
-    struct IdxType<AccGenericSycl<TSelector, TDim, TIdx>>
+    template<typename TTag, typename TDim, typename TIdx>
+    struct IdxType<AccGenericSycl<TTag, TDim, TIdx>>
     {
         using type = TIdx;
     };
diff --git a/include/alpaka/acc/AccGpuSyclIntel.hpp b/include/alpaka/acc/AccGpuSyclIntel.hpp
index 1e2b55d1f038..d544b9c1749e 100644
--- a/include/alpaka/acc/AccGpuSyclIntel.hpp
+++ b/include/alpaka/acc/AccGpuSyclIntel.hpp
@@ -7,7 +7,6 @@
 #include "alpaka/acc/AccGenericSycl.hpp"
 #include "alpaka/acc/Tag.hpp"
 #include "alpaka/core/Sycl.hpp"
-#include "alpaka/platform/PlatformGpuSyclIntel.hpp"
 
 #include <string>
 #include <utility>
@@ -20,7 +19,7 @@ namespace alpaka
     //!
     //! This accelerator allows parallel kernel execution on a oneAPI-capable Intel GPU target device.
     template<typename TDim, typename TIdx>
-    using AccGpuSyclIntel = AccGenericSycl<detail::IntelGpuSelector, TDim, TIdx>;
+    using AccGpuSyclIntel = AccGenericSycl<TagGpuSyclIntel, TDim, TIdx>;
 
     namespace trait
     {
diff --git a/include/alpaka/dev/DevCpuSycl.hpp b/include/alpaka/dev/DevCpuSycl.hpp
index 04b15a867558..5e9a2321769c 100644
--- a/include/alpaka/dev/DevCpuSycl.hpp
+++ b/include/alpaka/dev/DevCpuSycl.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/dev/DevGenericSycl.hpp"
-#include "alpaka/platform/PlatformCpuSycl.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
 namespace alpaka
 {
-    using DevCpuSycl = DevGenericSycl<PlatformCpuSycl>;
+    using DevCpuSycl = DevGenericSycl<TagCpuSycl>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/dev/DevFpgaSyclIntel.hpp b/include/alpaka/dev/DevFpgaSyclIntel.hpp
index 516027db6b2a..8004aad039fc 100644
--- a/include/alpaka/dev/DevFpgaSyclIntel.hpp
+++ b/include/alpaka/dev/DevFpgaSyclIntel.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/dev/DevGenericSycl.hpp"
-#include "alpaka/platform/PlatformFpgaSyclIntel.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
 namespace alpaka
 {
-    using DevFpgaSyclIntel = DevGenericSycl<PlatformFpgaSyclIntel>;
+    using DevFpgaSyclIntel = DevGenericSycl<TagFpgaSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/dev/DevGenericSycl.hpp b/include/alpaka/dev/DevGenericSycl.hpp
index 729090f8f2d3..efbcad92a0e5 100644
--- a/include/alpaka/dev/DevGenericSycl.hpp
+++ b/include/alpaka/dev/DevGenericSycl.hpp
@@ -32,7 +32,22 @@
 
 namespace alpaka
 {
-    template<typename TElem, typename TDim, typename TIdx, typename TDev>
+    namespace trait
+    {
+        template<typename TPlatform, typename TSfinae>
+        struct GetDevByIdx;
+    } // namespace trait
+
+    template<typename TTag>
+    using QueueGenericSyclBlocking = detail::QueueGenericSyclBase<TTag, true>;
+
+    template<typename TTag>
+    using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase<TTag, false>;
+
+    template<typename TTag>
+    struct PlatformGenericSycl;
+
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
     class BufGenericSycl;
 
     namespace detail
@@ -105,11 +120,13 @@ namespace alpaka
     } // namespace detail
 
     //! The SYCL device handle.
-    template<typename TPlatform>
+    template<typename TTag>
     class DevGenericSycl
-        : public concepts::Implements<ConceptCurrentThreadWaitFor, DevGenericSycl<TPlatform>>
-        , public concepts::Implements<ConceptDev, DevGenericSycl<TPlatform>>
+        : public concepts::Implements<ConceptCurrentThreadWaitFor, DevGenericSycl<TTag>>
+        , public concepts::Implements<ConceptDev, DevGenericSycl<TTag>>
     {
+        friend struct trait::GetDevByIdx<PlatformGenericSycl<TTag>>;
+
     public:
         DevGenericSycl(sycl::device device, sycl::context context)
             : m_impl{std::make_shared<detail::DevGenericSyclImpl>(std::move(device), std::move(context))}
@@ -133,128 +150,133 @@ namespace alpaka
 
         std::shared_ptr<detail::DevGenericSyclImpl> m_impl;
     };
-} // namespace alpaka
 
-namespace alpaka::trait
-{
-    //! The SYCL device name get trait specialization.
-    template<typename TPlatform>
-    struct GetName<DevGenericSycl<TPlatform>>
+    namespace trait
     {
-        static auto getName(DevGenericSycl<TPlatform> const& dev) -> std::string
+        //! The SYCL device name get trait specialization.
+        template<typename TTag>
+        struct GetName<DevGenericSycl<TTag>>
         {
-            auto const device = dev.getNativeHandle().first;
-            return device.template get_info<sycl::info::device::name>();
-        }
-    };
+            static auto getName(DevGenericSycl<TTag> const& dev) -> std::string
+            {
+                auto const device = dev.getNativeHandle().first;
+                return device.template get_info<sycl::info::device::name>();
+            }
+        };
 
-    //! The SYCL device available memory get trait specialization.
-    template<typename TPlatform>
-    struct GetMemBytes<DevGenericSycl<TPlatform>>
-    {
-        static auto getMemBytes(DevGenericSycl<TPlatform> const& dev) -> std::size_t
+        //! The SYCL device available memory get trait specialization.
+        template<typename TTag>
+        struct GetMemBytes<DevGenericSycl<TTag>>
         {
-            auto const device = dev.getNativeHandle().first;
-            return device.template get_info<sycl::info::device::global_mem_size>();
-        }
-    };
+            static auto getMemBytes(DevGenericSycl<TTag> const& dev) -> std::size_t
+            {
+                auto const device = dev.getNativeHandle().first;
+                return device.template get_info<sycl::info::device::global_mem_size>();
+            }
+        };
 
-    //! The SYCL device free memory get trait specialization.
-    template<typename TPlatform>
-    struct GetFreeMemBytes<DevGenericSycl<TPlatform>>
-    {
-        static auto getFreeMemBytes(DevGenericSycl<TPlatform> const& /* dev */) -> std::size_t
+        //! The SYCL device free memory get trait specialization.
+        template<typename TTag>
+        struct GetFreeMemBytes<DevGenericSycl<TTag>>
         {
-            static_assert(!sizeof(TPlatform), "Querying free device memory not supported for SYCL devices.");
-            return std::size_t{};
-        }
-    };
+            static auto getFreeMemBytes(DevGenericSycl<TTag> const& /* dev */) -> std::size_t
+            {
+                static_assert(
+                    !sizeof(PlatformGenericSycl<TTag>),
+                    "Querying free device memory not supported for SYCL devices.");
+                return std::size_t{};
+            }
+        };
 
-    //! The SYCL device warp size get trait specialization.
-    template<typename TPlatform>
-    struct GetWarpSizes<DevGenericSycl<TPlatform>>
-    {
-        static auto getWarpSizes(DevGenericSycl<TPlatform> const& dev) -> std::vector<std::size_t>
+        //! The SYCL device warp size get trait specialization.
+        template<typename TTag>
+        struct GetWarpSizes<DevGenericSycl<TTag>>
         {
-            auto const device = dev.getNativeHandle().first;
-            std::vector<std::size_t> warp_sizes = device.template get_info<sycl::info::device::sub_group_sizes>();
-            // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently does not
-            auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64);
-            if(find64 != warp_sizes.end())
-                warp_sizes.erase(find64);
-            // Sort the warp sizes in decreasing order
-            std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
-            return warp_sizes;
-        }
-    };
+            static auto getWarpSizes(DevGenericSycl<TTag> const& dev) -> std::vector<std::size_t>
+            {
+                auto const device = dev.getNativeHandle().first;
+                std::vector<std::size_t> warp_sizes = device.template get_info<sycl::info::device::sub_group_sizes>();
+                // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently does not
+                auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64);
+                if(find64 != warp_sizes.end())
+                    warp_sizes.erase(find64);
+                // Sort the warp sizes in decreasing order
+                std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
+                return warp_sizes;
+            }
+        };
 
-    //! The SYCL device preferred warp size get trait specialization.
-    template<typename TPlatform>
-    struct GetPreferredWarpSize<DevGenericSycl<TPlatform>>
-    {
-        static auto getPreferredWarpSize(DevGenericSycl<TPlatform> const& dev) -> std::size_t
+        //! The SYCL device preferred warp size get trait specialization.
+        template<typename TTag>
+        struct GetPreferredWarpSize<DevGenericSycl<TTag>>
         {
-            return GetWarpSizes<DevGenericSycl<TPlatform>>::getWarpSizes(dev).front();
-        }
-    };
+            static auto getPreferredWarpSize(DevGenericSycl<TTag> const& dev) -> std::size_t
+            {
+                return GetWarpSizes<DevGenericSycl<TTag>>::getWarpSizes(dev).front();
+            }
+        };
 
-    //! The SYCL device reset trait specialization.
-    template<typename TPlatform>
-    struct Reset<DevGenericSycl<TPlatform>>
-    {
-        static auto reset(DevGenericSycl<TPlatform> const&) -> void
+        //! The SYCL device reset trait specialization.
+        template<typename TTag>
+        struct Reset<DevGenericSycl<TTag>>
         {
-            static_assert(!sizeof(TPlatform), "Explicit device reset not supported for SYCL devices");
-        }
-    };
+            static auto reset(DevGenericSycl<TTag> const&) -> void
+            {
+                static_assert(
+                    !sizeof(PlatformGenericSycl<TTag>),
+                    "Explicit device reset not supported for SYCL devices");
+            }
+        };
 
-    //! The SYCL device native handle trait specialization.
-    template<typename TPlatform>
-    struct NativeHandle<DevGenericSycl<TPlatform>>
-    {
-        [[nodiscard]] static auto getNativeHandle(DevGenericSycl<TPlatform> const& dev)
+        //! The SYCL device native handle trait specialization.
+        template<typename TTag>
+        struct NativeHandle<DevGenericSycl<TTag>>
         {
-            return dev.getNativeHandle();
-        }
-    };
+            [[nodiscard]] static auto getNativeHandle(DevGenericSycl<TTag> const& dev)
+            {
+                return dev.getNativeHandle();
+            }
+        };
 
-    //! The SYCL device memory buffer type trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct BufType<DevGenericSycl<TPlatform>, TElem, TDim, TIdx>
-    {
-        using type = BufGenericSycl<TElem, TDim, TIdx, TPlatform>;
-    };
+        //! The SYCL device memory buffer type trait specialization.
+        template<typename TElem, typename TDim, typename TIdx, typename TTag>
+        struct BufType<DevGenericSycl<TTag>, TElem, TDim, TIdx>
+        {
+            using type = BufGenericSycl<TElem, TDim, TIdx, TTag>;
+        };
 
-    //! The SYCL device platform type trait specialization.
-    template<typename TPlatform>
-    struct PlatformType<DevGenericSycl<TPlatform>>
-    {
-        using type = TPlatform;
-    };
+        //! The SYCL device platform type trait specialization.
+        template<typename TTag>
+        struct PlatformType<DevGenericSycl<TTag>>
+        {
+            using type = PlatformGenericSycl<TTag>;
+        };
 
-    //! The thread SYCL device wait specialization.
-    template<typename TPlatform>
-    struct CurrentThreadWaitFor<DevGenericSycl<TPlatform>>
-    {
-        static auto currentThreadWaitFor(DevGenericSycl<TPlatform> const& dev) -> void
+        //! The thread SYCL device wait specialization.
+        template<typename TTag>
+        struct CurrentThreadWaitFor<DevGenericSycl<TTag>>
         {
-            dev.m_impl->wait();
-        }
-    };
+            static auto currentThreadWaitFor(DevGenericSycl<TTag> const& dev) -> void
+            {
+                dev.m_impl->wait();
+            }
+        };
 
-    //! The SYCL blocking queue trait specialization.
-    template<typename TPlatform>
-    struct QueueType<DevGenericSycl<TPlatform>, Blocking>
-    {
-        using type = detail::QueueGenericSyclBase<DevGenericSycl<TPlatform>, true>;
-    };
+        //! The SYCL blocking queue trait specialization.
+        template<typename TTag>
+        struct QueueType<DevGenericSycl<TTag>, Blocking>
+        {
+            using type = QueueGenericSyclBlocking<TTag>;
+        };
 
-    //! The SYCL non-blocking queue trait specialization.
-    template<typename TPlatform>
-    struct QueueType<DevGenericSycl<TPlatform>, NonBlocking>
-    {
-        using type = detail::QueueGenericSyclBase<DevGenericSycl<TPlatform>, false>;
-    };
-} // namespace alpaka::trait
+        //! The SYCL non-blocking queue trait specialization.
+        template<typename TTag>
+        struct QueueType<DevGenericSycl<TTag>, NonBlocking>
+        {
+            using type = QueueGenericSyclNonBlocking<TTag>;
+        };
+
+    } // namespace trait
+} // namespace alpaka
 
 #endif
diff --git a/include/alpaka/dev/DevGpuSyclIntel.hpp b/include/alpaka/dev/DevGpuSyclIntel.hpp
index 9897d40ebbc5..d26bb4ca72db 100644
--- a/include/alpaka/dev/DevGpuSyclIntel.hpp
+++ b/include/alpaka/dev/DevGpuSyclIntel.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/dev/DevGenericSycl.hpp"
-#include "alpaka/platform/PlatformGpuSyclIntel.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU)
 
 namespace alpaka
 {
-    using DevGpuSyclIntel = DevGenericSycl<PlatformGpuSyclIntel>;
+    using DevGpuSyclIntel = DevGenericSycl<TagGpuSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/event/EventCpuSycl.hpp b/include/alpaka/event/EventCpuSycl.hpp
index c95ed8e20df3..7c5b6310ae5b 100644
--- a/include/alpaka/event/EventCpuSycl.hpp
+++ b/include/alpaka/event/EventCpuSycl.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevCpuSycl.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/event/EventGenericSycl.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
 namespace alpaka
 {
-    using EventCpuSycl = EventGenericSycl<DevCpuSycl>;
+    using EventCpuSycl = EventGenericSycl<TagCpuSycl>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/event/EventFpgaSyclIntel.hpp b/include/alpaka/event/EventFpgaSyclIntel.hpp
index d79d8aca2cfd..0148c967e4f6 100644
--- a/include/alpaka/event/EventFpgaSyclIntel.hpp
+++ b/include/alpaka/event/EventFpgaSyclIntel.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevFpgaSyclIntel.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/event/EventGenericSycl.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
 namespace alpaka
 {
-    using EventFpgaSyclIntel = EventGenericSycl<DevFpgaSyclIntel>;
+    using EventFpgaSyclIntel = EventGenericSycl<TagFpgaSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/event/EventGenericSycl.hpp b/include/alpaka/event/EventGenericSycl.hpp
index 68011a0247cd..8b81f7029b17 100644
--- a/include/alpaka/event/EventGenericSycl.hpp
+++ b/include/alpaka/event/EventGenericSycl.hpp
@@ -22,11 +22,11 @@
 namespace alpaka
 {
     //! The SYCL device event.
-    template<typename TDev>
+    template<typename TTag>
     class EventGenericSycl final
     {
     public:
-        explicit EventGenericSycl(TDev const& dev) : m_dev{dev}
+        explicit EventGenericSycl(DevGenericSycl<TTag> const& dev) : m_dev{dev}
         {
         }
 
@@ -50,7 +50,7 @@ namespace alpaka
             m_event = event;
         }
 
-        TDev m_dev;
+        DevGenericSycl<TTag> m_dev;
 
     private:
         sycl::event m_event{};
@@ -60,20 +60,20 @@ namespace alpaka
 namespace alpaka::trait
 {
     //! The SYCL device event device get trait specialization.
-    template<typename TDev>
-    struct GetDev<EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct GetDev<EventGenericSycl<TTag>>
     {
-        static auto getDev(EventGenericSycl<TDev> const& event) -> TDev
+        static auto getDev(EventGenericSycl<TTag> const& event) -> DevGenericSycl<TTag>
         {
             return event.m_dev;
         }
     };
 
     //! The SYCL device event test trait specialization.
-    template<typename TDev>
-    struct IsComplete<EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct IsComplete<EventGenericSycl<TTag>>
     {
-        static auto isComplete(EventGenericSycl<TDev> const& event)
+        static auto isComplete(EventGenericSycl<TTag> const& event)
         {
             auto const status
                 = event.getNativeHandle().template get_info<sycl::info::event::command_execution_status>();
@@ -82,20 +82,20 @@ namespace alpaka::trait
     };
 
     //! The SYCL queue enqueue trait specialization.
-    template<typename TDev>
-    struct Enqueue<QueueGenericSyclNonBlocking<TDev>, EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct Enqueue<QueueGenericSyclNonBlocking<TTag>, EventGenericSycl<TTag>>
     {
-        static auto enqueue(QueueGenericSyclNonBlocking<TDev>& queue, EventGenericSycl<TDev>& event)
+        static auto enqueue(QueueGenericSyclNonBlocking<TTag>& queue, EventGenericSycl<TTag>& event)
         {
             event.setEvent(queue.m_spQueueImpl->get_last_event());
         }
     };
 
     //! The SYCL queue enqueue trait specialization.
-    template<typename TDev>
-    struct Enqueue<QueueGenericSyclBlocking<TDev>, EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct Enqueue<QueueGenericSyclBlocking<TTag>, EventGenericSycl<TTag>>
     {
-        static auto enqueue(QueueGenericSyclBlocking<TDev>& queue, EventGenericSycl<TDev>& event)
+        static auto enqueue(QueueGenericSyclBlocking<TTag>& queue, EventGenericSycl<TTag>& event)
         {
             event.setEvent(queue.m_spQueueImpl->get_last_event());
         }
@@ -105,30 +105,30 @@ namespace alpaka::trait
     //!
     //! Waits until the event itself and therefore all tasks preceding it in the queue it is enqueued to have been
     //! completed. If the event is not enqueued to a queue the method returns immediately.
-    template<typename TDev>
-    struct CurrentThreadWaitFor<EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct CurrentThreadWaitFor<EventGenericSycl<TTag>>
     {
-        static auto currentThreadWaitFor(EventGenericSycl<TDev> const& event)
+        static auto currentThreadWaitFor(EventGenericSycl<TTag> const& event)
         {
             event.getNativeHandle().wait_and_throw();
         }
     };
 
     //! The SYCL queue event wait trait specialization.
-    template<typename TDev>
-    struct WaiterWaitFor<QueueGenericSyclNonBlocking<TDev>, EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct WaiterWaitFor<QueueGenericSyclNonBlocking<TTag>, EventGenericSycl<TTag>>
     {
-        static auto waiterWaitFor(QueueGenericSyclNonBlocking<TDev>& queue, EventGenericSycl<TDev> const& event)
+        static auto waiterWaitFor(QueueGenericSyclNonBlocking<TTag>& queue, EventGenericSycl<TTag> const& event)
         {
             queue.m_spQueueImpl->register_dependency(event.getNativeHandle());
         }
     };
 
     //! The SYCL queue event wait trait specialization.
-    template<typename TDev>
-    struct WaiterWaitFor<QueueGenericSyclBlocking<TDev>, EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct WaiterWaitFor<QueueGenericSyclBlocking<TTag>, EventGenericSycl<TTag>>
     {
-        static auto waiterWaitFor(QueueGenericSyclBlocking<TDev>& queue, EventGenericSycl<TDev> const& event)
+        static auto waiterWaitFor(QueueGenericSyclBlocking<TTag>& queue, EventGenericSycl<TTag> const& event)
         {
             queue.m_spQueueImpl->register_dependency(event.getNativeHandle());
         }
@@ -138,20 +138,20 @@ namespace alpaka::trait
     //!
     //! Any future work submitted in any queue of this device will wait for event to complete before beginning
     //! execution.
-    template<typename TDev>
-    struct WaiterWaitFor<TDev, EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct WaiterWaitFor<DevGenericSycl<TTag>, EventGenericSycl<TTag>>
     {
-        static auto waiterWaitFor(TDev& dev, EventGenericSycl<TDev> const& event)
+        static auto waiterWaitFor(DevGenericSycl<TTag>& dev, EventGenericSycl<TTag> const& event)
         {
             dev.m_impl->register_dependency(event.getNativeHandle());
         }
     };
 
     //! The SYCL device event native handle trait specialization.
-    template<typename TDev>
-    struct NativeHandle<EventGenericSycl<TDev>>
+    template<typename TTag>
+    struct NativeHandle<EventGenericSycl<TTag>>
     {
-        [[nodiscard]] static auto getNativeHandle(EventGenericSycl<TDev> const& event)
+        [[nodiscard]] static auto getNativeHandle(EventGenericSycl<TTag> const& event)
         {
             return event.getNativeHandle();
         }
diff --git a/include/alpaka/event/EventGpuSyclIntel.hpp b/include/alpaka/event/EventGpuSyclIntel.hpp
index d59562a9e497..0f1bee69be20 100644
--- a/include/alpaka/event/EventGpuSyclIntel.hpp
+++ b/include/alpaka/event/EventGpuSyclIntel.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevGpuSyclIntel.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/event/EventGenericSycl.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU)
 
 namespace alpaka
 {
-    using EventGpuSyclIntel = EventGenericSycl<DevGpuSyclIntel>;
+    using EventGpuSyclIntel = EventGenericSycl<TagGpuSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/kernel/TaskKernelCpuSycl.hpp b/include/alpaka/kernel/TaskKernelCpuSycl.hpp
index 528846246053..3feef5e0b1aa 100644
--- a/include/alpaka/kernel/TaskKernelCpuSycl.hpp
+++ b/include/alpaka/kernel/TaskKernelCpuSycl.hpp
@@ -4,15 +4,16 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/kernel/TaskKernelGenericSycl.hpp"
-#include "alpaka/platform/PlatformCpuSycl.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
 namespace alpaka
 {
     template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
-    using TaskKernelCpuSycl = TaskKernelGenericSycl<detail::SyclCpuSelector, AccCpuSycl<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;
+    using TaskKernelCpuSycl
+        = TaskKernelGenericSycl<TagCpuSycl, AccCpuSycl<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;
 
 } // namespace alpaka
 
diff --git a/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp b/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp
index 2e54f279c471..6d75b06f86a7 100644
--- a/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp
+++ b/include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp
@@ -4,8 +4,8 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/kernel/TaskKernelGenericSycl.hpp"
-#include "alpaka/platform/PlatformFpgaSyclIntel.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
@@ -13,7 +13,7 @@ namespace alpaka
 {
     template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
     using TaskKernelFpgaSyclIntel
-        = TaskKernelGenericSycl<detail::IntelFpgaSelector, AccFpgaSyclIntel<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;
+        = TaskKernelGenericSycl<TagFpgaSyclIntel, AccFpgaSyclIntel<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;
 
 } // namespace alpaka
 
diff --git a/include/alpaka/kernel/TaskKernelGenericSycl.hpp b/include/alpaka/kernel/TaskKernelGenericSycl.hpp
index b56d905c7f66..11cc2cae4590 100644
--- a/include/alpaka/kernel/TaskKernelGenericSycl.hpp
+++ b/include/alpaka/kernel/TaskKernelGenericSycl.hpp
@@ -4,8 +4,8 @@
 
 #pragma once
 
-#include "alpaka/acc/Traits.hpp"
 #include "alpaka/acc/AccGenericSycl.hpp"
+#include "alpaka/acc/Traits.hpp"
 #include "alpaka/core/BoostPredef.hpp"
 #include "alpaka/core/Sycl.hpp"
 #include "alpaka/dev/Traits.hpp"
@@ -71,7 +71,7 @@
 namespace alpaka
 {
     //! The SYCL accelerator execution task.
-    template<typename TSelector, typename TAcc, typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
+    template<typename TTag, typename TAcc, typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
     class TaskKernelGenericSycl final : public WorkDivMembers<TDim, TIdx>
     {
     public:
@@ -280,7 +280,7 @@ namespace alpaka::trait
     };
 
     //! \brief Specialisation of the class template FunctionAttributes
-	//! \tparam TSelector The SYCL device selector.
+    //! \tparam TTag The SYCL device selector.
     //! \tparam TDev The device type.
     //! \tparam TDim The dimensionality of the accelerator device properties.
     //! \tparam TIdx The idx type of the accelerator device properties.
@@ -302,7 +302,7 @@ namespace alpaka::trait
             alpaka::KernelFunctionAttributes kernelFunctionAttributes;
 
             // set function properties for maxThreadsPerBlock to device properties
-            auto const& props = alpaka::getAccDevProps<AccGenericSycl<TSelector, TDim, TIdx>>(dev);
+            auto const& props = alpaka::getAccDevProps<AccGenericSycl<TTag, TDim, TIdx>>(dev);
             kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
             return kernelFunctionAttributes;
         }
diff --git a/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp b/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp
index 175c76129fa4..b4543d0b4c22 100644
--- a/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp
+++ b/include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp
@@ -4,8 +4,8 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/kernel/TaskKernelGenericSycl.hpp"
-#include "alpaka/platform/PlatformGpuSyclIntel.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU)
 
@@ -13,7 +13,7 @@ namespace alpaka
 {
     template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
     using TaskKernelGpuSyclIntel
-        = TaskKernelGenericSycl<detail::IntelGpuSelector, AccGpuSyclIntel<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;
+        = TaskKernelGenericSycl<TagGpuSyclIntel, AccGpuSyclIntel<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;
 
 } // namespace alpaka
 
diff --git a/include/alpaka/mem/buf/BufCpuSycl.hpp b/include/alpaka/mem/buf/BufCpuSycl.hpp
index d63eebf540ca..2e7559946ecc 100644
--- a/include/alpaka/mem/buf/BufCpuSycl.hpp
+++ b/include/alpaka/mem/buf/BufCpuSycl.hpp
@@ -6,6 +6,7 @@
 
 #include "alpaka/dev/DevCpuSycl.hpp"
 #include "alpaka/mem/buf/BufGenericSycl.hpp"
+#include "alpaka/platform/PlatformCpuSycl.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
diff --git a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp
index 8daafd75d099..926227cc840f 100644
--- a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp
+++ b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp
@@ -6,6 +6,7 @@
 
 #include "alpaka/dev/DevFpgaSyclIntel.hpp"
 #include "alpaka/mem/buf/BufGenericSycl.hpp"
+#include "alpaka/platform/PlatformFpgaSyclIntel.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
diff --git a/include/alpaka/mem/buf/BufGenericSycl.hpp b/include/alpaka/mem/buf/BufGenericSycl.hpp
index b4a5fd94ed54..221aa55ed396 100644
--- a/include/alpaka/mem/buf/BufGenericSycl.hpp
+++ b/include/alpaka/mem/buf/BufGenericSycl.hpp
@@ -24,8 +24,8 @@
 namespace alpaka
 {
     //! The SYCL memory buffer.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    class BufGenericSycl : public internal::ViewAccessOps<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    class BufGenericSycl : public internal::ViewAccessOps<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
     public:
         static_assert(
@@ -36,7 +36,7 @@ namespace alpaka
 
         //! Constructor
         template<typename TExtent, typename Deleter>
-        BufGenericSycl(DevGenericSycl<TPlatform> const& dev, TElem* const pMem, Deleter deleter, TExtent const& extent)
+        BufGenericSycl(DevGenericSycl<TTag> const& dev, TElem* const pMem, Deleter deleter, TExtent const& extent)
             : m_dev{dev}
             , m_extentElements{getExtentVecEnd<TDim>(extent)}
             , m_spMem(pMem, std::move(deleter))
@@ -53,7 +53,7 @@ namespace alpaka
                 "The idx type of TExtent and the TIdx template parameter have to be identical!");
         }
 
-        DevGenericSycl<TPlatform> m_dev;
+        DevGenericSycl<TTag> m_dev;
         Vec<TDim, TIdx> m_extentElements;
         std::shared_ptr<TElem> m_spMem;
     };
@@ -62,68 +62,67 @@ namespace alpaka
 namespace alpaka::trait
 {
     //! The BufGenericSycl device type trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct DevType<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct DevType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
-        using type = DevGenericSycl<TPlatform>;
+        using type = DevGenericSycl<TTag>;
     };
 
     //! The BufGenericSycl device get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct GetDev<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct GetDev<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
-        static auto getDev(BufGenericSycl<TElem, TDim, TIdx, TPlatform> const& buf)
+        static auto getDev(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf)
         {
             return buf.m_dev;
         }
     };
 
     //! The BufGenericSycl dimension getter trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct DimType<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct DimType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
         using type = TDim;
     };
 
     //! The BufGenericSycl memory element type get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct ElemType<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct ElemType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
         using type = TElem;
     };
 
     //! The BufGenericSycl extent get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct GetExtents<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct GetExtents<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
-        auto operator()(BufGenericSycl<TElem, TDim, TIdx, TPlatform> const& buf) const
+        auto operator()(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf) const
         {
             return buf.m_extentElements;
         }
     };
 
     //! The BufGenericSycl native pointer get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct GetPtrNative<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct GetPtrNative<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
-        static auto getPtrNative(BufGenericSycl<TElem, TDim, TIdx, TPlatform> const& buf) -> TElem const*
+        static auto getPtrNative(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf) -> TElem const*
         {
             return buf.m_spMem.get();
         }
 
-        static auto getPtrNative(BufGenericSycl<TElem, TDim, TIdx, TPlatform>& buf) -> TElem*
+        static auto getPtrNative(BufGenericSycl<TElem, TDim, TIdx, TTag>& buf) -> TElem*
         {
             return buf.m_spMem.get();
         }
     };
 
     //! The BufGenericSycl pointer on device get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct GetPtrDev<BufGenericSycl<TElem, TDim, TIdx, TPlatform>, DevGenericSycl<TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct GetPtrDev<BufGenericSycl<TElem, TDim, TIdx, TTag>, DevGenericSycl<TTag>>
     {
-        static auto getPtrDev(
-            BufGenericSycl<TElem, TDim, TIdx, TPlatform> const& buf,
-            DevGenericSycl<TPlatform> const& dev) -> TElem const*
+        static auto getPtrDev(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf, DevGenericSycl<TTag> const& dev)
+            -> TElem const*
         {
             if(dev == getDev(buf))
             {
@@ -135,8 +134,7 @@ namespace alpaka::trait
             }
         }
 
-        static auto getPtrDev(BufGenericSycl<TElem, TDim, TIdx, TPlatform>& buf, DevGenericSycl<TPlatform> const& dev)
-            -> TElem*
+        static auto getPtrDev(BufGenericSycl<TElem, TDim, TIdx, TTag>& buf, DevGenericSycl<TTag> const& dev) -> TElem*
         {
             if(dev == getDev(buf))
             {
@@ -150,12 +148,12 @@ namespace alpaka::trait
     };
 
     //! The SYCL memory allocation trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct BufAlloc<TElem, TDim, TIdx, DevGenericSycl<TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct BufAlloc<TElem, TDim, TIdx, DevGenericSycl<TTag>>
     {
         template<typename TExtent>
-        static auto allocBuf(DevGenericSycl<TPlatform> const& dev, TExtent const& extent)
-            -> BufGenericSycl<TElem, TDim, TIdx, TPlatform>
+        static auto allocBuf(DevGenericSycl<TTag> const& dev, TExtent const& extent)
+            -> BufGenericSycl<TElem, TDim, TIdx, TTag>
         {
             ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
@@ -197,38 +195,40 @@ namespace alpaka::trait
                 nativeContext);
             auto deleter = [ctx = nativeContext](TElem* ptr) { sycl::free(ptr, ctx); };
 
-            return BufGenericSycl<TElem, TDim, TIdx, TPlatform>(dev, memPtr, std::move(deleter), extent);
+            return BufGenericSycl<TElem, TDim, TIdx, TTag>(dev, memPtr, std::move(deleter), extent);
         }
     };
 
     //! The BufGenericSycl stream-ordered memory allocation capability trait specialization.
-    template<typename TDim, typename TPlatform>
-    struct HasAsyncBufSupport<TDim, DevGenericSycl<TPlatform>> : std::false_type
+    template<typename TDim, typename TTag>
+    struct HasAsyncBufSupport<TDim, DevGenericSycl<TTag>> : std::false_type
     {
     };
 
     //! The BufGenericSycl offset get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct GetOffsets<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct GetOffsets<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
-        auto operator()(BufGenericSycl<TElem, TDim, TIdx, TPlatform> const&) const -> Vec<TDim, TIdx>
+        auto operator()(BufGenericSycl<TElem, TDim, TIdx, TTag> const&) const -> Vec<TDim, TIdx>
         {
             return Vec<TDim, TIdx>::zeros();
         }
     };
 
     //! The pinned/mapped memory allocation trait specialization for the SYCL devices.
-    template<typename TPlatform, typename TElem, typename TDim, typename TIdx>
-    struct BufAllocMapped
+    template<typename TTag, typename TElem, typename TDim, typename TIdx>
+    struct BufAllocMapped<PlatformGenericSycl<TTag>, TElem, TDim, TIdx>
     {
         template<typename TExtent>
-        static auto allocMappedBuf(DevCpu const& host, TPlatform const& platform, TExtent const& extent)
-            -> BufCpu<TElem, TDim, TIdx>
+        static auto allocMappedBuf(
+            DevCpu const& host,
+            PlatformGenericSycl<TTag> const& platform,
+            TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
         {
             ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-            // Allocate SYCL page-locked memory on the host, mapped into the TPlatform address space and
-            // accessible to all devices in the TPlatform.
+            // Allocate SYCL page-locked memory on the host, mapped into the PlatformGenericSycl address space and
+            // accessible to all devices in the PlatformGenericSycl.
             auto ctx = platform.syclContext();
             TElem* memPtr = sycl::malloc_host<TElem>(static_cast<std::size_t>(getExtentProduct(extent)), ctx);
             auto deleter = [ctx](TElem* ptr) { sycl::free(ptr, ctx); };
@@ -238,22 +238,22 @@ namespace alpaka::trait
     };
 
     //! The BufGenericSycl idx type trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct IdxType<BufGenericSycl<TElem, TDim, TIdx, TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct IdxType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
     {
         using type = TIdx;
     };
 
     //! The BufCpu pointer on SYCL device get trait specialization.
-    template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
-    struct GetPtrDev<BufCpu<TElem, TDim, TIdx>, DevGenericSycl<TPlatform>>
+    template<typename TElem, typename TDim, typename TIdx, typename TTag>
+    struct GetPtrDev<BufCpu<TElem, TDim, TIdx>, DevGenericSycl<TTag>>
     {
-        static auto getPtrDev(BufCpu<TElem, TDim, TIdx> const& buf, DevGenericSycl<TPlatform> const&) -> TElem const*
+        static auto getPtrDev(BufCpu<TElem, TDim, TIdx> const& buf, DevGenericSycl<TTag> const&) -> TElem const*
         {
             return getPtrNative(buf);
         }
 
-        static auto getPtrDev(BufCpu<TElem, TDim, TIdx>& buf, DevGenericSycl<TPlatform> const&) -> TElem*
+        static auto getPtrDev(BufCpu<TElem, TDim, TIdx>& buf, DevGenericSycl<TTag> const&) -> TElem*
         {
             return getPtrNative(buf);
         }
diff --git a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp
index dd20f8a39648..8d31f0f577e0 100644
--- a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp
+++ b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp
@@ -6,6 +6,7 @@
 
 #include "alpaka/dev/DevGpuSyclIntel.hpp"
 #include "alpaka/mem/buf/BufGenericSycl.hpp"
+#include "alpaka/platform/PlatformGpuSyclIntel.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU)
 
diff --git a/include/alpaka/mem/buf/sycl/Copy.hpp b/include/alpaka/mem/buf/sycl/Copy.hpp
index 806c728acce3..1275c009e081 100644
--- a/include/alpaka/mem/buf/sycl/Copy.hpp
+++ b/include/alpaka/mem/buf/sycl/Copy.hpp
@@ -195,8 +195,8 @@ namespace alpaka::detail
 namespace alpaka::trait
 {
     //! The SYCL host-to-device memory copy trait specialization.
-    template<typename TPlatform, typename TDim>
-    struct CreateTaskMemcpy<TDim, DevGenericSycl<TPlatform>, DevCpu>
+    template<typename TTag, typename TDim>
+    struct CreateTaskMemcpy<TDim, DevGenericSycl<TTag>, DevCpu>
     {
         template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
         static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
@@ -209,8 +209,8 @@ namespace alpaka::trait
     };
 
     //! The SYCL device-to-host memory copy trait specialization.
-    template<typename TPlatform, typename TDim>
-    struct CreateTaskMemcpy<TDim, DevCpu, DevGenericSycl<TPlatform>>
+    template<typename TTag, typename TDim>
+    struct CreateTaskMemcpy<TDim, DevCpu, DevGenericSycl<TTag>>
     {
         template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
         static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
@@ -223,8 +223,8 @@ namespace alpaka::trait
     };
 
     //! The SYCL device-to-device memory copy trait specialization.
-    template<typename TPlatformDst, typename TPlatformSrc, typename TDim>
-    struct CreateTaskMemcpy<TDim, DevGenericSycl<TPlatformDst>, DevGenericSycl<TPlatformSrc>>
+    template<typename TTagDst, typename TTagSrc, typename TDim>
+    struct CreateTaskMemcpy<TDim, DevGenericSycl<TTagDst>, DevGenericSycl<TTagSrc>>
     {
         template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
         static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
diff --git a/include/alpaka/mem/view/ViewPlainPtr.hpp b/include/alpaka/mem/view/ViewPlainPtr.hpp
index ceb4d95aed32..3559a9db2fdd 100644
--- a/include/alpaka/mem/view/ViewPlainPtr.hpp
+++ b/include/alpaka/mem/view/ViewPlainPtr.hpp
@@ -154,22 +154,21 @@ namespace alpaka
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED)
         //! The SYCL device CreateViewPlainPtr trait specialization.
-        template<typename TPlatform>
-        struct CreateViewPlainPtr<DevGenericSycl<TPlatform>>
+        template<typename TTag>
+        struct CreateViewPlainPtr<DevGenericSycl<TTag>>
         {
             template<typename TElem, typename TExtent, typename TPitch>
             static auto createViewPlainPtr(
-                DevGenericSycl<TPlatform> const& dev,
+                DevGenericSycl<TTag> const& dev,
                 TElem* pMem,
                 TExtent const& extent,
                 TPitch pitch)
             {
-                return alpaka::
-                    ViewPlainPtr<DevGenericSycl<TPlatform>, TElem, alpaka::Dim<TExtent>, alpaka::Idx<TExtent>>(
-                        pMem,
-                        dev,
-                        extent,
-                        pitch);
+                return alpaka::ViewPlainPtr<DevGenericSycl<TTag>, TElem, alpaka::Dim<TExtent>, alpaka::Idx<TExtent>>(
+                    pMem,
+                    dev,
+                    extent,
+                    pitch);
             }
         };
 #endif
diff --git a/include/alpaka/platform/PlatformCpuSycl.hpp b/include/alpaka/platform/PlatformCpuSycl.hpp
index 52496985a25c..bab56bdeb334 100644
--- a/include/alpaka/platform/PlatformCpuSycl.hpp
+++ b/include/alpaka/platform/PlatformCpuSycl.hpp
@@ -18,7 +18,8 @@ namespace alpaka
 {
     namespace detail
     {
-        struct SyclCpuSelector
+        template<>
+        struct SYCLDeviceSelector<TagCpuSycl>
         {
             auto operator()(sycl::device const& dev) const -> int
             {
@@ -30,17 +31,7 @@ namespace alpaka
     } // namespace detail
 
     //! The SYCL device manager.
-    using PlatformCpuSycl = PlatformGenericSycl<detail::SyclCpuSelector>;
+    using PlatformCpuSycl = PlatformGenericSycl<TagCpuSycl>;
 } // namespace alpaka
 
-namespace alpaka::trait
-{
-    //! The SYCL device manager device type trait specialization.
-    template<>
-    struct DevType<PlatformCpuSycl>
-    {
-        using type = DevGenericSycl<PlatformCpuSycl>; // = DevCpuSycl
-    };
-} // namespace alpaka::trait
-
 #endif
diff --git a/include/alpaka/platform/PlatformFpgaSyclIntel.hpp b/include/alpaka/platform/PlatformFpgaSyclIntel.hpp
index 0edb028943ce..61ad81bca48e 100644
--- a/include/alpaka/platform/PlatformFpgaSyclIntel.hpp
+++ b/include/alpaka/platform/PlatformFpgaSyclIntel.hpp
@@ -24,7 +24,8 @@ namespace alpaka
 #        pragma clang diagnostic push
 #        pragma clang diagnostic ignored "-Wweak-vtables"
 #    endif
-        struct IntelFpgaSelector final
+        template<>
+        struct SYCLDeviceSelector<TagFpgaSyclIntel>
         {
 #    ifdef ALPAKA_FPGA_EMULATION
             static constexpr auto platform_name = "Intel(R) FPGA Emulation Platform for OpenCL(TM)";
@@ -48,17 +49,7 @@ namespace alpaka
     } // namespace detail
 
     //! The SYCL device manager.
-    using PlatformFpgaSyclIntel = PlatformGenericSycl<detail::IntelFpgaSelector>;
+    using PlatformFpgaSyclIntel = PlatformGenericSycl<TagFpgaSyclIntel>;
 } // namespace alpaka
 
-namespace alpaka::trait
-{
-    //! The SYCL device manager device type trait specialization.
-    template<>
-    struct DevType<PlatformFpgaSyclIntel>
-    {
-        using type = DevGenericSycl<PlatformFpgaSyclIntel>; // = DevFpgaSyclIntel
-    };
-} // namespace alpaka::trait
-
 #endif
diff --git a/include/alpaka/platform/PlatformGenericSycl.hpp b/include/alpaka/platform/PlatformGenericSycl.hpp
index c4df17c6660a..be007e7d721c 100644
--- a/include/alpaka/platform/PlatformGenericSycl.hpp
+++ b/include/alpaka/platform/PlatformGenericSycl.hpp
@@ -6,6 +6,7 @@
 
 #include "alpaka/core/Concepts.hpp"
 #include "alpaka/core/Sycl.hpp"
+#include "alpaka/dev/DevGenericSycl.hpp"
 #include "alpaka/dev/Traits.hpp"
 #include "alpaka/platform/Traits.hpp"
 
@@ -24,12 +25,18 @@
 
 namespace alpaka
 {
+    namespace detail
+    {
+        template<typename TTag>
+        struct SYCLDeviceSelector;
+    } // namespace detail
+
     //! The SYCL device manager.
-    template<typename TSelector>
-    struct PlatformGenericSycl : concepts::Implements<ConceptPlatform, PlatformGenericSycl<TSelector>>
+    template<typename TTag>
+    struct PlatformGenericSycl : concepts::Implements<ConceptPlatform, PlatformGenericSycl<TTag>>
     {
         PlatformGenericSycl()
-            : platform{TSelector{}}
+            : platform{detail::SYCLDeviceSelector<TTag>{}}
             , devices(platform.get_devices())
             , context{sycl::context{
                   devices,
@@ -88,636 +95,643 @@ namespace alpaka
         std::vector<sycl::device> devices;
         sycl::context context;
     };
-} // namespace alpaka
 
-namespace alpaka::trait
-{
-    //! The SYCL platform device count get trait specialization.
-    template<typename TSelector>
-    struct GetDevCount<PlatformGenericSycl<TSelector>>
+    namespace trait
     {
-        static auto getDevCount(PlatformGenericSycl<TSelector> const& platform) -> std::size_t
+        //! The SYCL platform device type trait specialization.
+        template<typename TTag>
+        struct DevType<PlatformGenericSycl<TTag>>
         {
-            ALPAKA_DEBUG_FULL_LOG_SCOPE;
-
-            return platform.syclDevices().size();
-        }
-    };
+            using type = DevGenericSycl<TTag>;
+        };
 
-    //! The SYCL platform device get trait specialization.
-    template<typename TSelector>
-    struct GetDevByIdx<alpaka::PlatformGenericSycl<TSelector>>
-    {
-        static auto getDevByIdx(PlatformGenericSycl<TSelector> const& platform, std::size_t const& devIdx)
+        //! The SYCL platform device count get trait specialization.
+        template<typename TTag>
+        struct GetDevCount<PlatformGenericSycl<TTag>>
         {
-            ALPAKA_DEBUG_FULL_LOG_SCOPE;
-
-            auto const& devices = platform.syclDevices();
-            if(devIdx >= devices.size())
+            static auto getDevCount(PlatformGenericSycl<TTag> const& platform) -> std::size_t
             {
-                auto ss_err = std::stringstream{};
-                ss_err << "Unable to return device handle for device " << devIdx << ". There are only "
-                       << devices.size() << " SYCL devices!";
-                throw std::runtime_error(ss_err.str());
+                ALPAKA_DEBUG_FULL_LOG_SCOPE;
+
+                return platform.syclDevices().size();
             }
+        };
 
-            auto sycl_dev = devices.at(devIdx);
+        //! The SYCL platform device get trait specialization.
+        template<typename TTag>
+        struct GetDevByIdx<PlatformGenericSycl<TTag>>
+        {
+            static auto getDevByIdx(PlatformGenericSycl<TTag> const& platform, std::size_t const& devIdx)
+            {
+                ALPAKA_DEBUG_FULL_LOG_SCOPE;
 
-            // Log this device.
+                auto const& devices = platform.syclDevices();
+                if(devIdx >= devices.size())
+                {
+                    auto ss_err = std::stringstream{};
+                    ss_err << "Unable to return device handle for device " << devIdx << ". There are only "
+                           << devices.size() << " SYCL devices!";
+                    throw std::runtime_error(ss_err.str());
+                }
+
+                auto sycl_dev = devices.at(devIdx);
+
+                // Log this device.
 #    if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
-            printDeviceProperties(sycl_dev);
+                printDeviceProperties(sycl_dev);
 #    elif ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
-            std::cout << __func__ << sycl_dev.template get_info<sycl::info::device::name>() << '\n';
+                std::cout << __func__ << sycl_dev.template get_info<sycl::info::device::name>() << '\n';
 #    endif
-            using SyclPlatform = alpaka::PlatformGenericSycl<TSelector>;
-            return typename DevType<SyclPlatform>::type{sycl_dev, platform.syclContext()};
-        }
+                using SyclPlatform = alpaka::PlatformGenericSycl<TTag>;
+                return typename DevType<SyclPlatform>::type{sycl_dev, platform.syclContext()};
+            }
 
-    private:
+        private:
 #    if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
-        //! Prints all the device properties to std::cout.
-        static auto printDeviceProperties(sycl::device const& device) -> void
-        {
-            ALPAKA_DEBUG_FULL_LOG_SCOPE;
+            //! Prints all the device properties to std::cout.
+            static auto printDeviceProperties(sycl::device const& device) -> void
+            {
+                ALPAKA_DEBUG_FULL_LOG_SCOPE;
 
-            constexpr auto KiB = std::size_t{1024};
-            constexpr auto MiB = KiB * KiB;
+                constexpr auto KiB = std::size_t{1024};
+                constexpr auto MiB = KiB * KiB;
 
-            std::cout << "Device type: ";
-            switch(device.get_info<sycl::info::device::device_type>())
-            {
-            case sycl::info::device_type::cpu:
-                std::cout << "CPU";
-                break;
-
-            case sycl::info::device_type::gpu:
-                std::cout << "GPU";
-                break;
-
-            case sycl::info::device_type::accelerator:
-                std::cout << "Accelerator";
-                break;
-
-            case sycl::info::device_type::custom:
-                std::cout << "Custom";
-                break;
-
-            case sycl::info::device_type::automatic:
-                std::cout << "Automatic";
-                break;
-
-            case sycl::info::device_type::host:
-                std::cout << "Host";
-                break;
-
-            // The SYCL spec forbids the return of device_type::all
-            // Including this here to prevent warnings because of
-            // missing cases
-            case sycl::info::device_type::all:
-                std::cout << "All";
-                break;
-            }
-            std::cout << '\n';
+                std::cout << "Device type: ";
+                switch(device.get_info<sycl::info::device::device_type>())
+                {
+                case sycl::info::device_type::cpu:
+                    std::cout << "CPU";
+                    break;
 
-            std::cout << "Name: " << device.get_info<sycl::info::device::name>() << '\n';
+                case sycl::info::device_type::gpu:
+                    std::cout << "GPU";
+                    break;
 
-            std::cout << "Vendor: " << device.get_info<sycl::info::device::vendor>() << '\n';
+                case sycl::info::device_type::accelerator:
+                    std::cout << "Accelerator";
+                    break;
 
-            std::cout << "Vendor ID: " << device.get_info<sycl::info::device::vendor_id>() << '\n';
+                case sycl::info::device_type::custom:
+                    std::cout << "Custom";
+                    break;
+
+                case sycl::info::device_type::automatic:
+                    std::cout << "Automatic";
+                    break;
 
-            std::cout << "Driver version: " << device.get_info<sycl::info::device::driver_version>() << '\n';
+                case sycl::info::device_type::host:
+                    std::cout << "Host";
+                    break;
 
-            std::cout << "SYCL version: " << device.get_info<sycl::info::device::version>() << '\n';
+                // The SYCL spec forbids the return of device_type::all
+                // Including this here to prevent warnings because of
+                // missing cases
+                case sycl::info::device_type::all:
+                    std::cout << "All";
+                    break;
+                }
+                std::cout << '\n';
+
+                std::cout << "Name: " << device.get_info<sycl::info::device::name>() << '\n';
+
+                std::cout << "Vendor: " << device.get_info<sycl::info::device::vendor>() << '\n';
+
+                std::cout << "Vendor ID: " << device.get_info<sycl::info::device::vendor_id>() << '\n';
+
+                std::cout << "Driver version: " << device.get_info<sycl::info::device::driver_version>() << '\n';
+
+                std::cout << "SYCL version: " << device.get_info<sycl::info::device::version>() << '\n';
 
 #        if !defined(BOOST_COMP_ICPX)
-            // Not defined by Level Zero back-end
-            std::cout << "Backend version: " << device.get_info<sycl::info::device::backend_version>() << '\n';
+                // Not defined by Level Zero back-end
+                std::cout << "Backend version: " << device.get_info<sycl::info::device::backend_version>() << '\n';
 #        endif
 
-            std::cout << "Aspects: " << '\n';
+                std::cout << "Aspects: " << '\n';
 
 #        if defined(BOOST_COMP_ICPX)
 #            if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
-            // These aspects are missing from oneAPI versions < 2023.2.0
-            if(device.has(sycl::aspect::emulated))
-                std::cout << "\t* emulated\n";
+                // These aspects are missing from oneAPI versions < 2023.2.0
+                if(device.has(sycl::aspect::emulated))
+                    std::cout << "\t* emulated\n";
 
-            if(device.has(sycl::aspect::host_debuggable))
-                std::cout << "\t* debuggable using standard debuggers\n";
+                if(device.has(sycl::aspect::host_debuggable))
+                    std::cout << "\t* debuggable using standard debuggers\n";
 #            endif
 #        endif
 
-            if(device.has(sycl::aspect::fp16))
-                std::cout << "\t* supports sycl::half precision\n";
+                if(device.has(sycl::aspect::fp16))
+                    std::cout << "\t* supports sycl::half precision\n";
 
-            if(device.has(sycl::aspect::fp64))
-                std::cout << "\t* supports double precision\n";
+                if(device.has(sycl::aspect::fp64))
+                    std::cout << "\t* supports double precision\n";
 
-            if(device.has(sycl::aspect::atomic64))
-                std::cout << "\t* supports 64-bit atomics\n";
+                if(device.has(sycl::aspect::atomic64))
+                    std::cout << "\t* supports 64-bit atomics\n";
 
-            if(device.has(sycl::aspect::image))
-                std::cout << "\t* supports images\n";
+                if(device.has(sycl::aspect::image))
+                    std::cout << "\t* supports images\n";
 
-            if(device.has(sycl::aspect::online_compiler))
-                std::cout << "\t* supports online compilation of device code\n";
+                if(device.has(sycl::aspect::online_compiler))
+                    std::cout << "\t* supports online compilation of device code\n";
 
-            if(device.has(sycl::aspect::online_linker))
-                std::cout << "\t* supports online linking of device code\n";
+                if(device.has(sycl::aspect::online_linker))
+                    std::cout << "\t* supports online linking of device code\n";
 
-            if(device.has(sycl::aspect::queue_profiling))
-                std::cout << "\t* supports queue profiling\n";
+                if(device.has(sycl::aspect::queue_profiling))
+                    std::cout << "\t* supports queue profiling\n";
 
-            if(device.has(sycl::aspect::usm_device_allocations))
-                std::cout << "\t* supports explicit USM allocations\n";
+                if(device.has(sycl::aspect::usm_device_allocations))
+                    std::cout << "\t* supports explicit USM allocations\n";
 
-            if(device.has(sycl::aspect::usm_host_allocations))
-                std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host\n";
+                if(device.has(sycl::aspect::usm_host_allocations))
+                    std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host\n";
 
-            if(device.has(sycl::aspect::usm_atomic_host_allocations))
-                std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n";
+                if(device.has(sycl::aspect::usm_atomic_host_allocations))
+                    std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n";
 
-            if(device.has(sycl::aspect::usm_shared_allocations))
-                std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared\n";
+                if(device.has(sycl::aspect::usm_shared_allocations))
+                    std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared\n";
 
-            if(device.has(sycl::aspect::usm_atomic_shared_allocations))
-                std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n";
+                if(device.has(sycl::aspect::usm_atomic_shared_allocations))
+                    std::cout << "\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n";
 
-            if(device.has(sycl::aspect::usm_system_allocations))
-                std::cout << "\t* can access memory allocated by the system allocator\n";
+                if(device.has(sycl::aspect::usm_system_allocations))
+                    std::cout << "\t* can access memory allocated by the system allocator\n";
 
-            std::cout << "Available compute units: " << device.get_info<sycl::info::device::max_compute_units>()
-                      << '\n';
+                std::cout << "Available compute units: " << device.get_info<sycl::info::device::max_compute_units>()
+                          << '\n';
 
-            std::cout << "Maximum work item dimensions: ";
-            auto dims = device.get_info<sycl::info::device::max_work_item_dimensions>();
-            std::cout << dims << std::endl;
+                std::cout << "Maximum work item dimensions: ";
+                auto dims = device.get_info<sycl::info::device::max_work_item_dimensions>();
+                std::cout << dims << std::endl;
 
-            std::cout << "Maximum number of work items:\n";
-            auto const wi_1D = device.get_info<sycl::info::device::max_work_item_sizes<1>>();
-            auto const wi_2D = device.get_info<sycl::info::device::max_work_item_sizes<2>>();
-            auto const wi_3D = device.get_info<sycl::info::device::max_work_item_sizes<3>>();
-            std::cout << "\t* 1D: (" << wi_1D.get(0) << ")\n";
-            std::cout << "\t* 2D: (" << wi_2D.get(0) << ", " << wi_2D.get(1) << ")\n";
-            std::cout << "\t* 3D: (" << wi_3D.get(0) << ", " << wi_3D.get(1) << ", " << wi_3D.get(2) << ")\n";
+                std::cout << "Maximum number of work items:\n";
+                auto const wi_1D = device.get_info<sycl::info::device::max_work_item_sizes<1>>();
+                auto const wi_2D = device.get_info<sycl::info::device::max_work_item_sizes<2>>();
+                auto const wi_3D = device.get_info<sycl::info::device::max_work_item_sizes<3>>();
+                std::cout << "\t* 1D: (" << wi_1D.get(0) << ")\n";
+                std::cout << "\t* 2D: (" << wi_2D.get(0) << ", " << wi_2D.get(1) << ")\n";
+                std::cout << "\t* 3D: (" << wi_3D.get(0) << ", " << wi_3D.get(1) << ", " << wi_3D.get(2) << ")\n";
 
-            std::cout << "Maximum number of work items per work-group: "
-                      << device.get_info<sycl::info::device::max_work_group_size>() << '\n';
+                std::cout << "Maximum number of work items per work-group: "
+                          << device.get_info<sycl::info::device::max_work_group_size>() << '\n';
 
-            std::cout << "Maximum number of sub-groups per work-group: "
-                      << device.get_info<sycl::info::device::max_num_sub_groups>() << '\n';
+                std::cout << "Maximum number of sub-groups per work-group: "
+                          << device.get_info<sycl::info::device::max_num_sub_groups>() << '\n';
 
-            std::cout << "Supported sub-group sizes: ";
-            auto const sg_sizes = device.get_info<sycl::info::device::sub_group_sizes>();
-            for(auto const& sz : sg_sizes)
-                std::cout << sz << ", ";
-            std::cout << '\n';
+                std::cout << "Supported sub-group sizes: ";
+                auto const sg_sizes = device.get_info<sycl::info::device::sub_group_sizes>();
+                for(auto const& sz : sg_sizes)
+                    std::cout << sz << ", ";
+                std::cout << '\n';
 
-            std::cout << "Preferred native vector width (char): "
-                      << device.get_info<sycl::info::device::preferred_vector_width_char>() << '\n';
+                std::cout << "Preferred native vector width (char): "
+                          << device.get_info<sycl::info::device::preferred_vector_width_char>() << '\n';
 
-            std::cout << "Native ISA vector width (char): "
-                      << device.get_info<sycl::info::device::native_vector_width_char>() << '\n';
+                std::cout << "Native ISA vector width (char): "
+                          << device.get_info<sycl::info::device::native_vector_width_char>() << '\n';
 
-            std::cout << "Preferred native vector width (short): "
-                      << device.get_info<sycl::info::device::preferred_vector_width_short>() << '\n';
+                std::cout << "Preferred native vector width (short): "
+                          << device.get_info<sycl::info::device::preferred_vector_width_short>() << '\n';
 
-            std::cout << "Native ISA vector width (short): "
-                      << device.get_info<sycl::info::device::native_vector_width_short>() << '\n';
+                std::cout << "Native ISA vector width (short): "
+                          << device.get_info<sycl::info::device::native_vector_width_short>() << '\n';
 
-            std::cout << "Preferred native vector width (int): "
-                      << device.get_info<sycl::info::device::preferred_vector_width_int>() << '\n';
+                std::cout << "Preferred native vector width (int): "
+                          << device.get_info<sycl::info::device::preferred_vector_width_int>() << '\n';
 
-            std::cout << "Native ISA vector width (int): "
-                      << device.get_info<sycl::info::device::native_vector_width_int>() << '\n';
+                std::cout << "Native ISA vector width (int): "
+                          << device.get_info<sycl::info::device::native_vector_width_int>() << '\n';
 
-            std::cout << "Preferred native vector width (long): "
-                      << device.get_info<sycl::info::device::preferred_vector_width_long>() << '\n';
+                std::cout << "Preferred native vector width (long): "
+                          << device.get_info<sycl::info::device::preferred_vector_width_long>() << '\n';
 
-            std::cout << "Native ISA vector width (long): "
-                      << device.get_info<sycl::info::device::native_vector_width_long>() << '\n';
+                std::cout << "Native ISA vector width (long): "
+                          << device.get_info<sycl::info::device::native_vector_width_long>() << '\n';
 
-            std::cout << "Preferred native vector width (float): "
-                      << device.get_info<sycl::info::device::preferred_vector_width_float>() << '\n';
+                std::cout << "Preferred native vector width (float): "
+                          << device.get_info<sycl::info::device::preferred_vector_width_float>() << '\n';
 
-            std::cout << "Native ISA vector width (float): "
-                      << device.get_info<sycl::info::device::native_vector_width_float>() << '\n';
+                std::cout << "Native ISA vector width (float): "
+                          << device.get_info<sycl::info::device::native_vector_width_float>() << '\n';
 
-            if(device.has(sycl::aspect::fp64))
-            {
-                std::cout << "Preferred native vector width (double): "
-                          << device.get_info<sycl::info::device::preferred_vector_width_double>() << '\n';
+                if(device.has(sycl::aspect::fp64))
+                {
+                    std::cout << "Preferred native vector width (double): "
+                              << device.get_info<sycl::info::device::preferred_vector_width_double>() << '\n';
 
-                std::cout << "Native ISA vector width (double): "
-                          << device.get_info<sycl::info::device::native_vector_width_double>() << '\n';
-            }
+                    std::cout << "Native ISA vector width (double): "
+                              << device.get_info<sycl::info::device::native_vector_width_double>() << '\n';
+                }
 
-            if(device.has(sycl::aspect::fp16))
-            {
-                std::cout << "Preferred native vector width (half): "
-                          << device.get_info<sycl::info::device::preferred_vector_width_half>() << '\n';
+                if(device.has(sycl::aspect::fp16))
+                {
+                    std::cout << "Preferred native vector width (half): "
+                              << device.get_info<sycl::info::device::preferred_vector_width_half>() << '\n';
 
-                std::cout << "Native ISA vector width (half): "
-                          << device.get_info<sycl::info::device::native_vector_width_half>() << '\n';
-            }
+                    std::cout << "Native ISA vector width (half): "
+                              << device.get_info<sycl::info::device::native_vector_width_half>() << '\n';
+                }
 
-            std::cout << "Maximum clock frequency: " << device.get_info<sycl::info::device::max_clock_frequency>()
-                      << " MHz\n";
+                std::cout << "Maximum clock frequency: " << device.get_info<sycl::info::device::max_clock_frequency>()
+                          << " MHz\n";
 
-            std::cout << "Address space size: " << device.get_info<sycl::info::device::address_bits>() << "-bit\n";
+                std::cout << "Address space size: " << device.get_info<sycl::info::device::address_bits>() << "-bit\n";
 
-            std::cout << "Maximum size of memory object allocation: "
-                      << device.get_info<sycl::info::device::max_mem_alloc_size>() << " bytes\n";
+                std::cout << "Maximum size of memory object allocation: "
+                          << device.get_info<sycl::info::device::max_mem_alloc_size>() << " bytes\n";
 
-            if(device.has(sycl::aspect::image))
-            {
-                std::cout << "Maximum number of simultaneous image object reads per kernel: "
-                          << device.get_info<sycl::info::device::max_read_image_args>() << '\n';
+                if(device.has(sycl::aspect::image))
+                {
+                    std::cout << "Maximum number of simultaneous image object reads per kernel: "
+                              << device.get_info<sycl::info::device::max_read_image_args>() << '\n';
 
-                std::cout << "Maximum number of simultaneous image writes per kernel: "
-                          << device.get_info<sycl::info::device::max_write_image_args>() << '\n';
+                    std::cout << "Maximum number of simultaneous image writes per kernel: "
+                              << device.get_info<sycl::info::device::max_write_image_args>() << '\n';
 
-                std::cout << "Maximum 1D/2D image width: " << device.get_info<sycl::info::device::image2d_max_width>()
-                          << " px\n";
+                    std::cout << "Maximum 1D/2D image width: "
+                              << device.get_info<sycl::info::device::image2d_max_width>() << " px\n";
 
-                std::cout << "Maximum 2D image height: " << device.get_info<sycl::info::device::image2d_max_height>()
-                          << " px\n";
+                    std::cout << "Maximum 2D image height: "
+                              << device.get_info<sycl::info::device::image2d_max_height>() << " px\n";
 
-                std::cout << "Maximum 3D image width: " << device.get_info<sycl::info::device::image3d_max_width>()
-                          << " px\n";
+                    std::cout << "Maximum 3D image width: " << device.get_info<sycl::info::device::image3d_max_width>()
+                              << " px\n";
 
-                std::cout << "Maximum 3D image height: " << device.get_info<sycl::info::device::image3d_max_height>()
-                          << " px\n";
+                    std::cout << "Maximum 3D image height: "
+                              << device.get_info<sycl::info::device::image3d_max_height>() << " px\n";
 
-                std::cout << "Maximum 3D image depth: " << device.get_info<sycl::info::device::image3d_max_depth>()
-                          << " px\n";
+                    std::cout << "Maximum 3D image depth: " << device.get_info<sycl::info::device::image3d_max_depth>()
+                              << " px\n";
 
-                std::cout << "Maximum number of samplers per kernel: "
-                          << device.get_info<sycl::info::device::max_samplers>() << '\n';
-            }
+                    std::cout << "Maximum number of samplers per kernel: "
+                              << device.get_info<sycl::info::device::max_samplers>() << '\n';
+                }
 
-            std::cout << "Maximum kernel argument size: " << device.get_info<sycl::info::device::max_parameter_size>()
-                      << " bytes\n";
+                std::cout << "Maximum kernel argument size: "
+                          << device.get_info<sycl::info::device::max_parameter_size>() << " bytes\n";
 
-            std::cout << "Memory base address alignment: "
-                      << device.get_info<sycl::info::device::mem_base_addr_align>() << " bit\n";
+                std::cout << "Memory base address alignment: "
+                          << device.get_info<sycl::info::device::mem_base_addr_align>() << " bit\n";
 
-            auto print_fp_config = [](std::string const& fp, std::vector<sycl::info::fp_config> const& conf)
-            {
-                std::cout << fp << " precision floating-point capabilities:\n";
-
-                auto find_and_print = [&](sycl::info::fp_config val)
+                auto print_fp_config = [](std::string const& fp, std::vector<sycl::info::fp_config> const& conf)
                 {
-                    auto it = std::find(begin(conf), end(conf), val);
-                    std::cout << (it == std::end(conf) ? "No" : "Yes") << '\n';
-                };
+                    std::cout << fp << " precision floating-point capabilities:\n";
 
-                std::cout << "\t* denorm support: ";
-                find_and_print(sycl::info::fp_config::denorm);
+                    auto find_and_print = [&](sycl::info::fp_config val)
+                    {
+                        auto it = std::find(begin(conf), end(conf), val);
+                        std::cout << (it == std::end(conf) ? "No" : "Yes") << '\n';
+                    };
 
-                std::cout << "\t* INF & quiet NaN support: ";
-                find_and_print(sycl::info::fp_config::inf_nan);
+                    std::cout << "\t* denorm support: ";
+                    find_and_print(sycl::info::fp_config::denorm);
 
-                std::cout << "\t* round to nearest even support: ";
-                find_and_print(sycl::info::fp_config::round_to_nearest);
+                    std::cout << "\t* INF & quiet NaN support: ";
+                    find_and_print(sycl::info::fp_config::inf_nan);
 
-                std::cout << "\t* round to zero support: ";
-                find_and_print(sycl::info::fp_config::round_to_zero);
+                    std::cout << "\t* round to nearest even support: ";
+                    find_and_print(sycl::info::fp_config::round_to_nearest);
 
-                std::cout << "\t* round to infinity support: ";
-                find_and_print(sycl::info::fp_config::round_to_inf);
+                    std::cout << "\t* round to zero support: ";
+                    find_and_print(sycl::info::fp_config::round_to_zero);
 
-                std::cout << "\t* IEEE754-2008 FMA support: ";
-                find_and_print(sycl::info::fp_config::fma);
+                    std::cout << "\t* round to infinity support: ";
+                    find_and_print(sycl::info::fp_config::round_to_inf);
 
-                std::cout << "\t* correctly rounded divide/sqrt support: ";
-                find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt);
+                    std::cout << "\t* IEEE754-2008 FMA support: ";
+                    find_and_print(sycl::info::fp_config::fma);
 
-                std::cout << "\t* software-implemented floating point operations: ";
-                find_and_print(sycl::info::fp_config::soft_float);
-            };
+                    std::cout << "\t* correctly rounded divide/sqrt support: ";
+                    find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt);
 
-            if(device.has(sycl::aspect::fp16))
-            {
-                auto const fp16_conf = device.get_info<sycl::info::device::half_fp_config>();
-                print_fp_config("Half", fp16_conf);
-            }
+                    std::cout << "\t* software-implemented floating point operations: ";
+                    find_and_print(sycl::info::fp_config::soft_float);
+                };
 
-            auto const fp32_conf = device.get_info<sycl::info::device::single_fp_config>();
-            print_fp_config("Single", fp32_conf);
+                if(device.has(sycl::aspect::fp16))
+                {
+                    auto const fp16_conf = device.get_info<sycl::info::device::half_fp_config>();
+                    print_fp_config("Half", fp16_conf);
+                }
 
-            if(device.has(sycl::aspect::fp64))
-            {
-                auto const fp64_conf = device.get_info<sycl::info::device::double_fp_config>();
-                print_fp_config("Double", fp64_conf);
-            }
+                auto const fp32_conf = device.get_info<sycl::info::device::single_fp_config>();
+                print_fp_config("Single", fp32_conf);
 
-            std::cout << "Global memory cache type: ";
-            auto has_global_mem_cache = false;
-            switch(device.get_info<sycl::info::device::global_mem_cache_type>())
-            {
-            case sycl::info::global_mem_cache_type::none:
-                std::cout << "none";
-                break;
-
-            case sycl::info::global_mem_cache_type::read_only:
-                std::cout << "read-only";
-                has_global_mem_cache = true;
-                break;
-
-            case sycl::info::global_mem_cache_type::read_write:
-                std::cout << "read-write";
-                has_global_mem_cache = true;
-                break;
-            }
-            std::cout << '\n';
+                if(device.has(sycl::aspect::fp64))
+                {
+                    auto const fp64_conf = device.get_info<sycl::info::device::double_fp_config>();
+                    print_fp_config("Double", fp64_conf);
+                }
 
-            if(has_global_mem_cache)
-            {
-                std::cout << "Global memory cache line size: "
-                          << device.get_info<sycl::info::device::global_mem_cache_line_size>() << " bytes\n";
+                std::cout << "Global memory cache type: ";
+                auto has_global_mem_cache = false;
+                switch(device.get_info<sycl::info::device::global_mem_cache_type>())
+                {
+                case sycl::info::global_mem_cache_type::none:
+                    std::cout << "none";
+                    break;
 
-                std::cout << "Global memory cache size: "
-                          << device.get_info<sycl::info::device::global_mem_cache_size>() / KiB << " KiB\n";
-            }
+                case sycl::info::global_mem_cache_type::read_only:
+                    std::cout << "read-only";
+                    has_global_mem_cache = true;
+                    break;
 
-            std::cout << "Global memory size: " << device.get_info<sycl::info::device::global_mem_size>() / MiB
-                      << " MiB" << std::endl;
+                case sycl::info::global_mem_cache_type::read_write:
+                    std::cout << "read-write";
+                    has_global_mem_cache = true;
+                    break;
+                }
+                std::cout << '\n';
 
-            std::cout << "Local memory type: ";
-            auto has_local_memory = false;
-            switch(device.get_info<sycl::info::device::local_mem_type>())
-            {
-            case sycl::info::local_mem_type::none:
-                std::cout << "none";
-                break;
-
-            case sycl::info::local_mem_type::local:
-                std::cout << "local";
-                has_local_memory = true;
-                break;
-
-            case sycl::info::local_mem_type::global:
-                std::cout << "global";
-                has_local_memory = true;
-                break;
-            }
-            std::cout << '\n';
+                if(has_global_mem_cache)
+                {
+                    std::cout << "Global memory cache line size: "
+                              << device.get_info<sycl::info::device::global_mem_cache_line_size>() << " bytes\n";
+
+                    std::cout << "Global memory cache size: "
+                              << device.get_info<sycl::info::device::global_mem_cache_size>() / KiB << " KiB\n";
+                }
 
-            if(has_local_memory)
-                std::cout << "Local memory size: " << device.get_info<sycl::info::device::local_mem_size>() / KiB
-                          << " KiB\n";
+                std::cout << "Global memory size: " << device.get_info<sycl::info::device::global_mem_size>() / MiB
+                          << " MiB" << std::endl;
 
-            std::cout << "Error correction support: "
-                      << (device.get_info<sycl::info::device::error_correction_support>() ? "Yes" : "No") << '\n';
+                std::cout << "Local memory type: ";
+                auto has_local_memory = false;
+                switch(device.get_info<sycl::info::device::local_mem_type>())
+                {
+                case sycl::info::local_mem_type::none:
+                    std::cout << "none";
+                    break;
 
-            auto print_memory_orders = [](std::vector<sycl::memory_order> const& mem_orders)
-            {
-                for(auto const& cap : mem_orders)
+                case sycl::info::local_mem_type::local:
+                    std::cout << "local";
+                    has_local_memory = true;
+                    break;
+
+                case sycl::info::local_mem_type::global:
+                    std::cout << "global";
+                    has_local_memory = true;
+                    break;
+                }
+                std::cout << '\n';
+
+                if(has_local_memory)
+                    std::cout << "Local memory size: " << device.get_info<sycl::info::device::local_mem_size>() / KiB
+                              << " KiB\n";
+
+                std::cout << "Error correction support: "
+                          << (device.get_info<sycl::info::device::error_correction_support>() ? "Yes" : "No") << '\n';
+
+                auto print_memory_orders = [](std::vector<sycl::memory_order> const& mem_orders)
                 {
-                    switch(cap)
+                    for(auto const& cap : mem_orders)
                     {
-                    case sycl::memory_order::relaxed:
-                        std::cout << "relaxed";
-                        break;
+                        switch(cap)
+                        {
+                        case sycl::memory_order::relaxed:
+                            std::cout << "relaxed";
+                            break;
 
-                    case sycl::memory_order::acquire:
-                        std::cout << "acquire";
-                        break;
+                        case sycl::memory_order::acquire:
+                            std::cout << "acquire";
+                            break;
 
-                    case sycl::memory_order::release:
-                        std::cout << "release";
-                        break;
+                        case sycl::memory_order::release:
+                            std::cout << "release";
+                            break;
 
-                    case sycl::memory_order::acq_rel:
-                        std::cout << "acq_rel";
-                        break;
+                        case sycl::memory_order::acq_rel:
+                            std::cout << "acq_rel";
+                            break;
 
-                    case sycl::memory_order::seq_cst:
-                        std::cout << "seq_cst";
-                        break;
+                        case sycl::memory_order::seq_cst:
+                            std::cout << "seq_cst";
+                            break;
 #        if defined(BOOST_COMP_ICPX)
-                    // Stop icpx from complaining about its own internals.
-                    case sycl::memory_order::__consume_unsupported:
-                        break;
+                        // Stop icpx from complaining about its own internals.
+                        case sycl::memory_order::__consume_unsupported:
+                            break;
 #        endif
+                        }
+                        std::cout << ", ";
                     }
-                    std::cout << ", ";
-                }
-                std::cout << '\n';
-            };
+                    std::cout << '\n';
+                };
 
-            std::cout << "Supported memory orderings for atomic operations: ";
-            auto const mem_orders = device.get_info<sycl::info::device::atomic_memory_order_capabilities>();
-            print_memory_orders(mem_orders);
+                std::cout << "Supported memory orderings for atomic operations: ";
+                auto const mem_orders = device.get_info<sycl::info::device::atomic_memory_order_capabilities>();
+                print_memory_orders(mem_orders);
 
 #        if defined(BOOST_COMP_ICPX)
 #            if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
-            // Not implemented in oneAPI < 2023.2.0
-            std::cout << "Supported memory orderings for sycl::atomic_fence: ";
-            auto const fence_orders = device.get_info<sycl::info::device::atomic_fence_order_capabilities>();
-            print_memory_orders(fence_orders);
+                // Not implemented in oneAPI < 2023.2.0
+                std::cout << "Supported memory orderings for sycl::atomic_fence: ";
+                auto const fence_orders = device.get_info<sycl::info::device::atomic_fence_order_capabilities>();
+                print_memory_orders(fence_orders);
 #            endif
 #        endif
 
-            auto print_memory_scopes = [](std::vector<sycl::memory_scope> const& mem_scopes)
-            {
-                for(auto const& cap : mem_scopes)
+                auto print_memory_scopes = [](std::vector<sycl::memory_scope> const& mem_scopes)
                 {
-                    switch(cap)
+                    for(auto const& cap : mem_scopes)
                     {
-                    case sycl::memory_scope::work_item:
-                        std::cout << "work-item";
-                        break;
+                        switch(cap)
+                        {
+                        case sycl::memory_scope::work_item:
+                            std::cout << "work-item";
+                            break;
 
-                    case sycl::memory_scope::sub_group:
-                        std::cout << "sub-group";
-                        break;
+                        case sycl::memory_scope::sub_group:
+                            std::cout << "sub-group";
+                            break;
 
-                    case sycl::memory_scope::work_group:
-                        std::cout << "work-group";
-                        break;
+                        case sycl::memory_scope::work_group:
+                            std::cout << "work-group";
+                            break;
 
-                    case sycl::memory_scope::device:
-                        std::cout << "device";
-                        break;
+                        case sycl::memory_scope::device:
+                            std::cout << "device";
+                            break;
 
-                    case sycl::memory_scope::system:
-                        std::cout << "system";
-                        break;
+                        case sycl::memory_scope::system:
+                            std::cout << "system";
+                            break;
+                        }
+                        std::cout << ", ";
                     }
-                    std::cout << ", ";
-                }
-                std::cout << '\n';
-            };
+                    std::cout << '\n';
+                };
 
-            std::cout << "Supported memory scopes for atomic operations: ";
-            auto const mem_scopes = device.get_info<sycl::info::device::atomic_memory_scope_capabilities>();
-            print_memory_scopes(mem_scopes);
+                std::cout << "Supported memory scopes for atomic operations: ";
+                auto const mem_scopes = device.get_info<sycl::info::device::atomic_memory_scope_capabilities>();
+                print_memory_scopes(mem_scopes);
 
 #        if defined(BOOST_COMP_ICPX)
 #            if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
-            // Not implemented in oneAPI < 2023.2.0
-            std::cout << "Supported memory scopes for sycl::atomic_fence: ";
-            auto const fence_scopes = device.get_info<sycl::info::device::atomic_fence_scope_capabilities>();
-            print_memory_scopes(fence_scopes);
+                // Not implemented in oneAPI < 2023.2.0
+                std::cout << "Supported memory scopes for sycl::atomic_fence: ";
+                auto const fence_scopes = device.get_info<sycl::info::device::atomic_fence_scope_capabilities>();
+                print_memory_scopes(fence_scopes);
 #            endif
 #        endif
 
-            std::cout << "Device timer resolution: "
-                      << device.get_info<sycl::info::device::profiling_timer_resolution>() << " ns\n";
+                std::cout << "Device timer resolution: "
+                          << device.get_info<sycl::info::device::profiling_timer_resolution>() << " ns\n";
 
-            std::cout << "Built-in kernels: ";
-            auto const builtins = device.get_info<sycl::info::device::built_in_kernel_ids>();
-            for(auto const& b : builtins)
-                std::cout << b.get_name() << ", ";
-            std::cout << '\n';
+                std::cout << "Built-in kernels: ";
+                auto const builtins = device.get_info<sycl::info::device::built_in_kernel_ids>();
+                for(auto const& b : builtins)
+                    std::cout << b.get_name() << ", ";
+                std::cout << '\n';
 
-            std::cout << "Maximum number of subdevices: ";
-            auto const max_subs = device.get_info<sycl::info::device::partition_max_sub_devices>();
-            std::cout << max_subs << '\n';
+                std::cout << "Maximum number of subdevices: ";
+                auto const max_subs = device.get_info<sycl::info::device::partition_max_sub_devices>();
+                std::cout << max_subs << '\n';
 
-            if(max_subs > 1)
-            {
-                std::cout << "Supported partition properties: ";
-                auto const part_props = device.get_info<sycl::info::device::partition_properties>();
-                auto has_affinity_domains = false;
-                for(auto const& prop : part_props)
+                if(max_subs > 1)
                 {
-                    switch(prop)
+                    std::cout << "Supported partition properties: ";
+                    auto const part_props = device.get_info<sycl::info::device::partition_properties>();
+                    auto has_affinity_domains = false;
+                    for(auto const& prop : part_props)
+                    {
+                        switch(prop)
+                        {
+                        case sycl::info::partition_property::no_partition:
+                            std::cout << "no partition";
+                            break;
+
+                        case sycl::info::partition_property::partition_equally:
+                            std::cout << "equally";
+                            break;
+
+                        case sycl::info::partition_property::partition_by_counts:
+                            std::cout << "by counts";
+                            break;
+
+                        case sycl::info::partition_property::partition_by_affinity_domain:
+                            std::cout << "by affinity domain";
+                            has_affinity_domains = true;
+                            break;
+#        if defined(BOOST_COMP_ICPX)
+                        case sycl::info::partition_property::ext_intel_partition_by_cslice:
+                            std::cout << "by compute slice (Intel extension; deprecated)";
+                            break;
+#        endif
+                        }
+                        std::cout << ", ";
+                    }
+                    std::cout << '\n';
+
+                    if(has_affinity_domains)
+                    {
+                        std::cout << "Supported partition affinity domains: ";
+                        auto const aff_doms = device.get_info<sycl::info::device::partition_affinity_domains>();
+                        for(auto const& dom : aff_doms)
+                        {
+                            switch(dom)
+                            {
+                            case sycl::info::partition_affinity_domain::not_applicable:
+                                std::cout << "not applicable";
+                                break;
+
+                            case sycl::info::partition_affinity_domain::numa:
+                                std::cout << "NUMA";
+                                break;
+
+                            case sycl::info::partition_affinity_domain::L4_cache:
+                                std::cout << "L4 cache";
+                                break;
+
+                            case sycl::info::partition_affinity_domain::L3_cache:
+                                std::cout << "L3 cache";
+                                break;
+
+                            case sycl::info::partition_affinity_domain::L2_cache:
+                                std::cout << "L2 cache";
+                                break;
+
+                            case sycl::info::partition_affinity_domain::L1_cache:
+                                std::cout << "L1 cache";
+                                break;
+
+                            case sycl::info::partition_affinity_domain::next_partitionable:
+                                std::cout << "next partitionable";
+                                break;
+                            }
+                            std::cout << ", ";
+                        }
+                        std::cout << '\n';
+                    }
+
+                    std::cout << "Current partition property: ";
+                    switch(device.get_info<sycl::info::device::partition_type_property>())
                     {
                     case sycl::info::partition_property::no_partition:
                         std::cout << "no partition";
                         break;
 
                     case sycl::info::partition_property::partition_equally:
-                        std::cout << "equally";
+                        std::cout << "partitioned equally";
                         break;
 
                     case sycl::info::partition_property::partition_by_counts:
-                        std::cout << "by counts";
+                        std::cout << "partitioned by counts";
                         break;
 
                     case sycl::info::partition_property::partition_by_affinity_domain:
-                        std::cout << "by affinity domain";
-                        has_affinity_domains = true;
+                        std::cout << "partitioned by affinity domain";
                         break;
+
 #        if defined(BOOST_COMP_ICPX)
                     case sycl::info::partition_property::ext_intel_partition_by_cslice:
-                        std::cout << "by compute slice (Intel extension; deprecated)";
+                        std::cout << "partitioned by compute slice (Intel extension; deprecated)";
                         break;
 #        endif
                     }
-                    std::cout << ", ";
-                }
-                std::cout << '\n';
+                    std::cout << '\n';
 
-                if(has_affinity_domains)
-                {
-                    std::cout << "Supported partition affinity domains: ";
-                    auto const aff_doms = device.get_info<sycl::info::device::partition_affinity_domains>();
-                    for(auto const& dom : aff_doms)
+                    std::cout << "Current partition affinity domain: ";
+                    switch(device.get_info<sycl::info::device::partition_type_affinity_domain>())
                     {
-                        switch(dom)
-                        {
-                        case sycl::info::partition_affinity_domain::not_applicable:
-                            std::cout << "not applicable";
-                            break;
+                    case sycl::info::partition_affinity_domain::not_applicable:
+                        std::cout << "not applicable";
+                        break;
 
-                        case sycl::info::partition_affinity_domain::numa:
-                            std::cout << "NUMA";
-                            break;
+                    case sycl::info::partition_affinity_domain::numa:
+                        std::cout << "NUMA";
+                        break;
 
-                        case sycl::info::partition_affinity_domain::L4_cache:
-                            std::cout << "L4 cache";
-                            break;
+                    case sycl::info::partition_affinity_domain::L4_cache:
+                        std::cout << "L4 cache";
+                        break;
 
-                        case sycl::info::partition_affinity_domain::L3_cache:
-                            std::cout << "L3 cache";
-                            break;
+                    case sycl::info::partition_affinity_domain::L3_cache:
+                        std::cout << "L3 cache";
+                        break;
 
-                        case sycl::info::partition_affinity_domain::L2_cache:
-                            std::cout << "L2 cache";
-                            break;
+                    case sycl::info::partition_affinity_domain::L2_cache:
+                        std::cout << "L2 cache";
+                        break;
 
-                        case sycl::info::partition_affinity_domain::L1_cache:
-                            std::cout << "L1 cache";
-                            break;
+                    case sycl::info::partition_affinity_domain::L1_cache:
+                        std::cout << "L1 cache";
+                        break;
 
-                        case sycl::info::partition_affinity_domain::next_partitionable:
-                            std::cout << "next partitionable";
-                            break;
-                        }
-                        std::cout << ", ";
+                    case sycl::info::partition_affinity_domain::next_partitionable:
+                        std::cout << "next partitionable";
+                        break;
                     }
                     std::cout << '\n';
                 }
 
-                std::cout << "Current partition property: ";
-                switch(device.get_info<sycl::info::device::partition_type_property>())
-                {
-                case sycl::info::partition_property::no_partition:
-                    std::cout << "no partition";
-                    break;
-
-                case sycl::info::partition_property::partition_equally:
-                    std::cout << "partitioned equally";
-                    break;
-
-                case sycl::info::partition_property::partition_by_counts:
-                    std::cout << "partitioned by counts";
-                    break;
-
-                case sycl::info::partition_property::partition_by_affinity_domain:
-                    std::cout << "partitioned by affinity domain";
-                    break;
-
-#        if defined(BOOST_COMP_ICPX)
-                case sycl::info::partition_property::ext_intel_partition_by_cslice:
-                    std::cout << "partitioned by compute slice (Intel extension; deprecated)";
-                    break;
-#        endif
-                }
-                std::cout << '\n';
-
-                std::cout << "Current partition affinity domain: ";
-                switch(device.get_info<sycl::info::device::partition_type_affinity_domain>())
-                {
-                case sycl::info::partition_affinity_domain::not_applicable:
-                    std::cout << "not applicable";
-                    break;
-
-                case sycl::info::partition_affinity_domain::numa:
-                    std::cout << "NUMA";
-                    break;
-
-                case sycl::info::partition_affinity_domain::L4_cache:
-                    std::cout << "L4 cache";
-                    break;
-
-                case sycl::info::partition_affinity_domain::L3_cache:
-                    std::cout << "L3 cache";
-                    break;
-
-                case sycl::info::partition_affinity_domain::L2_cache:
-                    std::cout << "L2 cache";
-                    break;
-
-                case sycl::info::partition_affinity_domain::L1_cache:
-                    std::cout << "L1 cache";
-                    break;
-
-                case sycl::info::partition_affinity_domain::next_partitionable:
-                    std::cout << "next partitionable";
-                    break;
-                }
-                std::cout << '\n';
+                std::cout.flush();
             }
-
-            std::cout.flush();
-        }
 #    endif
-    };
-} // namespace alpaka::trait
+        };
+    } // namespace trait
+} // namespace alpaka
 
 #endif
diff --git a/include/alpaka/platform/PlatformGpuSyclIntel.hpp b/include/alpaka/platform/PlatformGpuSyclIntel.hpp
index b51373646c9c..dd465895c18d 100644
--- a/include/alpaka/platform/PlatformGpuSyclIntel.hpp
+++ b/include/alpaka/platform/PlatformGpuSyclIntel.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/dev/DevGenericSycl.hpp"
 #include "alpaka/dev/Traits.hpp"
 #include "alpaka/platform/PlatformGenericSycl.hpp"
@@ -18,7 +19,8 @@ namespace alpaka
 {
     namespace detail
     {
-        struct IntelGpuSelector
+        template<>
+        struct SYCLDeviceSelector<TagGpuSyclIntel>
         {
             auto operator()(sycl::device const& dev) const -> int
             {
@@ -33,17 +35,7 @@ namespace alpaka
     } // namespace detail
 
     //! The SYCL device manager.
-    using PlatformGpuSyclIntel = PlatformGenericSycl<detail::IntelGpuSelector>;
+    using PlatformGpuSyclIntel = PlatformGenericSycl<TagGpuSyclIntel>;
 } // namespace alpaka
 
-namespace alpaka::trait
-{
-    //! The SYCL device manager device type trait specialization.
-    template<>
-    struct DevType<PlatformGpuSyclIntel>
-    {
-        using type = DevGenericSycl<PlatformGpuSyclIntel>; // = DevGpuSyclIntel
-    };
-} // namespace alpaka::trait
-
 #endif
diff --git a/include/alpaka/queue/QueueCpuSyclBlocking.hpp b/include/alpaka/queue/QueueCpuSyclBlocking.hpp
index 63dc39fc0c16..052fc234672d 100644
--- a/include/alpaka/queue/QueueCpuSyclBlocking.hpp
+++ b/include/alpaka/queue/QueueCpuSyclBlocking.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevCpuSycl.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/queue/QueueGenericSyclBlocking.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
 namespace alpaka
 {
-    using QueueCpuSyclBlocking = QueueGenericSyclBlocking<DevCpuSycl>;
+    using QueueCpuSyclBlocking = QueueGenericSyclBlocking<TagCpuSycl>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp b/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp
index d3fab4dcfbdb..330a2cc174d6 100644
--- a/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp
+++ b/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevCpuSycl.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/queue/QueueGenericSyclNonBlocking.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
 namespace alpaka
 {
-    using QueueCpuSyclNonBlocking = QueueGenericSyclNonBlocking<DevCpuSycl>;
+    using QueueCpuSyclNonBlocking = QueueGenericSyclNonBlocking<TagCpuSycl>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp b/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp
index 9ff2e58dc48d..ea17bb1ef4ff 100644
--- a/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp
+++ b/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevFpgaSyclIntel.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/queue/QueueGenericSyclBlocking.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
 namespace alpaka
 {
-    using QueueFpgaSyclIntelBlocking = QueueGenericSyclBlocking<DevFpgaSyclIntel>;
+    using QueueFpgaSyclIntelBlocking = QueueGenericSyclBlocking<TagFpgaSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp b/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp
index 20ea0bb83e81..e187b14f2651 100644
--- a/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp
+++ b/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevFpgaSyclIntel.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/queue/QueueGenericSyclNonBlocking.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
 namespace alpaka
 {
-    using QueueFpgaSyclIntelNonBlocking = QueueGenericSyclNonBlocking<DevFpgaSyclIntel>;
+    using QueueFpgaSyclIntelNonBlocking = QueueGenericSyclNonBlocking<TagFpgaSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueGenericSyclBlocking.hpp b/include/alpaka/queue/QueueGenericSyclBlocking.hpp
index bb743226c5d3..cbe98bb46c78 100644
--- a/include/alpaka/queue/QueueGenericSyclBlocking.hpp
+++ b/include/alpaka/queue/QueueGenericSyclBlocking.hpp
@@ -10,8 +10,8 @@
 
 namespace alpaka
 {
-    template<typename TDev>
-    using QueueGenericSyclBlocking = detail::QueueGenericSyclBase<TDev, true>;
+    template<typename TTag>
+    using QueueGenericSyclBlocking = detail::QueueGenericSyclBase<TTag, true>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp b/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp
index b5dcbe84004c..b9f0d7f6298a 100644
--- a/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp
+++ b/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp
@@ -10,8 +10,8 @@
 
 namespace alpaka
 {
-    template<typename TDev>
-    using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase<TDev, false>;
+    template<typename TTag>
+    using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase<TTag, false>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp b/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp
index 358513e1e2fc..3758dc335df4 100644
--- a/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp
+++ b/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevGpuSyclIntel.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/queue/QueueGenericSyclBlocking.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU)
 
 namespace alpaka
 {
-    using QueueGpuSyclIntelBlocking = QueueGenericSyclBlocking<DevGpuSyclIntel>;
+    using QueueGpuSyclIntelBlocking = QueueGenericSyclBlocking<TagGpuSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp b/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp
index f3be15c9dcb2..6c2bcd093bb9 100644
--- a/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp
+++ b/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include "alpaka/dev/DevGpuSyclIntel.hpp"
+#include "alpaka/acc/Tag.hpp"
 #include "alpaka/queue/QueueGenericSyclNonBlocking.hpp"
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_GPU)
 
 namespace alpaka
 {
-    using QueueGpuSyclIntelNonBlocking = QueueGenericSyclNonBlocking<DevGpuSyclIntel>;
+    using QueueGpuSyclIntelNonBlocking = QueueGenericSyclNonBlocking<TagGpuSyclIntel>;
 } // namespace alpaka
 
 #endif
diff --git a/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp b/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp
index 655deb973675..c99b2aeaa53f 100644
--- a/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp
+++ b/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp
@@ -23,262 +23,264 @@
 
 #    include <sycl/sycl.hpp>
 
-namespace alpaka::detail
+namespace alpaka
 {
-    template<typename T, typename = void>
-    inline constexpr auto is_sycl_task = false;
-
-    template<typename T>
-    inline constexpr auto is_sycl_task<T, std::void_t<decltype(T::is_sycl_task)>> = true;
-
-    template<typename T, typename = void>
-    inline constexpr auto is_sycl_kernel = false;
+    template<typename TTag>
+    class DevGenericSycl;
 
-    template<typename T>
-    inline constexpr auto is_sycl_kernel<T, std::void_t<decltype(T::is_sycl_kernel)>> = true;
+    template<typename TTag>
+    class EventGenericSycl;
 
-    class QueueGenericSyclImpl
+    namespace detail
     {
-    public:
-        QueueGenericSyclImpl(sycl::context context, sycl::device device)
-            : m_queue{
-                std::move(context), // This is important. In SYCL a device can belong to multiple contexts.
-                std::move(device),
-                {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}}
-        {
-        }
+        template<typename T, typename = void>
+        inline constexpr auto is_sycl_task = false;
+
+        template<typename T>
+        inline constexpr auto is_sycl_task<T, std::void_t<decltype(T::is_sycl_task)>> = true;
 
-        // This class will only exist as a pointer. We don't care about copy and move semantics.
-        QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete;
-        auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete;
+        template<typename T, typename = void>
+        inline constexpr auto is_sycl_kernel = false;
 
-        QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete;
-        auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete;
+        template<typename T>
+        inline constexpr auto is_sycl_kernel<T, std::void_t<decltype(T::is_sycl_kernel)>> = true;
 
-        ~QueueGenericSyclImpl()
+        class QueueGenericSyclImpl
         {
-            try
+        public:
+            QueueGenericSyclImpl(sycl::context context, sycl::device device)
+                : m_queue{
+                    std::move(context), // This is important. In SYCL a device can belong to multiple contexts.
+                    std::move(device),
+                    {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}}
             {
-                m_queue.wait_and_throw();
             }
-            catch(sycl::exception const& err)
+
+            // This class will only exist as a pointer. We don't care about copy and move semantics.
+            QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete;
+            auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete;
+
+            QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete;
+            auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete;
+
+            ~QueueGenericSyclImpl()
             {
-                std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " ("
-                          << err.code() << ')' << std::endl;
+                try
+                {
+                    m_queue.wait_and_throw();
+                }
+                catch(sycl::exception const& err)
+                {
+                    std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " ("
+                              << err.code() << ')' << std::endl;
+                }
+                catch(std::exception const& err)
+                {
+                    std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what()
+                              << std::endl;
+                }
             }
-            catch(std::exception const& err)
+
+            // Don't call this without locking first!
+            auto clean_dependencies() -> void
             {
-                std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what()
-                          << std::endl;
+                // Clean up completed events
+                auto const start = std::begin(m_dependencies);
+                auto const old_end = std::end(m_dependencies);
+                auto const new_end = std::remove_if(
+                    start,
+                    old_end,
+                    [](sycl::event ev) {
+                        return ev.get_info<sycl::info::event::command_execution_status>()
+                               == sycl::info::event_command_status::complete;
+                    });
+
+                m_dependencies.erase(new_end, old_end);
             }
-        }
-
-        // Don't call this without locking first!
-        auto clean_dependencies() -> void
-        {
-            // Clean up completed events
-            auto const start = std::begin(m_dependencies);
-            auto const old_end = std::end(m_dependencies);
-            auto const new_end = std::remove_if(
-                start,
-                old_end,
-                [](sycl::event ev) {
-                    return ev.get_info<sycl::info::event::command_execution_status>()
-                           == sycl::info::event_command_status::complete;
-                });
-
-            m_dependencies.erase(new_end, old_end);
-        }
-
-        auto register_dependency(sycl::event event) -> void
-        {
-            std::lock_guard<std::shared_mutex> lock{m_mutex};
-
-            clean_dependencies();
-            m_dependencies.push_back(event);
-        }
 
-        auto empty() const -> bool
-        {
-            std::shared_lock<std::shared_mutex> lock{m_mutex};
-            return m_last_event.get_info<sycl::info::event::command_execution_status>()
-                   == sycl::info::event_command_status::complete;
-        }
+            auto register_dependency(sycl::event event) -> void
+            {
+                std::lock_guard<std::shared_mutex> lock{m_mutex};
 
-        auto wait() -> void
-        {
-            // SYCL queues are thread-safe.
-            m_queue.wait_and_throw();
-        }
+                clean_dependencies();
+                m_dependencies.push_back(event);
+            }
 
-        auto get_last_event() const -> sycl::event
-        {
-            std::shared_lock<std::shared_mutex> lock{m_mutex};
-            return m_last_event;
-        }
+            auto empty() const -> bool
+            {
+                std::shared_lock<std::shared_mutex> lock{m_mutex};
+                return m_last_event.get_info<sycl::info::event::command_execution_status>()
+                       == sycl::info::event_command_status::complete;
+            }
 
-        template<bool TBlocking, typename TTask>
-        auto enqueue(TTask const& task) -> void
-        {
+            auto wait() -> void
             {
-                std::lock_guard<std::shared_mutex> lock{m_mutex};
+                // SYCL queues are thread-safe.
+                m_queue.wait_and_throw();
+            }
 
-                clean_dependencies();
+            auto get_last_event() const -> sycl::event
+            {
+                std::shared_lock<std::shared_mutex> lock{m_mutex};
+                return m_last_event;
+            }
 
-                // Execute task
-                if constexpr(is_sycl_task<TTask> && !is_sycl_kernel<TTask>) // Copy / Fill
-                {
-                    m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally
-                }
-                else
+            template<bool TBlocking, typename TTask>
+            auto enqueue(TTask const& task) -> void
+            {
                 {
-                    m_last_event = m_queue.submit(
-                        [this, &task](sycl::handler& cgh)
-                        {
-                            if(!m_dependencies.empty())
-                                cgh.depends_on(m_dependencies);
-
-                            if constexpr(is_sycl_kernel<TTask>) // Kernel
-                                task(cgh); // Will call cgh.parallel_for internally
-                            else // Host
-                                cgh.host_task(task);
-                        });
+                    std::lock_guard<std::shared_mutex> lock{m_mutex};
+
+                    clean_dependencies();
+
+                    // Execute task
+                    if constexpr(is_sycl_task<TTask> && !is_sycl_kernel<TTask>) // Copy / Fill
+                    {
+                        m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally
+                    }
+                    else
+                    {
+                        m_last_event = m_queue.submit(
+                            [this, &task](sycl::handler& cgh)
+                            {
+                                if(!m_dependencies.empty())
+                                    cgh.depends_on(m_dependencies);
+
+                                if constexpr(is_sycl_kernel<TTask>) // Kernel
+                                    task(cgh); // Will call cgh.parallel_for internally
+                                else // Host
+                                    cgh.host_task(task);
+                            });
+                    }
+
+                    m_dependencies.clear();
                 }
 
-                m_dependencies.clear();
+                if constexpr(TBlocking)
+                    wait();
+            }
+
+            [[nodiscard]] auto getNativeHandle() const noexcept
+            {
+                return m_queue;
             }
 
-            if constexpr(TBlocking)
-                wait();
-        }
+            std::vector<sycl::event> m_dependencies;
+            sycl::event m_last_event;
+            std::shared_mutex mutable m_mutex;
 
-        [[nodiscard]] auto getNativeHandle() const noexcept
-        {
-            return m_queue;
-        }
-
-        std::vector<sycl::event> m_dependencies;
-        sycl::event m_last_event;
-        std::shared_mutex mutable m_mutex;
-
-    private:
-        sycl::queue m_queue;
-    };
-
-    template<typename TDev, bool TBlocking>
-    class QueueGenericSyclBase
-        : public concepts::Implements<ConceptCurrentThreadWaitFor, QueueGenericSyclBase<TDev, TBlocking>>
-        , public concepts::Implements<ConceptQueue, QueueGenericSyclBase<TDev, TBlocking>>
-        , public concepts::Implements<ConceptGetDev, QueueGenericSyclBase<TDev, TBlocking>>
-    {
-    public:
-        QueueGenericSyclBase(TDev const& dev)
-            : m_dev{dev}
-            , m_spQueueImpl{std::make_shared<detail::QueueGenericSyclImpl>(
-                  dev.getNativeHandle().second,
-                  dev.getNativeHandle().first)}
-        {
-            m_dev.m_impl->register_queue(m_spQueueImpl);
-        }
+        private:
+            sycl::queue m_queue;
+        };
 
-        friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
+        template<typename TTag, bool TBlocking>
+        class QueueGenericSyclBase
+            : public concepts::Implements<ConceptCurrentThreadWaitFor, QueueGenericSyclBase<TTag, TBlocking>>
+            , public concepts::Implements<ConceptQueue, QueueGenericSyclBase<TTag, TBlocking>>
+            , public concepts::Implements<ConceptGetDev, QueueGenericSyclBase<TTag, TBlocking>>
         {
-            return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl);
-        }
+        public:
+            QueueGenericSyclBase(DevGenericSycl<TTag> const& dev)
+                : m_dev{dev}
+                , m_spQueueImpl{std::make_shared<detail::QueueGenericSyclImpl>(
+                      dev.getNativeHandle().second,
+                      dev.getNativeHandle().first)}
+            {
+                m_dev.m_impl->register_queue(m_spQueueImpl);
+            }
 
-        friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
-        {
-            return !(lhs == rhs);
-        }
+            friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
+            {
+                return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl);
+            }
 
-        [[nodiscard]] auto getNativeHandle() const noexcept
-        {
-            return m_spQueueImpl->getNativeHandle();
-        }
+            friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
+            {
+                return !(lhs == rhs);
+            }
 
-        TDev m_dev;
-        std::shared_ptr<detail::QueueGenericSyclImpl> m_spQueueImpl;
-    };
-} // namespace alpaka::detail
+            [[nodiscard]] auto getNativeHandle() const noexcept
+            {
+                return m_spQueueImpl->getNativeHandle();
+            }
 
-namespace alpaka
-{
-    template<typename TDev>
-    class EventGenericSycl;
-} // namespace alpaka
+            DevGenericSycl<TTag> m_dev;
+            std::shared_ptr<detail::QueueGenericSyclImpl> m_spQueueImpl;
+        };
+    } // namespace detail
 
-namespace alpaka::trait
-{
-    //! The SYCL blocking queue device type trait specialization.
-    template<typename TDev, bool TBlocking>
-    struct DevType<detail::QueueGenericSyclBase<TDev, TBlocking>>
+    namespace trait
     {
-        using type = TDev;
-    };
+        //! The SYCL blocking queue device type trait specialization.
+        template<typename TTag, bool TBlocking>
+        struct DevType<detail::QueueGenericSyclBase<TTag, TBlocking>>
+        {
+            using type = DevGenericSycl<TTag>;
+        };
 
-    //! The SYCL blocking queue device get trait specialization.
-    template<typename TDev, bool TBlocking>
-    struct GetDev<detail::QueueGenericSyclBase<TDev, TBlocking>>
-    {
-        static auto getDev(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue)
+        //! The SYCL blocking queue device get trait specialization.
+        template<typename TTag, bool TBlocking>
+        struct GetDev<detail::QueueGenericSyclBase<TTag, TBlocking>>
         {
-            ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-            return queue.m_dev;
-        }
-    };
-
-    //! The SYCL blocking queue event type trait specialization.
-    template<typename TDev, bool TBlocking>
-    struct EventType<detail::QueueGenericSyclBase<TDev, TBlocking>>
-    {
-        using type = EventGenericSycl<TDev>;
-    };
+            static auto getDev(detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
+            {
+                ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
+                return queue.m_dev;
+            }
+        };
 
-    //! The SYCL blocking queue enqueue trait specialization.
-    template<typename TDev, bool TBlocking, typename TTask>
-    struct Enqueue<detail::QueueGenericSyclBase<TDev, TBlocking>, TTask>
-    {
-        static auto enqueue(detail::QueueGenericSyclBase<TDev, TBlocking>& queue, TTask const& task) -> void
+        //! The SYCL blocking queue event type trait specialization.
+        template<typename TTag, bool TBlocking>
+        struct EventType<detail::QueueGenericSyclBase<TTag, TBlocking>>
         {
-            ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-            queue.m_spQueueImpl->template enqueue<TBlocking>(task);
-        }
-    };
-
-    //! The SYCL blocking queue test trait specialization.
-    template<typename TDev, bool TBlocking>
-    struct Empty<detail::QueueGenericSyclBase<TDev, TBlocking>>
-    {
-        static auto empty(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue) -> bool
+            using type = EventGenericSycl<TTag>;
+        };
+
+        //! The SYCL blocking queue enqueue trait specialization.
+        template<typename TTag, bool TBlocking, typename TTask>
+        struct Enqueue<detail::QueueGenericSyclBase<TTag, TBlocking>, TTask>
         {
-            ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-            return queue.m_spQueueImpl->empty();
-        }
-    };
-
-    //! The SYCL blocking queue thread wait trait specialization.
-    //!
-    //! Blocks execution of the calling thread until the queue has finished processing all previously requested
-    //! tasks (kernels, data copies, ...)
-    template<typename TDev, bool TBlocking>
-    struct CurrentThreadWaitFor<detail::QueueGenericSyclBase<TDev, TBlocking>>
-    {
-        static auto currentThreadWaitFor(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue) -> void
+            static auto enqueue(detail::QueueGenericSyclBase<TTag, TBlocking>& queue, TTask const& task) -> void
+            {
+                ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
+                queue.m_spQueueImpl->template enqueue<TBlocking>(task);
+            }
+        };
+
+        //! The SYCL blocking queue test trait specialization.
+        template<typename TTag, bool TBlocking>
+        struct Empty<detail::QueueGenericSyclBase<TTag, TBlocking>>
         {
-            ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-            queue.m_spQueueImpl->wait();
-        }
-    };
-
-    //! The SYCL queue native handle trait specialization.
-    template<typename TDev, bool TBlocking>
-    struct NativeHandle<detail::QueueGenericSyclBase<TDev, TBlocking>>
-    {
-        [[nodiscard]] static auto getNativeHandle(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue)
+            static auto empty(detail::QueueGenericSyclBase<TTag, TBlocking> const& queue) -> bool
+            {
+                ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
+                return queue.m_spQueueImpl->empty();
+            }
+        };
+
+        //! The SYCL blocking queue thread wait trait specialization.
+        //!
+        //! Blocks execution of the calling thread until the queue has finished processing all previously requested
+        //! tasks (kernels, data copies, ...)
+        template<typename TTag, bool TBlocking>
+        struct CurrentThreadWaitFor<detail::QueueGenericSyclBase<TTag, TBlocking>>
         {
-            return queue.getNativeHandle();
-        }
-    };
-} // namespace alpaka::trait
+            static auto currentThreadWaitFor(detail::QueueGenericSyclBase<TTag, TBlocking> const& queue) -> void
+            {
+                ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
+                queue.m_spQueueImpl->wait();
+            }
+        };
 
+        //! The SYCL queue native handle trait specialization.
+        template<typename TTag, bool TBlocking>
+        struct NativeHandle<detail::QueueGenericSyclBase<TTag, TBlocking>>
+        {
+            [[nodiscard]] static auto getNativeHandle(detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
+            {
+                return queue.getNativeHandle();
+            }
+        };
+    } // namespace trait
+} // namespace alpaka
 #endif
diff --git a/include/alpaka/test/acc/TestAccs.hpp b/include/alpaka/test/acc/TestAccs.hpp
index c0751aa18cf6..e84bb7800f39 100644
--- a/include/alpaka/test/acc/TestAccs.hpp
+++ b/include/alpaka/test/acc/TestAccs.hpp
@@ -79,21 +79,21 @@ namespace alpaka::test
 #endif
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_TARGET_CPU)
         template<typename TDim, typename TIdx>
-        using AccCpuSyclIfAvailableElseInt = alpaka::AccCpuSycl<TDim, TIdx>;
+        using AccCpuSyclIfAvailableElseInt = AccCpuSycl<TDim, TIdx>;
 #else
         template<typename TDim, typename TIdx>
         using AccCpuSyclIfAvailableElseInt = int;
 #endif
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_TARGET_FPGA)
         template<typename TDim, typename TIdx>
-        using AccFpgaSyclIntelIfAvailableElseInt = alpaka::AccFpgaSyclIntel<TDim, TIdx>;
+        using AccFpgaSyclIntelIfAvailableElseInt = AccFpgaSyclIntel<TDim, TIdx>;
 #else
         template<typename TDim, typename TIdx>
         using AccFpgaSyclIntelIfAvailableElseInt = int;
 #endif
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_TARGET_GPU)
         template<typename TDim, typename TIdx>
-        using AccGpuSyclIntelIfAvailableElseInt = alpaka::AccGpuSyclIntel<TDim, TIdx>;
+        using AccGpuSyclIntelIfAvailableElseInt = AccGpuSyclIntel<TDim, TIdx>;
 #else
         template<typename TDim, typename TIdx>
         using AccGpuSyclIntelIfAvailableElseInt = int;
diff --git a/include/alpaka/test/event/EventHostManualTrigger.hpp b/include/alpaka/test/event/EventHostManualTrigger.hpp
index 4ce363907475..dbdc269f0070 100644
--- a/include/alpaka/test/event/EventHostManualTrigger.hpp
+++ b/include/alpaka/test/event/EventHostManualTrigger.hpp
@@ -712,11 +712,11 @@ namespace alpaka
 {
     namespace test
     {
-        template<typename TPlatform>
+        template<typename TTag>
         class EventHostManualTriggerSycl
         {
         public:
-            EventHostManualTriggerSycl(DevGenericSycl<TPlatform> const&)
+            EventHostManualTriggerSycl(DevGenericSycl<TTag> const&)
             {
             }
 
@@ -727,16 +727,16 @@ namespace alpaka
 
         namespace trait
         {
-            template<typename TPlatform>
-            struct EventHostManualTriggerType<DevGenericSycl<TPlatform>>
+            template<typename TTag>
+            struct EventHostManualTriggerType<DevGenericSycl<TTag>>
             {
-                using type = alpaka::test::EventHostManualTriggerSycl<TPlatform>;
+                using type = alpaka::test::EventHostManualTriggerSycl<TTag>;
             };
 
-            template<typename TPlatform>
-            struct IsEventHostManualTriggerSupported<DevGenericSycl<TPlatform>>
+            template<typename TTag>
+            struct IsEventHostManualTriggerSupported<DevGenericSycl<TTag>>
             {
-                ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TPlatform> const&) -> bool
+                ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TTag> const&) -> bool
                 {
                     return false;
                 }
@@ -746,35 +746,30 @@ namespace alpaka
 
     namespace trait
     {
-        template<typename TPlatform>
-        struct Enqueue<
-            QueueGenericSyclBlocking<DevGenericSycl<TPlatform>>,
-            test::EventHostManualTriggerSycl<TPlatform>>
+        template<typename TTag>
+        struct Enqueue<QueueGenericSyclBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
         {
             ALPAKA_FN_HOST static auto enqueue(
-                QueueGenericSyclBlocking<DevGenericSycl<TPlatform>>& /* queue */,
-                test::EventHostManualTriggerSycl<TPlatform>& /* event */) -> void
+                QueueGenericSyclBlocking<TTag>& /* queue */,
+                test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
             {
             }
         };
 
-        template<typename TPlatform>
-        struct Enqueue<
-            QueueGenericSyclNonBlocking<DevGenericSycl<TPlatform>>,
-            test::EventHostManualTriggerSycl<TPlatform>>
+        template<typename TTag>
+        struct Enqueue<QueueGenericSyclNonBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
         {
             ALPAKA_FN_HOST static auto enqueue(
-                QueueGenericSyclNonBlocking<DevGenericSycl<TPlatform>>& /* queue */,
-                test::EventHostManualTriggerSycl<TPlatform>& /* event */) -> void
+                QueueGenericSyclNonBlocking<TTag>& /* queue */,
+                test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
             {
             }
         };
 
-        template<typename TPlatform>
-        struct IsComplete<test::EventHostManualTriggerSycl<TPlatform>>
+        template<typename TTag>
+        struct IsComplete<test::EventHostManualTriggerSycl<TTag>>
         {
-            ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl<TPlatform> const& /* event */)
-                -> bool
+            ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl<TTag> const& /* event */) -> bool
             {
                 return true;
             }
diff --git a/include/alpaka/test/queue/Queue.hpp b/include/alpaka/test/queue/Queue.hpp
index 22432fc719b4..d9c0e9927934 100644
--- a/include/alpaka/test/queue/Queue.hpp
+++ b/include/alpaka/test/queue/Queue.hpp
@@ -38,14 +38,20 @@ namespace alpaka::test
 #    endif
         };
 #endif
-    } // namespace trait
 
-    //! The queue type that should be used for the given device.
-    template<typename TDev>
-    using DefaultQueue = typename trait::DefaultQueueType<TDev>::type;
+#ifdef ALPAKA_ACC_SYCL_ENABLED
+        //! The default queue type trait specialization for the SYCL device.
+        template<typename TTag>
+        struct DefaultQueueType<DevGenericSycl<TTag>>
+        {
+#    if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
+            using type = QueueGenericSyclBlocking<TTag>;
+#    else
+            using type = QueueGenericSyclNonBlocking<TTag>;
+#    endif
+        };
+#endif
 
-    namespace trait
-    {
         //! The blocking queue trait.
         template<typename TQueue, typename TSfinae = void>
         struct IsBlockingQueue;
@@ -82,81 +88,24 @@ namespace alpaka::test
 #endif
 
 #ifdef ALPAKA_ACC_SYCL_ENABLED
-#    ifdef ALPAKA_SYCL_ONEAPI_CPU
-        //! The default queue type trait specialization for the Intel CPU device.
-        template<>
-        struct DefaultQueueType<alpaka::DevCpuSycl>
-        {
-#        if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
-            using type = alpaka::QueueCpuSyclBlocking;
-#        else
-            using type = alpaka::QueueCpuSyclNonBlocking;
-#        endif
-        };
-
-        template<>
-        struct IsBlockingQueue<alpaka::QueueCpuSyclBlocking>
+        template<typename TTag>
+        struct IsBlockingQueue<QueueGenericSyclBlocking<TTag>>
         {
             static constexpr auto value = true;
         };
 
-        template<>
-        struct IsBlockingQueue<alpaka::QueueCpuSyclNonBlocking>
+        template<typename TTag>
+        struct IsBlockingQueue<QueueGenericSyclNonBlocking<TTag>>
         {
             static constexpr auto value = false;
         };
-#    endif
-#    ifdef ALPAKA_SYCL_ONEAPI_FPGA
-        //! The default queue type trait specialization for the Intel SYCL device.
-        template<>
-        struct DefaultQueueType<alpaka::DevFpgaSyclIntel>
-        {
-#        if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
-            using type = alpaka::QueueFpgaSyclIntelBlocking;
-#        else
-            using type = alpaka::QueueFpgaSyclIntelNonBlocking;
-#        endif
-        };
-
-        template<>
-        struct IsBlockingQueue<alpaka::QueueFpgaSyclIntelBlocking>
-        {
-            static constexpr auto value = true;
-        };
-
-        template<>
-        struct IsBlockingQueue<alpaka::QueueFpgaSyclIntelNonBlocking>
-        {
-            static constexpr auto value = false;
-        };
-#    endif
-#    ifdef ALPAKA_SYCL_ONEAPI_GPU
-        //! The default queue type trait specialization for the Intel CPU device.
-        template<>
-        struct DefaultQueueType<alpaka::DevGpuSyclIntel>
-        {
-#        if(ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
-            using type = alpaka::QueueGpuSyclIntelBlocking;
-#        else
-            using type = alpaka::QueueGpuSyclIntelNonBlocking;
-#        endif
-        };
-
-        template<>
-        struct IsBlockingQueue<alpaka::QueueGpuSyclIntelBlocking>
-        {
-            static constexpr auto value = true;
-        };
-
-        template<>
-        struct IsBlockingQueue<alpaka::QueueGpuSyclIntelNonBlocking>
-        {
-            static constexpr auto value = false;
-        };
-#    endif
 #endif
     } // namespace trait
 
+    //! The queue type that should be used for the given device.
+    template<typename TDev>
+    using DefaultQueue = typename trait::DefaultQueueType<TDev>::type;
+
     //! The queue type that should be used for the given accelerator.
     template<typename TQueue>
     using IsBlockingQueue = trait::IsBlockingQueue<TQueue>;