alpaka-group · fwyzard · Jul 27, 2023 · Nov 28, 2022 · Apr 2, 2023 · Jun 13, 2023
diff --git a/README_SYCL.md b/README_SYCL.md
@@ -53,9 +53,9 @@ To enable device-side printing add the following compiler flags:
 * `-DALPAKA_SYCL_IOSTREAM_ENABLED`: to enable device-side printing.
 * `-DALPAKA_SYCL_IOSTREAM_KIB=<value>`: `<value>` (without the brackets) defines the kibibytes per block to be reserved for device-side printing. `<value>` cannot exceed the amount of shared memory per block.
 
-### Building for Intel CPUs
+### Building for x86 64-bit CPUs
 
-1. `#include <alpaka/standalone/CpuSyclIntel.hpp>` in your C++ code.
+1. `#include <alpaka/standalone/CpuSycl.hpp>` in your C++ code.
 2. Add the following flags:
   * `-fsycl-targets=spir64_x86_64` (compiler and linker): to enable CPU compilation. Note: If you are using multiple SYCL hardware targets (like CPU and GPU) separate them by comma here.
   * `-Xsycl-target-backend=spir64_x86_64 "-march=<ISA>"` (linker): to choose the Intel ISA to compile for. Check the output of `opencl-aot --help` and look for the possible values of the `--march` flag.
@@ -84,17 +84,21 @@ To enable device-side printing add the following compiler flags:
 
 In contrast to the other back-ends the SYCL back-end comes with multiple different accelerators which should be chosen according to your requirements:
 
-* `alpaka::experimental::AccCpuSyclIntel` for targeting Intel CPUs. In contrast to the other CPU back-ends this will be using Intel's OpenCL implementation for CPUs under the hood.
+* `alpaka::experimental::AccCpuSycl` for targeting Intel and AMD CPUs. In contrast to the other CPU back-ends this will use Intel's OpenCL implementation for CPUs under the hood.
 * `alpaka::experimental::AccFpgaSyclIntel` for targeting Intel FPGAs.
 * `alpaka::experimental::AccGpuSyclIntel` for targeting Intel GPUs. 
 
 These can be used interchangeably (some restrictions apply - see below) with the non-experimental alpaka accelerators to compile an existing alpaka code for SYCL-capable hardware.
 
 ### Restrictions
 
-* The FPGA back-ends (both vendors) cannot be used together with the Intel CPU / GPU back-ends. This is because of the different compilation trajectory required for FPGAs and is unlikely to be fixed anytime soon. See [here](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/programming-interface/fpga-flow/why-is-fpga-compilation-different.html) for an explanation.
-* The SYCL back-end currently does not support passing pointers as kernel parameters. Use alpaka's experimental accessors instead.
-* The SYCL back-end does not have device-side random number generation.
+* The Intel FPGA back-end cannot be used together with the Intel CPU / GPU back-ends. This is because of the different compilation trajectory required for FPGAs and is unlikely to be fixed anytime soon. See [here](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/programming-interface/fpga-flow/why-is-fpga-compilation-different.html) for an explanation.
 * Similar to the CUDA and HIP back-ends the SYCL back-end only supports up to three kernel dimensions.
 * Some Intel GPUs do not support the `double` type for device code. alpaka will not check this.
-* The FPGA back-end does not support atomics. alpaka will not check this.
+  You can enable software emulation for `double` precision types with
+  ```bash
+  export IGC_EnableDPEmulation=1
+  export OverrideDefaultFP64Settings=1
+  ```
+  See [Intel's FAQ](https://github.com/intel/compute-runtime/blob/master/opencl/doc/FAQ.md#feature-double-precision-emulation-fp64) for more information.
+* The FPGA back-end does not support atomics. alpaka will not check this.
diff --git a/cmake/alpakaCommon.cmake b/cmake/alpakaCommon.cmake
@@ -1,5 +1,5 @@
 #
-# Copyright 2023 Benjamin Worpitz, Erik Zenker, Axel Hübl, Jan Stephan, René Widera, Jeffrey Kelling, Andrea Bocci, Bernhard Manfred Gruber
+# Copyright 2023 Benjamin Worpitz, Erik Zenker, Axel Hübl, Jan Stephan, René Widera, Jeffrey Kelling, Andrea Bocci, Bernhard Manfred Gruber, Aurora Perego
 # SPDX-License-Identifier: MPL-2.0
 #
 
@@ -547,9 +547,6 @@ if(alpaka_ACC_SYCL_ENABLE)
 
         # Enable device-side printing to stdout
         cmake_dependent_option(alpaka_SYCL_ENABLE_IOSTREAM "Enable device-side printing to stdout" OFF "alpaka_ACC_SYCL_ENABLE" OFF)
-        if(BUILD_TESTING)
-            set(alpaka_SYCL_ENABLE_IOSTREAM ON CACHE BOOL "Enable device-side printing to stdout" FORCE)
-        endif()
 
         alpaka_set_compiler_options(HOST_DEVICE target alpaka "-fsycl")
         target_link_options(alpaka INTERFACE "-fsycl")
@@ -559,7 +556,7 @@ if(alpaka_ACC_SYCL_ENABLE)
         # Determine SYCL targets
         set(alpaka_SYCL_ONEAPI_CPU_TARGET "spir64_x86_64")
         set(alpaka_SYCL_ONEAPI_FPGA_TARGET "spir64_fpga")
-        set(alpaka_SYCL_ONEAPI_GPU_TARGET "spir64_gen")
+        set(alpaka_SYCL_ONEAPI_GPU_TARGET ${alpaka_SYCL_ONEAPI_GPU_DEVICES})
 
         if(alpaka_SYCL_ONEAPI_CPU)
             list(APPEND alpaka_SYCL_TARGETS ${alpaka_SYCL_ONEAPI_CPU_TARGET})
@@ -620,14 +617,13 @@ if(alpaka_ACC_SYCL_ENABLE)
         if(alpaka_SYCL_ONEAPI_GPU)
             # Create a drop-down list (in cmake-gui) of valid Intel GPU targets. On the command line the user can specifiy
             # additional targets, such as ranges: "Gen8-Gen12LP" or lists: "icllp;skl".
-            set(alpaka_SYCL_ONEAPI_GPU_DEVICES "bdw" CACHE STRING "Intel GPU devices / generations to compile for")
+            set(alpaka_SYCL_ONEAPI_GPU_DEVICES "intel_gpu_pvc" CACHE STRING "Intel GPU devices / generations to compile for")
             set_property(CACHE alpaka_SYCL_ONEAPI_GPU_DEVICES
-                        PROPERTY STRINGS "bdw;skl;kbl;cfl;bxt;glk;whl;aml;cml;icllp;lkf;ehl;tgllp;rkl;adl-s;adl-p;dg1;acm-g10;ats-m150;dg2-g10;acm-g11;ats-m75;dg2-g11;acm-g12;dg2-g12;pvc-sdv;pvc;gen11;gen12lp;gen8;gen9;xe;xe-hpc;xe-hpg")
+                        PROPERTY STRINGS "intel_gpu_pvc;intel_gpu_acm_g12;intel_gpu_acm_g11;intel_gpu_acm_g10;intel_gpu_dg1;intel_gpu_adl_n;intel_gpu_adl_p;intel_gpu_rpl_s;intel_gpu_adl_s;intel_gpu_rkl;intel_gpu_tgllp;intel_gpu_icllp;intel_gpu_cml;intel_gpu_aml;intel_gpu_whl;intel_gpu_glk;intel_gpu_apl;intel_gpu_cfl;intel_gpu_kbl;intel_gpu_skl;intel_gpu_bdw")
             # If the user has given us a list turn all ';' into ',' to pacify the Intel OpenCL compiler.
             string(REPLACE ";" "," alpaka_SYCL_ONEAPI_GPU_DEVICES "${alpaka_SYCL_ONEAPI_GPU_DEVICES}")
 
             target_compile_definitions(alpaka INTERFACE "ALPAKA_SYCL_ONEAPI_GPU")
-            target_link_options(alpaka INTERFACE "SHELL:-Xsycl-target-backend=${alpaka_SYCL_ONEAPI_GPU_TARGET} \"-device ${alpaka_SYCL_ONEAPI_GPU_DEVICES}\"")
         endif()
 
         #-----------------------------------------------------------------------------------------------------------------

diff --git a/example/vectorAdd/src/vectorAdd.cpp b/example/vectorAdd/src/vectorAdd.cpp
@@ -1,4 +1,5 @@
-/* Copyright 2023 Benjamin Worpitz, Matthias Werner, Bernhard Manfred Gruber, Jan Stephan
+/* Copyright 2023 Benjamin Worpitz, Matthias Werner, Bernhard Manfred Gruber, Jan Stephan, Luca Ferragina,
+ *                Aurora Perego
  * SPDX-License-Identifier: ISC
  */
 
@@ -76,6 +77,8 @@ auto main() -> int
     // - AccCpuSerial
     // using Acc = alpaka::AccCpuSerial<Dim, Idx>;
     using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+    using Pltf = alpaka::Pltf<Acc>;
+    using DevAcc = alpaka::Dev<Acc>;
     std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;
 
     // Defines the synchronization behavior of a queue
@@ -136,7 +139,7 @@ auto main() -> int
     }
 
     // Allocate 3 buffers on the accelerator
-    using BufAcc = alpaka::Buf<Acc, Data, Dim, Idx>;
+    using BufAcc = alpaka::Buf<DevAcc, Data, Dim, Idx>;
     BufAcc bufAccA(alpaka::allocBuf<Data, Idx>(devAcc, extent));
     BufAcc bufAccB(alpaka::allocBuf<Data, Idx>(devAcc, extent));
     BufAcc bufAccC(alpaka::allocBuf<Data, Idx>(devAcc, extent));

diff --git a/include/alpaka/acc/AccCpuSyclIntel.hpp → include/alpaka/acc/AccCpuSycl.hpp b/include/alpaka/acc/AccCpuSyclIntel.hpp → include/alpaka/acc/AccCpuSycl.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2022 Jan Stephan
+/* Copyright 2023 Jan Stephan, Luca Ferragina, Andrea Bocci
  * SPDX-License-Identifier: MPL-2.0
  */
 
@@ -9,11 +9,11 @@
 #include "alpaka/core/Concepts.hpp"
 #include "alpaka/core/DemangleTypeNames.hpp"
 #include "alpaka/core/Sycl.hpp"
-#include "alpaka/dev/DevCpuSyclIntel.hpp"
+#include "alpaka/dev/DevCpuSycl.hpp"
 #include "alpaka/dev/Traits.hpp"
-#include "alpaka/kernel/TaskKernelCpuSyclIntel.hpp"
+#include "alpaka/kernel/TaskKernelCpuSycl.hpp"
 #include "alpaka/kernel/Traits.hpp"
-#include "alpaka/pltf/PltfCpuSyclIntel.hpp"
+#include "alpaka/pltf/PltfCpuSycl.hpp"
 #include "alpaka/pltf/Traits.hpp"
 #include "alpaka/vec/Vec.hpp"
 
@@ -23,17 +23,17 @@
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_BACKEND_ONEAPI) && defined(ALPAKA_SYCL_ONEAPI_CPU)
 
-#    include <CL/sycl.hpp>
+#    include <sycl/sycl.hpp>
 
 namespace alpaka
 {
     //! The Intel CPU SYCL accelerator.
     //!
     //! This accelerator allows parallel kernel execution on a oneAPI-capable Intel CPU target device.
     template<typename TDim, typename TIdx>
-    class AccCpuSyclIntel final
+    class AccCpuSycl final
         : public AccGenericSycl<TDim, TIdx>
-        , public concepts::Implements<ConceptAcc, AccCpuSyclIntel<TDim, TIdx>>
+        , public concepts::Implements<ConceptAcc, AccCpuSycl<TDim, TIdx>>
     {
     public:
         using AccGenericSycl<TDim, TIdx>::AccGenericSycl;
@@ -44,28 +44,28 @@ namespace alpaka::trait
 {
     //! The Intel CPU SYCL accelerator name trait specialization.
     template<typename TDim, typename TIdx>
-    struct GetAccName<AccCpuSyclIntel<TDim, TIdx>>
+    struct GetAccName<AccCpuSycl<TDim, TIdx>>
     {
         static auto getAccName() -> std::string
         {
-            return "AccCpuSyclIntel<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
+            return "AccCpuSycl<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
         }
     };
 
     //! The Intel CPU SYCL accelerator device type trait specialization.
     template<typename TDim, typename TIdx>
-    struct DevType<AccCpuSyclIntel<TDim, TIdx>>
+    struct DevType<AccCpuSycl<TDim, TIdx>>
     {
-        using type = DevCpuSyclIntel;
+        using type = DevCpuSycl;
     };
 
     //! The Intel CPU SYCL accelerator execution task type trait specialization.
     template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
-    struct CreateTaskKernel<AccCpuSyclIntel<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
+    struct CreateTaskKernel<AccCpuSycl<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
     {
         static auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
         {
-            return TaskKernelCpuSyclIntel<TDim, TIdx, TKernelFnObj, TArgs...>{
+            return TaskKernelCpuSycl<TDim, TIdx, TKernelFnObj, TArgs...>{
                 workDiv,
                 kernelFnObj,
                 std::forward<TArgs>(args)...};
@@ -74,21 +74,21 @@ namespace alpaka::trait
 
     //! The Intel CPU SYCL execution task platform type trait specialization.
     template<typename TDim, typename TIdx>
-    struct PltfType<AccCpuSyclIntel<TDim, TIdx>>
+    struct PltfType<AccCpuSycl<TDim, TIdx>>
     {
-        using type = PltfCpuSyclIntel;
+        using type = PltfCpuSycl;
     };
 
     template<typename TDim, typename TIdx>
-    struct AccToTag<alpaka::AccCpuSyclIntel<TDim, TIdx>>
+    struct AccToTag<alpaka::AccCpuSycl<TDim, TIdx>>
     {
-        using type = alpaka::TagCpuSyclIntel;
+        using type = alpaka::TagCpuSycl;
     };
 
     template<typename TDim, typename TIdx>
-    struct TagToAcc<alpaka::TagCpuSyclIntel, TDim, TIdx>
+    struct TagToAcc<alpaka::TagCpuSycl, TDim, TIdx>
     {
-        using type = alpaka::AccCpuSyclIntel<TDim, TIdx>;
+        using type = alpaka::AccCpuSycl<TDim, TIdx>;
     };
 } // namespace alpaka::trait
 

diff --git a/include/alpaka/acc/AccFpgaSyclIntel.hpp b/include/alpaka/acc/AccFpgaSyclIntel.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2022 Jan Stephan
+/* Copyright 2023 Jan Stephan, Aurora Perego
  * SPDX-License-Identifier: MPL-2.0
  */
 
@@ -22,7 +22,7 @@
 
 #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_BACKEND_ONEAPI) && defined(ALPAKA_SYCL_ONEAPI_FPGA)
 
-#    include <CL/sycl.hpp>
+#    include <sycl/sycl.hpp>
 
 namespace alpaka
 {
@@ -45,7 +45,7 @@ namespace alpaka::trait
     template<typename TDim, typename TIdx>
     struct GetAccName<AccFpgaSyclIntel<TDim, TIdx>>
     {
-        ALPAKA_FN_HOST static auto getAccName() -> std::string
+        static auto getAccName() -> std::string
         {
             return "AccFpgaSyclIntel<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
         }

diff --git a/include/alpaka/acc/AccGenericSycl.hpp b/include/alpaka/acc/AccGenericSycl.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2023 Jan Stephan, Antonio Di Pilato, Andrea Bocci
+/* Copyright 2023 Jan Stephan, Antonio Di Pilato, Andrea Bocci, Luca Ferragina, Aurora Perego
  * SPDX-License-Identifier: MPL-2.0
  */
 
@@ -15,6 +15,7 @@
 #include "alpaka/intrinsic/IntrinsicGenericSycl.hpp"
 #include "alpaka/math/MathGenericSycl.hpp"
 #include "alpaka/mem/fence/MemFenceGenericSycl.hpp"
+#include "alpaka/rand/RandGenericSycl.hpp"
 #include "alpaka/warp/WarpGenericSycl.hpp"
 #include "alpaka/workdiv/WorkDivGenericSycl.hpp"
 
@@ -37,7 +38,7 @@
 
 #ifdef ALPAKA_ACC_SYCL_ENABLED
 
-#    include <CL/sycl.hpp>
+#    include <sycl/sycl.hpp>
 
 namespace alpaka
 {
@@ -56,40 +57,17 @@ namespace alpaka
         , public BlockSyncGenericSycl<TDim>
         , public IntrinsicGenericSycl
         , public MemFenceGenericSycl
+        , public rand::RandGenericSycl<TDim>
         , public warp::WarpGenericSycl<TDim>
     {
+        static_assert(TDim::value > 0, "The SYCL accelerator must have a dimension greater than zero.");
+
     public:
         AccGenericSycl(AccGenericSycl const&) = delete;
         AccGenericSycl(AccGenericSycl&&) = delete;
         auto operator=(AccGenericSycl const&) -> AccGenericSycl& = delete;
         auto operator=(AccGenericSycl&&) -> AccGenericSycl& = delete;
 
-#    ifdef ALPAKA_SYCL_IOSTREAM_ENABLED
-        AccGenericSycl(
-            Vec<TDim, TIdx> const& threadElemExtent,
-            sycl::nd_item<TDim::value> work_item,
-            sycl::local_accessor<std::byte> dyn_shared_acc,
-            sycl::local_accessor<std::byte> st_shared_acc,
-            sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device> global_fence_dummy,
-            sycl::local_accessor<int> local_fence_dummy,
-            sycl::stream output_stream)
-            : WorkDivGenericSycl<TDim, TIdx>{threadElemExtent, work_item}
-            , gb::IdxGbGenericSycl<TDim, TIdx>{work_item}
-            , bt::IdxBtGenericSycl<TDim, TIdx>{work_item}
-            , AtomicHierarchy<AtomicGenericSycl, AtomicGenericSycl, AtomicGenericSycl>{}
-            , math::MathGenericSycl{}
-            , BlockSharedMemDynGenericSycl{dyn_shared_acc}
-            , BlockSharedMemStGenericSycl{st_shared_acc}
-            , BlockSyncGenericSycl<TDim>{work_item}
-            , IntrinsicGenericSycl{}
-            , MemFenceGenericSycl{global_fence_dummy, local_fence_dummy}
-            , warp::WarpGenericSycl<TDim>{work_item}
-            , cout{output_stream}
-        {
-        }
-
-        sycl::stream cout;
-#    else
         AccGenericSycl(
             Vec<TDim, TIdx> const& threadElemExtent,
             sycl::nd_item<TDim::value> work_item,
@@ -107,10 +85,10 @@ namespace alpaka
             , BlockSyncGenericSycl<TDim>{work_item}
             , IntrinsicGenericSycl{}
             , MemFenceGenericSycl{global_fence_dummy, local_fence_dummy}
+            , rand::RandGenericSycl<TDim>{work_item}
             , warp::WarpGenericSycl<TDim>{work_item}
         {
         }
-#    endif
     };
 } // namespace alpaka
 

diff --git a/include/alpaka/acc/Tag.hpp b/include/alpaka/acc/Tag.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2023 Simeon Ehrig, Jan Stephan
+/* Copyright 2023 Simeon Ehrig, Jan Stephan, Andrea Bocci
  * SPDX-License-Identifier: MPL-2.0
  */
 
@@ -23,7 +23,7 @@ namespace alpaka
     CREATE_ACC_TAG(TagCpuOmp2Blocks);
     CREATE_ACC_TAG(TagCpuOmp2Threads);
     CREATE_ACC_TAG(TagCpuSerial);
-    CREATE_ACC_TAG(TagCpuSyclIntel);
+    CREATE_ACC_TAG(TagCpuSycl);
     CREATE_ACC_TAG(TagCpuTbbBlocks);
     CREATE_ACC_TAG(TagCpuThreads);
     CREATE_ACC_TAG(TagFpgaSyclIntel);