From 0fb80378595f9484f3ef57fe5c536fb292efd131 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 9 Jan 2024 21:20:32 +0100 Subject: [PATCH] Add alpaka::getPreferredWarpSize(dev) alpaka::getPreferredWarpSize(dev) returns one of the possible warp sizes supported by the device. On devices that support a single work size (cpu, CUDA gpu, ROCm gpu), getPreferredWarpSize(dev) avoids the overhead of wrapping that value in an std::vector. On devices that support multiple warp sizes, the value returned by getPreferredWarpSize(dev) is unspecified. Currently it returns the largest supported value -- but this could change in a future version of alpaka. Signed-off-by: Andrea Bocci --- include/alpaka/dev/DevCpu.hpp | 14 ++++++++-- include/alpaka/dev/DevGenericSycl.hpp | 15 ++++++++++- include/alpaka/dev/DevUniformCudaHipRt.hpp | 31 +++++++++++++++++++--- include/alpaka/dev/Traits.hpp | 13 ++++++++- test/unit/dev/src/DevWarpSizeTest.cpp | 10 ++++++- 5 files changed, 75 insertions(+), 8 deletions(-) diff --git a/include/alpaka/dev/DevCpu.hpp b/include/alpaka/dev/DevCpu.hpp index 7a91652518ec..e36c263072fa 100644 --- a/include/alpaka/dev/DevCpu.hpp +++ b/include/alpaka/dev/DevCpu.hpp @@ -1,5 +1,5 @@ -/* Copyright 2022 Axel Huebl, Benjamin Worpitz, Matthias Werner, Jan Stephan, Bernhard Manfred Gruber, - * Antonio Di Pilato +/* Copyright 2024 Axel Huebl, Benjamin Worpitz, Matthias Werner, Jan Stephan, Bernhard Manfred Gruber, + * Antonio Di Pilato, Andrea Bocci * SPDX-License-Identifier: MPL-2.0 */ @@ -135,6 +135,16 @@ namespace alpaka } }; + //! The CPU device preferred warp size get trait specialization. + template<> + struct GetPreferredWarpSize + { + ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevCpu const& /* dev */) -> std::size_t + { + return 1u; + } + }; + //! The CPU device reset trait specialization. template<> struct Reset diff --git a/include/alpaka/dev/DevGenericSycl.hpp b/include/alpaka/dev/DevGenericSycl.hpp index 85e00738cf98..729090f8f2d3 100644 --- a/include/alpaka/dev/DevGenericSycl.hpp +++ b/include/alpaka/dev/DevGenericSycl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2023 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego +/* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego, Andrea Bocci * SPDX-License-Identifier: MPL-2.0 */ @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -181,10 +182,22 @@ namespace alpaka::trait auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64); if(find64 != warp_sizes.end()) warp_sizes.erase(find64); + // Sort the warp sizes in decreasing order + std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{}); return warp_sizes; } }; + //! The SYCL device preferred warp size get trait specialization. + template + struct GetPreferredWarpSize> + { + static auto getPreferredWarpSize(DevGenericSycl const& dev) -> std::size_t + { + return GetWarpSizes>::getWarpSizes(dev).front(); + } + }; + //! The SYCL device reset trait specialization. template struct Reset> diff --git a/include/alpaka/dev/DevUniformCudaHipRt.hpp b/include/alpaka/dev/DevUniformCudaHipRt.hpp index 632b49829dd2..1b0758d66f63 100644 --- a/include/alpaka/dev/DevUniformCudaHipRt.hpp +++ b/include/alpaka/dev/DevUniformCudaHipRt.hpp @@ -1,10 +1,11 @@ -/* Copyright 2023 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber, - * Antonio Di Pilato, Jan Stephan +/* Copyright 2024 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber, + * Antonio Di Pilato, Jan Stephan, Andrea Bocci * SPDX-License-Identifier: MPL-2.0 */ #pragma once +#include "alpaka/core/ApiCudaRt.hpp" #include "alpaka/core/Concepts.hpp" #include "alpaka/core/Cuda.hpp" #include "alpaka/core/Hip.hpp" @@ -163,13 +164,37 @@ namespace alpaka struct GetWarpSizes> { ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt const& dev) -> std::vector + { + return {GetPreferredWarpSize>::getPreferredWarpSize(dev)}; + } + }; + + //! The CUDA/HIP RT preferred device warp size get trait specialization. + template + struct GetPreferredWarpSize> + { + ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt const& dev) -> std::size_t { typename TApi::DeviceProp_t devProp; ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, dev.getNativeHandle())); - return {static_cast(devProp.warpSize)}; + return static_cast(devProp.warpSize); + } + }; + +# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED + //! The CUDA RT preferred device warp size get trait specialization. + template<> + struct GetPreferredWarpSize> + { + ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevUniformCudaHipRt const& /* dev */) + -> std::size_t + { + // All CUDA GPUs to date have a warp size of 32 threads. + return 32u; } }; +# endif // ALPAKA_ACC_GPU_CUDA_ENABLED //! The CUDA/HIP RT device reset trait specialization. template diff --git a/include/alpaka/dev/Traits.hpp b/include/alpaka/dev/Traits.hpp index ca7b358ca8a4..096ce5bb3a7e 100644 --- a/include/alpaka/dev/Traits.hpp +++ b/include/alpaka/dev/Traits.hpp @@ -1,4 +1,4 @@ -/* Copyright 2022 Benjamin Worpitz, Bernhard Manfred Gruber, Jan Stephan +/* Copyright 2024 Benjamin Worpitz, Bernhard Manfred Gruber, Jan Stephan, Andrea Bocci * SPDX-License-Identifier: MPL-2.0 */ @@ -42,6 +42,10 @@ namespace alpaka template struct GetWarpSizes; + //! The device preferred warp size get trait. + template + struct GetPreferredWarpSize; + //! The device reset trait. template struct Reset; @@ -109,6 +113,13 @@ namespace alpaka return trait::GetWarpSizes::getWarpSizes(dev); } + //! \return The preferred warp size on the device in number of threads. + template + ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const& dev) -> std::size_t + { + return trait::GetPreferredWarpSize::getPreferredWarpSize(dev); + } + //! Resets the device. //! What this method does is dependent on the accelerator. template diff --git a/test/unit/dev/src/DevWarpSizeTest.cpp b/test/unit/dev/src/DevWarpSizeTest.cpp index ab513fcc7ad1..e8f76b37d346 100644 --- a/test/unit/dev/src/DevWarpSizeTest.cpp +++ b/test/unit/dev/src/DevWarpSizeTest.cpp @@ -1,4 +1,4 @@ -/* Copyright 2022 Sergei Bastrakov, Bernhard Manfred Gruber, Jan Stephan +/* Copyright 2024 Sergei Bastrakov, Bernhard Manfred Gruber, Jan Stephan, Andrea Bocci * SPDX-License-Identifier: MPL-2.0 */ @@ -21,3 +21,11 @@ TEMPLATE_LIST_TEST_CASE("getWarpSizes", "[dev]", alpaka::test::TestAccs) std::cend(warpExtents), [](std::size_t warpExtent) { return warpExtent > 0; })); } + +TEMPLATE_LIST_TEST_CASE("getPreferredWarpSize", "[dev]", alpaka::test::TestAccs) +{ + auto const platform = alpaka::Platform{}; + auto const dev = alpaka::getDevByIdx(platform, 0); + auto const preferredWarpSize = alpaka::getPreferredWarpSize(dev); + REQUIRE(preferredWarpSize > 0); +}