Skip to content

Commit

Permalink
Add alpaka::getPreferredWarpSize(dev)
Browse files Browse the repository at this point in the history
alpaka::getPreferredWarpSize(dev) returns one of the possible warp sizes
supported by the device.
On devices that support a single work size (cpu, CUDA gpu, ROCm gpu),
getPreferredWarpSize(dev) avoids the overhead of wrapping that value in an
std::vector.
On devices that support multiple warp sizes, the value returned by
getPreferredWarpSize(dev) is unspecified. Currently it returns the largest
supported value -- but this could change in a future version of alpaka.

Signed-off-by: Andrea Bocci <andrea.bocci@cern.ch>
  • Loading branch information
fwyzard committed Jan 16, 2024
1 parent 65604b0 commit 0fb8037
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 8 deletions.
14 changes: 12 additions & 2 deletions include/alpaka/dev/DevCpu.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Copyright 2022 Axel Huebl, Benjamin Worpitz, Matthias Werner, Jan Stephan, Bernhard Manfred Gruber,
* Antonio Di Pilato
/* Copyright 2024 Axel Huebl, Benjamin Worpitz, Matthias Werner, Jan Stephan, Bernhard Manfred Gruber,
* Antonio Di Pilato, Andrea Bocci
* SPDX-License-Identifier: MPL-2.0
*/

Expand Down Expand Up @@ -135,6 +135,16 @@ namespace alpaka
}
};

//! The CPU device preferred warp size get trait specialization.
template<>
struct GetPreferredWarpSize<DevCpu>
{
ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevCpu const& /* dev */) -> std::size_t
{
return 1u;
}
};

//! The CPU device reset trait specialization.
template<>
struct Reset<DevCpu>
Expand Down
15 changes: 14 additions & 1 deletion include/alpaka/dev/DevGenericSycl.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2023 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego
/* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego, Andrea Bocci
* SPDX-License-Identifier: MPL-2.0
*/

Expand All @@ -18,6 +18,7 @@

#include <algorithm>
#include <cstddef>
#include <functional>
#include <memory>
#include <mutex>
#include <shared_mutex>
Expand Down Expand Up @@ -181,10 +182,22 @@ namespace alpaka::trait
auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64);
if(find64 != warp_sizes.end())
warp_sizes.erase(find64);
// Sort the warp sizes in decreasing order
std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
return warp_sizes;
}
};

//! The SYCL device preferred warp size get trait specialization.
template<typename TPlatform>
struct GetPreferredWarpSize<DevGenericSycl<TPlatform>>
{
static auto getPreferredWarpSize(DevGenericSycl<TPlatform> const& dev) -> std::size_t
{
return GetWarpSizes<DevGenericSycl<TPlatform>>::getWarpSizes(dev).front();
}
};

//! The SYCL device reset trait specialization.
template<typename TPlatform>
struct Reset<DevGenericSycl<TPlatform>>
Expand Down
31 changes: 28 additions & 3 deletions include/alpaka/dev/DevUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
/* Copyright 2023 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber,
* Antonio Di Pilato, Jan Stephan
/* Copyright 2024 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber,
* Antonio Di Pilato, Jan Stephan, Andrea Bocci
* SPDX-License-Identifier: MPL-2.0
*/

#pragma once

#include "alpaka/core/ApiCudaRt.hpp"
#include "alpaka/core/Concepts.hpp"
#include "alpaka/core/Cuda.hpp"
#include "alpaka/core/Hip.hpp"
Expand Down Expand Up @@ -163,13 +164,37 @@ namespace alpaka
struct GetWarpSizes<DevUniformCudaHipRt<TApi>>
{
ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt<TApi> const& dev) -> std::vector<std::size_t>
{
return {GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>::getPreferredWarpSize(dev)};
}
};

//! The CUDA/HIP RT preferred device warp size get trait specialization.
template<typename TApi>
struct GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>
{
ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
{
typename TApi::DeviceProp_t devProp;
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, dev.getNativeHandle()));

return {static_cast<std::size_t>(devProp.warpSize)};
return static_cast<std::size_t>(devProp.warpSize);
}
};

# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
//! The CUDA RT preferred device warp size get trait specialization.
template<>
struct GetPreferredWarpSize<DevUniformCudaHipRt<ApiCudaRt>>
{
ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevUniformCudaHipRt<ApiCudaRt> const& /* dev */)
-> std::size_t
{
// All CUDA GPUs to date have a warp size of 32 threads.
return 32u;
}
};
# endif // ALPAKA_ACC_GPU_CUDA_ENABLED

//! The CUDA/HIP RT device reset trait specialization.
template<typename TApi>
Expand Down
13 changes: 12 additions & 1 deletion include/alpaka/dev/Traits.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2022 Benjamin Worpitz, Bernhard Manfred Gruber, Jan Stephan
/* Copyright 2024 Benjamin Worpitz, Bernhard Manfred Gruber, Jan Stephan, Andrea Bocci
* SPDX-License-Identifier: MPL-2.0
*/

Expand Down Expand Up @@ -42,6 +42,10 @@ namespace alpaka
template<typename T, typename TSfinae = void>
struct GetWarpSizes;

//! The device preferred warp size get trait.
template<typename T, typename TSfinae = void>
struct GetPreferredWarpSize;

//! The device reset trait.
template<typename T, typename TSfinae = void>
struct Reset;
Expand Down Expand Up @@ -109,6 +113,13 @@ namespace alpaka
return trait::GetWarpSizes<TDev>::getWarpSizes(dev);
}

//! \return The preferred warp size on the device in number of threads.
template<typename TDev>
ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const& dev) -> std::size_t
{
return trait::GetPreferredWarpSize<TDev>::getPreferredWarpSize(dev);
}

//! Resets the device.
//! What this method does is dependent on the accelerator.
template<typename TDev>
Expand Down
10 changes: 9 additions & 1 deletion test/unit/dev/src/DevWarpSizeTest.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2022 Sergei Bastrakov, Bernhard Manfred Gruber, Jan Stephan
/* Copyright 2024 Sergei Bastrakov, Bernhard Manfred Gruber, Jan Stephan, Andrea Bocci
* SPDX-License-Identifier: MPL-2.0
*/

Expand All @@ -21,3 +21,11 @@ TEMPLATE_LIST_TEST_CASE("getWarpSizes", "[dev]", alpaka::test::TestAccs)
std::cend(warpExtents),
[](std::size_t warpExtent) { return warpExtent > 0; }));
}

TEMPLATE_LIST_TEST_CASE("getPreferredWarpSize", "[dev]", alpaka::test::TestAccs)
{
auto const platform = alpaka::Platform<TestType>{};
auto const dev = alpaka::getDevByIdx(platform, 0);
auto const preferredWarpSize = alpaka::getPreferredWarpSize(dev);
REQUIRE(preferredWarpSize > 0);
}

0 comments on commit 0fb8037

Please sign in to comment.