Skip to content

Commit

Permalink
Change all CUDA warp operations to synchronise all threads
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard authored and psychocoderHPC committed Dec 13, 2023
1 parent 6312741 commit 4b42a01
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions include/alpaka/warp/WarpUniformCudaHipBuiltIn.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2022 Sergei Bastrakov, David M. Rogers, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Aurora Perego
/* Copyright 2023 Sergei Bastrakov, David M. Rogers, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Aurora Perego
* SPDX-License-Identifier: MPL-2.0
*/

Expand Down Expand Up @@ -67,7 +67,7 @@ namespace alpaka::warp
std::int32_t predicate) -> std::int32_t
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __all_sync(activemask(warp), predicate);
return __all_sync(0xffff'ffff, predicate);
# else
return __all(predicate);
# endif
Expand All @@ -82,7 +82,7 @@ namespace alpaka::warp
std::int32_t predicate) -> std::int32_t
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __any_sync(activemask(warp), predicate);
return __any_sync(0xffff'ffff, predicate);
# else
return __any(predicate);
# endif
Expand All @@ -103,7 +103,7 @@ namespace alpaka::warp
# endif
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __ballot_sync(activemask(warp), predicate);
return __ballot_sync(0xffff'ffff, predicate);
# else
return __ballot(predicate);
# endif
Expand All @@ -121,7 +121,7 @@ namespace alpaka::warp
std::int32_t width) -> T
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __shfl_sync(activemask(warp), val, srcLane, width);
return __shfl_sync(0xffff'ffff, val, srcLane, width);
# else
return __shfl(val, srcLane, width);
# endif
Expand All @@ -139,7 +139,7 @@ namespace alpaka::warp
std::int32_t width) -> T
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __shfl_up_sync(activemask(warp), val, offset, width);
return __shfl_up_sync(0xffff'ffff, val, offset, width);
# else
return __shfl_up(val, offset, width);
# endif
Expand All @@ -157,7 +157,7 @@ namespace alpaka::warp
std::int32_t width) -> T
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __shfl_down_sync(activemask(warp), val, offset, width);
return __shfl_down_sync(0xffff'ffff, val, offset, width);
# else
return __shfl_down(val, offset, width);
# endif
Expand All @@ -175,7 +175,7 @@ namespace alpaka::warp
std::int32_t width) -> T
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
return __shfl_xor_sync(activemask(warp), val, mask, width);
return __shfl_xor_sync(0xffff'ffff, val, mask, width);
# else
return __shfl_xor(val, mask, width);
# endif
Expand Down

0 comments on commit 4b42a01

Please sign in to comment.