Skip to content

Commit

Permalink
Remove CudaVectorArrayWrapper, use Vec for Philox counters.
Browse files Browse the repository at this point in the history
  • Loading branch information
sliwowitz authored and psychocoderHPC committed Mar 20, 2024
1 parent e564da6 commit 79448a5
Show file tree
Hide file tree
Showing 25 changed files with 327 additions and 851 deletions.
19 changes: 8 additions & 11 deletions example/counterBasedRng/src/counterBasedRng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@ class CounterBasedRngKernel
public:
template<class TAcc>
using Vec = alpaka::Vec<alpaka::Dim<TAcc>, alpaka::Idx<TAcc>>;
template<class TAcc>
using Gen = typename alpaka::rand::PhiloxStateless4x32x10Vector<TAcc>;
template<class TAcc>
using Key = typename Gen<TAcc>::Key;
template<class TAcc>
using Counter = typename Gen<TAcc>::Counter;
using Gen = typename alpaka::rand::PhiloxStateless4x32x10Vector;
using Key = typename Gen::Key;
using Counter = typename Gen::Counter;

template<typename TAcc, typename TElem>
using Mdspan = alpaka::experimental::MdSpan<TElem, alpaka::Idx<TAcc>, alpaka::Dim<TAcc>>;
Expand All @@ -36,7 +33,7 @@ class CounterBasedRngKernel
static ALPAKA_FN_ACC auto elemLoop(
TAcc const& acc,
Mdspan<TAcc, TElem> dst,
Key<TAcc> const& key,
Key const& key,
Vec<TAcc> const& threadElemExtent,
Vec<TAcc>& threadFirstElemIdx) -> void
{
Expand All @@ -56,14 +53,14 @@ class CounterBasedRngKernel
}
else
{
Counter<TAcc> c = {0, 0, 0, 0};
Counter c = {0, 0, 0, 0};
for(unsigned int i = 0; i < Dim; ++i)
c[i] = threadFirstElemIdx[i];

for(; threadFirstElemIdx[Dim - 1] < threadLastElemIdxClipped; ++threadFirstElemIdx[Dim - 1])
{
c[Dim - 1] = threadFirstElemIdx[Dim - 1];
auto const random = Gen<TAcc>::generate(c, key);
auto const random = Gen::generate(c, key);
// to make use of the whole random vector we would need to ensure numElement[0] % 4 == 0
dst(alpaka::toArray(threadFirstElemIdx)) = TElem(random[0]);
}
Expand All @@ -82,7 +79,7 @@ class CounterBasedRngKernel
//! \param extent The matrix dimension in elements.
ALPAKA_NO_HOST_ACC_WARNING
template<typename TAcc, typename TElem>
ALPAKA_FN_ACC auto operator()(TAcc const& acc, Mdspan<TAcc, TElem> dst, Key<TAcc> const& key) const -> void
ALPAKA_FN_ACC auto operator()(TAcc const& acc, Mdspan<TAcc, TElem> dst, Key const& key) const -> void
{
constexpr auto Dim = alpaka::Dim<TAcc>::value;
static_assert(Dim <= 4, "The CounterBasedRngKernel expects at most 4-dimensional indices!");
Expand Down Expand Up @@ -166,7 +163,7 @@ auto main() -> int
Data* const pBufHostDev(alpaka::getPtrNative(bufHostDev));

std::random_device rd{};
CounterBasedRngKernel::Key<AccHost> key = {rd(), rd()};
CounterBasedRngKernel::Key key = {rd(), rd()};

// Allocate buffer on the accelerator
using BufAcc = alpaka::Buf<Acc, Data, Dim, Idx>;
Expand Down
40 changes: 19 additions & 21 deletions example/randomCells2D/src/randomCells2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@ constexpr unsigned NUM_X = 127;
constexpr unsigned NUM_Y = 211;

/// Selected PRNG engine for single-value operation
template<typename TAcc>
using RandomEngineSingle = alpaka::rand::Philox4x32x10<TAcc>;
using RandomEngineSingle = alpaka::rand::Philox4x32x10;
// using RandomEngineSingle = alpaka::rand::engine::uniform_cuda_hip::Xor;
// using RandomEngineSingle = alpaka::rand::engine::cpu::MersenneTwister;
// using RandomEngineSingle = alpaka::rand::engine::cpu::TinyMersenneTwister;


/// Selected PRNG engine for vector operation
template<typename TAcc>
using RandomEngineVector = alpaka::rand::Philox4x32x10Vector<TAcc>;
using RandomEngineVector = alpaka::rand::Philox4x32x10Vector;

/** Get a pointer to the correct location of `TElement array` taking pitch into account.
*
Expand Down Expand Up @@ -71,7 +69,7 @@ struct RunTimestepKernelSingle
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
TExtent const extent,
RandomEngineSingle<TAcc>* const states,
RandomEngineSingle* const states,
float* const cells,
std::size_t pitchRand,
std::size_t pitchOut) const -> void
Expand All @@ -84,7 +82,7 @@ struct RunTimestepKernelSingle
auto cellsOut = pitchedPointer2D(cells, pitchOut, idx);

// Setup generator and distribution.
RandomEngineSingle<TAcc> engine(*statesOut);
RandomEngineSingle engine(*statesOut);
alpaka::rand::UniformReal<float> dist;

float sum = 0;
Expand All @@ -104,7 +102,7 @@ struct RunTimestepKernelVector
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
TExtent const extent,
RandomEngineVector<TAcc>* const states,
RandomEngineVector* const states,
float* const cells,
std::size_t pitchRand,
std::size_t pitchOut) const -> void
Expand All @@ -117,10 +115,10 @@ struct RunTimestepKernelVector
auto cellsOut = pitchedPointer2D(cells, pitchOut, idx);

// Setup generator and distribution.
RandomEngineVector<TAcc> engine(*statesOut); // Load the state of the random engine
RandomEngineVector engine(*statesOut); // Load the state of the random engine
using DistributionResult =
typename RandomEngineVector<TAcc>::template ResultContainer<float>; // Container type which will store
// the distribution results
typename RandomEngineVector::template ResultContainer<float>; // Container type which will store
// the distribution results
constexpr unsigned resultVectorSize = std::tuple_size_v<DistributionResult>; // Size of the result vector
alpaka::rand::UniformReal<DistributionResult> dist; // Vector-aware distribution function

Expand Down Expand Up @@ -160,16 +158,16 @@ auto main() -> int

using BufHost = alpaka::Buf<Host, float, Dim, Idx>;
using BufAcc = alpaka::Buf<Acc, float, Dim, Idx>;
using BufHostRand = alpaka::Buf<Host, RandomEngineSingle<Acc>, Dim, Idx>;
using BufAccRand = alpaka::Buf<Acc, RandomEngineSingle<Acc>, Dim, Idx>;
using BufHostRandVec = alpaka::Buf<Host, RandomEngineVector<Acc>, Dim, Idx>;
using BufAccRandVec = alpaka::Buf<Acc, RandomEngineVector<Acc>, Dim, Idx>;
using BufHostRand = alpaka::Buf<Host, RandomEngineSingle, Dim, Idx>;
using BufAccRand = alpaka::Buf<Acc, RandomEngineSingle, Dim, Idx>;
using BufHostRandVec = alpaka::Buf<Host, RandomEngineVector, Dim, Idx>;
using BufAccRandVec = alpaka::Buf<Acc, RandomEngineVector, Dim, Idx>;
using WorkDiv = alpaka::WorkDivMembers<Dim, Idx>;

constexpr Idx numX = NUM_X;
constexpr Idx numY = NUM_Y;

const Vec extent(numY, numX);
Vec const extent(numY, numX);

constexpr Idx perThreadX = 1;
constexpr Idx perThreadY = 1;
Expand All @@ -192,13 +190,13 @@ auto main() -> int
BufAcc bufAccV{alpaka::allocBuf<float, Idx>(devAcc, extent)};
float* const ptrBufAccV{alpaka::getPtrNative(bufAccV)};

BufHostRand bufHostRandS{alpaka::allocBuf<RandomEngineSingle<Acc>, Idx>(devHost, extent)};
BufAccRand bufAccRandS{alpaka::allocBuf<RandomEngineSingle<Acc>, Idx>(devAcc, extent)};
RandomEngineSingle<Acc>* const ptrBufAccRandS{alpaka::getPtrNative(bufAccRandS)};
BufHostRand bufHostRandS{alpaka::allocBuf<RandomEngineSingle, Idx>(devHost, extent)};
BufAccRand bufAccRandS{alpaka::allocBuf<RandomEngineSingle, Idx>(devAcc, extent)};
RandomEngineSingle* const ptrBufAccRandS{alpaka::getPtrNative(bufAccRandS)};

BufHostRandVec bufHostRandV{alpaka::allocBuf<RandomEngineVector<Acc>, Idx>(devHost, extent)};
BufAccRandVec bufAccRandV{alpaka::allocBuf<RandomEngineVector<Acc>, Idx>(devAcc, extent)};
RandomEngineVector<Acc>* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)};
BufHostRandVec bufHostRandV{alpaka::allocBuf<RandomEngineVector, Idx>(devHost, extent)};
BufAccRandVec bufAccRandV{alpaka::allocBuf<RandomEngineVector, Idx>(devAcc, extent)};
RandomEngineVector* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)};

InitRandomKernel initRandomKernel;
auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[0];
Expand Down
17 changes: 8 additions & 9 deletions example/randomStrategies/src/randomStrategies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ constexpr unsigned NUM_ROLLS = 2000; ///< Amount of random number "dice rolls" p

/// Selected PRNG engine
// Comment the current "using" line, and uncomment a different one to change the PRNG engine
template<typename TAcc>
using RandomEngine = alpaka::rand::Philox4x32x10<TAcc>;
using RandomEngine = alpaka::rand::Philox4x32x10;

// using RandomEngine = alpaka::rand::engine::cpu::MersenneTwister;
// using RandomEngine = alpaka::rand::engine::cpu::TinyMersenneTwister;
Expand All @@ -45,8 +44,8 @@ struct Box
QueueAcc queue; ///< default accelerator queue

// buffers holding the PRNG states
using BufHostRand = alpaka::Buf<Host, RandomEngine<Acc>, Dim, Idx>;
using BufAccRand = alpaka::Buf<Acc, RandomEngine<Acc>, Dim, Idx>;
using BufHostRand = alpaka::Buf<Host, RandomEngine, Dim, Idx>;
using BufAccRand = alpaka::Buf<Acc, RandomEngine, Dim, Idx>;

Vec const extentRand; ///< size of the buffer of PRNG states
WorkDiv workdivRand; ///< work division for PRNG buffer initialization
Expand All @@ -71,8 +70,8 @@ struct Box
Vec(Idx{1}),
false,
alpaka::GridBlockExtentSubDivRestrictions::Unrestricted)}
, bufHostRand{alpaka::allocBuf<RandomEngine<Acc>, Idx>(alpaka::getDevByIdx(hostPlatform, 0), extentRand)}
, bufAccRand{alpaka::allocBuf<RandomEngine<Acc>, Idx>(alpaka::getDevByIdx(accPlatform, 0), extentRand)}
, bufHostRand{alpaka::allocBuf<RandomEngine, Idx>(alpaka::getDevByIdx(hostPlatform, 0), extentRand)}
, bufAccRand{alpaka::allocBuf<RandomEngine, Idx>(alpaka::getDevByIdx(accPlatform, 0), extentRand)}
, extentResult{static_cast<Idx>((NUM_POINTS * NUM_ROLLS))} // Store all "rolls" for each "point"
, workdivResult{alpaka::getValidWorkDiv<Acc>(
alpaka::getDevByIdx(accPlatform, 0),
Expand Down Expand Up @@ -167,7 +166,7 @@ struct FillKernel
ALPAKA_FN_ACC auto operator()(
TAcc const& acc, ///< current accelerator
TExtent const extent, ///< size of the results buffer
RandomEngine<TAcc>* const states, ///< PRNG states buffer
RandomEngine* const states, ///< PRNG states buffer
float* const cells ///< results buffer
) const -> void
{
Expand All @@ -180,7 +179,7 @@ struct FillKernel
auto const numWorkers
= alpaka::math::min(acc, numGridThreads, static_cast<decltype(numGridThreads)>(NUM_POINTS));

RandomEngine<TAcc> engine(states[idx]); // Setup the PRNG using the saved state for this thread.
RandomEngine engine(states[idx]); // Setup the PRNG using the saved state for this thread.
alpaka::rand::UniformReal<float> dist; // Setup the random number distribution
for(uint32_t i = idx; i < extent[0]; i += numWorkers)
{
Expand Down Expand Up @@ -245,7 +244,7 @@ template<Strategy TStrategy>
void runStrategy(Box& box)
{
// Set up the pointer to the PRNG states buffer
RandomEngine<Box::Acc>* const ptrBufAccRand{alpaka::getPtrNative(box.bufAccRand)};
RandomEngine* const ptrBufAccRand{alpaka::getPtrNative(box.bufAccRand)};

// Initialize the PRNG and its states on the device
InitRandomKernel<TStrategy> initRandomKernel;
Expand Down
1 change: 0 additions & 1 deletion include/alpaka/alpaka.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@
#include "alpaka/meta/Apply.hpp"
#include "alpaka/meta/CartesianProduct.hpp"
#include "alpaka/meta/Concatenate.hpp"
#include "alpaka/meta/CudaVectorArrayWrapper.hpp"
#include "alpaka/meta/DependentFalseType.hpp"
#include "alpaka/meta/Filter.hpp"
#include "alpaka/meta/Fold.hpp"
Expand Down
Loading

0 comments on commit 79448a5

Please sign in to comment.