Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make kernel results depend each other directly #2420

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions benchmarks/babelstream/src/babelStreamMainTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* Can be run with custom arguments as well as catch2 arguments
* Run with Custom arguments:
* ./babelstream --array-size=33554432 --number-runs=100
* Runt with default array size and num runs:
* Run with default array size and num runs:
* ./babelstream
* Run with Catch2 arguments and defaul arrary size and num runs:
* ./babelstream --success
Expand Down Expand Up @@ -76,12 +76,12 @@ struct CopyKernel
//! \tparam T The data type
//! \param acc The accelerator to be executed on.
//! \param a Pointer for vector a
//! \param b Pointer for vector b
//! \param c Pointer for vector c
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T* b) const
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T* c) const
{
auto const [index] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
b[index] = a[index];
c[index] = a[index];
}
};

Expand All @@ -92,14 +92,14 @@ struct MultKernel
//! \tparam TAcc The accelerator environment to be executed on.
//! \tparam T The data type
//! \param acc The accelerator to be executed on.
//! \param a Pointer for vector a
//! \param c Pointer for vector c
//! \param b Pointer for result vector b
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T* const a, T* b) const
ALPAKA_FN_ACC void operator()(TAcc const& acc, T* const c, T* b) const
{
const T scalar = static_cast<T>(scalarVal);
auto const [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
b[i] = scalar * a[i];
b[i] = scalar * c[i];
}
};

Expand Down Expand Up @@ -132,11 +132,11 @@ struct TriadKernel
//! \param b Pointer for vector b
//! \param c Pointer for result vector c
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T const* b, T* c) const
ALPAKA_FN_ACC void operator()(TAcc const& acc, T* a, T const* b, T const* c) const
{
const T scalar = static_cast<T>(scalarVal);
auto const [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
c[i] = a[i] + scalar * b[i];
a[i] = b[i] + scalar * c[i];
}
};

Expand All @@ -151,6 +151,7 @@ struct DotKernel
//! \param a Pointer for vector a
//! \param b Pointer for vector b
//! \param sum Pointer for result vector consisting sums for each block
//! \param arraySize the size of the array
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T const* b, T* sum, alpaka::Idx<TAcc> arraySize) const
{
Expand Down Expand Up @@ -316,23 +317,23 @@ void testKernels()
},
"InitKernel");

// Test the copy-kernel. Copy A one by one to B.
// Test the copy-kernel. Copy A one by one to C.
measureKernelExec(
[&]() { alpaka::exec<Acc>(queue, workDivCopy, CopyKernel(), bufAccInputAPtr, bufAccInputBPtr); },
[&]() { alpaka::exec<Acc>(queue, workDivCopy, CopyKernel(), bufAccInputAPtr, bufAccOutputCPtr); },
"CopyKernel");

// Test the scaling-kernel. Calculate B=scalar*A.
// Test the scaling-kernel. Calculate B=scalar*C. Where C = A.
measureKernelExec(
[&]() { alpaka::exec<Acc>(queue, workDivMult, MultKernel(), bufAccInputAPtr, bufAccInputBPtr); },
[&]() { alpaka::exec<Acc>(queue, workDivMult, MultKernel(), bufAccOutputCPtr, bufAccInputBPtr); },
"MultKernel");

// Test the addition-kernel. Calculate C=A+B. Where B=scalar*A.
// Test the addition-kernel. Calculate C=A+B. Where B=scalar*C or B=scalar*A.
measureKernelExec(
[&]()
{ alpaka::exec<Acc>(queue, workDivAdd, AddKernel(), bufAccInputAPtr, bufAccInputBPtr, bufAccOutputCPtr); },
"AddKernel");

// Test the Triad-kernel. Calculate C=A+scalar*B where B=scalar*A.
// Test the Triad-kernel. Calculate A=B+scalar*C. Where C is A+scalar*A.
measureKernelExec(
[&]()
{ alpaka::exec<Acc>(queue, workDivTriad, TriadKernel(), bufAccInputAPtr, bufAccInputBPtr, bufAccOutputCPtr); },
Expand All @@ -350,9 +351,9 @@ void testKernels()
DataType initVal{static_cast<DataType>(0.0)};
DataType sumErrC{initVal}, sumErrB{initVal}, sumErrA{initVal};

auto const expectedC = static_cast<DataType>(valA + scalarVal * scalarVal * valA);
auto const expectedC = static_cast<DataType>(valA + scalarVal * valA);
auto const expectedB = static_cast<DataType>(scalarVal * valA);
auto const expectedA = static_cast<DataType>(valA);
auto const expectedA = static_cast<DataType>(expectedB + static_cast<DataType>(scalarVal) * expectedC);

// sum of the errors for each array
for(Idx i = 0; i < arraySize; ++i)
Expand All @@ -363,6 +364,7 @@ void testKernels()
}

// Normalize and compare sum of the errors
// Use a different equality check if floating point errors exceed precision of FuzzyEqual function
REQUIRE(FuzzyEqual(sumErrC / static_cast<DataType>(arraySize) / expectedC, static_cast<DataType>(0.0)));
REQUIRE(FuzzyEqual(sumErrB / static_cast<DataType>(arraySize) / expectedB, static_cast<DataType>(0.0)));
REQUIRE(FuzzyEqual(sumErrA / static_cast<DataType>(arraySize) / expectedA, static_cast<DataType>(0.0)));
Expand Down Expand Up @@ -401,8 +403,10 @@ void testKernels()

DataType const* sumPtr = std::data(bufHostSumPerBlock);
auto const result = std::reduce(sumPtr, sumPtr + gridBlockExtent, DataType{0});
// Since vector values are 1, dot product should be identical to arraySize
REQUIRE(FuzzyEqual(static_cast<DataType>(result), static_cast<DataType>(arraySize * 2)));

// dot product should be identical to arraySize*valA*valB
// Use a different equality check if floating point errors exceed precision of FuzzyEqual function
REQUIRE(FuzzyEqual(static_cast<DataType>(result), static_cast<DataType>(arraySize) * expectedA * expectedB));
// Add workdiv to the list of workdivs to print later
metaData.setItem(BMInfoDataType::WorkDivDot, workDivDot);
}
Expand Down
Loading