alpaka-group · mehmetyusufoglu · Nov 7, 2024
diff --git a/benchmarks/babelstream/src/babelStreamMainTest.cpp b/benchmarks/babelstream/src/babelStreamMainTest.cpp
@@ -22,7 +22,7 @@
  * Can be run with custom arguments as well as catch2 arguments
  * Run with Custom arguments:
  * ./babelstream --array-size=33554432 --number-runs=100
- * Runt with default array size and num runs:
+ * Run with default array size and num runs:
  * ./babelstream
  * Run with Catch2 arguments and defaul arrary size and num runs:
  * ./babelstream --success
@@ -76,12 +76,12 @@ struct CopyKernel
     //! \tparam T The data type
     //! \param acc The accelerator to be executed on.
     //! \param a Pointer for vector a
-    //! \param b Pointer for vector b
+    //! \param c Pointer for vector c
     template<typename TAcc, typename T>
-    ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T* b) const
+    ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T* c) const
     {
         auto const [index] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
-        b[index] = a[index];
+        c[index] = a[index];
     }
 };
 
@@ -92,14 +92,14 @@ struct MultKernel
     //! \tparam TAcc The accelerator environment to be executed on.
     //! \tparam T The data type
     //! \param acc The accelerator to be executed on.
-    //! \param a Pointer for vector a
+    //! \param c Pointer for vector c
     //! \param b Pointer for result vector b
     template<typename TAcc, typename T>
-    ALPAKA_FN_ACC void operator()(TAcc const& acc, T* const a, T* b) const
+    ALPAKA_FN_ACC void operator()(TAcc const& acc, T* const c, T* b) const
     {
         const T scalar = static_cast<T>(scalarVal);
         auto const [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
-        b[i] = scalar * a[i];
+        b[i] = scalar * c[i];
     }
 };
 
@@ -132,11 +132,11 @@ struct TriadKernel
     //! \param b Pointer for vector b
     //! \param c Pointer for result vector c
     template<typename TAcc, typename T>
-    ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T const* b, T* c) const
+    ALPAKA_FN_ACC void operator()(TAcc const& acc, T* a, T const* b, T const* c) const
     {
         const T scalar = static_cast<T>(scalarVal);
         auto const [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
-        c[i] = a[i] + scalar * b[i];
+        a[i] = b[i] + scalar * c[i];
     }
 };
 
@@ -151,6 +151,7 @@ struct DotKernel
     //! \param a Pointer for vector a
     //! \param b Pointer for vector b
     //! \param sum Pointer for result vector consisting sums for each block
+    //! \param arraySize the size of the array
     template<typename TAcc, typename T>
     ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T const* b, T* sum, alpaka::Idx<TAcc> arraySize) const
     {
@@ -316,23 +317,23 @@ void testKernels()
         },
         "InitKernel");
 
-    // Test the copy-kernel. Copy A one by one to B.
+    // Test the copy-kernel. Copy A one by one to C.
     measureKernelExec(
-        [&]() { alpaka::exec<Acc>(queue, workDivCopy, CopyKernel(), bufAccInputAPtr, bufAccInputBPtr); },
+        [&]() { alpaka::exec<Acc>(queue, workDivCopy, CopyKernel(), bufAccInputAPtr, bufAccOutputCPtr); },
         "CopyKernel");
 
-    // Test the scaling-kernel. Calculate B=scalar*A.
+    // Test the scaling-kernel. Calculate B=scalar*C. Where C = A.
     measureKernelExec(
-        [&]() { alpaka::exec<Acc>(queue, workDivMult, MultKernel(), bufAccInputAPtr, bufAccInputBPtr); },
+        [&]() { alpaka::exec<Acc>(queue, workDivMult, MultKernel(), bufAccOutputCPtr, bufAccInputBPtr); },
         "MultKernel");
 
-    // Test the addition-kernel. Calculate C=A+B. Where B=scalar*A.
+    // Test the addition-kernel. Calculate C=A+B. Where B=scalar*C or B=scalar*A.
     measureKernelExec(
         [&]()
         { alpaka::exec<Acc>(queue, workDivAdd, AddKernel(), bufAccInputAPtr, bufAccInputBPtr, bufAccOutputCPtr); },
         "AddKernel");
 
-    // Test the Triad-kernel. Calculate C=A+scalar*B where B=scalar*A.
+    // Test the Triad-kernel. Calculate A=B+scalar*C. Where C is A+scalar*A.
     measureKernelExec(
         [&]()
         { alpaka::exec<Acc>(queue, workDivTriad, TriadKernel(), bufAccInputAPtr, bufAccInputBPtr, bufAccOutputCPtr); },
@@ -350,9 +351,9 @@ void testKernels()
     DataType initVal{static_cast<DataType>(0.0)};
     DataType sumErrC{initVal}, sumErrB{initVal}, sumErrA{initVal};
 
-    auto const expectedC = static_cast<DataType>(valA + scalarVal * scalarVal * valA);
+    auto const expectedC = static_cast<DataType>(valA + scalarVal * valA);
     auto const expectedB = static_cast<DataType>(scalarVal * valA);
-    auto const expectedA = static_cast<DataType>(valA);
+    auto const expectedA = static_cast<DataType>(expectedB + static_cast<DataType>(scalarVal) * expectedC);
 
     // sum of the errors for each array
     for(Idx i = 0; i < arraySize; ++i)
@@ -363,6 +364,7 @@ void testKernels()
     }
 
     // Normalize and compare sum of the errors
+    // Use a different equality check if floating point errors exceed precision of FuzzyEqual function
     REQUIRE(FuzzyEqual(sumErrC / static_cast<DataType>(arraySize) / expectedC, static_cast<DataType>(0.0)));
     REQUIRE(FuzzyEqual(sumErrB / static_cast<DataType>(arraySize) / expectedB, static_cast<DataType>(0.0)));
     REQUIRE(FuzzyEqual(sumErrA / static_cast<DataType>(arraySize) / expectedA, static_cast<DataType>(0.0)));
@@ -401,8 +403,10 @@ void testKernels()
 
         DataType const* sumPtr = std::data(bufHostSumPerBlock);
         auto const result = std::reduce(sumPtr, sumPtr + gridBlockExtent, DataType{0});
-        // Since vector values are 1, dot product should be identical to arraySize
-        REQUIRE(FuzzyEqual(static_cast<DataType>(result), static_cast<DataType>(arraySize * 2)));
+
+        //  dot product should be identical to arraySize*valA*valB
+        //  Use a different equality check if floating point errors exceed precision of FuzzyEqual function
+        REQUIRE(FuzzyEqual(static_cast<DataType>(result), static_cast<DataType>(arraySize) * expectedA * expectedB));
         // Add workdiv to the list of workdivs to print later
         metaData.setItem(BMInfoDataType::WorkDivDot, workDivDot);
     }