Add 2D heat equation example with shared memory and validation

AuroraPerego · Sep 5, 2024 · 53392f4 · 53392f4
1 parent 546f267
commit 53392f4
Show file tree

Hide file tree

Showing 8 changed files with 594 additions and 0 deletions.
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -22,6 +22,7 @@ add_subdirectory("convolution2D/")
 add_subdirectory("conv2DWithMdspan/")
 add_subdirectory("counterBasedRng/")
 add_subdirectory("heatEquation/")
+add_subdirectory("heatEquation2D/")
 add_subdirectory("helloWorld/")
 add_subdirectory("helloWorldLambda/")
 add_subdirectory("kernelSpecialization/")

diff --git a/example/heatEquation2D/CMakeLists.txt b/example/heatEquation2D/CMakeLists.txt
@@ -0,0 +1,67 @@
+#
+# Copyright 2023 Benjamin Worpitz, Jan Stephan
+# SPDX-License-Identifier: ISC
+#
+
+################################################################################
+# Required CMake version.
+
+cmake_minimum_required(VERSION 3.22)
+
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+################################################################################
+# Project.
+
+set(_TARGET_NAME heatEquation2D)
+
+project(${_TARGET_NAME} LANGUAGES CXX)
+
+
+################################################################################
+# PNGwriter
+################################################################################
+
+# find PNGwriter installation
+find_package(PNGwriter 0.7.0 CONFIG)
+
+if(PNGwriter_FOUND)
+  set(PNGWRITER_ENABLED True)
+else()
+  set(PNGWRITER_ENABLED False)
+endif()
+
+#-------------------------------------------------------------------------------
+# Find alpaka.
+
+if(NOT TARGET alpaka::alpaka)
+    option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)
+
+    if(alpaka_USE_SOURCE_TREE)
+        # Don't build the examples recursively
+        set(alpaka_BUILD_EXAMPLES OFF)
+        add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
+    else()
+        find_package(alpaka REQUIRED)
+    endif()
+endif()
+
+#-------------------------------------------------------------------------------
+# Add executable.
+
+alpaka_add_executable(
+    ${_TARGET_NAME}
+    src/heatEquation2D.cpp)
+target_link_libraries(
+    ${_TARGET_NAME}
+    PUBLIC alpaka::alpaka)
+if(PNGwriter_FOUND)
+    target_link_libraries(
+        ${_TARGET_NAME}
+        PRIVATE PNGwriter::PNGwriter)
+    target_compile_definitions(${_TARGET_NAME} PRIVATE PNGWRITER_ENABLED)
+endif()
+
+set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)
+
+add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
diff --git a/example/heatEquation2D/src/BoundaryKernel.hpp b/example/heatEquation2D/src/BoundaryKernel.hpp
@@ -0,0 +1,86 @@
+/* Copyright 2024 Tapish Narwal
+ * SPDX-License-Identifier: ISC
+ */
+
+#pragma once
+
+#include "analyticalSolution.hpp"
+#include "helpers.hpp"
+
+#include <alpaka/alpaka.hpp>
+
+//! alpaka version of explicit finite-difference 1d heat equation solver
+//!
+//! Applies boundary conditions
+//! forward difference in t and second-order central difference in x
+//!
+//! \param uBuf grid values of u for each x, y and the current value of t:
+//!                 u(x, y, t)  | t = t_current
+//! \param chunkSize
+//! \param pitch
+//! \param dx step in x
+//! \param dy step in y
+//! \param dt step in t
+struct BoundaryKernel
+{
+    template<typename TAcc, typename TChunk>
+    ALPAKA_FN_ACC auto operator()(
+        TAcc const& acc,
+        double* const uBuf,
+        TChunk const chunkSize,
+        TChunk const pitch,
+        uint32_t step,
+        double const dx,
+        double const dy,
+        double const dt) const -> void
+    {
+        using Dim = alpaka::DimInt<2u>;
+        using Idx = uint32_t;
+
+        // Get extents(dimensions)
+        auto const gridBlockExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc);
+        auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc);
+        auto const numThreadsPerBlock = blockThreadExtent.prod();
+
+        // Get indexes
+        auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
+        auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);
+        auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u];
+        auto const blockStartIdx = gridBlockIdx * chunkSize;
+
+        // Lambda function to apply boundary conditions
+        auto applyBoundary = [&](auto const& globalIdxStart, auto const length, bool isRow)
+        {
+            for(auto i = threadIdx1D; i < length; i += numThreadsPerBlock)
+            {
+                auto idx2D = globalIdxStart + (isRow ? alpaka::Vec<Dim, Idx>{0, i} : alpaka::Vec<Dim, Idx>{i, 0});
+                auto elem = getElementPtr(uBuf, idx2D, pitch);
+                *elem = exactSolution(idx2D[1] * dx, idx2D[0] * dy, step * dt);
+            }
+        };
+
+        // Apply boundary conditions for the top row
+        if(gridBlockIdx[0] == 0)
+        {
+            applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{0, 1}, chunkSize[1], true);
+        }
+
+        // Apply boundary conditions for the bottom row
+        if(gridBlockIdx[0] == gridBlockExtent[0] - 1)
+        {
+            applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{chunkSize[0] + 1, 1}, chunkSize[1], true);
+        }
+
+        // Apply boundary conditions for the left column
+        if(gridBlockIdx[1] == 0)
+        {
+            applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, 0}, chunkSize[0], false);
+        }
+
+        // Apply boundary conditions for the right column
+        if(gridBlockIdx[1] == gridBlockExtent[1] - 1)
+        {
+            applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, chunkSize[1] + 1}, chunkSize[0], false);
+        }
+    }
+};
diff --git a/example/heatEquation2D/src/StencilKernel.hpp b/example/heatEquation2D/src/StencilKernel.hpp
@@ -0,0 +1,89 @@
+/* Copyright 2024 Tapish Narwal
+ * SPDX-License-Identifier: ISC
+ */
+
+#pragma once
+
+#include "helpers.hpp"
+
+#include <alpaka/alpaka.hpp>
+
+//! alpaka version of explicit finite-difference 2D heat equation solver
+//!
+//! \tparam T_SharedMemSize1D size of the shared memory box
+//!
+//! Solving equation u_t(x, t) = u_xx(x, t) + u_yy(y, t) using a simple explicit scheme with
+//! forward difference in t and second-order central difference in x and y
+//!
+//! \param uCurrBuf Current buffer with grid values of u for each x, y pair and the current value of t:
+//!                 u(x, y, t) | t = t_current
+//! \param uNextBuf resulting grid values of u for each x, y pair and the next value of t:
+//!              u(x, y, t) | t = t_current + dt
+//! \param chunkSize The size of the chunk or tile that the user divides the problem into. This defines the size of the
+//!                  workload handled by each thread block.
+//! \param pitchCurr The pitch (or stride) in memory corresponding to the TDim grid in the accelerator's memory.
+//!              This is used to calculate memory offsets when accessing elements in the current buffer.
+//! \param pitchNext The pitch used to calculate memory offsets when accessing elements in the next buffer.
+//! \param dx step in x
+//! \param dy step in y
+//! \param dt step in t
+template<size_t T_SharedMemSize1D>
+struct StencilKernel
+{
+    template<typename TAcc, typename TDim, typename TIdx>
+    ALPAKA_FN_ACC auto operator()(
+        TAcc const& acc,
+        double const* const uCurrBuf,
+        double* const uNextBuf,
+        alpaka::Vec<TDim, TIdx> const chunkSize,
+        alpaka::Vec<TDim, TIdx> const pitchCurr,
+        alpaka::Vec<TDim, TIdx> const pitchNext,
+        double const dx,
+        double const dy,
+        double const dt) const -> void
+    {
+        auto& sdata = alpaka::declareSharedVar<double[T_SharedMemSize1D], __COUNTER__>(acc);
+
+        // Get extents(dimensions)
+        auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc);
+        auto const numThreadsPerBlock = blockThreadExtent.prod();
+
+        // Get indexes
+        auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
+        auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);
+        auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u];
+        auto const blockStartIdx = gridBlockIdx * chunkSize;
+
+        constexpr alpaka::Vec<TDim, TIdx> halo{2, 2};
+
+        for(auto i = threadIdx1D; i < T_SharedMemSize1D; i += numThreadsPerBlock)
+        {
+            auto idx2d = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize + halo);
+            idx2d = idx2d + blockStartIdx;
+            auto elem = getElementPtr(uCurrBuf, idx2d, pitchCurr);
+            sdata[i] = *elem;
+        }
+
+        alpaka::syncBlockThreads(acc);
+
+        // Each kernel executes one element
+        double const rX = dt / (dx * dx);
+        double const rY = dt / (dy * dy);
+
+        // go over only core cells
+        for(auto i = threadIdx1D; i < chunkSize.prod(); i += numThreadsPerBlock)
+        {
+            auto idx2D = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize);
+            idx2D = idx2D + alpaka::Vec<TDim, TIdx>{1, 1}; // offset for halo above and to the left
+            auto localIdx1D = alpaka::mapIdx<1>(idx2D, chunkSize + halo)[0u];
+
+
+            auto bufIdx = idx2D + blockStartIdx;
+            auto elem = getElementPtr(uNextBuf, bufIdx, pitchNext);
+
+            *elem = sdata[localIdx1D] * (1.0 - 2.0 * rX - 2.0 * rY) + sdata[localIdx1D - 1] * rX
+                    + sdata[localIdx1D + 1] * rX + sdata[localIdx1D - chunkSize[1] - halo[1]] * rY
+                    + sdata[localIdx1D + chunkSize[1] + halo[1]] * rY;
+        }
+    }
+};
diff --git a/example/heatEquation2D/src/analyticalSolution.hpp b/example/heatEquation2D/src/analyticalSolution.hpp
@@ -0,0 +1,71 @@
+/* Copyright 2020 Tapish Narwal
+ * SPDX-License-Identifier: ISC
+ */
+
+#pragma once
+
+#include <alpaka/alpaka.hpp>
+
+#include <cmath>
+
+//! Exact solution to the test problem
+//! u_t(x, y, t) = u_xx(x, t) + u_yy(y, t), x in [0, 1], y in [0, 1], t in [0, T]
+//!
+//! \param x value of x
+//! \param x value of y
+//! \param t value of t
+ALPAKA_FN_HOST_ACC auto exactSolution(double const x, double const y, double const t) -> double
+{
+    constexpr double pi = alpaka::math::constants::pi;
+    return std::exp(-pi * pi * t) * (std::sin(pi * x) + std::sin(pi * y));
+}
+
+//! Valdidate calculated solution in the buffer to the analytical solution at t=tMax
+//!
+//! \param buffer buffer holding the solution at t=tMax
+//! \param extent extents of the buffer
+//! \param dx
+//! \param dy
+//! \param tMax time at simulation end
+template<typename T_Buffer, typename T_Extent>
+auto validateSolution(
+    T_Buffer const& buffer,
+    T_Extent const& extent,
+    double const dx,
+    double const dy,
+    double const tMax) -> std::pair<bool, double>
+{
+    // Calculate error
+    double maxError = 0.0;
+    for(uint32_t j = 1; j < extent[0] - 1; ++j)
+    {
+        for(uint32_t i = 0; i < extent[1]; ++i)
+        {
+            auto const error = std::abs(buffer.data()[j * extent[1] + i] - exactSolution(i * dx, j * dy, tMax));
+            maxError = std::max(maxError, error);
+        }
+    }
+
+    constexpr double errorThreshold = 1e-5;
+    return std::make_pair(maxError < errorThreshold, maxError);
+}
+
+//! Initialize the buffer to the analytical solution at t=0
+//!
+//! \param buffer buffer holding the solution at tMax
+//! \param extent extents of the buffer
+//! \param dx
+//! \param dy
+template<typename TBuffer>
+auto initalizeBuffer(TBuffer& buffer, double const dx, double const dy) -> void
+{
+    auto extents = alpaka::getExtents(buffer);
+    // Apply initial conditions for the test problem
+    for(uint32_t j = 0; j < extents[0]; ++j)
+    {
+        for(uint32_t i = 0; i < extents[1]; ++i)
+        {
+            buffer.data()[j * extents[1] + i] = exactSolution(i * dx, j * dy, 0.0);
+        }
+    }
+}