Skip to content

Commit

Permalink
Add 2D heat equation example with shared memory and validation
Browse files Browse the repository at this point in the history
  • Loading branch information
ikbuibui authored and psychocoderHPC committed Sep 5, 2024
1 parent 546f267 commit 53392f4
Show file tree
Hide file tree
Showing 8 changed files with 594 additions and 0 deletions.
1 change: 1 addition & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ add_subdirectory("convolution2D/")
add_subdirectory("conv2DWithMdspan/")
add_subdirectory("counterBasedRng/")
add_subdirectory("heatEquation/")
add_subdirectory("heatEquation2D/")
add_subdirectory("helloWorld/")
add_subdirectory("helloWorldLambda/")
add_subdirectory("kernelSpecialization/")
Expand Down
67 changes: 67 additions & 0 deletions example/heatEquation2D/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#
# Copyright 2023 Benjamin Worpitz, Jan Stephan
# SPDX-License-Identifier: ISC
#

################################################################################
# Required CMake version.

cmake_minimum_required(VERSION 3.22)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

################################################################################
# Project.

set(_TARGET_NAME heatEquation2D)

project(${_TARGET_NAME} LANGUAGES CXX)


################################################################################
# PNGwriter
################################################################################

# find PNGwriter installation
find_package(PNGwriter 0.7.0 CONFIG)

if(PNGwriter_FOUND)
set(PNGWRITER_ENABLED True)
else()
set(PNGWRITER_ENABLED False)
endif()

#-------------------------------------------------------------------------------
# Find alpaka.

if(NOT TARGET alpaka::alpaka)
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)

if(alpaka_USE_SOURCE_TREE)
# Don't build the examples recursively
set(alpaka_BUILD_EXAMPLES OFF)
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
else()
find_package(alpaka REQUIRED)
endif()
endif()

#-------------------------------------------------------------------------------
# Add executable.

alpaka_add_executable(
${_TARGET_NAME}
src/heatEquation2D.cpp)
target_link_libraries(
${_TARGET_NAME}
PUBLIC alpaka::alpaka)
if(PNGwriter_FOUND)
target_link_libraries(
${_TARGET_NAME}
PRIVATE PNGwriter::PNGwriter)
target_compile_definitions(${_TARGET_NAME} PRIVATE PNGWRITER_ENABLED)
endif()

set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)

add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
86 changes: 86 additions & 0 deletions example/heatEquation2D/src/BoundaryKernel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/* Copyright 2024 Tapish Narwal
* SPDX-License-Identifier: ISC
*/

#pragma once

#include "analyticalSolution.hpp"
#include "helpers.hpp"

#include <alpaka/alpaka.hpp>

//! alpaka version of explicit finite-difference 1d heat equation solver
//!
//! Applies boundary conditions
//! forward difference in t and second-order central difference in x
//!
//! \param uBuf grid values of u for each x, y and the current value of t:
//! u(x, y, t) | t = t_current
//! \param chunkSize
//! \param pitch
//! \param dx step in x
//! \param dy step in y
//! \param dt step in t
struct BoundaryKernel
{
template<typename TAcc, typename TChunk>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
double* const uBuf,
TChunk const chunkSize,
TChunk const pitch,
uint32_t step,
double const dx,
double const dy,
double const dt) const -> void
{
using Dim = alpaka::DimInt<2u>;
using Idx = uint32_t;

// Get extents(dimensions)
auto const gridBlockExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc);
auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc);
auto const numThreadsPerBlock = blockThreadExtent.prod();

// Get indexes
auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);
auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u];
auto const blockStartIdx = gridBlockIdx * chunkSize;

// Lambda function to apply boundary conditions
auto applyBoundary = [&](auto const& globalIdxStart, auto const length, bool isRow)
{
for(auto i = threadIdx1D; i < length; i += numThreadsPerBlock)
{
auto idx2D = globalIdxStart + (isRow ? alpaka::Vec<Dim, Idx>{0, i} : alpaka::Vec<Dim, Idx>{i, 0});
auto elem = getElementPtr(uBuf, idx2D, pitch);
*elem = exactSolution(idx2D[1] * dx, idx2D[0] * dy, step * dt);
}
};

// Apply boundary conditions for the top row
if(gridBlockIdx[0] == 0)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{0, 1}, chunkSize[1], true);
}

// Apply boundary conditions for the bottom row
if(gridBlockIdx[0] == gridBlockExtent[0] - 1)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{chunkSize[0] + 1, 1}, chunkSize[1], true);
}

// Apply boundary conditions for the left column
if(gridBlockIdx[1] == 0)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, 0}, chunkSize[0], false);
}

// Apply boundary conditions for the right column
if(gridBlockIdx[1] == gridBlockExtent[1] - 1)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, chunkSize[1] + 1}, chunkSize[0], false);
}
}
};
89 changes: 89 additions & 0 deletions example/heatEquation2D/src/StencilKernel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/* Copyright 2024 Tapish Narwal
* SPDX-License-Identifier: ISC
*/

#pragma once

#include "helpers.hpp"

#include <alpaka/alpaka.hpp>

//! alpaka version of explicit finite-difference 2D heat equation solver
//!
//! \tparam T_SharedMemSize1D size of the shared memory box
//!
//! Solving equation u_t(x, t) = u_xx(x, t) + u_yy(y, t) using a simple explicit scheme with
//! forward difference in t and second-order central difference in x and y
//!
//! \param uCurrBuf Current buffer with grid values of u for each x, y pair and the current value of t:
//! u(x, y, t) | t = t_current
//! \param uNextBuf resulting grid values of u for each x, y pair and the next value of t:
//! u(x, y, t) | t = t_current + dt
//! \param chunkSize The size of the chunk or tile that the user divides the problem into. This defines the size of the
//! workload handled by each thread block.
//! \param pitchCurr The pitch (or stride) in memory corresponding to the TDim grid in the accelerator's memory.
//! This is used to calculate memory offsets when accessing elements in the current buffer.
//! \param pitchNext The pitch used to calculate memory offsets when accessing elements in the next buffer.
//! \param dx step in x
//! \param dy step in y
//! \param dt step in t
template<size_t T_SharedMemSize1D>
struct StencilKernel
{
template<typename TAcc, typename TDim, typename TIdx>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
double const* const uCurrBuf,
double* const uNextBuf,
alpaka::Vec<TDim, TIdx> const chunkSize,
alpaka::Vec<TDim, TIdx> const pitchCurr,
alpaka::Vec<TDim, TIdx> const pitchNext,
double const dx,
double const dy,
double const dt) const -> void
{
auto& sdata = alpaka::declareSharedVar<double[T_SharedMemSize1D], __COUNTER__>(acc);

// Get extents(dimensions)
auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc);
auto const numThreadsPerBlock = blockThreadExtent.prod();

// Get indexes
auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);
auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u];
auto const blockStartIdx = gridBlockIdx * chunkSize;

constexpr alpaka::Vec<TDim, TIdx> halo{2, 2};

for(auto i = threadIdx1D; i < T_SharedMemSize1D; i += numThreadsPerBlock)
{
auto idx2d = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize + halo);
idx2d = idx2d + blockStartIdx;
auto elem = getElementPtr(uCurrBuf, idx2d, pitchCurr);
sdata[i] = *elem;
}

alpaka::syncBlockThreads(acc);

// Each kernel executes one element
double const rX = dt / (dx * dx);
double const rY = dt / (dy * dy);

// go over only core cells
for(auto i = threadIdx1D; i < chunkSize.prod(); i += numThreadsPerBlock)
{
auto idx2D = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize);
idx2D = idx2D + alpaka::Vec<TDim, TIdx>{1, 1}; // offset for halo above and to the left
auto localIdx1D = alpaka::mapIdx<1>(idx2D, chunkSize + halo)[0u];


auto bufIdx = idx2D + blockStartIdx;
auto elem = getElementPtr(uNextBuf, bufIdx, pitchNext);

*elem = sdata[localIdx1D] * (1.0 - 2.0 * rX - 2.0 * rY) + sdata[localIdx1D - 1] * rX
+ sdata[localIdx1D + 1] * rX + sdata[localIdx1D - chunkSize[1] - halo[1]] * rY
+ sdata[localIdx1D + chunkSize[1] + halo[1]] * rY;
}
}
};
71 changes: 71 additions & 0 deletions example/heatEquation2D/src/analyticalSolution.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/* Copyright 2020 Tapish Narwal
* SPDX-License-Identifier: ISC
*/

#pragma once

#include <alpaka/alpaka.hpp>

#include <cmath>

//! Exact solution to the test problem
//! u_t(x, y, t) = u_xx(x, t) + u_yy(y, t), x in [0, 1], y in [0, 1], t in [0, T]
//!
//! \param x value of x
//! \param x value of y
//! \param t value of t
ALPAKA_FN_HOST_ACC auto exactSolution(double const x, double const y, double const t) -> double
{
constexpr double pi = alpaka::math::constants::pi;
return std::exp(-pi * pi * t) * (std::sin(pi * x) + std::sin(pi * y));
}

//! Valdidate calculated solution in the buffer to the analytical solution at t=tMax
//!
//! \param buffer buffer holding the solution at t=tMax
//! \param extent extents of the buffer
//! \param dx
//! \param dy
//! \param tMax time at simulation end
template<typename T_Buffer, typename T_Extent>
auto validateSolution(
T_Buffer const& buffer,
T_Extent const& extent,
double const dx,
double const dy,
double const tMax) -> std::pair<bool, double>
{
// Calculate error
double maxError = 0.0;
for(uint32_t j = 1; j < extent[0] - 1; ++j)
{
for(uint32_t i = 0; i < extent[1]; ++i)
{
auto const error = std::abs(buffer.data()[j * extent[1] + i] - exactSolution(i * dx, j * dy, tMax));
maxError = std::max(maxError, error);
}
}

constexpr double errorThreshold = 1e-5;
return std::make_pair(maxError < errorThreshold, maxError);
}

//! Initialize the buffer to the analytical solution at t=0
//!
//! \param buffer buffer holding the solution at tMax
//! \param extent extents of the buffer
//! \param dx
//! \param dy
template<typename TBuffer>
auto initalizeBuffer(TBuffer& buffer, double const dx, double const dy) -> void
{
auto extents = alpaka::getExtents(buffer);
// Apply initial conditions for the test problem
for(uint32_t j = 0; j < extents[0]; ++j)
{
for(uint32_t i = 0; i < extents[1]; ++i)
{
buffer.data()[j * extents[1] + i] = exactSolution(i * dx, j * dy, 0.0);
}
}
}
Loading

0 comments on commit 53392f4

Please sign in to comment.