From 4f046dc5a2df5aee79a1b158789c4bdad74439b9 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 6 Sep 2024 01:09:57 +0200 Subject: [PATCH] Rewrite the 3D buffer copy example using different uniformElements loops --- example/bufferCopy/src/bufferCopy.cpp | 58 +++++++++++---------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/example/bufferCopy/src/bufferCopy.cpp b/example/bufferCopy/src/bufferCopy.cpp index 7cc4fe9c969..f443645c528 100644 --- a/example/bufferCopy/src/bufferCopy.cpp +++ b/example/bufferCopy/src/bufferCopy.cpp @@ -1,5 +1,5 @@ -/* Copyright 2023 Alexander Matthes, Benjamin Worpitz, Erik Zenker, Matthias Werner, Bernhard Manfred Gruber, - * Jan Stephan +/* Copyright 2024 Alexander Matthes, Benjamin Worpitz, Erik Zenker, Matthias Werner, Bernhard Manfred Gruber, + * Jan Stephan, Andrea Bocci * SPDX-License-Identifier: ISC */ @@ -15,12 +15,10 @@ struct PrintBufferKernel template ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan data) const -> void { - auto const idx = alpaka::getIdx(acc); - auto const gridSize = alpaka::getWorkDiv(acc); - - for(size_t z = idx[0]; z < data.extent(0); z += gridSize[0]) - for(size_t y = idx[1]; y < data.extent(1); y += gridSize[1]) - for(size_t x = idx[2]; x < data.extent(2); x += gridSize[2]) + // Use three nested loops along the dimensions 0, 1 and 2 + for(size_t z : alpaka::uniformElementsAlong<0>(acc, data.extent(0))) + for(size_t y : alpaka::uniformElementsAlong<1>(acc, data.extent(1))) + for(size_t x : alpaka::uniformElementsAlong<2>(acc, data.extent(2))) printf("%zu,%zu,%zu:%u ", z, y, x, static_cast(data(z, y, x))); } }; @@ -31,12 +29,10 @@ struct TestBufferKernel template ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan data) const -> void { - auto const idx = alpaka::getIdx(acc); - auto const gridSize = alpaka::getWorkDiv(acc); - - for(size_t z = idx[0]; z < data.extent(0); z += gridSize[0]) - for(size_t y = idx[1]; y < data.extent(1); y += gridSize[1]) - for(size_t x = idx[2]; x < data.extent(2); x += gridSize[2]) + // Use three nested loops along the dimensions z, y and x + for(size_t z : alpaka::uniformElementsAlongZ(acc, data.extent(0))) + for(size_t y : alpaka::uniformElementsAlongY(acc, data.extent(1))) + for(size_t x : alpaka::uniformElementsAlongX(acc, data.extent(2))) ALPAKA_ASSERT_ACC( data(z, y, x) == alpaka::mapIdx<1u>( @@ -51,16 +47,10 @@ struct FillBufferKernel template ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan data) const -> void { - using Vec = alpaka::Vec, alpaka::Idx>; - - auto const idx = alpaka::getIdx(acc); - auto const gridSize = alpaka::getWorkDiv(acc); - - for(size_t z = idx[0]; z < data.extent(0); z += gridSize[0]) - for(size_t y = idx[1]; y < data.extent(1); y += gridSize[1]) - for(size_t x = idx[2]; x < data.extent(2); x += gridSize[2]) - data(z, y, x) - = alpaka::mapIdx<1u>(Vec{z, y, x}, Vec{data.extent(0), data.extent(1), data.extent(2)})[0]; + // Use a single 3-dimensional loop + for(auto idx : alpaka::uniformElementsND(acc, alpaka::Vec{data.extent(0), data.extent(1), data.extent(2)})) + data(idx.z(), idx.y(), idx.x()) // equivalent to data(idx[0], idx[1], idx[2]) + = alpaka::mapIdx<1u>(idx, alpaka::Vec{data.extent(0), data.extent(1), data.extent(2)})[0]; } }; @@ -78,15 +68,15 @@ auto example(TAccTag const&) -> int // Define the device accelerator using Acc = alpaka::TagToAcc; std::cout << "Using alpaka accelerator: " << alpaka::getAccName() << std::endl; - // Defines the synchronization behavior of a queue + // Defines the synchronization behavior of the device queue // // choose between Blocking and NonBlocking using AccQueueProperty = alpaka::Blocking; using DevQueue = alpaka::Queue; - // Define the device accelerator + // Define the host accelerator using Host = alpaka::AccCpuSerial; - // Defines the synchronization behavior of a queue + // Defines the synchronization behavior of the host queue // // choose between Blocking and NonBlocking using HostQueueProperty = alpaka::Blocking; @@ -118,14 +108,14 @@ auto example(TAccTag const&) -> int using Data = std::uint32_t; constexpr Idx nElementsPerDim = 2; - Vec const extents(Vec::all(static_cast(nElementsPerDim))); + Vec const extents = Vec::all(nElementsPerDim); // Allocate host memory buffers // // The `alloc` method returns a reference counted buffer handle. // When the last such handle is destroyed, the memory is freed automatically. using BufHost = alpaka::Buf; - BufHost hostBuffer(alpaka::allocBuf(devHost, extents)); + BufHost hostBuffer = alpaka::allocBuf(devHost, extents); // You can also use already allocated memory and wrap it within a view (irrespective of the device type). // The view does not own the underlying memory. So you have to make sure that // the view does not outlive its underlying memory. @@ -136,8 +126,8 @@ auto example(TAccTag const&) -> int // // The interface to allocate a buffer is the same on the host and on the device. using BufAcc = alpaka::Buf; - BufAcc deviceBuffer1(alpaka::allocBuf(devAcc, extents)); - BufAcc deviceBuffer2(alpaka::allocBuf(devAcc, extents)); + BufAcc deviceBuffer1 = alpaka::allocBuf(devAcc, extents); + BufAcc deviceBuffer2 = alpaka::allocBuf(devAcc, extents); // Init host buffer @@ -152,9 +142,9 @@ auto example(TAccTag const&) -> int // some values into the buffer memory. // Mind, that only a host can write on host memory. // The same holds true for device memory. - for(Idx z(0); z < extents[0]; ++z) - for(Idx y(0); y < extents[1]; ++y) - for(Idx x(0); x < extents[2]; ++x) + for(Idx z = 0; z < extents[0]; ++z) + for(Idx y = 0; y < extents[1]; ++y) + for(Idx x = 0; x < extents[2]; ++x) hostBufferMdSpan(z, y, x) = static_cast(z * extents[1] * extents[2] + y * extents[2] + x); // Memory views and buffers can also be initialized by executing a kernel.