From 6a27beb42221393e7c8743b9c46acea0c9849537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Widera?= Date: Mon, 16 Sep 2024 10:26:12 +0200 Subject: [PATCH] reduce register footprint fix #2382 Rewrite UniformElements iterator to reduce the register footprint. - avoid multiple return within a function - reduce the iterator state size by one element --- include/alpaka/exec/UniformElements.hpp | 39 ++++++++++--------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/include/alpaka/exec/UniformElements.hpp b/include/alpaka/exec/UniformElements.hpp index b7f6cd2ee54..f01c481a0cd 100644 --- a/include/alpaka/exec/UniformElements.hpp +++ b/include/alpaka/exec/UniformElements.hpp @@ -129,12 +129,11 @@ namespace alpaka friend class UniformElementsAlong; ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first) - : elements_{elements} - , stride_{stride} - , extent_{extent} - , first_{std::min(first, extent)} - , index_{first_} - , range_{std::min(first + elements, extent)} + : extent_{extent} + , elements_{elements} // we need to reduce the stride by on element range because index_ is later + // increased with each increment + , stride_{stride - elements} + , index_{std::min(first, extent)} { } @@ -148,21 +147,16 @@ namespace alpaka ALPAKA_FN_ACC inline const_iterator& operator++() { // increment the index along the elements processed by the current thread + ++indexElem_; ++index_; - if(index_ < range_) - return *this; - - // increment the thread index with the grid stride - first_ += stride_; - index_ = first_; - range_ = std::min(first_ + elements_, extent_); - if(index_ < extent_) - return *this; + if(indexElem_ >= elements_) + { + indexElem_ = Idx{0}; + index_ += stride_; + } + if(index_ >= extent_) + index_ = extent_; - // the iterator has reached or passed the end of the extent, clamp it to the extent - first_ = extent_; - index_ = extent_; - range_ = extent_; return *this; } @@ -176,7 +170,7 @@ namespace alpaka ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { - return (index_ == other.index_) and (first_ == other.first_); + return (*(*this) == *other); } ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const @@ -186,13 +180,12 @@ namespace alpaka private: // non-const to support iterator copy and assignment + Idx extent_; Idx elements_; Idx stride_; - Idx extent_; // modified by the pre/post-increment operator - Idx first_; Idx index_; - Idx range_; + Idx indexElem_ = 0; }; private: