Skip to content

Commit

Permalink
reduce register footprint
Browse files Browse the repository at this point in the history
fix #2382

Rewrite UniformElements iterator to reduce the register footprint.

- avoid multiple return within a function
- reduce the iterator state size by one element
  • Loading branch information
psychocoderHPC committed Sep 16, 2024
1 parent 38cbc40 commit 6a27beb
Showing 1 changed file with 16 additions and 23 deletions.
39 changes: 16 additions & 23 deletions include/alpaka/exec/UniformElements.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,11 @@ namespace alpaka
friend class UniformElementsAlong;

ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
: elements_{elements}
, stride_{stride}
, extent_{extent}
, first_{std::min(first, extent)}
, index_{first_}
, range_{std::min(first + elements, extent)}
: extent_{extent}
, elements_{elements} // we need to reduce the stride by on element range because index_ is later
// increased with each increment
, stride_{stride - elements}
, index_{std::min(first, extent)}
{
}

Expand All @@ -148,21 +147,16 @@ namespace alpaka
ALPAKA_FN_ACC inline const_iterator& operator++()
{
// increment the index along the elements processed by the current thread
++indexElem_;
++index_;
if(index_ < range_)
return *this;

// increment the thread index with the grid stride
first_ += stride_;
index_ = first_;
range_ = std::min(first_ + elements_, extent_);
if(index_ < extent_)
return *this;
if(indexElem_ >= elements_)
{
indexElem_ = Idx{0};
index_ += stride_;
}
if(index_ >= extent_)
index_ = extent_;

// the iterator has reached or passed the end of the extent, clamp it to the extent
first_ = extent_;
index_ = extent_;
range_ = extent_;
return *this;
}

Expand All @@ -176,7 +170,7 @@ namespace alpaka

ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
{
return (index_ == other.index_) and (first_ == other.first_);
return (*(*this) == *other);
}

ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
Expand All @@ -186,13 +180,12 @@ namespace alpaka

private:
// non-const to support iterator copy and assignment
Idx extent_;
Idx elements_;
Idx stride_;
Idx extent_;
// modified by the pre/post-increment operator
Idx first_;
Idx index_;
Idx range_;
Idx indexElem_ = 0;
};

private:
Expand Down

0 comments on commit 6a27beb

Please sign in to comment.