Skip to content

Commit

Permalink
lazily compute iterator len
Browse files Browse the repository at this point in the history
This means the price of visting every container is only paid when needed.

Fixes RoaringBitmap#294
  • Loading branch information
Dr-Emann committed Oct 16, 2024
1 parent b39a3c1 commit 18852a7
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 45 deletions.
17 changes: 17 additions & 0 deletions roaring/src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,21 @@ impl Iterator for Iter<'_> {
fn next(&mut self) -> Option<u32> {
self.inner.next().map(|i| util::join(self.key, i))
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}

fn count(self) -> usize
where
Self: Sized,
{
self.inner.count()
}

fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.inner.nth(n).map(|i| util::join(self.key, i))
}
}

impl DoubleEndedIterator for Iter<'_> {
Expand All @@ -308,6 +323,8 @@ impl DoubleEndedIterator for Iter<'_> {
}
}

impl ExactSizeIterator for Iter<'_> {}

impl fmt::Debug for Container {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter)
Expand Down
168 changes: 123 additions & 45 deletions roaring/src/bitmap/iter.rs
Original file line number Diff line number Diff line change
@@ -1,134 +1,212 @@
use alloc::vec;
use core::iter;
use core::slice;

use super::container::Container;
use crate::{NonSortedIntegers, RoaringBitmap};

use crate::bitmap::container;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;

/// An iterator for `RoaringBitmap`.
pub struct Iter<'a> {
inner: iter::Flatten<slice::Iter<'a, Container>>,
size_hint: u64,
front: Option<container::Iter<'a>>,
containers: slice::Iter<'a, Container>,
back: Option<container::Iter<'a>>,
}

/// An iterator for `RoaringBitmap`.
pub struct IntoIter {
inner: iter::Flatten<vec::IntoIter<Container>>,
size_hint: u64,
front: Option<container::Iter<'static>>,
containers: vec::IntoIter<Container>,
back: Option<container::Iter<'static>>,
}

#[inline]
fn and_then_or_clear<T, U>(opt: &mut Option<T>, f: impl FnOnce(&mut T) -> Option<U>) -> Option<U> {
let x = f(opt.as_mut()?);
if x.is_none() {
*opt = None;
}
x
}

impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
let size_hint = containers.iter().map(|c| c.len()).sum();
Iter { inner: containers.iter().flatten(), size_hint }
Iter { front: None, containers: containers.iter(), back: None }
}
}

impl IntoIter {
fn new(containers: Vec<Container>) -> IntoIter {
let size_hint = containers.iter().map(|c| c.len()).sum();
IntoIter { inner: containers.into_iter().flatten(), size_hint }
IntoIter { front: None, containers: containers.into_iter(), back: None }
}
}

fn size_hint_impl(
front: &Option<container::Iter<'_>>,
containers: &impl AsRef<[Container]>,
back: &Option<container::Iter<'_>>,
) -> (usize, Option<usize>) {
let first_size = front.as_ref().map_or(0, |it| it.len());
let last_size = back.as_ref().map_or(0, |it| it.len());
let mut size = first_size + last_size;
for container in containers.as_ref() {
match size.checked_add(container.len() as usize) {
Some(new_size) => size = new_size,
None => return (usize::MAX, None),
}
}
(size, Some(size))
}

impl Iterator for Iter<'_> {
type Item = u32;

fn next(&mut self) -> Option<u32> {
self.size_hint = self.size_hint.saturating_sub(1);
self.inner.next()
loop {
if let Some(x) = and_then_or_clear(&mut self.front, Iterator::next) {
return Some(x);
}
self.front = match self.containers.next() {
Some(inner) => Some(inner.into_iter()),
None => return and_then_or_clear(&mut self.back, Iterator::next),
}
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
if self.size_hint < usize::MAX as u64 {
(self.size_hint as usize, Some(self.size_hint as usize))
} else {
(usize::MAX, None)
}
size_hint_impl(&self.front, &self.containers, &self.back)
}

#[inline]
fn fold<B, F>(self, init: B, f: F) -> B
fn fold<B, F>(mut self, mut init: B, mut f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B,
{
self.inner.fold(init, f)
if let Some(iter) = &mut self.front {
init = iter.fold(init, &mut f);
}
init = self.containers.fold(init, |acc, container| {
let iter = <&Container>::into_iter(container);
iter.fold(acc, &mut f)
});
if let Some(iter) = &mut self.back {
init = iter.fold(init, &mut f);
};
init
}
}

impl DoubleEndedIterator for Iter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
self.size_hint = self.size_hint.saturating_sub(1);
self.inner.next_back()
loop {
if let Some(x) = and_then_or_clear(&mut self.back, DoubleEndedIterator::next_back) {
return Some(x);
}
self.back = match self.containers.next_back() {
Some(inner) => Some(inner.into_iter()),
None => return and_then_or_clear(&mut self.front, DoubleEndedIterator::next_back),
}
}
}

#[inline]
fn rfold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
fn rfold<Acc, Fold>(mut self, mut init: Acc, mut fold: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
self.inner.rfold(init, fold)
if let Some(iter) = &mut self.back {
init = iter.rfold(init, &mut fold);
}
init = self.containers.rfold(init, |acc, container| {
let iter = container.into_iter();
iter.rfold(acc, &mut fold)
});
if let Some(iter) = &mut self.front {
init = iter.rfold(init, &mut fold);
};
init
}
}

#[cfg(target_pointer_width = "64")]
impl ExactSizeIterator for Iter<'_> {
fn len(&self) -> usize {
self.size_hint as usize
}
}
impl ExactSizeIterator for Iter<'_> {}

impl Iterator for IntoIter {
type Item = u32;

fn next(&mut self) -> Option<u32> {
self.size_hint = self.size_hint.saturating_sub(1);
self.inner.next()
loop {
if let Some(x) = and_then_or_clear(&mut self.front, Iterator::next) {
return Some(x);
}
match self.containers.next() {
Some(inner) => self.front = Some(inner.into_iter()),
None => return and_then_or_clear(&mut self.back, Iterator::next),
}
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
if self.size_hint < usize::MAX as u64 {
(self.size_hint as usize, Some(self.size_hint as usize))
} else {
(usize::MAX, None)
}
size_hint_impl(&self.front, &self.containers, &self.back)
}

#[inline]
fn fold<B, F>(self, init: B, f: F) -> B
fn fold<B, F>(mut self, mut init: B, mut f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B,
{
self.inner.fold(init, f)
if let Some(iter) = &mut self.front {
init = iter.fold(init, &mut f);
}
init = self.containers.fold(init, |acc, container| {
let iter = <Container>::into_iter(container);
iter.fold(acc, &mut f)
});
if let Some(iter) = &mut self.back {
init = iter.fold(init, &mut f);
};
init
}
}

impl DoubleEndedIterator for IntoIter {
fn next_back(&mut self) -> Option<Self::Item> {
self.size_hint = self.size_hint.saturating_sub(1);
self.inner.next_back()
loop {
if let Some(x) = and_then_or_clear(&mut self.back, DoubleEndedIterator::next_back) {
return Some(x);
}
match self.containers.next_back() {
Some(inner) => self.back = Some(inner.into_iter()),
None => return and_then_or_clear(&mut self.front, DoubleEndedIterator::next_back),
}
}
}

#[inline]
fn rfold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
fn rfold<Acc, Fold>(mut self, mut init: Acc, mut fold: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
self.inner.rfold(init, fold)
if let Some(iter) = &mut self.back {
init = iter.rfold(init, &mut fold);
}
init = self.containers.rfold(init, |acc, container| {
let iter = container.into_iter();
iter.rfold(acc, &mut fold)
});
if let Some(iter) = &mut self.front {
init = iter.rfold(init, &mut fold);
};
init
}
}

#[cfg(target_pointer_width = "64")]
impl ExactSizeIterator for IntoIter {
fn len(&self) -> usize {
self.size_hint as usize
}
}
impl ExactSizeIterator for IntoIter {}

impl RoaringBitmap {
/// Iterator over each value stored in the RoaringBitmap, guarantees values are ordered by value.
Expand Down
20 changes: 20 additions & 0 deletions roaring/src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,24 @@ impl<B: Borrow<[u64; BITMAP_LENGTH]>> Iterator for BitmapIter<B> {
self.value &= self.value - 1;
Some(64 * self.key + index)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let mut len: u32 = self.value.count_ones();
if self.key < self.key_back {
for v in &self.bits.borrow()[self.key as usize + 1..self.key_back as usize] {
len += v.count_ones();
}
len += self.value_back.count_ones();
}
(len as usize, Some(len as usize))
}

fn count(self) -> usize
where
Self: Sized,
{
self.len()
}
}

impl<B: Borrow<[u64; BITMAP_LENGTH]>> DoubleEndedIterator for BitmapIter<B> {
Expand All @@ -473,6 +491,8 @@ impl<B: Borrow<[u64; BITMAP_LENGTH]>> DoubleEndedIterator for BitmapIter<B> {
}
}

impl<B: Borrow<[u64; BITMAP_LENGTH]>> ExactSizeIterator for BitmapIter<B> {}

#[inline]
pub fn key(index: u16) -> usize {
index as usize / 64
Expand Down
32 changes: 32 additions & 0 deletions roaring/src/bitmap/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,36 @@ impl Iterator for Iter<'_> {
Iter::BitmapOwned(inner) => inner.next(),
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
match self {
Iter::Array(inner) => inner.size_hint(),
Iter::Vec(inner) => inner.size_hint(),
Iter::BitmapBorrowed(inner) => inner.size_hint(),
Iter::BitmapOwned(inner) => inner.size_hint(),
}
}

fn count(self) -> usize
where
Self: Sized,
{
match self {
Iter::Array(inner) => inner.count(),
Iter::Vec(inner) => inner.count(),
Iter::BitmapBorrowed(inner) => inner.count(),
Iter::BitmapOwned(inner) => inner.count(),
}
}

fn nth(&mut self, n: usize) -> Option<Self::Item> {
match self {
Iter::Array(inner) => inner.nth(n).copied(),
Iter::Vec(inner) => inner.nth(n),
Iter::BitmapBorrowed(inner) => inner.nth(n),
Iter::BitmapOwned(inner) => inner.nth(n),
}
}
}

impl DoubleEndedIterator for Iter<'_> {
Expand All @@ -520,3 +550,5 @@ impl DoubleEndedIterator for Iter<'_> {
}
}
}

impl ExactSizeIterator for Iter<'_> {}

0 comments on commit 18852a7

Please sign in to comment.