From 18852a74b3216dbf86f5999d6e0efa3b53272b62 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 15 Oct 2024 22:56:12 -0400 Subject: [PATCH] lazily compute iterator len This means the price of visting every container is only paid when needed. Fixes #294 --- roaring/src/bitmap/container.rs | 17 +++ roaring/src/bitmap/iter.rs | 168 +++++++++++++++++------ roaring/src/bitmap/store/bitmap_store.rs | 20 +++ roaring/src/bitmap/store/mod.rs | 32 +++++ 4 files changed, 192 insertions(+), 45 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 09860b88..9b238866 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -300,6 +300,21 @@ impl Iterator for Iter<'_> { fn next(&mut self) -> Option { self.inner.next().map(|i| util::join(self.key, i)) } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize + where + Self: Sized, + { + self.inner.count() + } + + fn nth(&mut self, n: usize) -> Option { + self.inner.nth(n).map(|i| util::join(self.key, i)) + } } impl DoubleEndedIterator for Iter<'_> { @@ -308,6 +323,8 @@ impl DoubleEndedIterator for Iter<'_> { } } +impl ExactSizeIterator for Iter<'_> {} + impl fmt::Debug for Container { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 59463a21..e4966b57 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -1,134 +1,212 @@ use alloc::vec; -use core::iter; use core::slice; use super::container::Container; use crate::{NonSortedIntegers, RoaringBitmap}; +use crate::bitmap::container; #[cfg(not(feature = "std"))] use alloc::vec::Vec; /// An iterator for `RoaringBitmap`. pub struct Iter<'a> { - inner: iter::Flatten>, - size_hint: u64, + front: Option>, + containers: slice::Iter<'a, Container>, + back: Option>, } /// An iterator for `RoaringBitmap`. pub struct IntoIter { - inner: iter::Flatten>, - size_hint: u64, + front: Option>, + containers: vec::IntoIter, + back: Option>, +} + +#[inline] +fn and_then_or_clear(opt: &mut Option, f: impl FnOnce(&mut T) -> Option) -> Option { + let x = f(opt.as_mut()?); + if x.is_none() { + *opt = None; + } + x } impl Iter<'_> { fn new(containers: &[Container]) -> Iter { - let size_hint = containers.iter().map(|c| c.len()).sum(); - Iter { inner: containers.iter().flatten(), size_hint } + Iter { front: None, containers: containers.iter(), back: None } } } impl IntoIter { fn new(containers: Vec) -> IntoIter { - let size_hint = containers.iter().map(|c| c.len()).sum(); - IntoIter { inner: containers.into_iter().flatten(), size_hint } + IntoIter { front: None, containers: containers.into_iter(), back: None } } } +fn size_hint_impl( + front: &Option>, + containers: &impl AsRef<[Container]>, + back: &Option>, +) -> (usize, Option) { + let first_size = front.as_ref().map_or(0, |it| it.len()); + let last_size = back.as_ref().map_or(0, |it| it.len()); + let mut size = first_size + last_size; + for container in containers.as_ref() { + match size.checked_add(container.len() as usize) { + Some(new_size) => size = new_size, + None => return (usize::MAX, None), + } + } + (size, Some(size)) +} + impl Iterator for Iter<'_> { type Item = u32; fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() + loop { + if let Some(x) = and_then_or_clear(&mut self.front, Iterator::next) { + return Some(x); + } + self.front = match self.containers.next() { + Some(inner) => Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.back, Iterator::next), + } + } } fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) - } + size_hint_impl(&self.front, &self.containers, &self.back) } #[inline] - fn fold(self, init: B, f: F) -> B + fn fold(mut self, mut init: B, mut f: F) -> B where Self: Sized, F: FnMut(B, Self::Item) -> B, { - self.inner.fold(init, f) + if let Some(iter) = &mut self.front { + init = iter.fold(init, &mut f); + } + init = self.containers.fold(init, |acc, container| { + let iter = <&Container>::into_iter(container); + iter.fold(acc, &mut f) + }); + if let Some(iter) = &mut self.back { + init = iter.fold(init, &mut f); + }; + init } } impl DoubleEndedIterator for Iter<'_> { fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() + loop { + if let Some(x) = and_then_or_clear(&mut self.back, DoubleEndedIterator::next_back) { + return Some(x); + } + self.back = match self.containers.next_back() { + Some(inner) => Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.front, DoubleEndedIterator::next_back), + } + } } #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc + fn rfold(mut self, mut init: Acc, mut fold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { - self.inner.rfold(init, fold) + if let Some(iter) = &mut self.back { + init = iter.rfold(init, &mut fold); + } + init = self.containers.rfold(init, |acc, container| { + let iter = container.into_iter(); + iter.rfold(acc, &mut fold) + }); + if let Some(iter) = &mut self.front { + init = iter.rfold(init, &mut fold); + }; + init } } #[cfg(target_pointer_width = "64")] -impl ExactSizeIterator for Iter<'_> { - fn len(&self) -> usize { - self.size_hint as usize - } -} +impl ExactSizeIterator for Iter<'_> {} impl Iterator for IntoIter { type Item = u32; fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() + loop { + if let Some(x) = and_then_or_clear(&mut self.front, Iterator::next) { + return Some(x); + } + match self.containers.next() { + Some(inner) => self.front = Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.back, Iterator::next), + } + } } fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) - } + size_hint_impl(&self.front, &self.containers, &self.back) } #[inline] - fn fold(self, init: B, f: F) -> B + fn fold(mut self, mut init: B, mut f: F) -> B where Self: Sized, F: FnMut(B, Self::Item) -> B, { - self.inner.fold(init, f) + if let Some(iter) = &mut self.front { + init = iter.fold(init, &mut f); + } + init = self.containers.fold(init, |acc, container| { + let iter = ::into_iter(container); + iter.fold(acc, &mut f) + }); + if let Some(iter) = &mut self.back { + init = iter.fold(init, &mut f); + }; + init } } impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() + loop { + if let Some(x) = and_then_or_clear(&mut self.back, DoubleEndedIterator::next_back) { + return Some(x); + } + match self.containers.next_back() { + Some(inner) => self.back = Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.front, DoubleEndedIterator::next_back), + } + } } #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc + fn rfold(mut self, mut init: Acc, mut fold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { - self.inner.rfold(init, fold) + if let Some(iter) = &mut self.back { + init = iter.rfold(init, &mut fold); + } + init = self.containers.rfold(init, |acc, container| { + let iter = container.into_iter(); + iter.rfold(acc, &mut fold) + }); + if let Some(iter) = &mut self.front { + init = iter.rfold(init, &mut fold); + }; + init } } #[cfg(target_pointer_width = "64")] -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { - self.size_hint as usize - } -} +impl ExactSizeIterator for IntoIter {} impl RoaringBitmap { /// Iterator over each value stored in the RoaringBitmap, guarantees values are ordered by value. diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index ceaeb5c2..20da9baf 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -449,6 +449,24 @@ impl> Iterator for BitmapIter { self.value &= self.value - 1; Some(64 * self.key + index) } + + fn size_hint(&self) -> (usize, Option) { + let mut len: u32 = self.value.count_ones(); + if self.key < self.key_back { + for v in &self.bits.borrow()[self.key as usize + 1..self.key_back as usize] { + len += v.count_ones(); + } + len += self.value_back.count_ones(); + } + (len as usize, Some(len as usize)) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.len() + } } impl> DoubleEndedIterator for BitmapIter { @@ -473,6 +491,8 @@ impl> DoubleEndedIterator for BitmapIter { } } +impl> ExactSizeIterator for BitmapIter {} + #[inline] pub fn key(index: u16) -> usize { index as usize / 64 diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index d0661639..625b8137 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -508,6 +508,36 @@ impl Iterator for Iter<'_> { Iter::BitmapOwned(inner) => inner.next(), } } + + fn size_hint(&self) -> (usize, Option) { + match self { + Iter::Array(inner) => inner.size_hint(), + Iter::Vec(inner) => inner.size_hint(), + Iter::BitmapBorrowed(inner) => inner.size_hint(), + Iter::BitmapOwned(inner) => inner.size_hint(), + } + } + + fn count(self) -> usize + where + Self: Sized, + { + match self { + Iter::Array(inner) => inner.count(), + Iter::Vec(inner) => inner.count(), + Iter::BitmapBorrowed(inner) => inner.count(), + Iter::BitmapOwned(inner) => inner.count(), + } + } + + fn nth(&mut self, n: usize) -> Option { + match self { + Iter::Array(inner) => inner.nth(n).copied(), + Iter::Vec(inner) => inner.nth(n), + Iter::BitmapBorrowed(inner) => inner.nth(n), + Iter::BitmapOwned(inner) => inner.nth(n), + } + } } impl DoubleEndedIterator for Iter<'_> { @@ -520,3 +550,5 @@ impl DoubleEndedIterator for Iter<'_> { } } } + +impl ExactSizeIterator for Iter<'_> {}