From 820b270d39a32e6eb52bd80241e6d9b08d480cef Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 15 Oct 2024 22:56:12 -0400 Subject: [PATCH] lazily compute iterator len, add interator overrides This means the price of visting every container is only paid when needed: e.g. when calling `.collect()` This adds more efficent overrides for `count`, `nth` and `nth_back`, and implements `FusedIterator` --- roaring/src/bitmap/container.rs | 17 ++ roaring/src/bitmap/iter.rs | 300 +++++++++++++++++++---- roaring/src/bitmap/store/bitmap_store.rs | 20 ++ roaring/src/bitmap/store/mod.rs | 32 +++ roaring/tests/iter.rs | 47 ++++ 5 files changed, 370 insertions(+), 46 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 09860b88..9b238866 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -300,6 +300,21 @@ impl Iterator for Iter<'_> { fn next(&mut self) -> Option { self.inner.next().map(|i| util::join(self.key, i)) } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize + where + Self: Sized, + { + self.inner.count() + } + + fn nth(&mut self, n: usize) -> Option { + self.inner.nth(n).map(|i| util::join(self.key, i)) + } } impl DoubleEndedIterator for Iter<'_> { @@ -308,6 +323,8 @@ impl DoubleEndedIterator for Iter<'_> { } } +impl ExactSizeIterator for Iter<'_> {} + impl fmt::Debug for Container { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 59463a21..70ef27fa 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -1,8 +1,8 @@ use alloc::vec; -use core::iter; +use core::iter::FusedIterator; use core::slice; -use super::container::Container; +use super::container::{self, Container}; use crate::{NonSortedIntegers, RoaringBitmap}; #[cfg(not(feature = "std"))] @@ -10,126 +10,334 @@ use alloc::vec::Vec; /// An iterator for `RoaringBitmap`. pub struct Iter<'a> { - inner: iter::Flatten>, - size_hint: u64, + front: Option>, + containers: slice::Iter<'a, Container>, + back: Option>, } /// An iterator for `RoaringBitmap`. pub struct IntoIter { - inner: iter::Flatten>, - size_hint: u64, + front: Option>, + containers: vec::IntoIter, + back: Option>, +} + +#[inline] +fn and_then_or_clear(opt: &mut Option, f: impl FnOnce(&mut T) -> Option) -> Option { + let x = f(opt.as_mut()?); + if x.is_none() { + *opt = None; + } + x } impl Iter<'_> { fn new(containers: &[Container]) -> Iter { - let size_hint = containers.iter().map(|c| c.len()).sum(); - Iter { inner: containers.iter().flatten(), size_hint } + Iter { front: None, containers: containers.iter(), back: None } } } impl IntoIter { fn new(containers: Vec) -> IntoIter { - let size_hint = containers.iter().map(|c| c.len()).sum(); - IntoIter { inner: containers.into_iter().flatten(), size_hint } + IntoIter { front: None, containers: containers.into_iter(), back: None } + } +} + +fn size_hint_impl( + front: &Option>, + containers: &impl AsRef<[Container]>, + back: &Option>, +) -> (usize, Option) { + let first_size = front.as_ref().map_or(0, |it| it.len()); + let last_size = back.as_ref().map_or(0, |it| it.len()); + let mut size = first_size + last_size; + for container in containers.as_ref() { + match size.checked_add(container.len() as usize) { + Some(new_size) => size = new_size, + None => return (usize::MAX, None), + } } + (size, Some(size)) } impl Iterator for Iter<'_> { type Item = u32; fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() + loop { + if let Some(x) = and_then_or_clear(&mut self.front, Iterator::next) { + return Some(x); + } + self.front = match self.containers.next() { + Some(inner) => Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.back, Iterator::next), + } + } } fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) - } + size_hint_impl(&self.front, &self.containers, &self.back) } #[inline] - fn fold(self, init: B, f: F) -> B + fn fold(mut self, mut init: B, mut f: F) -> B where Self: Sized, F: FnMut(B, Self::Item) -> B, { - self.inner.fold(init, f) + if let Some(iter) = &mut self.front { + init = iter.fold(init, &mut f); + } + init = self.containers.fold(init, |acc, container| { + let iter = <&Container>::into_iter(container); + iter.fold(acc, &mut f) + }); + if let Some(iter) = &mut self.back { + init = iter.fold(init, &mut f); + }; + init + } + + fn count(self) -> usize + where + Self: Sized, + { + let mut count = self.front.map_or(0, Iterator::count); + count += self.containers.map(|container| container.len() as usize).sum::(); + count += self.back.map_or(0, Iterator::count); + count + } + + fn nth(&mut self, n: usize) -> Option { + let mut n = n; + let nth_advance = |it: &mut container::Iter| { + let len = it.len(); + if n < len { + it.nth(n) + } else { + n -= len; + None + } + }; + if let Some(x) = and_then_or_clear(&mut self.front, nth_advance) { + return Some(x); + } + for container in self.containers.by_ref() { + let len = container.len() as usize; + if n < len { + let mut front_iter = container.into_iter(); + let result = front_iter.nth(n); + self.front = Some(front_iter); + return result; + } + n -= len; + } + and_then_or_clear(&mut self.back, |it| it.nth(n)) } } impl DoubleEndedIterator for Iter<'_> { fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() + loop { + if let Some(x) = and_then_or_clear(&mut self.back, DoubleEndedIterator::next_back) { + return Some(x); + } + self.back = match self.containers.next_back() { + Some(inner) => Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.front, DoubleEndedIterator::next_back), + } + } } #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc + fn rfold(mut self, mut init: Acc, mut fold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { - self.inner.rfold(init, fold) + if let Some(iter) = &mut self.back { + init = iter.rfold(init, &mut fold); + } + init = self.containers.rfold(init, |acc, container| { + let iter = container.into_iter(); + iter.rfold(acc, &mut fold) + }); + if let Some(iter) = &mut self.front { + init = iter.rfold(init, &mut fold); + }; + init } -} -#[cfg(target_pointer_width = "64")] -impl ExactSizeIterator for Iter<'_> { - fn len(&self) -> usize { - self.size_hint as usize + fn nth_back(&mut self, n: usize) -> Option { + let mut n = n; + let nth_advance = |it: &mut container::Iter| { + let len = it.len(); + if n < len { + it.nth_back(n) + } else { + n -= len; + None + } + }; + if let Some(x) = and_then_or_clear(&mut self.back, nth_advance) { + return Some(x); + } + for container in self.containers.by_ref().rev() { + let len = container.len() as usize; + if n < len { + let mut front_iter = container.into_iter(); + let result = front_iter.nth_back(n); + self.back = Some(front_iter); + return result; + } + n -= len; + } + and_then_or_clear(&mut self.front, |it| it.nth_back(n)) } } +#[cfg(target_pointer_width = "64")] +impl ExactSizeIterator for Iter<'_> {} +impl FusedIterator for Iter<'_> {} + impl Iterator for IntoIter { type Item = u32; fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() + loop { + if let Some(x) = and_then_or_clear(&mut self.front, Iterator::next) { + return Some(x); + } + match self.containers.next() { + Some(inner) => self.front = Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.back, Iterator::next), + } + } } fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) - } + size_hint_impl(&self.front, &self.containers, &self.back) } #[inline] - fn fold(self, init: B, f: F) -> B + fn fold(mut self, mut init: B, mut f: F) -> B where Self: Sized, F: FnMut(B, Self::Item) -> B, { - self.inner.fold(init, f) + if let Some(iter) = &mut self.front { + init = iter.fold(init, &mut f); + } + init = self.containers.fold(init, |acc, container| { + let iter = ::into_iter(container); + iter.fold(acc, &mut f) + }); + if let Some(iter) = &mut self.back { + init = iter.fold(init, &mut f); + }; + init + } + + fn count(self) -> usize + where + Self: Sized, + { + let mut count = self.front.map_or(0, Iterator::count); + count += self.containers.map(|container| container.len() as usize).sum::(); + count += self.back.map_or(0, Iterator::count); + count + } + + fn nth(&mut self, n: usize) -> Option { + let mut n = n; + let nth_advance = |it: &mut container::Iter| { + let len = it.len(); + if n < len { + it.nth(n) + } else { + n -= len; + None + } + }; + if let Some(x) = and_then_or_clear(&mut self.front, nth_advance) { + return Some(x); + } + for container in self.containers.by_ref() { + let len = container.len() as usize; + if n < len { + let mut front_iter = container.into_iter(); + let result = front_iter.nth(n); + self.front = Some(front_iter); + return result; + } + n -= len; + } + and_then_or_clear(&mut self.back, |it| it.nth(n)) } } impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() + loop { + if let Some(x) = and_then_or_clear(&mut self.back, DoubleEndedIterator::next_back) { + return Some(x); + } + match self.containers.next_back() { + Some(inner) => self.back = Some(inner.into_iter()), + None => return and_then_or_clear(&mut self.front, DoubleEndedIterator::next_back), + } + } } #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc + fn rfold(mut self, mut init: Acc, mut fold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { - self.inner.rfold(init, fold) + if let Some(iter) = &mut self.back { + init = iter.rfold(init, &mut fold); + } + init = self.containers.rfold(init, |acc, container| { + let iter = container.into_iter(); + iter.rfold(acc, &mut fold) + }); + if let Some(iter) = &mut self.front { + init = iter.rfold(init, &mut fold); + }; + init } -} -#[cfg(target_pointer_width = "64")] -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { - self.size_hint as usize + fn nth_back(&mut self, n: usize) -> Option { + let mut n = n; + let nth_advance = |it: &mut container::Iter| { + let len = it.len(); + if n < len { + it.nth_back(n) + } else { + n -= len; + None + } + }; + if let Some(x) = and_then_or_clear(&mut self.back, nth_advance) { + return Some(x); + } + for container in self.containers.by_ref().rev() { + let len = container.len() as usize; + if n < len { + let mut front_iter = container.into_iter(); + let result = front_iter.nth_back(n); + self.back = Some(front_iter); + return result; + } + n -= len; + } + and_then_or_clear(&mut self.front, |it| it.nth_back(n)) } } +#[cfg(target_pointer_width = "64")] +impl ExactSizeIterator for IntoIter {} +impl FusedIterator for IntoIter {} + impl RoaringBitmap { /// Iterator over each value stored in the RoaringBitmap, guarantees values are ordered by value. /// diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index ceaeb5c2..4b89e0e0 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -449,6 +449,24 @@ impl> Iterator for BitmapIter { self.value &= self.value - 1; Some(64 * self.key + index) } + + fn size_hint(&self) -> (usize, Option) { + let mut len: u32 = self.value.count_ones(); + if self.key < self.key_back { + for v in &self.bits.borrow()[self.key as usize + 1..self.key_back as usize] { + len += v.count_ones(); + } + len += self.value_back.count_ones(); + } + (len as usize, Some(len as usize)) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.len() + } } impl> DoubleEndedIterator for BitmapIter { @@ -473,6 +491,8 @@ impl> DoubleEndedIterator for BitmapIter { } } +impl> ExactSizeIterator for BitmapIter {} + #[inline] pub fn key(index: u16) -> usize { index as usize / 64 diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index d0661639..625b8137 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -508,6 +508,36 @@ impl Iterator for Iter<'_> { Iter::BitmapOwned(inner) => inner.next(), } } + + fn size_hint(&self) -> (usize, Option) { + match self { + Iter::Array(inner) => inner.size_hint(), + Iter::Vec(inner) => inner.size_hint(), + Iter::BitmapBorrowed(inner) => inner.size_hint(), + Iter::BitmapOwned(inner) => inner.size_hint(), + } + } + + fn count(self) -> usize + where + Self: Sized, + { + match self { + Iter::Array(inner) => inner.count(), + Iter::Vec(inner) => inner.count(), + Iter::BitmapBorrowed(inner) => inner.count(), + Iter::BitmapOwned(inner) => inner.count(), + } + } + + fn nth(&mut self, n: usize) -> Option { + match self { + Iter::Array(inner) => inner.nth(n).copied(), + Iter::Vec(inner) => inner.nth(n), + Iter::BitmapBorrowed(inner) => inner.nth(n), + Iter::BitmapOwned(inner) => inner.nth(n), + } + } } impl DoubleEndedIterator for Iter<'_> { @@ -520,3 +550,5 @@ impl DoubleEndedIterator for Iter<'_> { } } } + +impl ExactSizeIterator for Iter<'_> {} diff --git a/roaring/tests/iter.rs b/roaring/tests/iter.rs index 86a83245..05591681 100644 --- a/roaring/tests/iter.rs +++ b/roaring/tests/iter.rs @@ -81,6 +81,53 @@ proptest! { } } +proptest! { + #[test] + fn nth(values in btree_set(any::(), ..=10_000), nth in 0..10_005usize) { + let bitmap = RoaringBitmap::from_sorted_iter(values.iter().cloned()).unwrap(); + let mut orig_iter = bitmap.iter().fuse(); + let mut iter = bitmap.iter(); + + for _ in 0..nth { + if orig_iter.next().is_none() { + break; + } + } + let expected = orig_iter.next(); + assert_eq!(expected, iter.nth(nth)); + let expected_next = orig_iter.next(); + assert_eq!(expected_next, iter.next()); + + let mut val_iter = values.into_iter(); + assert_eq!(expected, val_iter.nth(nth)); + assert_eq!(expected_next, val_iter.next()); + } +} + +#[test] +fn huge_nth() { + let bitmap = RoaringBitmap::new(); + let mut iter = bitmap.iter(); + assert_eq!(None, iter.nth(usize::MAX)); +} + +proptest! { + #[test] + fn count(values in btree_set(any::(), ..=10_000), skip in 0..10_005usize) { + let bitmap = RoaringBitmap::from_sorted_iter(values.iter().cloned()).unwrap(); + let mut iter = bitmap.iter(); + + if let Some(n) = skip.checked_sub(1) { + iter.nth(n); + } + let expected_count = values.len().saturating_sub(skip); + let size_hint = iter.size_hint(); + assert_eq!(expected_count, size_hint.0); + assert_eq!(Some(expected_count), size_hint.1); + assert_eq!(expected_count, iter.count()); + } +} + #[test] fn rev_array() { let values = 0..100;