diff --git a/benches/benches/benches.rs b/benches/benches/benches.rs index 68a1217..d2d4998 100644 --- a/benches/benches/benches.rs +++ b/benches/benches/benches.rs @@ -139,6 +139,110 @@ fn grow_and_insert(c: &mut Criterion) { }); } +fn iter_union_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("iter_union_count/1m", |b| { + b.iter(|| black_box(fb_a.union(&fb_b).count())) + }); +} + +fn iter_intersect_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("iter_intersection_count/1m", |b| { + b.iter(|| black_box(fb_a.intersection(&fb_b).count())); + }); +} + +fn iter_difference_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("iter_difference_count/1m", |b| { + b.iter(|| black_box(fb_a.difference(&fb_b).count())); + }); +} + +fn iter_symmetric_difference_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("iter_symmetric_difference_count/1m", |b| { + b.iter(|| black_box(fb_a.symmetric_difference(&fb_b).count())); + }); +} + +fn union_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("union_count/1m", |b| { + b.iter(|| black_box(fb_a.union_count(&fb_b))) + }); +} + +fn intersect_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("intersection_count/1m", |b| { + b.iter(|| black_box(fb_a.intersection_count(&fb_b))); + }); +} + +fn difference_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("difference_count/1m", |b| { + b.iter(|| black_box(fb_a.difference_count(&fb_b))); + }); +} + +fn symmetric_difference_count(c: &mut Criterion) { + const N: usize = 1_000_000; + let mut fb_a = FixedBitSet::with_capacity(N); + let mut fb_b = FixedBitSet::with_capacity(N); + + fb_a.insert_range(..); + fb_b.insert_range(..); + + c.bench_function("symmetric_difference_count/1m", |b| { + b.iter(|| black_box(fb_a.symmetric_difference_count(&fb_b))); + }); +} + fn union_with(c: &mut Criterion) { const N: usize = 1_000_000; let mut fb_a = FixedBitSet::with_capacity(N); @@ -201,6 +305,14 @@ criterion_group!( iter_ones_sparse, iter_ones_all_ones, iter_ones_all_ones_rev, + iter_union_count, + iter_intersect_count, + iter_difference_count, + iter_symmetric_difference_count, + union_count, + intersect_count, + difference_count, + symmetric_difference_count, insert_range, insert, intersect_with, diff --git a/src/lib.rs b/src/lib.rs index 8b87ff3..c33c50b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -132,6 +132,11 @@ impl FixedBitSet { blocks } + #[inline] + fn batch_count_ones(blocks: impl IntoIterator) -> usize { + blocks.into_iter().map(|x| x.count_ones() as usize).sum() + } + /// Grows the internal size of the bitset before inserting a bit /// /// Unlike `insert`, this cannot panic, but may allocate if the bit is outside of the existing buffer's range. @@ -428,18 +433,16 @@ impl FixedBitSet { /// Count the number of set bits in the given bit range. /// + /// This function is potentially much faster than using `ones(other).count()`. /// Use `..` to count the whole content of the bitset. /// /// **Panics** if the range extends past the end of the bitset. #[inline] pub fn count_ones(&self, range: T) -> usize { - Masks::new(range, self.length) - .map(|(block, mask)| { - // SAFETY: Masks cannot return a block index that is out of range. - let value = unsafe { *self.get_unchecked(block) }; - (value & mask).count_ones() as usize - }) - .sum() + Self::batch_count_ones(Masks::new(range, self.length).map(|(block, mask)| { + // SAFETY: Masks cannot return a block index that is out of range. + unsafe { *self.get_unchecked(block) & mask } + })) } /// Sets every bit in the given range to the given state (`enabled`) @@ -669,6 +672,73 @@ impl FixedBitSet { } } + /// Computes how many bits would be set in the union between two bitsets. + /// + /// This is potentially much faster than using `union(other).count()`. Unlike + /// other methods like using [`union_with`] followed by [`count_ones`], this + /// does not mutate in place or require separate allocations. + #[inline] + pub fn union_count(&self, other: &FixedBitSet) -> usize { + let me = self.as_slice(); + let other = other.as_slice(); + let mut count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x | *y))); + if other.len() > me.len() { + count += Self::batch_count_ones(other[me.len()..].iter().copied()); + } else if self.len() > other.len() { + count += Self::batch_count_ones(me[other.len()..].iter().copied()); + } + count + } + + /// Computes how many bits would be set in the intersection between two bitsets. + /// + /// This is potentially much faster than using `intersection(other).count()`. Unlike + /// other methods like using [`intersect_with`] followed by [`count_ones`], this + /// does not mutate in place or require separate allocations. + #[inline] + pub fn intersection_count(&self, other: &FixedBitSet) -> usize { + Self::batch_count_ones( + self.as_slice() + .iter() + .zip(other.as_slice()) + .map(|(x, y)| (*x & *y)), + ) + } + + /// Computes how many bits would be set in the difference between two bitsets. + /// + /// This is potentially much faster than using `difference(other).count()`. Unlike + /// other methods like using [`difference_with`] followed by [`count_ones`], this + /// does not mutate in place or require separate allocations. + #[inline] + pub fn difference_count(&self, other: &FixedBitSet) -> usize { + Self::batch_count_ones( + self.as_slice() + .iter() + .zip(other.as_slice().iter()) + .map(|(x, y)| (*x & !*y)), + ) + } + + /// Computes how many bits would be set in the symmetric difference between two bitsets. + /// + /// This is potentially much faster than using `symmetric_difference(other).count()`. Unlike + /// other methods like using [`symmetric_difference_with`] followed by [`count_ones`], this + /// does not mutate in place or require separate allocations. + #[inline] + pub fn symmetric_difference_count(&self, other: &FixedBitSet) -> usize { + let me = self.as_slice(); + let other = other.as_slice(); + let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x ^ *y))); + if other.len() > me.len() { + count + Self::batch_count_ones(other[me.len()..].iter().copied()) + } else if me.len() > other.len() { + count + Self::batch_count_ones(me[other.len()..].iter().copied()) + } else { + count + } + } + /// Returns `true` if `self` has no elements in common with `other`. This /// is equivalent to checking for an empty intersection. pub fn is_disjoint(&self, other: &FixedBitSet) -> bool { @@ -1830,7 +1900,8 @@ mod tests { let mut b = FixedBitSet::with_capacity(len); a.set_range(..a_end, true); b.set_range(b_start.., true); - + let count = a.intersection_count(&b); + let iterator_count = a.intersection(&b).count(); let mut ab = a.intersection(&b).collect::(); for i in 0..b_start { @@ -1850,6 +1921,15 @@ mod tests { ab, a, "intersection and intersect_with produce the same results" ); + assert_eq!( + ab.count_ones(..), + count, + "intersection and intersection_count produce the same results" + ); + assert_eq!( + count, iterator_count, + "intersection and intersection_count produce the same results" + ); } #[test] @@ -1862,6 +1942,8 @@ mod tests { let mut b = FixedBitSet::with_capacity(b_len); a.set_range(a_start.., true); b.set_range(..b_end, true); + let count = a.union_count(&b); + let iterator_count = a.union(&b).count(); let ab = a.union(&b).collect::(); for i in a_start..a_len { assert!(ab.contains(i)); @@ -1875,6 +1957,15 @@ mod tests { a.union_with(&b); assert_eq!(ab, a, "union and union_with produce the same results"); + assert_eq!( + count, + ab.count_ones(..), + "union and union_count produce the same results" + ); + assert_eq!( + count, iterator_count, + "union and union_count produce the same results" + ); } #[test] @@ -1888,6 +1979,8 @@ mod tests { let mut b = FixedBitSet::with_capacity(b_len); a.set_range(a_start..a_end, true); b.set_range(b_start..b_len, true); + let count = a.difference_count(&b); + let iterator_count = a.difference(&b).count(); let mut a_diff_b = a.difference(&b).collect::(); for i in a_start..b_start { assert!(a_diff_b.contains(i)); @@ -1903,6 +1996,15 @@ mod tests { a_diff_b, a, "difference and difference_with produce the same results" ); + assert_eq!( + a_diff_b.count_ones(..), + count, + "difference and difference_count produce the same results" + ); + assert_eq!( + count, iterator_count, + "intersection and intersection_count produce the same results" + ); } #[test] @@ -1916,6 +2018,8 @@ mod tests { let mut b = FixedBitSet::with_capacity(b_len); a.set_range(a_start..a_end, true); b.set_range(b_start..b_len, true); + let count = a.symmetric_difference_count(&b); + let iterator_count = a.symmetric_difference(&b).count(); let a_sym_diff_b = a.symmetric_difference(&b).collect::(); for i in 0..a_start { assert!(!a_sym_diff_b.contains(i)); @@ -1935,6 +2039,15 @@ mod tests { a_sym_diff_b, a, "symmetric_difference and _with produce the same results" ); + assert_eq!( + a_sym_diff_b.count_ones(..), + count, + "symmetric_difference and _count produce the same results" + ); + assert_eq!( + count, iterator_count, + "symmetric_difference and _count produce the same results" + ); } #[test]