Skip to content

Commit

Permalink
replace RoaringTreemap by TwoLevelRoaringBitmap
Browse files Browse the repository at this point in the history
Benchmark results on insert_range (only bench available for 64-bit):

    group                       roaring64                               treemap
    -----                       ---------                               -------
    from_empty_1000             1.00     87.6±4.76ns 10.6 GElem/sec     1.65    144.2±1.28ns  6.5 GElem/sec
    from_empty_10000            1.00    166.8±9.32ns 55.8 GElem/sec     1.28    213.6±7.26ns 43.6 GElem/sec
    from_empty_8589934590       1.01    151.5±0.99ms 52.8 GElem/sec     1.00    149.7±1.00ms 53.5 GElem/sec
    pre_populated_1000          1.00   139.3±19.83ns  6.7 GElem/sec     1.30   180.8±20.70ns  5.2 GElem/sec
    pre_populated_10000         1.00   235.4±83.29ns 39.6 GElem/sec     1.26  295.9±106.25ns 31.5 GElem/sec
    pre_populated_8589934590    1.00     74.8±2.56ms 107.0 GElem/sec    1.01     75.3±1.82ms 106.3 GElem/sec
  • Loading branch information
grim7reaper committed Aug 13, 2023
1 parent c7bab3c commit d8bb37b
Show file tree
Hide file tree
Showing 43 changed files with 1,275 additions and 3,384 deletions.
68 changes: 33 additions & 35 deletions benchmarks/benches/lib.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
use itertools::Itertools;
use std::cmp::Reverse;
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};

use criterion::measurement::Measurement;
use crate::datasets::Datasets;
use criterion::{
black_box, criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion,
Throughput,
black_box, criterion_group, criterion_main, measurement::Measurement, BatchSize,
BenchmarkGroup, BenchmarkId, Criterion, Throughput,
};
use itertools::Itertools;
use roaring::{MultiOps, Roaring32, Roaring64};
use std::{
cmp::Reverse,
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign},
};

use roaring::{MultiOps, Roaring32};

use crate::datasets::Datasets;

mod datasets;

Expand Down Expand Up @@ -670,29 +668,29 @@ fn insert_range_bitmap(c: &mut Criterion) {
}
}

// fn insert_range_treemap(c: &mut Criterion) {
// for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
// let mut group = c.benchmark_group("insert_range_treemap");
// group.throughput(criterion::Throughput::Elements(size));
// group.bench_function(format!("from_empty_{}", size), |b| {
// let bm = RoaringTreemap::new();
// b.iter_batched(
// || bm.clone(),
// |mut bm| black_box(bm.insert_range(0..size)),
// criterion::BatchSize::SmallInput,
// )
// });
// group.bench_function(format!("pre_populated_{}", size), |b| {
// let mut bm = RoaringTreemap::new();
// bm.insert_range(0..size);
// b.iter_batched(
// || bm.clone(),
// |mut bm| black_box(bm.insert_range(0..size)),
// criterion::BatchSize::SmallInput,
// )
// });
// }
// }
fn insert_range_roaring64(c: &mut Criterion) {
for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
let mut group = c.benchmark_group("insert_range_roaring64");
group.throughput(criterion::Throughput::Elements(size));
group.bench_function(format!("from_empty_{}", size), |b| {
let bm = Roaring64::new();
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
group.bench_function(format!("pre_populated_{}", size), |b| {
let mut bm = Roaring64::new();
bm.insert_range(0..size);
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
}
}

criterion_group!(
benches,
Expand All @@ -711,7 +709,7 @@ criterion_group!(
remove,
remove_range_bitmap,
insert_range_bitmap,
// insert_range_treemap,
insert_range_roaring64,
iteration,
is_empty,
serialization,
Expand Down
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ use std::fmt;

mod core;

/// A compressed bitmap with u64 values. Implemented as a `BTreeMap` of `RoaringBitmap`s.
// pub mod treemap;
// pub use treemap::RoaringTreemap;
mod value;
pub use value::{ContainerKey, Value, ValueRange};

mod roaring32;
pub use roaring32::Roaring32;

mod roaring64;
pub use roaring64::Roaring64;

pub use self::core::RoaringBitmap;

/// An error type that is returned when an iterator isn't sorted.
Expand All @@ -53,7 +53,7 @@ impl fmt::Display for NonSortedIntegers {
impl Error for NonSortedIntegers {}

/// A [`Iterator::collect`] blanket implementation that provides extra methods for [`Roaring32`]
/// and [`RoaringTreemap`].
/// and [`Roaring64`].
///
/// When merging multiple bitmap with the same operation it's usually faster to call the
/// method in this trait than to write your own for loop and merging the bitmaps yourself.
Expand Down
140 changes: 140 additions & 0 deletions src/roaring64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
use crate::{ContainerKey, RoaringBitmap, Value, ValueRange};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::{
io,
ops::{Bound, RangeBounds, RangeInclusive},
};

/// A compressed bitmap for 64-bit values.
///
/// # Examples
///
/// ```rust
/// use roaring::Roaring64;
///
/// let mut rb = Roaring64::new();
///
/// // insert all primes less than 10
/// rb.insert(2);
/// rb.insert(3);
/// rb.insert(5);
/// rb.insert(7);
/// println!("total bits set to true: {}", rb.len());
/// ```
pub type Roaring64 = RoaringBitmap<u64>;

impl Value for u64 {
type Key = u64;
type Range = RangeInclusive<Self>;

fn split(self) -> (Self::Key, u16) {
(self >> 16, self as u16)
}

fn join(key: Self::Key, index: u16) -> Self {
(key << 16) + u64::from(index)
}

fn range(range: impl RangeBounds<Self>) -> Option<Self::Range> {
let start: u64 = match range.start_bound() {
Bound::Included(&i) => i,
Bound::Excluded(&i) => i.checked_add(1)?,
Bound::Unbounded => 0,
};
let end: u64 = match range.end_bound() {
Bound::Included(&i) => i,
Bound::Excluded(&i) => i.checked_sub(1)?,
Bound::Unbounded => u64::MAX,
};

if end < start {
return None;
}

Some(start..=end)
}

fn max_containers() -> usize {
// Theoretically, u64::MAX + 1.
// Realistically we're probably capped at usize anyway.
usize::MAX
}
}

impl ContainerKey for u64 {
#[inline(always)]
fn size() -> usize {
// Key is coded on 48-bit, the 16 upper ones are unused.
6
}

fn write(self, writer: &mut impl WriteBytesExt) -> io::Result<()> {
writer.write_u48::<LittleEndian>(self)
}

fn read(reader: &mut impl ReadBytesExt) -> io::Result<Self> {
reader.read_u48::<LittleEndian>()
}
}

impl ValueRange<u64> for RangeInclusive<u64> {
type KeyIterator = RangeInclusive<u64>;

fn start(&self) -> (<u64 as Value>::Key, u16) {
self.start().split()
}

fn end(&self) -> (<u64 as Value>::Key, u16) {
self.end().split()
}

fn containers_count(&self) -> usize {
let start = ValueRange::start(self).0;
let end = ValueRange::end(self).0;
(end - start) as usize + 1
}

fn keys(self) -> Self::KeyIterator {
let start = ValueRange::start(&self).0;
let end = ValueRange::end(&self).0;
start..=end
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn split() {
assert_eq!((0x0000_0000_0000u64, 0x0000u16), 0x0000_0000_0000_0000u64.split());
assert_eq!((0x0000_0000_0000u64, 0x0001u16), 0x0000_0000_0000_0001u64.split());
assert_eq!((0x0000_0000_FFFFu64, 0xFFFEu16), 0x0000_0000_FFFF_FFFEu64.split());
assert_eq!((0x0000_0000_FFFFu64, 0xFFFFu16), 0x0000_0000_FFFF_FFFFu64.split());
assert_eq!((0x0000_0001_0000u64, 0x0000u16), 0x0000_0001_0000_0000u64.split());
assert_eq!((0x0000_0001_0000u64, 0x0001u16), 0x0000_0001_0000_0001u64.split());
assert_eq!((0xFFFF_FFFF_FFFFu64, 0xFFFEu16), 0xFFFF_FFFF_FFFF_FFFEu64.split());
assert_eq!((0xFFFF_FFFF_FFFFu64, 0xFFFFu16), 0xFFFF_FFFF_FFFF_FFFFu64.split());
}

#[test]
fn join() {
assert_eq!(0x0000_0000_0000_0000u64, u64::join(0x0000_0000_0000u64, 0x0000u16));
assert_eq!(0x0000_0000_0000_0001u64, u64::join(0x0000_0000_0000u64, 0x0001u16));
assert_eq!(0x0000_0000_FFFF_FFFEu64, u64::join(0x0000_0000_FFFFu64, 0xFFFEu16));
assert_eq!(0x0000_0000_FFFF_FFFFu64, u64::join(0x0000_0000_FFFFu64, 0xFFFFu16));
assert_eq!(0x0000_0001_0000_0000u64, u64::join(0x0000_0001_0000u64, 0x0000u16));
assert_eq!(0x0000_0001_0000_0001u64, u64::join(0x0000_0001_0000u64, 0x0001u16));
assert_eq!(0xFFFF_FFFF_FFFF_FFFEu64, u64::join(0xFFFF_FFFF_FFFFu64, 0xFFFEu16));
assert_eq!(0xFFFF_FFFF_FFFF_FFFFu64, u64::join(0xFFFF_FFFF_FFFFu64, 0xFFFFu16));
}

#[test]
fn range() {
assert_eq!(Some(1..=5), u64::range(1..6));
assert_eq!(Some(1..=u64::MAX), u64::range(1..));
assert_eq!(Some(0..=u64::MAX), u64::range(..));
assert_eq!(None, u64::range(5..5));
assert_eq!(Some(16..=16), u64::range(16..=16))
}
}
17 changes: 0 additions & 17 deletions src/treemap/arbitrary.rs

This file was deleted.

Loading

0 comments on commit d8bb37b

Please sign in to comment.