Skip to content

Commit

Permalink
make RoaringBitmap generic over its values
Browse files Browse the repository at this point in the history
This will allows to easily implement Roaring64, a 64-bit RoaringBitmap
using the TwoLevelRoaringBitmap approach, which is up to 11x faster than
the RoaringTreemap approach and reuse most of Roaring32 code.

Note that RoaringBitmap::full have been removed because it won't scale
for Roaring64.

Comment out RoaringTreemap for now, will be replaced in the next commit.
  • Loading branch information
grim7reaper committed Aug 13, 2023
1 parent 4f8c798 commit c7bab3c
Show file tree
Hide file tree
Showing 56 changed files with 2,269 additions and 2,248 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ license = "MIT OR Apache-2.0"
[dependencies]
bytemuck = "1.7.3"
byteorder = "1.4.3"
retain_mut = "=0.1.7"
serde = { version = "1.0.139", optional = true }

[features]
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/benches/datasets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
use git2::FetchOptions;
use once_cell::sync::OnceCell as SyncOnceCell;

use roaring::RoaringBitmap;
use roaring::Roaring32;

static INSTANCE: SyncOnceCell<Vec<Dataset>> = SyncOnceCell::new();

Expand Down Expand Up @@ -41,7 +41,7 @@ impl IntoIterator for Datasets {

pub struct Dataset {
pub name: String,
pub bitmaps: Vec<RoaringBitmap>,
pub bitmaps: Vec<Roaring32>,
}

fn init_datasets() -> Result<PathBuf, Box<dyn std::error::Error>> {
Expand Down Expand Up @@ -186,7 +186,7 @@ fn parse_datasets<P: AsRef<Path>>(path: P) -> Result<Vec<Dataset>, Box<dyn std::
numbers.push(n);
}

let bitmap = RoaringBitmap::from_sorted_iter(numbers.iter().copied())?;
let bitmap = Roaring32::from_sorted_iter(numbers.iter().copied())?;
numbers.clear();
bitmaps.push(bitmap);

Expand Down
114 changes: 56 additions & 58 deletions benchmarks/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use criterion::{
Throughput,
};

use roaring::{MultiOps, RoaringBitmap, RoaringTreemap};
use roaring::{MultiOps, Roaring32};

use crate::datasets::Datasets;

Expand All @@ -18,13 +18,13 @@ mod datasets;
fn pairwise_binary_op_matrix(
c: &mut Criterion,
op_name: &str,
op_own_own: impl Fn(RoaringBitmap, RoaringBitmap) -> RoaringBitmap,
op_own_ref: impl Fn(RoaringBitmap, &RoaringBitmap) -> RoaringBitmap,
op_ref_own: impl Fn(&RoaringBitmap, RoaringBitmap) -> RoaringBitmap,
op_ref_ref: impl Fn(&RoaringBitmap, &RoaringBitmap) -> RoaringBitmap,
mut op_assign_owned: impl FnMut(&mut RoaringBitmap, RoaringBitmap),
mut op_assign_ref: impl FnMut(&mut RoaringBitmap, &RoaringBitmap),
op_len: impl Fn(&RoaringBitmap, &RoaringBitmap) -> u64,
op_own_own: impl Fn(Roaring32, Roaring32) -> Roaring32,
op_own_ref: impl Fn(Roaring32, &Roaring32) -> Roaring32,
op_ref_own: impl Fn(&Roaring32, Roaring32) -> Roaring32,
op_ref_ref: impl Fn(&Roaring32, &Roaring32) -> Roaring32,
mut op_assign_owned: impl FnMut(&mut Roaring32, Roaring32),
mut op_assign_ref: impl FnMut(&mut Roaring32, &Roaring32),
op_len: impl Fn(&Roaring32, &Roaring32) -> u64,
) {
let mut group = c.benchmark_group(format!("pairwise_{}", op_name));

Expand Down Expand Up @@ -120,7 +120,7 @@ fn pairwise_binary_op_matrix(
fn pairwise_binary_op<R, M: Measurement>(
group: &mut BenchmarkGroup<M>,
op_name: &str,
op: impl Fn(RoaringBitmap, RoaringBitmap) -> R,
op: impl Fn(Roaring32, Roaring32) -> R,
) {
for dataset in Datasets {
group.bench_function(BenchmarkId::new(op_name, &dataset.name), |b| {
Expand Down Expand Up @@ -152,17 +152,15 @@ fn creation(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("from_sorted_iter", &dataset.name), |b| {
b.iter(|| {
for bitmap_numbers in &dataset_numbers {
black_box(
RoaringBitmap::from_sorted_iter(bitmap_numbers.iter().copied()).unwrap(),
);
black_box(Roaring32::from_sorted_iter(bitmap_numbers.iter().copied()).unwrap());
}
})
});

group.bench_function(BenchmarkId::new("collect", &dataset.name), |b| {
b.iter(|| {
for bitmap_numbers in &dataset_numbers {
black_box(bitmap_numbers.iter().copied().collect::<RoaringBitmap>());
black_box(bitmap_numbers.iter().copied().collect::<Roaring32>());
}
})
});
Expand Down Expand Up @@ -408,15 +406,15 @@ fn deserialization(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("deserialize_from", &dataset.name), |b| {
b.iter(|| {
for buf in input.iter() {
black_box(RoaringBitmap::deserialize_from(buf.as_slice()).unwrap());
black_box(Roaring32::deserialize_from(buf.as_slice()).unwrap());
}
});
});

group.bench_function(BenchmarkId::new("deserialize_unchecked_from", &dataset.name), |b| {
b.iter(|| {
for buf in input.iter() {
black_box(RoaringBitmap::deserialize_unchecked_from(buf.as_slice()).unwrap());
black_box(Roaring32::deserialize_unchecked_from(buf.as_slice()).unwrap());
}
});
});
Expand Down Expand Up @@ -476,7 +474,7 @@ fn successive_and(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("Multi And Owned", &dataset.name), |b| {
b.iter_batched(
|| dataset.bitmaps.clone(),
|bitmaps: Vec<RoaringBitmap>| black_box(bitmaps.intersection()),
|bitmaps: Vec<Roaring32>| black_box(bitmaps.intersection()),
BatchSize::LargeInput,
);
});
Expand All @@ -491,7 +489,7 @@ fn successive_or(c: &mut Criterion) {
for dataset in Datasets {
group.bench_function(BenchmarkId::new("Successive Or Assign Ref", &dataset.name), |b| {
b.iter(|| {
let mut output = RoaringBitmap::new();
let mut output = Roaring32::new();
for bitmap in &dataset.bitmaps {
output |= bitmap;
}
Expand All @@ -501,8 +499,8 @@ fn successive_or(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("Successive Or Assign Owned", &dataset.name), |b| {
b.iter_batched(
|| dataset.bitmaps.clone(),
|bitmaps: Vec<RoaringBitmap>| {
let mut output = RoaringBitmap::new();
|bitmaps: Vec<Roaring32>| {
let mut output = Roaring32::new();
for bitmap in bitmaps {
output |= bitmap;
}
Expand All @@ -513,7 +511,7 @@ fn successive_or(c: &mut Criterion) {

group.bench_function(BenchmarkId::new("Successive Or Ref Ref", &dataset.name), |b| {
b.iter(|| {
let mut output = RoaringBitmap::new();
let mut output = Roaring32::new();
for bitmap in &dataset.bitmaps {
output = (&output) | bitmap;
}
Expand All @@ -527,7 +525,7 @@ fn successive_or(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("Multi Or Owned", &dataset.name), |b| {
b.iter_batched(
|| dataset.bitmaps.clone(),
|bitmaps: Vec<RoaringBitmap>| black_box(bitmaps.union()),
|bitmaps: Vec<Roaring32>| black_box(bitmaps.union()),
BatchSize::LargeInput,
);
});
Expand All @@ -541,13 +539,13 @@ fn successive_or(c: &mut Criterion) {

fn is_empty(c: &mut Criterion) {
c.bench_function("is_empty true", |b| {
let bitmap = RoaringBitmap::new();
let bitmap = Roaring32::new();
b.iter(|| {
bitmap.is_empty();
});
});
c.bench_function("is_empty false", |b| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
bitmap.insert(1);
b.iter(|| {
bitmap.is_empty();
Expand All @@ -558,21 +556,21 @@ fn is_empty(c: &mut Criterion) {
fn insert(c: &mut Criterion) {
c.bench_function("create & insert 1", |b| {
b.iter(|| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
bitmap.insert(black_box(1));
});
});

c.bench_function("insert 1", |b| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
b.iter(|| {
bitmap.insert(black_box(1));
});
});

c.bench_function("create & insert several", |b| {
b.iter(|| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
bitmap.insert(black_box(1));
bitmap.insert(black_box(10));
bitmap.insert(black_box(100));
Expand All @@ -584,7 +582,7 @@ fn insert(c: &mut Criterion) {
});

c.bench_function("insert several", |b| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
b.iter(|| {
bitmap.insert(black_box(1));
bitmap.insert(black_box(10));
Expand All @@ -599,7 +597,7 @@ fn insert(c: &mut Criterion) {

fn contains(c: &mut Criterion) {
c.bench_function("contains true", |b| {
let mut bitmap: RoaringBitmap = RoaringBitmap::new();
let mut bitmap: Roaring32 = Roaring32::new();
bitmap.insert(1);

b.iter(|| {
Expand All @@ -608,7 +606,7 @@ fn contains(c: &mut Criterion) {
});

c.bench_function("contains false", |b| {
let bitmap: RoaringBitmap = RoaringBitmap::new();
let bitmap: Roaring32 = Roaring32::new();

b.iter(|| {
bitmap.contains(black_box(1));
Expand All @@ -618,7 +616,7 @@ fn contains(c: &mut Criterion) {

fn remove(c: &mut Criterion) {
c.bench_function("remove 1", |b| {
let mut sub: RoaringBitmap = (0..65_536).collect();
let mut sub: Roaring32 = (0..65_536).collect();
b.iter(|| {
black_box(sub.remove(1000));
});
Expand All @@ -627,7 +625,7 @@ fn remove(c: &mut Criterion) {

fn remove_range_bitmap(c: &mut Criterion) {
c.bench_function("remove_range 1", |b| {
let mut sub: RoaringBitmap = (0..65_536).collect();
let mut sub: Roaring32 = (0..65_536).collect();
b.iter(|| {
// carefully delete part of the bitmap
// only the first iteration will actually change something
Expand All @@ -641,7 +639,7 @@ fn remove_range_bitmap(c: &mut Criterion) {
// Slower bench that creates a new bitmap on each iteration so that can benchmark
// bitmap to array conversion
b.iter(|| {
let mut sub: RoaringBitmap = (0..65_536).collect();
let mut sub: Roaring32 = (0..65_536).collect();
black_box(sub.remove_range(100..65_536));
assert_eq!(sub.len(), 100);
});
Expand All @@ -653,15 +651,15 @@ fn insert_range_bitmap(c: &mut Criterion) {
let mut group = c.benchmark_group("insert_range");
group.throughput(criterion::Throughput::Elements(size as u64));
group.bench_function(format!("from_empty_{}", size), |b| {
let bm = RoaringBitmap::new();
let bm = Roaring32::new();
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
group.bench_function(format!("pre_populated_{}", size), |b| {
let mut bm = RoaringBitmap::new();
let mut bm = Roaring32::new();
bm.insert_range(0..size);
b.iter_batched(
|| bm.clone(),
Expand All @@ -672,29 +670,29 @@ fn insert_range_bitmap(c: &mut Criterion) {
}
}

fn insert_range_treemap(c: &mut Criterion) {
for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
let mut group = c.benchmark_group("insert_range_treemap");
group.throughput(criterion::Throughput::Elements(size));
group.bench_function(format!("from_empty_{}", size), |b| {
let bm = RoaringTreemap::new();
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
group.bench_function(format!("pre_populated_{}", size), |b| {
let mut bm = RoaringTreemap::new();
bm.insert_range(0..size);
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
}
}
// fn insert_range_treemap(c: &mut Criterion) {
// for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
// let mut group = c.benchmark_group("insert_range_treemap");
// group.throughput(criterion::Throughput::Elements(size));
// group.bench_function(format!("from_empty_{}", size), |b| {
// let bm = RoaringTreemap::new();
// b.iter_batched(
// || bm.clone(),
// |mut bm| black_box(bm.insert_range(0..size)),
// criterion::BatchSize::SmallInput,
// )
// });
// group.bench_function(format!("pre_populated_{}", size), |b| {
// let mut bm = RoaringTreemap::new();
// bm.insert_range(0..size);
// b.iter_batched(
// || bm.clone(),
// |mut bm| black_box(bm.insert_range(0..size)),
// criterion::BatchSize::SmallInput,
// )
// });
// }
// }

criterion_group!(
benches,
Expand All @@ -713,7 +711,7 @@ criterion_group!(
remove,
remove_range_bitmap,
insert_range_bitmap,
insert_range_treemap,
// insert_range_treemap,
iteration,
is_empty,
serialization,
Expand Down
42 changes: 0 additions & 42 deletions src/bitmap/mod.rs

This file was deleted.

Loading

0 comments on commit c7bab3c

Please sign in to comment.