Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bindings CRoaring 3.0, including 64 bit bitmaps #125

Merged
merged 25 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
519 changes: 479 additions & 40 deletions croaring-sys/CRoaring/bindgen_bundled_version.rs

Large diffs are not rendered by default.

16,975 changes: 10,843 additions & 6,132 deletions croaring-sys/CRoaring/roaring.c

Large diffs are not rendered by default.

1,556 changes: 1,222 additions & 334 deletions croaring-sys/CRoaring/roaring.h

Large diffs are not rendered by default.

613 changes: 333 additions & 280 deletions croaring-sys/CRoaring/roaring.hh

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion croaring-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "croaring-sys"
version = "1.1.0"
version = "2.0.0"
edition = "2021"
authors = ["croaring-rs developers"]
license = "Apache-2.0"
Expand Down
1 change: 0 additions & 1 deletion croaring-sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::env;
use std::path::PathBuf;

fn main() {
println!("cargo:rerun-if-changed=CRoaring");
Expand Down
4 changes: 2 additions & 2 deletions croaring/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "croaring"
version = "1.0.1"
version = "1.1.0"
edition = "2021"
authors = ["croaring-rs developers"]
license = "Apache-2.0"
Expand All @@ -22,7 +22,7 @@ roaring = "0.10"
criterion = { version = "0.5", features = ["html_reports"] }

[dependencies]
ffi = { package = "croaring-sys", path = "../croaring-sys", version = "1.1.0" }
ffi = { package = "croaring-sys", path = "../croaring-sys", version = "2.0.0" }
byteorder = "1.4.3"

[[bench]]
Expand Down
149 changes: 134 additions & 15 deletions croaring/benches/benches.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use criterion::{
black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput,
};
use std::ops::ControlFlow;

use croaring::{Bitmap, Portable};
use croaring::{Bitmap, Bitmap64, Portable};

fn new(c: &mut Criterion) {
c.bench_function("new", |b| b.iter(Bitmap::new));
Expand Down Expand Up @@ -130,10 +131,25 @@ fn flip(c: &mut Criterion) {
}

fn to_vec(c: &mut Criterion) {
c.bench_function("to_vec", |b| {
let bitmap = Bitmap::of(&[1, 2, 3]);
const N: usize = 100_000;
let bitmap: Bitmap = random_iter().take(N).collect();
let mut g = c.benchmark_group("collect");
g.bench_function("to_vec", |b| {
b.iter(|| bitmap.to_vec());
});
g.bench_function("via_iter", |b| {
b.iter(|| bitmap.iter().collect::<Vec<_>>());
});
g.bench_function("foreach", |b| {
b.iter(|| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
bitmap.for_each(|item| -> ControlFlow<()> {
vec.push(item);
ControlFlow::Continue(())
});
vec
});
});
}

fn get_serialized_size_in_bytes(c: &mut Criterion) {
Expand Down Expand Up @@ -213,24 +229,35 @@ fn bulk_new(c: &mut Criterion) {
group.finish();
}

fn random_iter(c: &mut Criterion) {
#[derive(Clone)]
struct RandomIter {
x: u32,
}

impl Iterator for RandomIter {
type Item = u32;

fn next(&mut self) -> Option<u32> {
const MULTIPLIER: u32 = 742938285;
const MODULUS: u32 = (1 << 31) - 1;
self.x = (MULTIPLIER.wrapping_mul(self.x)) % MODULUS;
Some(self.x)
}
}

fn random_iter() -> RandomIter {
RandomIter { x: 20170705 }
}

fn create_random(c: &mut Criterion) {
const N: u32 = 5_000;
// Clamp values so we get some re-use of containers
const MAX: u32 = 8 * (u16::MAX as u32 + 1);

let mut group = c.benchmark_group("random_iter");
group.throughput(Throughput::Elements(N.into()));

let rand_iter = {
const MULTIPLIER: u32 = 742938285;
const MODULUS: u32 = (1 << 31) - 1;
// Super simple LCG iterator
let mut z = 20170705; // seed
std::iter::from_fn(move || {
z = (MULTIPLIER * z) % MODULUS;
Some(z % MAX)
})
};
let rand_iter = random_iter();

group.bench_function("random_adds", |b| {
b.iter(|| {
Expand All @@ -252,6 +279,96 @@ fn random_iter(c: &mut Criterion) {
});
}

fn collect_bitmap64_to_vec(c: &mut Criterion) {
const N: u64 = 1_000_000;

let mut group = c.benchmark_group("collect_bitmap64_to_vec");
group.throughput(Throughput::Elements(N.into()));
let bitmap = Bitmap64::from_range(0..N);
group.bench_function("to_vec", |b| {
b.iter_batched(|| (), |()| bitmap.to_vec(), BatchSize::LargeInput);
});
group.bench_function("foreach", |b| {
b.iter_batched(
|| (),
|()| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
bitmap.for_each(|item| -> ControlFlow<()> {
vec.push(item);
ControlFlow::Continue(())
});
vec
},
BatchSize::LargeInput,
);
});
group.bench_function("iter", |b| {
b.iter_batched(
|| (),
|()| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
vec.extend(bitmap.iter());
vec
},
BatchSize::LargeInput,
);
});
group.bench_function("iter_many", |b| {
b.iter_batched(
|| (),
|()| {
let mut vec = vec![0; bitmap.cardinality() as usize];
let mut iter = bitmap.cursor();
assert_eq!(iter.read_many(&mut vec), vec.len());
vec
},
BatchSize::LargeInput,
);
});

group.finish();
}

fn iterate_bitmap64(c: &mut Criterion) {
const N: u64 = 1_000_000;
const END_ITER: u64 = N - 100;

let mut group = c.benchmark_group("bitmap64_iterate");
group.throughput(Throughput::Elements(N.into()));
let bitmap = Bitmap64::from_range(0..N);
group.bench_function("iter", |b| {
b.iter(|| {
for x in bitmap.iter() {
if x == END_ITER {
break;
}
}
})
});
group.bench_function("cursor", |b| {
b.iter(|| {
let mut cursor = bitmap.cursor();
while let Some(x) = cursor.next() {
if x == END_ITER {
break;
}
}
})
});
group.bench_function("for_each", |b| {
b.iter(|| {
bitmap.for_each(|x| -> ControlFlow<()> {
if x == END_ITER {
return ControlFlow::Break(());
}
ControlFlow::Continue(())
})
})
});

group.finish();
}

criterion_group!(
benches,
new,
Expand All @@ -269,6 +386,8 @@ criterion_group!(
serialize,
deserialize,
bulk_new,
random_iter,
create_random,
collect_bitmap64_to_vec,
iterate_bitmap64,
);
criterion_main!(benches);
81 changes: 77 additions & 4 deletions croaring/src/bitmap/imp.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::callback::CallbackWrapper;
use crate::Bitset;
use ffi::roaring_bitmap_t;
use std::convert::TryInto;
use std::ffi::{c_void, CStr};
use std::ops::{Bound, RangeBounds};
use std::{mem, ptr};
use std::ops::{Bound, ControlFlow, RangeBounds};
use std::{mem, panic, ptr};

use super::serialization::{Deserializer, Serializer};
use super::{Bitmap, Statistics};
Expand All @@ -21,7 +21,7 @@ impl Bitmap {
// (it can be moved safely), and can be freed with `free`, without freeing the underlying
// containers and auxiliary data. Ensure this is still valid every time we update
// the version of croaring.
const _: () = assert!(ffi::ROARING_VERSION_MAJOR == 2 && ffi::ROARING_VERSION_MINOR == 0);
const _: () = assert!(ffi::ROARING_VERSION_MAJOR == 3 && ffi::ROARING_VERSION_MINOR == 0);
ffi::roaring_free(p.cast::<c_void>());
result
}
Expand Down Expand Up @@ -279,6 +279,29 @@ impl Bitmap {
unsafe { ffi::roaring_bitmap_remove_checked(&mut self.bitmap, element) }
}

/// Remove many values from the bitmap
///
/// This should be faster than calling `remove` multiple times.
///
/// In order to exploit this optimization, the caller should attempt to keep values with the same high 48 bits of
/// the value as consecutive elements in `vals`
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// let mut bitmap = Bitmap::of(&[1, 2, 3, 4, 5, 6, 7, 8, 9]);
/// bitmap.remove_many(&[1, 2, 3, 4, 5, 6, 7, 8]);
/// assert_eq!(bitmap.to_vec(), vec![9]);
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_remove_many")]
pub fn remove_many(&mut self, elements: &[u32]) {
unsafe {
ffi::roaring_bitmap_remove_many(&mut self.bitmap, elements.len(), elements.as_ptr())
}
}

/// Contains returns true if the integer element is contained in the bitmap
///
/// # Examples
Expand Down Expand Up @@ -721,6 +744,49 @@ impl Bitmap {
unsafe { ffi::roaring_bitmap_flip_inplace(&mut self.bitmap, start, end) }
}

/// Iterate over the values in the bitmap in sorted order
///
/// If `f` returns `Break`, iteration will stop and the value will be returned,
/// Otherwise, iteration continues. If `f` never returns break, `None` is returned after all values are visited.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use std::ops::ControlFlow;
///
/// let bitmap = Bitmap::of(&[1, 2, 3, 14, 20, 21, 100]);
/// let mut even_nums_under_50 = vec![];
///
/// let first_over_50 = bitmap.for_each(|value| {
/// if value > 50 {
/// return ControlFlow::Break(value);
/// }
/// if value % 2 == 0 {
/// even_nums_under_50.push(value);
/// }
/// ControlFlow::Continue(())
/// });
///
/// assert_eq!(even_nums_under_50, vec![2, 14, 20]);
/// assert_eq!(first_over_50, ControlFlow::Break(100));
/// ```
#[inline]
pub fn for_each<F, O>(&self, f: F) -> ControlFlow<O>
where
F: FnMut(u32) -> ControlFlow<O>,
{
let mut callback_wrapper = CallbackWrapper::new(f);
let (callback, context) = callback_wrapper.callback_and_ctx();
unsafe {
ffi::roaring_iterate(&self.bitmap, Some(callback), context);
}
match callback_wrapper.result() {
Ok(cf) => cf,
Err(e) => panic::resume_unwind(e),
}
}

/// Returns a vector containing all of the integers stored in the Bitmap
/// in sorted order.
///
Expand Down Expand Up @@ -922,6 +988,13 @@ impl Bitmap {
/// // Exclusive ranges still step from the start, but do not include it
/// let bitmap = Bitmap::from_range_with_step((Bound::Excluded(10), Bound::Included(30)), 10);
/// assert_eq!(bitmap.to_vec(), [20, 30]);
///
/// // Ranges including max value
/// let bitmap = Bitmap::from_range_with_step((u32::MAX - 1)..=u32::MAX, 1);
/// assert_eq!(bitmap.to_vec(), vec![u32::MAX - 1, u32::MAX]);
///
/// let bitmap = Bitmap::from_range_with_step((u32::MAX - 1)..=u32::MAX, 3);
/// assert_eq!(bitmap.to_vec(), vec![u32::MAX - 1]);
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_from_range")]
Expand Down
Loading