Skip to content

Commit

Permalink
64-bit Roaring Bitmaps in C (#534)
Browse files Browse the repository at this point in the history
* Add an Adaptive Radix Trie (ART)

This will be used to store the high 48 bits of 64 bit roaring bitmap entries.
For more details, see include/roaring/art/art.h

* Add a 64 bit version of roaring bitmaps in C

The idea is copied from the Java version of Roaring Bitmaps: use an Adaptive
Radix Trie (ART) to store the high 48 bits of each entry, which storing the low
16 bits in containers.

* Add benchmarks for roaring64 and roaring64map

* Support big endian and non-linux in roaring64.c

* Use void as the base node type in the ART

Also zero-instantiate structs when returning.

* Remove tests for size methods in ART and roaring64

* Fix failing Win32 test workflow

These tests were taking too long to complete.

* Hide ART and roaring64-internal types

These don't actually need to be exposed to the user, so we can declare them in
the header and keep them private. ART types don't need to be declared at all.

* Cleanups to ART

* Change art_compare_prefix to take a single length rather than two.
* Use art_compare_keys where possible.
* Use art_val_t directly (through an alias) rather than defining a separate
  type.
* Remove key_chunk from art_indexed_child_t.
* Use node->base where possible.
* Merge art_create_iterator into art_init_iterator.

* Cleanups to roaring64

* Remove unused "using namespace"
  • Loading branch information
SLieve authored Jan 8, 2024
1 parent e764a91 commit 0225f21
Show file tree
Hide file tree
Showing 10 changed files with 5,053 additions and 28 deletions.
176 changes: 176 additions & 0 deletions include/roaring/art/art.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#ifndef ART_ART_H
#define ART_ART_H

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

/*
* This file contains an implementation of an Adaptive Radix Tree as described
* in https://db.in.tum.de/~leis/papers/ART.pdf.
*
* The ART contains the keys in _byte lexographical_ order.
*
* Other features:
* * Fixed 48 bit key length: all keys are assumed to be be 48 bits in size.
* This allows us to put the key and key prefixes directly in nodes, reducing
* indirection at no additional memory overhead.
* * Key compression: the only inner nodes created are at points where key
* chunks _differ_. This means that if there are two entries with different
* high 48 bits, then there is only one inner node containing the common key
* prefix, and two leaves.
* * Intrusive leaves: the leaf struct is included in user values. This removes
* a layer of indirection.
*/

// Fixed length of keys in the ART. All keys are assumed to be of this length.
#define ART_KEY_BYTES 6

#ifdef __cplusplus
extern "C" {
namespace roaring {
namespace internal {
#endif

typedef uint8_t art_key_chunk_t;
typedef struct art_node_s art_node_t;

/**
* Wrapper to allow an empty tree.
*/
typedef struct art_s {
art_node_t *root;
} art_t;

/**
* Values inserted into the tree have to be cast-able to art_val_t. This
* improves performance by reducing indirection.
*
* NOTE: Value pointers must be unique! This is because each value struct
* contains the key corresponding to the value.
*/
typedef struct art_val_s {
art_key_chunk_t key[ART_KEY_BYTES];
} art_val_t;

/**
* Compares two keys, returns their relative order:
* * Key 1 < key 2: returns a negative value
* * Key 1 == key 2: returns 0
* * Key 1 > key 2: returns a positive value
*/
int art_compare_keys(const art_key_chunk_t key1[],
const art_key_chunk_t key2[]);

/**
* Inserts the given key and value.
*/
void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val);

/**
* Returns the value erased, NULL if not found.
*/
art_val_t *art_erase(art_t *art, const art_key_chunk_t *key);

/**
* Returns the value associated with the given key, NULL if not found.
*/
art_val_t *art_find(const art_t *art, const art_key_chunk_t *key);

/**
* Returns true if the ART is empty.
*/
bool art_is_empty(const art_t *art);

/**
* Frees the nodes of the ART except the values, which the user is expected to
* free.
*/
void art_free(art_t *art);

/**
* Returns the size in bytes of the ART. Includes size of pointers to values,
* but not the values themselves.
*/
size_t art_size_in_bytes(const art_t *art);

/**
* Prints the ART using printf, useful for debugging.
*/
void art_printf(const art_t *art);

/**
* ART-internal iterator bookkeeping. Users should treat this as an opaque type.
*/
typedef struct art_iterator_frame_s {
art_node_t *node;
uint8_t index_in_node;
} art_iterator_frame_t;

/**
* Users should only access `key` and `value` in iterators. The iterator is
* valid when `value != NULL`.
*/
typedef struct art_iterator_s {
art_key_chunk_t key[ART_KEY_BYTES];
art_val_t *value;

uint8_t depth; // Key depth
uint8_t frame; // Node depth
art_iterator_frame_t frames[ART_KEY_BYTES];
} art_iterator_t;

/**
* Creates an iterator initialzed to the first or last entry in the ART,
* depending on `first`. The iterator is not valid if there are no entries in
* the ART.
*/
art_iterator_t art_init_iterator(const art_t *art, bool first);

/**
* Returns an initialized iterator positioned at a key equal to or greater than
* the given key, if it exists.
*/
art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key);

/**
* Returns an initialized iterator positioned at a key greater than the given
* key, if it exists.
*/
art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key);

/**
* The following iterator movement functions return true if a new entry was
* encountered.
*/
bool art_iterator_move(art_iterator_t *iterator, bool forward);
bool art_iterator_next(art_iterator_t *iterator);
bool art_iterator_prev(art_iterator_t *iterator);

/**
* Moves the iterator forward to a key equal to or greater than the given key.
* Assumes the given key is greater or equal to the current position of the
* iterator.
*/
bool art_iterator_lower_bound(art_iterator_t *iterator,
const art_key_chunk_t *key);

/**
* Insert the value and positions the iterator at the key.
*/
void art_iterator_insert(art_t *art, art_iterator_t *iterator,
const art_key_chunk_t *key, art_val_t *val);

/**
* Erase the value pointed at by the iterator. Moves the iterator to the next
* leaf. Returns the value erased or NULL if nothing was erased.
*/
art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator);

#ifdef __cplusplus
} // extern "C"
} // namespace roaring
} // namespace internal
#endif

#endif
Loading

0 comments on commit 0225f21

Please sign in to comment.