diff --git a/Cargo.lock b/Cargo.lock index 2597e1c62..d7c1b31ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,6 +108,24 @@ version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "arbtest" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23909d5fb517fac2a8a4c887e847dbe41dd22ec46914586f5727980d0a193fdc" +dependencies = [ + "arbitrary", +] + [[package]] name = "archery" version = "1.2.1" @@ -261,6 +279,12 @@ version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +[[package]] +name = "bitmaps" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d084b0137aaa901caf9f1e8b21daa6aa24d41cd806e111335541eff9683bd6" + [[package]] name = "block-buffer" version = "0.10.4" @@ -556,6 +580,32 @@ dependencies = [ "walkdir", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap 4.5.2", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + [[package]] name = "criterion-plot" version = "0.5.0" @@ -781,6 +831,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" +dependencies = [ + "proc-macro2 1.0.78", + "quote 1.0.35", + "syn 2.0.52", +] + [[package]] name = "derive_builder" version = "0.20.0" @@ -1271,6 +1332,15 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "imbl-sized-chunks" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144006fb58ed787dcae3f54575ff4349755b00ccc99f4b4873860b654be1ed63" +dependencies = [ + "bitmaps", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -1774,7 +1844,7 @@ dependencies = [ "codespan", "codespan-reporting", "comrak", - "criterion", + "criterion 0.4.0", "cxx", "cxx-build", "indexmap 1.9.3", @@ -1787,13 +1857,13 @@ dependencies = [ "malachite-q", "md-5", "metrics", + "nickel-lang-funcarray", "nickel-lang-utils", "once_cell", "pkg-config", "pretty", "pretty_assertions", "regex", - "rpds", "rustyline", "rustyline-derive", "serde", @@ -1821,6 +1891,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "nickel-lang-funcarray" +version = "0.1.0" +dependencies = [ + "arbitrary", + "arbtest", + "criterion 0.5.1", + "imbl-sized-chunks", + "rpds", + "serde", +] + [[package]] name = "nickel-lang-lsp" version = "1.8.0" @@ -1832,7 +1914,7 @@ dependencies = [ "clap 4.5.2", "codespan", "codespan-reporting", - "criterion", + "criterion 0.4.0", "crossbeam", "csv", "derive_more", @@ -1863,7 +1945,7 @@ name = "nickel-lang-utils" version = "0.1.0" dependencies = [ "codespan", - "criterion", + "criterion 0.4.0", "nickel-lang-core", "pprof", "serde", @@ -2124,7 +2206,7 @@ checksum = "196ded5d4be535690899a4631cc9f18cdc41b7ebf24a79400f46f48e49a11059" dependencies = [ "backtrace", "cfg-if", - "criterion", + "criterion 0.4.0", "findshlibs", "inferno", "libc", @@ -2453,7 +2535,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0e15515d3ce3313324d842629ea4905c25a13f81953eadb88f85516f59290a4" dependencies = [ "archery", - "serde", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d1771546a..81d1f6769 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "core", "cli", + "funcarray", "lsp/nls", "lsp/lsp-harness", "utils", @@ -22,6 +23,7 @@ readme = "README.md" [workspace.dependencies] nickel-lang-core = { version = "0.9.0", path = "./core", default-features = false } +nickel-lang-funcarray = { version = "0.1.0", path = "./funcarray" } nickel-lang-utils = { version = "0.1.0", path = "./utils" } lsp-harness = { version = "0.1.0", path = "./lsp/lsp-harness" } diff --git a/core/Cargo.toml b/core/Cargo.toml index 427b0250b..cdb1585b1 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -43,6 +43,7 @@ codespan.workspace = true codespan-reporting.workspace = true cxx = { workspace = true, optional = true } logos.workspace = true +nickel-lang-funcarray.workspace = true smallvec.workspace = true serde = { workspace = true, features = ["derive"] } serde_json.workspace = true @@ -82,7 +83,6 @@ tree-sitter-nickel = { workspace = true, optional = true } metrics = { workspace = true, optional = true } strsim = "0.10.0" -rpds = { version = "1.1.0", features = ["serde"] } [dev-dependencies] pretty_assertions.workspace = true diff --git a/core/benches/arrays.rs b/core/benches/arrays.rs index 2b50086d2..f45a80219 100644 --- a/core/benches/arrays.rs +++ b/core/benches/arrays.rs @@ -20,7 +20,10 @@ fn ncl_random_array(len: usize) -> String { numbers.push(RichTerm::from(Term::Num(Number::from(acc)))); } - let xs = RichTerm::from(Term::Array(Array::new(numbers), ArrayAttrs::default())); + let xs = RichTerm::from(Term::Array( + Array::collect(numbers.into_iter()), + ArrayAttrs::default(), + )); let doc: DocBuilder<_, ()> = xs.pretty(&BoxAllocator); let mut out = Vec::new(); doc.render(80, &mut out).unwrap(); diff --git a/core/src/cache.rs b/core/src/cache.rs index b47b8a6c8..135b9161c 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -605,7 +605,10 @@ impl Cache { )) } else { Ok(( - attach_pos(Term::Array(Array::new(terms), Default::default()).into()), + attach_pos( + Term::Array(Array::collect(terms.into_iter()), Default::default()) + .into(), + ), ParseErrors::default(), )) } diff --git a/core/src/closurize.rs b/core/src/closurize.rs index 813bb1274..7eec2d420 100644 --- a/core/src/closurize.rs +++ b/core/src/closurize.rs @@ -191,13 +191,12 @@ impl Closurize for Array { env: Environment, btype: BindingType, ) -> Self { - self.iter() + self.into_iter() .map(|t| { if should_share(&t.term) { - t.clone() - .closurize_as_btype(cache, env.clone(), btype.clone()) + t.closurize_as_btype(cache, env.clone(), btype.clone()) } else { - t.clone() + t } }) .collect() diff --git a/core/src/eval/mod.rs b/core/src/eval/mod.rs index 0e707a689..9369a03b8 100644 --- a/core/src/eval/mod.rs +++ b/core/src/eval/mod.rs @@ -807,8 +807,8 @@ impl VirtualMachine { // See the comment on the `BinaryOp::ArrayConcat` match arm. Term::Array(terms, attrs) if !attrs.closurized => { let closurized_array = terms - .iter() - .map(|t| t.clone().closurize(&mut self.cache, env.clone())) + .into_iter() + .map(|t| t.closurize(&mut self.cache, env.clone())) .collect(); let closurized_ctrs = attrs @@ -1274,8 +1274,8 @@ pub fn subst( } Term::Array(ts, mut attrs) => { let ts = ts - .iter() - .map(|t| subst(cache, t.clone(), initial_env, env)) + .into_iter() + .map(|t| subst(cache, t, initial_env, env)) .collect(); // cd [^subst-closurized-false] diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index eb377840f..0a54f3228 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -30,7 +30,7 @@ use crate::{ serialize::ExportFormat, stdlib::internals, term::{ - array::{Array, ArrayAttrs, OutOfBoundError}, + array::{Array, ArrayAttrs}, make as mk_term, record::*, string::NickelString, @@ -533,10 +533,10 @@ impl VirtualMachine { // structures. It maintains the invariant that any data structure only // contain indices (that is, currently, variables). let ts = ts - .iter() + .into_iter() .map(|t| { let t_with_ctrs = RuntimeContract::apply_all( - t.clone(), + t, attrs.pending_contracts.iter().cloned(), pos.into_inherited(), ); @@ -702,9 +702,9 @@ impl VirtualMachine { } Term::Array(ts, attrs) if !ts.is_empty() => { let terms = seq_terms( - ts.iter().map(|t| { + ts.into_iter().map(|t| { let t_with_ctr = RuntimeContract::apply_all( - t.clone(), + t, attrs.pending_contracts.iter().cloned(), pos.into_inherited(), ) @@ -1096,14 +1096,14 @@ impl VirtualMachine { } Term::Array(ts, attrs) if !ts.is_empty() => { let ts = ts - .iter() + .into_iter() .map(|t| { mk_term::op1( UnaryOp::Force { ignore_not_exported, }, RuntimeContract::apply_all( - t.clone(), + t, attrs.pending_contracts.iter().cloned(), pos.into_inherited(), ), @@ -1952,9 +1952,9 @@ impl VirtualMachine { { if let Term::Array(array, _) = notes_term.into() { let notes = array - .iter() + .into_iter() .map(|element| { - let term = element.term.clone().into_owned(); + let term = element.term.into_owned(); if let Term::Str(s) = term { Ok(s.into_inner()) @@ -2333,6 +2333,7 @@ impl VirtualMachine { BinaryOp::ArrayConcat => match_sharedterm!(match (t1) { Term::Array(ts1, attrs1) => match_sharedterm!(match (t2) { Term::Array(ts2, attrs2) => { + let mut ts2 = ts2; // NOTE: the [eval_closure] function in [eval] should've made sure // that the array is closurized. We leave a debug_assert! here just // in case something goes wrong in the future. If the assert failed, @@ -2409,32 +2410,27 @@ impl VirtualMachine { let ctrs_right_empty = ctrs_right_sieve.iter().all(Option::is_none); let ctrs_right_dedup = ctrs_right_sieve.into_iter().flatten(); - let arr = if ctrs_right_empty { - let mut elems = ts2.into_reversed_vector(); + let ctrs_left_empty = ctrs_left_dedup.is_empty(); - for t in ts1.iter_rev() { - elems.push_back_mut( + let arr = if ctrs_right_empty && ctrs_left_empty { + ts2.prepend(ts1); + ts2 + } else if ctrs_right_empty { + let ts1_vec: Vec<_> = ts1 + .into_iter() + .map(|t| { RuntimeContract::apply_all( - t.clone(), + t, ctrs_left_dedup.iter().cloned(), pos1, ) - .closurize(&mut self.cache, env1.clone()), - ); - } - Array::from_reversed_vector(elems) + .closurize(&mut self.cache, env1.clone()) + }) + .collect(); + + ts2.prepend_iter(ts1_vec.into_iter()); + ts2 } else { - // NOTE: To avoid the extra Vec allocation, we could use - // Rc<[T]>::new_uninit_slice() and fill up the slice manually, but - // that's a nightly-only experimental API. Note that collecting into - // an Rc<[T]> will also allocate a intermediate vector, unless the - // input iterator implements the nightly-only API TrustedLen, and - // Array's iterator currently doesn't. Even if we could implement - // TrustedLen we would have to contend with the fact that .chain(..) - // tends to be slow. - // - Rc<[T]>::from_iter docs: - // https://doc.rust-lang.org/std/rc/struct.Rc.html#impl-FromIterator%3CT%3E - // - chain issue: https://github.com/rust-lang/rust/issues/63340 let mut ts: Vec = Vec::with_capacity(ts1.len() + ts2.len()); ts.extend(ts1.iter().map(|t| { @@ -2455,7 +2451,7 @@ impl VirtualMachine { .closurize(&mut self.cache, env2.clone()) })); - Array::new(ts) + Array::collect(ts.into_iter()) }; let attrs = ArrayAttrs { @@ -3435,9 +3431,7 @@ impl VirtualMachine { )); }; - let result = array.slice(start_as_usize, end_as_usize); - - if let Err(OutOfBoundError) = result { + if end_as_usize < start_as_usize || end_as_usize > array.len() { return Err(EvalError::Other( format!( "array/slice: index out of bounds. Expected `start <= end <= {}`, but \ @@ -3446,8 +3440,9 @@ impl VirtualMachine { ), pos_op, )); - }; + } + array.slice(start_as_usize, end_as_usize); Ok(Closure { body: RichTerm::new(Term::Array(array, attrs), pos_op_inh), env: env3, @@ -3795,17 +3790,17 @@ fn eq( // record contracts with default values, wrapped terms, etc. let mut eqs = l1 - .iter() + .into_iter() .map(|t| { let pos = t.pos.into_inherited(); - RuntimeContract::apply_all(t.clone(), a1.pending_contracts.iter().cloned(), pos) + RuntimeContract::apply_all(t, a1.pending_contracts.iter().cloned(), pos) .closurize(cache, env1.clone()) }) .collect::>() .into_iter() - .zip(l2.iter().map(|t| { + .zip(l2.into_iter().map(|t| { let pos = t.pos.into_inherited(); - RuntimeContract::apply_all(t.clone(), a2.pending_contracts.iter().cloned(), pos) + RuntimeContract::apply_all(t, a2.pending_contracts.iter().cloned(), pos) .closurize(cache, env2.clone()) })) .collect::>(); diff --git a/core/src/parser/tests.rs b/core/src/parser/tests.rs index f443bd7d9..28a7d03a9 100644 --- a/core/src/parser/tests.rs +++ b/core/src/parser/tests.rs @@ -54,7 +54,7 @@ fn mk_symbolic_single_chunk(prefix: &str, s: &str) -> RichTerm { ( FieldPathElem::Ident("fragments".into()), Field::from(RichTerm::from(Array( - Array::new(std::iter::once(mk_single_chunk(s))), + Array::collect(std::iter::once(mk_single_chunk(s))), Default::default(), ))), ), diff --git a/core/src/serialize.rs b/core/src/serialize.rs index 64eed6de5..f00dcfcb0 100644 --- a/core/src/serialize.rs +++ b/core/src/serialize.rs @@ -198,7 +198,7 @@ pub fn deserialize_array<'de, D>(deserializer: D) -> Result<(Array, ArrayAttrs), where D: Deserializer<'de>, { - let terms = Array::new(Vec::deserialize(deserializer)?); + let terms = Array::deserialize(deserializer)?; Ok((terms, Default::default())) } diff --git a/core/src/term/array.rs b/core/src/term/array.rs index 4201c8e0c..1cb799ad5 100644 --- a/core/src/term/array.rs +++ b/core/src/term/array.rs @@ -1,3 +1,5 @@ +use nickel_lang_funcarray::FunctionalArray; + use super::*; #[derive(Debug, Default, PartialEq, Clone)] @@ -38,105 +40,14 @@ impl ArrayAttrs { } } -/// A Nickel array, represented as a view (slice) into a shared backing array. The view is -/// delimited by `start` (included) and `end` (excluded). This allows to take the tail of an array, -/// or an arbitrary slice, in constant time, providing actual linear time iteration when -/// imlementing recursive functions, such as folds, for example. -#[derive(Debug, Clone, PartialEq, Default)] -pub struct Array { - inner: rpds::Vector, - start: usize, - end: usize, -} - -pub struct OutOfBoundError; - -impl Array { - /// Creates a Nickel array from reference-counted slice. - pub fn new(iter: impl IntoIterator) -> Self { - iter.into_iter().collect() - } - - /// Resize the view to be a a sub-view of the current one, by considering a slice `start` - /// (included) to `end` (excluded). - /// - /// The parameters must satisfy `0 <= start <= end <= self.end - self.start`. Otherwise, - /// `Err(..)` is returned. - pub fn slice(&mut self, start: usize, end: usize) -> Result<(), OutOfBoundError> { - if start > end || end > self.len() { - return Err(OutOfBoundError); - } - - let prev_start = self.start; - self.start = prev_start + start; - self.end = prev_start + end; - - Ok(()) - } - - /// Returns the effective length of the array. - pub fn len(&self) -> usize { - self.end - self.start - } - - /// Returns `true` if the array is empty. - pub fn is_empty(&self) -> bool { - self.end == self.start - } - - /// Returns a reference to the term at the given index. - pub fn get(&self, idx: usize) -> Option<&RichTerm> { - self.inner.get(self.inner.len() - self.start - 1 - idx) - } - - /// Discards the first `diff` terms of the array. - pub fn advance_by(mut self, diff: usize) -> Self { - self.start += usize::min(diff, self.len()); - self - } - - /// Returns an iterator of references over the array. - pub fn iter(&self) -> impl Iterator + '_ { - self.inner.iter().rev().skip(self.start).take(self.len()) - } - - pub fn iter_rev(&self) -> impl Iterator + '_ { - self.inner - .iter() - .skip(self.inner.len() - self.end) - .take(self.len()) - } - - pub fn from_reversed_vector(inner: rpds::Vector) -> Self { - Self { - start: 0, - end: inner.len(), - inner, - } - } - - pub fn into_reversed_vector(mut self) -> rpds::Vector { - if self.end != self.inner.len() { - // There's no efficient way to chop off the beginning of a vector, so - // in this case we just need to copy it. - self.iter_rev().cloned().collect() - } else { - for _ in 0..self.start { - self.inner.drop_last_mut(); - } - self.inner - } - } -} +pub type Array = FunctionalArray; +// TODO: one common use of this collect function is `arr.into_iter().map(|_| ...).collect()`. +// Maybe it's worth having an optimized `map_in_place` function. impl FromIterator for Array { fn from_iter>(iter: T) -> Self { - // Ugh. rpds::Vector doesn't support reverse-in-place + // This needs an extra allocation, because FunctionalArray only supports non-allocating construction from reversed iterators. let items = iter.into_iter().collect::>(); - let inner = items.into_iter().rev().collect::>(); - let start = 0; - let end = inner.len(); - - Self { inner, start, end } + Array::collect(items.into_iter()) } } diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 488dcf448..fe7f543df 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -2353,11 +2353,10 @@ impl Traverse for RichTerm { ) } Term::Array(ts, attrs) => { - let ts_res = Array::new( - ts.iter() - .map(|t| t.clone().traverse(f, order)) - .collect::, _>>()?, - ); + let ts_res = ts + .into_iter() + .map(|t| t.traverse(f, order)) + .collect::>()?; RichTerm::new(Term::Array(ts_res, attrs), pos) } @@ -2738,8 +2737,8 @@ pub mod make { }; ( $( $terms:expr ),* ) => { { - let ts = $crate::term::array::Array::new( - [$( $crate::term::RichTerm::from($terms) ),*] + let ts = $crate::term::array::Array::collect( + [$( $crate::term::RichTerm::from($terms) ),*].into_iter() ); $crate::term::RichTerm::from(Term::Array(ts, ArrayAttrs::default())) } diff --git a/funcarray/.gitignore b/funcarray/.gitignore new file mode 100644 index 000000000..ea8c4bf7f --- /dev/null +++ b/funcarray/.gitignore @@ -0,0 +1 @@ +/target diff --git a/funcarray/Cargo.toml b/funcarray/Cargo.toml new file mode 100644 index 000000000..bf2a0efa3 --- /dev/null +++ b/funcarray/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "nickel-lang-funcarray" +version = "0.1.0" +edition = "2021" + +[dependencies] +imbl-sized-chunks = "0.1.2" +serde.workspace = true + +[dev-dependencies] +arbitrary = { version = "1.3.2", features = ["derive"] } +arbtest = "0.3.1" +criterion = "0.5.1" +rpds = "1.1.0" + +[[bench]] +name = "rpds_comparison" +harness = false + +[[bench]] +name = "array" +harness = false diff --git a/funcarray/benches/array.rs b/funcarray/benches/array.rs new file mode 100644 index 000000000..59aad3598 --- /dev/null +++ b/funcarray/benches/array.rs @@ -0,0 +1,68 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use nickel_lang_funcarray::FunctionalArray; + +pub fn prepend(c: &mut Criterion) { + let big_input = [0; 10000]; + let small_input = [1; 100]; + let mut group = c.benchmark_group("collect"); + + group.bench_function("prepend 10 + 10000, N=8", |b| { + let mut big: FunctionalArray<_, 8> = FunctionalArray::collect(big_input.iter().copied()); + let small: FunctionalArray<_, 8> = + FunctionalArray::collect(small_input[..10].iter().copied()); + b.iter(|| big.prepend(small.clone())) + }); + + group.bench_function("prepend 100 + 10000, N=8", |b| { + let mut big: FunctionalArray<_, 8> = FunctionalArray::collect(big_input.iter().copied()); + let small: FunctionalArray<_, 8> = FunctionalArray::collect(small_input.iter().copied()); + b.iter(|| big.prepend(small.clone())) + }); + + group.bench_function("prepend 10 + 10000, N=64", |b| { + let mut big: FunctionalArray<_, 64> = FunctionalArray::collect(big_input.iter().copied()); + let small: FunctionalArray<_, 64> = + FunctionalArray::collect(small_input[..10].iter().copied()); + b.iter(|| big.prepend(small.clone())) + }); + + group.bench_function("prepend 100 + 10000, N=64", |b| { + let mut big: FunctionalArray<_, 64> = FunctionalArray::collect(big_input.iter().copied()); + let small: FunctionalArray<_, 64> = FunctionalArray::collect(small_input.iter().copied()); + b.iter(|| big.prepend(small.clone())) + }); +} + +pub fn iter_slice(c: &mut Criterion) { + let input = [0; 10000]; + let mut group = c.benchmark_group("collect"); + + group.bench_function("slice 100 out of 1k, N=8", |b| { + let mut arr: FunctionalArray<_, 8> = + FunctionalArray::collect(input[..1000].iter().copied()); + arr.slice(500, 600); + b.iter(|| black_box(arr.iter().count())); + }); + + group.bench_function("slice 100 out of 10k, N=8", |b| { + let mut arr: FunctionalArray<_, 8> = FunctionalArray::collect(input.iter().copied()); + arr.slice(5000, 5100); + b.iter(|| black_box(arr.iter().count())); + }); + + group.bench_function("slice 100 out of 1k, N=64", |b| { + let mut arr: FunctionalArray<_, 64> = + FunctionalArray::collect(input[..1000].iter().copied()); + arr.slice(500, 600); + b.iter(|| black_box(arr.iter().count())); + }); + + group.bench_function("slice 100 out of 10k, N=64", |b| { + let mut arr: FunctionalArray<_, 64> = FunctionalArray::collect(input.iter().copied()); + arr.slice(5000, 5100); + b.iter(|| black_box(arr.iter().count())); + }); +} + +criterion_group!(benches, prepend, iter_slice); +criterion_main!(benches); diff --git a/funcarray/benches/rpds_comparison.rs b/funcarray/benches/rpds_comparison.rs new file mode 100644 index 000000000..96e88b092 --- /dev/null +++ b/funcarray/benches/rpds_comparison.rs @@ -0,0 +1,76 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use nickel_lang_funcarray::Vector; + +pub fn collect(c: &mut Criterion) { + let input = vec![0; 10000]; + let mut group = c.benchmark_group("collect"); + + group.bench_function("ours 10000, N=8", |b| { + b.iter(|| black_box(input.iter().copied().collect::>())) + }); + + group.bench_function("ours 10000, N=32", |b| { + b.iter(|| black_box(input.iter().copied().collect::>())) + }); + + group.bench_function("ours 10000, N=64", |b| { + b.iter(|| black_box(input.iter().copied().collect::>())) + }); + + group.bench_function("rpds 10000", |b| { + b.iter(|| black_box(input.iter().copied().collect::>())) + }); +} + +pub fn count(c: &mut Criterion) { + let input = vec![0; 10000]; + let vec8: Vector = input.iter().copied().collect(); + let vec32: Vector = input.iter().copied().collect(); + let vec64: Vector = input.iter().copied().collect(); + let rpds: rpds::Vector = input.iter().copied().collect(); + let mut group = c.benchmark_group("iter"); + + group.bench_function("ours 10000, N=8", |b| { + b.iter(|| black_box(vec8.iter().count())); + }); + + group.bench_function("ours 10000, N=32", |b| { + b.iter(|| black_box(vec32.iter().count())); + }); + + group.bench_function("ours 10000, N=64", |b| { + b.iter(|| black_box(vec64.iter().count())); + }); + + group.bench_function("rpds 10000", |b| { + b.iter(|| black_box(rpds.iter().count())); + }); +} + +pub fn reverse_count(c: &mut Criterion) { + let input = vec![0; 10000]; + let vec8: Vector = input.iter().copied().collect(); + let vec32: Vector = input.iter().copied().collect(); + let vec64: Vector = input.iter().copied().collect(); + let rpds: rpds::Vector = input.iter().copied().collect(); + let mut group = c.benchmark_group("iter_rev"); + + group.bench_function("ours 10000, N=8", |b| { + b.iter(|| black_box(vec8.rev_iter().count())); + }); + + group.bench_function("ours 10000, N=32", |b| { + b.iter(|| black_box(vec32.rev_iter().count())); + }); + + group.bench_function("ours 10000, N=64", |b| { + b.iter(|| black_box(vec64.rev_iter().count())); + }); + + group.bench_function("rpds 10000", |b| { + b.iter(|| black_box(rpds.iter().rev().count())); + }); +} + +criterion_group!(benches, collect, count, reverse_count); +criterion_main!(benches); diff --git a/funcarray/src/functional_array.rs b/funcarray/src/functional_array.rs new file mode 100644 index 000000000..016213393 --- /dev/null +++ b/funcarray/src/functional_array.rs @@ -0,0 +1,265 @@ +use std::ops::Index; + +use crate::vector::{RevIntoIter, RevIter}; + +use super::Vector; + +/// A persistent list with fast random access. +/// +/// The `FunctionArray` container is so called because it has some array-like +/// performance characteristics (like fast random access and fast slicing), +/// while also being fast at patterns that are common in functional languages +/// (like prepending elements). +/// +/// This is implemented internally as a tree, and the parameter `N` controls its +/// branching factor. For performance, it should always be a power of 2. Values +/// between `8` and `64` are pretty reasonable. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct FunctionalArray { + rev_vec: Vector, + // Our slice involves the range of indices [start, end), like most slicing. + // But since we work in reverse, our "first" element is at `end - 1`. + start: usize, + end: usize, +} + +impl Default for FunctionalArray { + fn default() -> Self { + FunctionalArray { + rev_vec: Default::default(), + start: 0, + end: 0, + } + } +} + +impl serde::Serialize for FunctionalArray { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeSeq; + + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for elt in self.iter() { + seq.serialize_element(elt)?; + } + seq.end() + } +} + +impl<'de, T: Clone + serde::Deserialize<'de>, const N: usize> serde::Deserialize<'de> + for FunctionalArray +{ + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let vec: Vec = Vec::deserialize(deserializer)?; + Ok(FunctionalArray::collect(vec.into_iter())) + } +} + +impl FunctionalArray { + /// Create a new `FunctionalArray` out of a double-ended iterator. + /// + /// `FunctionalArray` doesn't implement `FromIterator` because for efficient + /// creation it needs to iterate in reverse. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// assert_eq!(arr[0], 0); + /// assert_eq!(arr[5], 5); + /// ``` + pub fn collect>(iter: I) -> Self { + let rev_vec: Vector = iter.rev().collect(); + Self { + end: rev_vec.len(), + rev_vec, + start: 0, + } + } + + /// The number of elements in this array. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// assert_eq!(arr.len(), 6); + /// ``` + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns `true` if the length is zero. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// assert!(!arr.is_empty()); + /// assert!(FunctionalArray::::default().is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.end == self.start + } + + /// Gets an element at a given index, or `None` if `idx` is out-of-bounds. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// assert_eq!(arr.get(2), Some(&2)); + /// assert_eq!(arr.get(6), None); + /// ``` + pub fn get(&self, idx: usize) -> Option<&T> { + self.end + .checked_sub(idx) + .and_then(|i| i.checked_sub(1)) + .and_then(|i| self.rev_vec.get(i)) + } + + /// Adds an element to the beginning of this array. + /// + /// Runs in time complexity `O(log n)` where `n` is the array length. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let mut arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// arr.push_front(6); + /// assert_eq!(arr.into_iter().collect::>(), vec![6, 0, 1, 2, 3, 4, 5]); + /// ``` + pub fn push_front(&mut self, elt: T) { + // If `end` is already at the end of `rev_vec` then this is a cheap no-op. + self.rev_vec.truncate(self.end); + self.rev_vec.push(elt); + self.end += 1; + } + + /// Removes and returns the element at the beginning of this array, or + /// `None` if we're empty. + /// + /// Runs in time complexity `O(log self.len())`. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let mut arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// assert_eq!(arr.pop_front(), Some(0)); + /// assert_eq!(arr.into_iter().collect::>(), vec![1, 2, 3, 4, 5]); + /// ``` + pub fn pop_front(&mut self) -> Option { + if self.is_empty() { + None + } else { + self.rev_vec.truncate(self.end); + self.end -= 1; + self.rev_vec.pop() + } + } + + /// Returns an iterator over references to array elements. + pub fn iter(&self) -> impl Iterator { + self.into_iter() + } + + /// Prepends another array to the beginning of this array. + /// + /// Runs in time complexity `O(other.len() + log self.len())`. In particular, + /// you should arrange things so that `self` is long and `other` is short. + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let mut arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// let other = FunctionalArray::<_, 32>::collect([7, 6].into_iter()); + /// arr.prepend(other); + /// assert_eq!(arr.into_iter().collect::>(), vec![7, 6, 0, 1, 2, 3, 4, 5]); + /// ``` + pub fn prepend(&mut self, other: Self) { + // If `end` is already at the end of `rev_vec` then this is a cheap no-op. + self.rev_vec.truncate(self.end); + self.end += other.len(); + self.rev_vec.extend(other.rev_vec); + } + + pub fn prepend_iter(&mut self, other: impl DoubleEndedIterator) { + // If `end` is already at the end of `rev_vec` then this is a cheap no-op. + self.rev_vec.truncate(self.end); + let len_before = self.rev_vec.len(); + self.rev_vec.extend(other.rev()); + self.end += self.rev_vec.len() - len_before; + } + + /// Replace this array by the subslice from index `from` (inclusive) to index `to` (exclusive). + /// + /// # Examples + /// + /// ```rust + /// # use nickel_lang_funcarray::FunctionalArray; + /// let mut arr = FunctionalArray::<_, 32>::collect([0, 1, 2, 3, 4, 5].into_iter()); + /// arr.slice(1, 5); + /// assert_eq!(arr.clone().into_iter().collect::>(), vec![1, 2, 3, 4]); + /// arr.slice(1, 3); + /// assert_eq!(arr.into_iter().collect::>(), vec![2, 3]); + /// ``` + // We can't use the index trait here, because we'd have to return a &Self. + pub fn slice(&mut self, from: usize, to: usize) { + assert!(from <= to); + assert!(to <= self.len()); + let old_end = self.end; + self.end = old_end - from; + self.start = old_end - to; + } +} + +impl IntoIterator for FunctionalArray { + type Item = T; + type IntoIter = std::iter::Take>; + + fn into_iter(self) -> Self::IntoIter { + if let Some(last_idx) = self.end.checked_sub(1) { + let len = self.len(); + self.rev_vec.into_rev_iter_starting_at(last_idx).take(len) + } else { + // Avoid `into_rev_iter_starting_at` because it's inclusive of its + // index and will fail for an empty vec. + self.rev_vec.into_rev_iter().take(0) + } + } +} + +impl<'a, T: Clone, const N: usize> IntoIterator for &'a FunctionalArray { + type Item = &'a T; + type IntoIter = std::iter::Take>; + + fn into_iter(self) -> Self::IntoIter { + if let Some(last_idx) = self.end.checked_sub(1) { + self.rev_vec.rev_iter_starting_at(last_idx).take(self.len()) + } else { + // Avoid `rev_iter_starting_at` because it's inclusive of its + // index and will fail for an empty vec. + self.rev_vec.rev_iter().take(0) + } + } +} + +impl Index for FunctionalArray { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + self.get(index).expect("index out of bounds") + } +} diff --git a/funcarray/src/lib.rs b/funcarray/src/lib.rs new file mode 100644 index 000000000..4ea0837cd --- /dev/null +++ b/funcarray/src/lib.rs @@ -0,0 +1,31 @@ +//! This crate provides persistent data structures tailored to Nickel's needs. +//! +//! [`Vector`] is a persistent vector (also known as a "bitmapped vector trie"), +//! as described [here](https://hypirion.com/musings/understanding-persistent-vector-pt-1). +//! The same structure is implemented in [rpds](https://crates.io/crates/rpds), but our +//! implementation is faster for Nickel's use-cases: +//! - rpds's internal nodes are implemented with `Vec`, meaning that there's a double pointer +//! indirection. We store our internal nodes inline. +//! - rpds wraps its leaves in `Rc` pointers, but we are mainly interested in storing things +//! that are already reference-counted under the hood. We store our leaves inline, and +//! require that the be `Clone`. +//! - we have optimized implementations of `Extend`, and support fast iteration over +//! subslices. +//! +//! [`FunctionalArray`] backs the implementation of arrays in Nickel. It's basically a backwards +//! [`Vector`] with support for slicing. It's backwards in order to support efficient access +//! and modification at the beginning. + +// TODO: +// - benchmarks for Array + +// Not yet implemented (do we need them?) +// - deletion +// - mutable indexing +// - mutable iteration + +mod functional_array; +pub(crate) mod vector; + +pub use functional_array::FunctionalArray; +pub use vector::Vector; diff --git a/funcarray/src/vector.rs b/funcarray/src/vector.rs new file mode 100644 index 000000000..70152d5d3 --- /dev/null +++ b/funcarray/src/vector.rs @@ -0,0 +1,776 @@ +use std::{iter::Peekable, ops::Index, rc::Rc}; + +use imbl_sized_chunks::Chunk; + +// In principle we could decouple the size of the interior nodes from the size of the leaves. +// This might make sense when `T` is large, because the interior nodes are always pointer-sized. +type Interior = Chunk>, N>; +type ChunkIter = imbl_sized_chunks::sized_chunk::Iter; +type InteriorChunkIter = ChunkIter>, N>; + +// Can we improve the memory layout? We like N to be a power of 2 for +// performance (because it allows adjusting the indices using just bitwise +// operations), but it would also be cool if the total size of the node were a +// nice round number (so it would exactly fit in a small integer number of cache +// lines). The discriminant makes it hard to have both of these at once. +// Since we always know (based on the tree height) which type we *expect* a node +// to have, we could use a `union` instead of an `enum` (at the cost of lots of +// unsafe code). +// +// It would be nice to force `N` to be a power of 2 (for example, by +// parametrizing with `B` and setting `N = 1 << B`). This sort of needs +// the `generic_const_exprs` feature to work, though. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum Node { + Leaf { data: Chunk }, + Interior { children: Interior }, +} + +fn split_index(idx: usize, height: u8) -> (usize, usize) { + let factor = N.pow(height.into()); + (idx / factor, idx % factor) +} + +impl Node { + fn len(&self) -> usize { + match self { + Node::Leaf { data } => data.len(), + Node::Interior { children } => { + // TODO: this could be faster if we also stored the tree height. We could calculate + // the size of the packed part without iterating over it. + children.iter().map(|c| c.len()).sum() + } + } + } + + pub fn get(&self, height: u8, idx: usize) -> Option<&T> { + match self { + Node::Leaf { data } => { + debug_assert_eq!(height, 0); + data.get(idx) + } + Node::Interior { children } => { + let (bucket_idx, child_idx) = split_index::(idx, height); + children + .get(bucket_idx) + .and_then(|child| child.get(height - 1, child_idx)) + } + } + } + + // `idx` can point at an existing index, or one past the end. + pub fn set(&mut self, elt: T, idx: usize, height: u8) { + match self { + Node::Leaf { data } => { + debug_assert_eq!(height, 0); + debug_assert!(idx <= data.len()); + if idx < data.len() { + data.set(idx, elt); + } else { + data.push_back(elt); + } + } + Node::Interior { children } => { + let (bucket_idx, child_idx) = split_index::(idx, height); + assert!(height >= 1); + assert!(bucket_idx <= children.len()); + if bucket_idx < children.len() { + Rc::make_mut(&mut children[bucket_idx]).set(elt, child_idx, height - 1); + } else { + let mut leaf = Chunk::new(); + leaf.push_back(elt); + + let mut child = Node::Leaf { data: leaf }; + for _ in 1..height { + let mut children = Chunk::new(); + children.push_back(Rc::new(child)); + child = Node::Interior { children }; + } + children.push_back(Rc::new(child)); + } + } + } + } + + // Returns true if popping made this subtree empty. + fn pop(&mut self) -> (T, bool) { + match self { + Node::Leaf { data } => { + debug_assert!(!data.is_empty()); + let ret = data.pop_back(); + (ret, data.is_empty()) + } + Node::Interior { children } => { + let (ret, child_empty) = + Rc::make_mut(children.last_mut().expect("empty interior node")).pop(); + if child_empty { + children.pop_back(); + } + (ret, children.is_empty()) + } + } + } + + // Assumes that the length is less than this node's current length. + fn truncate(&mut self, len: usize, height: u8) { + match self { + Node::Leaf { data } => { + debug_assert!(height == 0); + data.drop_right(len); + } + Node::Interior { children } => { + // If `len` is small enough, we may just want to drop some children + // and their entire subtrees. + let max_child_len = N.pow(u32::from(height)); + let num_full_children = len / max_child_len; + let extra = len % max_child_len; + if extra > 0 { + children.drop_right(num_full_children + 1); + Rc::make_mut(&mut children[num_full_children]).truncate(extra, height - 1); + } else { + children.drop_right(num_full_children); + } + } + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Vector { + // TODO: we currently allocate for an empty vector. Try to avoid it. + root: Rc>, + length: usize, + // TODO: could save some space by taking 8 bits out of length + height: u8, +} + +#[derive(Debug, Clone)] +pub struct Iter<'a, T, const N: usize> { + stack: Vec>>>, + leaf: std::slice::Iter<'a, T>, +} + +impl<'a, T, const N: usize> Iterator for Iter<'a, T, N> { + type Item = &'a T; + + fn next(&mut self) -> Option { + if let Some(ret) = self.leaf.next() { + Some(ret) + } else { + let height = self.stack.len(); + let mut next = loop { + match self.stack.last_mut() { + Some(iter) => { + if let Some(next) = iter.next() { + break next; + } else { + self.stack.pop(); + } + } + None => { + return None; + } + } + }; + + let cur_len = self.stack.len(); + for _ in cur_len..height { + let Node::Interior { children } = next.as_ref() else { + unreachable!(); + }; + let mut children_iter = children.iter(); + next = children_iter.next().expect("empty interior node"); + self.stack.push(children_iter); + } + + let Node::Leaf { data } = next.as_ref() else { + unreachable!(); + }; + debug_assert!(!data.is_empty()); + self.leaf = data.iter(); + self.leaf.next() + } + } +} + +// We haven't implemented DoubleEndedIterator; implementing a reverse iterator +// -- while less flexible -- is simpler. The extra thing you'd get from +// DoubleEndedIterator is the ability to alternate taking from the beginning and +// the end. +#[derive(Debug, Clone)] +pub struct RevIter<'a, T, const N: usize> { + inner: Iter<'a, T, N>, +} + +impl<'a, T, const N: usize> Iterator for RevIter<'a, T, N> { + type Item = &'a T; + + fn next(&mut self) -> Option { + if let Some(ret) = self.inner.leaf.next_back() { + Some(ret) + } else { + let height = self.inner.stack.len(); + let mut next = loop { + match self.inner.stack.last_mut() { + Some(iter) => { + if let Some(next) = iter.next_back() { + break next; + } else { + self.inner.stack.pop(); + } + } + None => { + return None; + } + } + }; + + let cur_len = self.inner.stack.len(); + for _ in cur_len..height { + let Node::Interior { children } = next.as_ref() else { + unreachable!(); + }; + let mut children_iter = children.iter(); + next = children_iter.next_back().expect("empty interior node"); + self.inner.stack.push(children_iter); + } + + let Node::Leaf { data } = next.as_ref() else { + unreachable!(); + }; + debug_assert!(!data.is_empty()); + self.inner.leaf = data.iter(); + self.inner.leaf.next_back() + } + } +} + +pub struct IntoIter { + stack: Vec>, + leaf: ChunkIter, +} + +impl Iterator for IntoIter { + type Item = T; + + fn next(&mut self) -> Option { + if let Some(ret) = self.leaf.next() { + Some(ret) + } else { + let height = self.stack.len(); + let mut next = loop { + match self.stack.last_mut() { + Some(iter) => { + if let Some(next) = iter.next() { + break next; + } else { + self.stack.pop(); + } + } + None => { + return None; + } + } + }; + + let cur_len = self.stack.len(); + for _ in cur_len..height { + let Node::Interior { children } = Rc::unwrap_or_clone(next) else { + unreachable!(); + }; + let mut children_iter = children.into_iter(); + next = children_iter.next().expect("empty interior node"); + self.stack.push(children_iter); + } + + let Node::Leaf { data } = Rc::unwrap_or_clone(next) else { + unreachable!(); + }; + debug_assert!(!data.is_empty()); + self.leaf = data.into_iter(); + self.leaf.next() + } + } +} + +pub struct RevIntoIter { + inner: IntoIter, +} + +impl Iterator for RevIntoIter { + type Item = T; + + fn next(&mut self) -> Option { + if let Some(ret) = self.inner.leaf.next_back() { + Some(ret) + } else { + let height = self.inner.stack.len(); + let mut next = loop { + match self.inner.stack.last_mut() { + Some(iter) => { + if let Some(next) = iter.next_back() { + break next; + } else { + self.inner.stack.pop(); + } + } + None => { + return None; + } + } + }; + + let cur_len = self.inner.stack.len(); + for _ in cur_len..height { + let Node::Interior { children } = Rc::unwrap_or_clone(next) else { + unreachable!(); + }; + let mut children_iter = children.into_iter(); + next = children_iter.next_back().expect("empty interior node"); + self.inner.stack.push(children_iter); + } + + let Node::Leaf { data } = Rc::unwrap_or_clone(next) else { + unreachable!(); + }; + debug_assert!(!data.is_empty()); + self.inner.leaf = data.into_iter(); + self.inner.leaf.next_back() + } + } +} + +impl Extend for Vector { + fn extend>(&mut self, iter: I) { + // Make the iterator peekable, because we need to check if there's an + // element remaining before we mutate the tree to make room for it. + let mut iter = iter.into_iter().peekable(); + + // Extends a node from an iterator, but does not increase the height of + // the node. If the node fills up, the iterator may not be fully consumed. + // + // Returns the number of elements consumed from the iterator. + fn extend_rec, const N: usize>( + iter: &mut Peekable, + node: &mut Interior, + height: u8, + ) -> usize { + debug_assert!(height >= 1); + let mut consumed = 0; + + if height == 1 { + // If there's a leaf that isn't filled, fill it. + if let Some(last_child) = node.last_mut() { + // Usually, we assert that there's a last child because the + // interior nodes are guaranteed to be non-empty. But within + // this function we sometimes create empty interior nodes to + // be filled later. + // TODO: can avoid the clone if it's already full + let Node::Leaf { data } = Rc::make_mut(last_child) else { + unreachable!(); + }; + let old_len = data.len(); + data.extend(iter.take(N - data.len())); + consumed += data.len() - old_len; + } + + while !node.is_full() && iter.peek().is_some() { + let data: Chunk = iter.take(N).collect(); + consumed += data.len(); + node.push_back(Rc::new(Node::Leaf { data })); + } + } else { + if let Some(child) = node.last_mut() { + let Node::Interior { children } = Rc::make_mut(child) else { + unreachable!(); + }; + + consumed += extend_rec(iter, children, height - 1); + } + + while !node.is_full() && iter.peek().is_some() { + let mut new_child: Interior = Chunk::new(); + consumed += extend_rec(iter, &mut new_child, height - 1); + node.push_back(Rc::new(Node::Interior { + children: new_child, + })); + } + } + + consumed + } + + while iter.peek().is_some() { + let consumed = match Rc::make_mut(&mut self.root) { + Node::Leaf { data } => { + let old_len = data.len(); + data.extend((&mut iter).take(N - data.len())); + data.len() - old_len + } + Node::Interior { children } => extend_rec(&mut iter, children, self.height), + }; + self.length += consumed; + + // Check if there's more left in the iterator, and add a level if there is. + if iter.peek().is_some() { + self.add_level(); + } + } + } +} + +fn height_for_length(length: usize) -> u8 { + // Length zero through N has height zero, length N + 1 through N^2 has height 1, etc. + // The unwrap is fine unless someone has a usize that's more than 256 bits. + length.saturating_sub(1).max(1).ilog(N).try_into().unwrap() +} + +impl Vector { + pub fn new() -> Self { + Self { + root: Rc::new(Node::Leaf { data: Chunk::new() }), + length: 0, + height: 0, + } + } + + pub fn len(&self) -> usize { + self.length + } + + pub fn is_empty(&self) -> bool { + self.length == 0 + } +} + +impl Vector { + fn is_packed(&self) -> bool { + fn is_packed_rec(n: &Node, right_most: bool) -> bool { + match n { + Node::Leaf { data } => data.is_full() || right_most, + Node::Interior { children } => { + if let Some((tail, others)) = children.split_last() { + others.iter().all(|n| is_packed_rec(n, false)) && is_packed_rec(tail, true) + } else { + debug_assert!(false, "empty node"); + false + } + } + } + } + + is_packed_rec(&self.root, true) + } + + pub fn check_invariants(&self) { + assert!(self.is_packed()); + assert_eq!(self.length, self.root.len()); + if let Node::Interior { children } = self.root.as_ref() { + assert!(children.len() > 1); + } + assert_eq!(self.height, height_for_length::(self.len())); + } + + fn is_full(&self) -> bool { + self.length == N.pow(u32::from(self.height) + 1) + } + + pub fn get(&self, idx: usize) -> Option<&T> { + self.root.get(self.height, idx) + } + + // Increases the height of the tree by one, temporarily breaking the invariant that + // the root must have at least two children. + fn add_level(&mut self) { + let old_root = std::mem::replace( + &mut self.root, + Rc::new(Node::Interior { + children: Chunk::new(), + }), + ); + + // TODO: maybe we can avoid the make_mut and the fallible destructuring? + // It seems a little tricky to do so without increasing some ref-counts. + let Node::Interior { children } = Rc::make_mut(&mut self.root) else { + unreachable!(); + }; + children.push_back(old_root); + self.height += 1; + } + + pub fn push(&mut self, elt: T) { + if self.is_full() { + self.add_level(); + } + let idx = self.len(); + Rc::make_mut(&mut self.root).set(elt, idx, self.height); + self.length += 1; + } + + pub fn pop(&mut self) -> Option { + if self.is_empty() { + None + } else { + let root_mut = Rc::make_mut(&mut self.root); + let (ret, _empty) = root_mut.pop(); + self.length -= 1; + + // If we've shrunk the root down to a single child, reduce the tree height by 1. + if let Node::Interior { children } = root_mut { + if children.len() == 1 { + self.root = children.pop_back(); + self.height -= 1; + } + } + Some(ret) + } + } + + pub fn truncate(&mut self, len: usize) { + if len >= self.length { + return; + } + + let new_height = height_for_length::(len); + if new_height < self.height { + let mut new_root = &self.root; + for _ in new_height..self.height { + let Node::Interior { children } = new_root.as_ref() else { + unreachable!(); + }; + new_root = children.first().expect("empty interior node"); + } + self.root = Rc::clone(new_root); + self.height = new_height; + } + + Rc::make_mut(&mut self.root).truncate(len, self.height); + + self.length = len; + } + + pub fn iter(&self) -> Iter<'_, T, N> { + self.into_iter() + } + + pub fn rev_iter(&self) -> RevIter<'_, T, N> { + let mut stack = Vec::with_capacity(self.height.into()); + let mut node = self.root.as_ref(); + + while let Node::Interior { children } = node { + let mut node_iter = children.iter(); + node = node_iter.next_back().expect("empty interior node"); + stack.push(node_iter); + } + + let Node::Leaf { data } = node else { + unreachable!(); + }; + RevIter { + inner: Iter { + stack, + leaf: data.iter(), + }, + } + } + + pub fn rev_iter_starting_at(&self, mut idx: usize) -> RevIter<'_, T, N> { + let mut stack = Vec::with_capacity(self.height.into()); + let mut node = self.root.as_ref(); + let mut height = self.height; + + while let Node::Interior { children } = node { + let (bucket_idx, child_idx) = split_index::(idx, height); + let mut node_iter = children[..=bucket_idx].iter(); + node = node_iter.next_back().expect("empty interior node"); + stack.push(node_iter); + + height = height.checked_sub(1).expect("invalid height"); + idx = child_idx; + } + + let Node::Leaf { data } = node else { + unreachable!(); + }; + RevIter { + inner: Iter { + stack, + leaf: data[..=idx].iter(), + }, + } + } + + pub fn into_rev_iter(self) -> RevIntoIter { + let mut stack = Vec::with_capacity(self.height.into()); + let mut node = Rc::unwrap_or_clone(self.root); + + while let Node::Interior { children } = node { + let mut node_iter = children.into_iter(); + node = Rc::unwrap_or_clone(node_iter.next_back().expect("empty interior node")); + stack.push(node_iter); + } + + let Node::Leaf { data } = node else { + unreachable!(); + }; + RevIntoIter { + inner: IntoIter { + stack, + leaf: data.into_iter(), + }, + } + } + + pub fn into_rev_iter_starting_at(self, mut idx: usize) -> RevIntoIter { + let mut stack = Vec::with_capacity(self.height.into()); + let mut node = Rc::unwrap_or_clone(self.root); + let mut height = self.height; + + while let Node::Interior { mut children } = node { + let (bucket_idx, child_idx) = split_index::(idx, height); + children.drop_right(bucket_idx + 1); + let mut node_iter = children.into_iter(); + node = Rc::unwrap_or_clone(node_iter.next_back().expect("empty interior node")); + stack.push(node_iter); + + height = height.checked_sub(1).expect("invalid height"); + idx = child_idx; + } + + let Node::Leaf { mut data } = node else { + unreachable!(); + }; + data.drop_right(idx + 1); + RevIntoIter { + inner: IntoIter { + stack, + leaf: data.into_iter(), + }, + } + } +} + +impl<'a, T, const N: usize> IntoIterator for &'a Vector { + type Item = &'a T; + type IntoIter = Iter<'a, T, N>; + + fn into_iter(self) -> Self::IntoIter { + let mut stack = Vec::with_capacity(self.height.into()); + let mut node = self.root.as_ref(); + + while let Node::Interior { children } = node { + let mut node_iter = children.iter(); + node = node_iter.next().expect("empty interior node"); + stack.push(node_iter); + } + + let Node::Leaf { data } = node else { + unreachable!(); + }; + Iter { + stack, + leaf: data.iter(), + } + } +} + +impl IntoIterator for Vector { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + let mut stack = Vec::with_capacity(self.height.into()); + let mut node = Rc::unwrap_or_clone(self.root); + + while let Node::Interior { children } = node { + let mut node_iter = children.into_iter(); + node = Rc::unwrap_or_clone(node_iter.next().expect("empty interior node")); + stack.push(node_iter); + } + + let Node::Leaf { data } = node else { + unreachable!(); + }; + IntoIter { + stack, + leaf: data.into_iter(), + } + } +} + +impl Default for Vector { + fn default() -> Self { + Self::new() + } +} + +impl FromIterator for Vector { + fn from_iter>(iter: I) -> Self { + let mut ret = Vector::default(); + ret.extend(iter); + ret + } +} + +impl Index for Vector { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + self.get(index).expect("index out of range") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic() { + let mut vec = Vector::::new(); + vec.check_invariants(); + vec.push(1); + assert_eq!(vec.get(0), Some(&1)); + assert_eq!(vec.get(1), None); + vec.check_invariants(); + + vec.push(2); + vec.check_invariants(); + vec.push(3); + vec.check_invariants(); + assert_eq!(vec.get(0), Some(&1)); + assert_eq!(vec.get(1), Some(&2)); + assert_eq!(vec.get(2), Some(&3)); + assert_eq!(vec.get(3), None); + + let mut iter = vec.iter(); + assert_eq!(iter.next(), Some(&1)); + assert_eq!(iter.next(), Some(&2)); + assert_eq!(iter.next(), Some(&3)); + assert_eq!(iter.next(), None); + + assert_eq!(vec.iter().copied().collect::>(), vec![1, 2, 3]); + + assert_eq!(vec.pop(), Some(3)); + vec.check_invariants(); + vec.push(3); + vec.check_invariants(); + + vec.extend([1, 2, 3]); + vec.check_invariants(); + let mut iter = vec.iter(); + assert_eq!(iter.next(), Some(&1)); + assert_eq!(iter.next(), Some(&2)); + assert_eq!(iter.next(), Some(&3)); + assert_eq!(iter.next(), Some(&1)); + assert_eq!(iter.next(), Some(&2)); + assert_eq!(iter.next(), Some(&3)); + assert_eq!(iter.next(), None); + + assert_eq!(6, vec.len()); + assert_eq!( + vec.iter().copied().collect::>(), + vec![1, 2, 3, 1, 2, 3] + ); + assert_eq!(vec.into_iter().collect::>(), vec![1, 2, 3, 1, 2, 3]); + } +} diff --git a/funcarray/tests/arbtest.rs b/funcarray/tests/arbtest.rs new file mode 100644 index 000000000..29f0c0c36 --- /dev/null +++ b/funcarray/tests/arbtest.rs @@ -0,0 +1,207 @@ +use std::collections::VecDeque; + +use arbitrary::Unstructured; +use arbtest::{arbitrary, arbtest}; +use nickel_lang_funcarray::{FunctionalArray, Vector}; + +#[derive(arbitrary::Arbitrary, Debug)] +enum Op { + Push(u32), + Pop, + Extend(Vec), + Clone, + Truncate(u32), +} + +impl Op { + fn apply_to_vec(&self, vec: &mut Vec) { + match self { + Op::Push(x) => vec.push(*x), + Op::Pop => { + vec.pop(); + } + Op::Extend(xs) => vec.extend_from_slice(xs), + Op::Clone => {} + Op::Truncate(len) => { + if !vec.is_empty() { + vec.truncate(*len as usize % vec.len()) + } + } + } + } + + fn apply_to_vector( + &self, + vec: &mut Vector, + arena: &mut Vec>, + ) { + match self { + Op::Push(x) => vec.push(*x), + Op::Pop => { + vec.pop(); + } + Op::Extend(xs) => vec.extend(xs.iter().copied()), + Op::Clone => { + arena.push(vec.clone()); + } + Op::Truncate(len) => { + if !vec.is_empty() { + vec.truncate(*len as usize % vec.len()) + } + } + } + } +} + +#[derive(arbitrary::Arbitrary, Debug)] +enum ArrayOp { + PushFront(u32), + PopFront, + Prepend(Vec), + Slice(u32, usize), +} + +impl ArrayOp { + fn apply_to_vec_deque(&self, vec: &mut VecDeque) { + match self { + ArrayOp::PushFront(x) => vec.push_front(*x), + ArrayOp::PopFront => { + vec.pop_front(); + } + ArrayOp::Prepend(xs) => { + for x in xs.iter().rev() { + vec.push_front(*x); + } + } + ArrayOp::Slice(start, len) => { + if !vec.is_empty() { + let start = *start as usize % vec.len(); + for _ in 0..start { + vec.pop_front(); + } + vec.truncate(*len); + } + } + } + } + + fn apply_to_array(&self, vec: &mut FunctionalArray) { + match self { + ArrayOp::PushFront(x) => vec.push_front(*x), + ArrayOp::PopFront => { + vec.pop_front(); + } + ArrayOp::Prepend(xs) => { + let other = FunctionalArray::collect(xs.iter().cloned()); + vec.prepend(other); + } + ArrayOp::Slice(start, len) => { + if !vec.is_empty() { + let start = *start as usize % vec.len(); + let start = start.min(vec.len()); + let end = (start + len).min(vec.len()); + vec.slice(start, end); + } + } + } + } +} + +// u.arbitrary() generates very short vecs by default: +// https://github.com/matklad/arbtest/issues/8 +fn arb_vec(u: &mut Unstructured<'_>) -> arbitrary::Result> { + let len = u.arbitrary_len::()?; + std::iter::from_fn(|| Some(u.arbitrary::())) + .take(len) + .collect() +} + +#[test] +fn mutations() { + arbtest(|u| { + let mut vec: Vec = arb_vec(u)?; + let mut vector: Vector = vec.iter().copied().collect(); + let mut arena = Vec::new(); + let ops: Vec = u.arbitrary()?; + + for op in ops { + op.apply_to_vec(&mut vec); + op.apply_to_vector(&mut vector, &mut arena); + + vector.check_invariants(); + + assert_eq!(vec, vector.iter().cloned().collect::>()); + } + + Ok(()) + }); +} + +#[test] +fn rev_iter() { + arbtest(|u| { + let mut vec: Vec = arb_vec(u)?; + let vector: Vector = vec.iter().copied().collect(); + + let reversed: Vec = vector.rev_iter().copied().collect(); + vec.reverse(); + assert_eq!(reversed, vec); + + Ok(()) + }); +} + +#[test] +fn rev_iter_starting_at() { + arbtest(|u| { + let mut vec: Vec = arb_vec(u)?; + if !vec.is_empty() { + let vector: Vector = vec.iter().copied().collect(); + let idx: usize = u.arbitrary()?; + let idx = idx % vec.len(); + + let reversed: Vec = vector.rev_iter_starting_at(idx).copied().collect(); + vec.truncate(idx + 1); + vec.reverse(); + assert_eq!(reversed, vec); + } + + Ok(()) + }); +} + +#[test] +fn rev_into_iter() { + arbtest(|u| { + let mut vec: Vec = arb_vec(u)?; + let vector: Vector = vec.iter().copied().collect(); + + let reversed: Vec = vector.into_rev_iter().collect(); + vec.reverse(); + assert_eq!(reversed, vec); + + Ok(()) + }); +} + +#[test] +fn array_mutations() { + arbtest(|u| { + let vec: Vec = arb_vec(u)?; + let mut vec: VecDeque = vec.into_iter().collect(); + let mut arr: FunctionalArray = FunctionalArray::collect(vec.iter().copied()); + let ops: Vec = u.arbitrary()?; + + assert_eq!(vec, arr.iter().cloned().collect::>()); + + for op in ops { + op.apply_to_vec_deque(&mut vec); + op.apply_to_array(&mut arr); + + assert_eq!(vec, arr.iter().cloned().collect::>()); + assert_eq!(vec, arr.clone().into_iter().collect::>()); + } + + Ok(()) + }); +}