From fb69c86171029c52075e596485a0883ddd0b1c36 Mon Sep 17 00:00:00 2001 From: Joe Neeman Date: Wed, 9 Oct 2024 23:05:42 +0700 Subject: [PATCH] Optimizations, maybe --- core/stdlib/std.ncl | 10 +-- funcarray/benches/rpds_comparison.rs | 43 +++++++++++- funcarray/src/functional_array.rs | 42 ++++++++--- funcarray/src/lib.rs | 14 +++- funcarray/src/vector.rs | 100 +++++++++++++++++++-------- funcarray/tests/arbtest.rs | 11 ++- 6 files changed, 172 insertions(+), 48 deletions(-) diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl index aee41af5e..f3a0761d3 100644 --- a/core/stdlib/std.ncl +++ b/core/stdlib/std.ncl @@ -439,7 +439,7 @@ # => [ 3, 2, 1 ] ``` "% - = fun pred l => fold_left (fun acc x => if pred x then acc @ [x] else acc) [] l, + = fun pred l => fold_right (fun x acc => if pred x then [x] @ acc else acc) [] l, flatten : forall a. Array (Array a) -> Array a @@ -532,13 +532,13 @@ ``` "% = fun pred l => - let aux = fun acc x => + let aux = fun x acc => if (pred x) then - { right = acc.right @ [x], wrong = acc.wrong } + { right = [x] @ acc.right, wrong = acc.wrong } else - { right = acc.right, wrong = acc.wrong @ [x] } + { right = acc.right, wrong = [x] @ acc.wrong } in - fold_left aux { right = [], wrong = [] } l, + fold_right aux { right = [], wrong = [] } l, generate : forall a. (Number -> a) -> Number -> Array a diff --git a/funcarray/benches/rpds_comparison.rs b/funcarray/benches/rpds_comparison.rs index 96e88b092..00fa61a0e 100644 --- a/funcarray/benches/rpds_comparison.rs +++ b/funcarray/benches/rpds_comparison.rs @@ -72,5 +72,46 @@ pub fn reverse_count(c: &mut Criterion) { }); } -criterion_group!(benches, collect, count, reverse_count); +pub fn get(c: &mut Criterion) { + let input = vec![0; 10000]; + let vec8: Vector = input.iter().copied().collect(); + let vec32: Vector = input.iter().copied().collect(); + let vec64: Vector = input.iter().copied().collect(); + let rpds: rpds::Vector = input.iter().copied().collect(); + let mut group = c.benchmark_group("get"); + + group.bench_function("ours 10000, N=8", |b| { + b.iter(|| { + for i in 0..10000 { + black_box(vec8.get(i)); + } + }); + }); + + group.bench_function("ours 10000, N=32", |b| { + b.iter(|| { + for i in 0..10000 { + black_box(vec32.get(i)); + } + }); + }); + + group.bench_function("ours 10000, N=64", |b| { + b.iter(|| { + for i in 0..10000 { + black_box(vec64.get(i)); + } + }); + }); + + group.bench_function("rpds 10000", |b| { + b.iter(|| { + for i in 0..10000 { + black_box(rpds.get(i)); + } + }); + }); +} + +criterion_group!(benches, collect, count, reverse_count, get); criterion_main!(benches); diff --git a/funcarray/src/functional_array.rs b/funcarray/src/functional_array.rs index 016213393..a0f7be77b 100644 --- a/funcarray/src/functional_array.rs +++ b/funcarray/src/functional_array.rs @@ -1,6 +1,9 @@ use std::ops::Index; -use crate::vector::{RevIntoIter, RevIter}; +use crate::{ + vector::{RevIntoIter, RevIter}, + Const, ValidBranchingConstant, +}; use super::Vector; @@ -15,7 +18,10 @@ use super::Vector; /// branching factor. For performance, it should always be a power of 2. Values /// between `8` and `64` are pretty reasonable. #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct FunctionalArray { +pub struct FunctionalArray +where + Const: ValidBranchingConstant, +{ rev_vec: Vector, // Our slice involves the range of indices [start, end), like most slicing. // But since we work in reverse, our "first" element is at `end - 1`. @@ -23,7 +29,10 @@ pub struct FunctionalArray { end: usize, } -impl Default for FunctionalArray { +impl Default for FunctionalArray +where + Const: ValidBranchingConstant, +{ fn default() -> Self { FunctionalArray { rev_vec: Default::default(), @@ -33,7 +42,10 @@ impl Default for FunctionalArray { } } -impl serde::Serialize for FunctionalArray { +impl serde::Serialize for FunctionalArray +where + Const: ValidBranchingConstant, +{ fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -50,6 +62,8 @@ impl serde::Serialize for Functiona impl<'de, T: Clone + serde::Deserialize<'de>, const N: usize> serde::Deserialize<'de> for FunctionalArray +where + Const: ValidBranchingConstant, { fn deserialize(deserializer: D) -> Result where @@ -60,7 +74,10 @@ impl<'de, T: Clone + serde::Deserialize<'de>, const N: usize> serde::Deserialize } } -impl FunctionalArray { +impl FunctionalArray +where + Const: ValidBranchingConstant, +{ /// Create a new `FunctionalArray` out of a double-ended iterator. /// /// `FunctionalArray` doesn't implement `FromIterator` because for efficient @@ -225,7 +242,10 @@ impl FunctionalArray { } } -impl IntoIterator for FunctionalArray { +impl IntoIterator for FunctionalArray +where + Const: ValidBranchingConstant, +{ type Item = T; type IntoIter = std::iter::Take>; @@ -241,7 +261,10 @@ impl IntoIterator for FunctionalArray { } } -impl<'a, T: Clone, const N: usize> IntoIterator for &'a FunctionalArray { +impl<'a, T: Clone, const N: usize> IntoIterator for &'a FunctionalArray +where + Const: ValidBranchingConstant, +{ type Item = &'a T; type IntoIter = std::iter::Take>; @@ -256,7 +279,10 @@ impl<'a, T: Clone, const N: usize> IntoIterator for &'a FunctionalArray { } } -impl Index for FunctionalArray { +impl Index for FunctionalArray +where + Const: ValidBranchingConstant, +{ type Output = T; fn index(&self, index: usize) -> &Self::Output { diff --git a/funcarray/src/lib.rs b/funcarray/src/lib.rs index 4ea0837cd..50f6b84fe 100644 --- a/funcarray/src/lib.rs +++ b/funcarray/src/lib.rs @@ -16,9 +16,6 @@ //! [`Vector`] with support for slicing. It's backwards in order to support efficient access //! and modification at the beginning. -// TODO: -// - benchmarks for Array - // Not yet implemented (do we need them?) // - deletion // - mutable indexing @@ -27,5 +24,16 @@ mod functional_array; pub(crate) mod vector; +pub trait ValidBranchingConstant {} +pub struct Const {} + +impl ValidBranchingConstant for Const<2> {} +impl ValidBranchingConstant for Const<4> {} +impl ValidBranchingConstant for Const<8> {} +impl ValidBranchingConstant for Const<16> {} +impl ValidBranchingConstant for Const<32> {} +impl ValidBranchingConstant for Const<64> {} +impl ValidBranchingConstant for Const<128> {} + pub use functional_array::FunctionalArray; pub use vector::Vector; diff --git a/funcarray/src/vector.rs b/funcarray/src/vector.rs index 70152d5d3..c55d84462 100644 --- a/funcarray/src/vector.rs +++ b/funcarray/src/vector.rs @@ -2,6 +2,8 @@ use std::{iter::Peekable, ops::Index, rc::Rc}; use imbl_sized_chunks::Chunk; +use crate::{Const, ValidBranchingConstant}; + // In principle we could decouple the size of the interior nodes from the size of the leaves. // This might make sense when `T` is large, because the interior nodes are always pointer-sized. type Interior = Chunk>, N>; @@ -26,12 +28,15 @@ enum Node { Interior { children: Interior }, } -fn split_index(idx: usize, height: u8) -> (usize, usize) { - let factor = N.pow(height.into()); - (idx / factor, idx % factor) +fn extract_index(idx: usize, height: u8) -> usize { + let shifted: usize = idx >> (N.ilog2() * u32::from(height)); + shifted & (N - 1) } -impl Node { +impl Node +where + Const: ValidBranchingConstant, +{ fn len(&self) -> usize { match self { Node::Leaf { data } => data.len(), @@ -47,13 +52,13 @@ impl Node { match self { Node::Leaf { data } => { debug_assert_eq!(height, 0); - data.get(idx) + data.get(idx & (N - 1)) } Node::Interior { children } => { - let (bucket_idx, child_idx) = split_index::(idx, height); + let bucket_idx = extract_index::(idx, height); children .get(bucket_idx) - .and_then(|child| child.get(height - 1, child_idx)) + .and_then(|child| child.get(height - 1, idx)) } } } @@ -62,6 +67,7 @@ impl Node { pub fn set(&mut self, elt: T, idx: usize, height: u8) { match self { Node::Leaf { data } => { + let idx = idx & (N - 1); debug_assert_eq!(height, 0); debug_assert!(idx <= data.len()); if idx < data.len() { @@ -71,11 +77,11 @@ impl Node { } } Node::Interior { children } => { - let (bucket_idx, child_idx) = split_index::(idx, height); + let bucket_idx = extract_index::(idx, height); assert!(height >= 1); assert!(bucket_idx <= children.len()); if bucket_idx < children.len() { - Rc::make_mut(&mut children[bucket_idx]).set(elt, child_idx, height - 1); + Rc::make_mut(&mut children[bucket_idx]).set(elt, idx, height - 1); } else { let mut leaf = Chunk::new(); leaf.push_back(elt); @@ -136,7 +142,10 @@ impl Node { } #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct Vector { +pub struct Vector +where + Const: ValidBranchingConstant, +{ // TODO: we currently allocate for an empty vector. Try to avoid it. root: Rc>, length: usize, @@ -145,12 +154,18 @@ pub struct Vector { } #[derive(Debug, Clone)] -pub struct Iter<'a, T, const N: usize> { +pub struct Iter<'a, T, const N: usize> +where + Const: ValidBranchingConstant, +{ stack: Vec>>>, leaf: std::slice::Iter<'a, T>, } -impl<'a, T, const N: usize> Iterator for Iter<'a, T, N> { +impl<'a, T, const N: usize> Iterator for Iter<'a, T, N> +where + Const: ValidBranchingConstant, +{ type Item = &'a T; fn next(&mut self) -> Option { @@ -198,11 +213,17 @@ impl<'a, T, const N: usize> Iterator for Iter<'a, T, N> { // DoubleEndedIterator is the ability to alternate taking from the beginning and // the end. #[derive(Debug, Clone)] -pub struct RevIter<'a, T, const N: usize> { +pub struct RevIter<'a, T, const N: usize> +where + Const: ValidBranchingConstant, +{ inner: Iter<'a, T, N>, } -impl<'a, T, const N: usize> Iterator for RevIter<'a, T, N> { +impl<'a, T, const N: usize> Iterator for RevIter<'a, T, N> +where + Const: ValidBranchingConstant, +{ type Item = &'a T; fn next(&mut self) -> Option { @@ -340,7 +361,10 @@ impl Iterator for RevIntoIter { } } -impl Extend for Vector { +impl Extend for Vector +where + Const: ValidBranchingConstant, +{ fn extend>(&mut self, iter: I) { // Make the iterator peekable, because we need to check if there's an // element remaining before we mutate the tree to make room for it. @@ -425,7 +449,10 @@ fn height_for_length(length: usize) -> u8 { length.saturating_sub(1).max(1).ilog(N).try_into().unwrap() } -impl Vector { +impl Vector +where + Const: ValidBranchingConstant, +{ pub fn new() -> Self { Self { root: Rc::new(Node::Leaf { data: Chunk::new() }), @@ -443,7 +470,10 @@ impl Vector { } } -impl Vector { +impl Vector +where + Const: ValidBranchingConstant, +{ fn is_packed(&self) -> bool { fn is_packed_rec(n: &Node, right_most: bool) -> bool { match n { @@ -574,19 +604,18 @@ impl Vector { } } - pub fn rev_iter_starting_at(&self, mut idx: usize) -> RevIter<'_, T, N> { + pub fn rev_iter_starting_at(&self, idx: usize) -> RevIter<'_, T, N> { let mut stack = Vec::with_capacity(self.height.into()); let mut node = self.root.as_ref(); let mut height = self.height; while let Node::Interior { children } = node { - let (bucket_idx, child_idx) = split_index::(idx, height); + let bucket_idx = extract_index::(idx, height); let mut node_iter = children[..=bucket_idx].iter(); node = node_iter.next_back().expect("empty interior node"); stack.push(node_iter); height = height.checked_sub(1).expect("invalid height"); - idx = child_idx; } let Node::Leaf { data } = node else { @@ -595,7 +624,7 @@ impl Vector { RevIter { inner: Iter { stack, - leaf: data[..=idx].iter(), + leaf: data[..=(idx & (N - 1))].iter(), }, } } @@ -627,19 +656,19 @@ impl Vector { let mut height = self.height; while let Node::Interior { mut children } = node { - let (bucket_idx, child_idx) = split_index::(idx, height); + let bucket_idx = extract_index::(idx, height); children.drop_right(bucket_idx + 1); let mut node_iter = children.into_iter(); node = Rc::unwrap_or_clone(node_iter.next_back().expect("empty interior node")); stack.push(node_iter); height = height.checked_sub(1).expect("invalid height"); - idx = child_idx; } let Node::Leaf { mut data } = node else { unreachable!(); }; + idx &= N - 1; data.drop_right(idx + 1); RevIntoIter { inner: IntoIter { @@ -650,7 +679,10 @@ impl Vector { } } -impl<'a, T, const N: usize> IntoIterator for &'a Vector { +impl<'a, T, const N: usize> IntoIterator for &'a Vector +where + Const: ValidBranchingConstant, +{ type Item = &'a T; type IntoIter = Iter<'a, T, N>; @@ -674,7 +706,10 @@ impl<'a, T, const N: usize> IntoIterator for &'a Vector { } } -impl IntoIterator for Vector { +impl IntoIterator for Vector +where + Const: ValidBranchingConstant, +{ type Item = T; type IntoIter = IntoIter; @@ -698,13 +733,19 @@ impl IntoIterator for Vector { } } -impl Default for Vector { +impl Default for Vector +where + Const: ValidBranchingConstant, +{ fn default() -> Self { Self::new() } } -impl FromIterator for Vector { +impl FromIterator for Vector +where + Const: ValidBranchingConstant, +{ fn from_iter>(iter: I) -> Self { let mut ret = Vector::default(); ret.extend(iter); @@ -712,7 +753,10 @@ impl FromIterator for Vector { } } -impl Index for Vector { +impl Index for Vector +where + Const: ValidBranchingConstant, +{ type Output = T; fn index(&self, index: usize) -> &Self::Output { diff --git a/funcarray/tests/arbtest.rs b/funcarray/tests/arbtest.rs index 29f0c0c36..8881cd904 100644 --- a/funcarray/tests/arbtest.rs +++ b/funcarray/tests/arbtest.rs @@ -2,7 +2,7 @@ use std::collections::VecDeque; use arbitrary::Unstructured; use arbtest::{arbitrary, arbtest}; -use nickel_lang_funcarray::{FunctionalArray, Vector}; +use nickel_lang_funcarray::{Const, FunctionalArray, ValidBranchingConstant, Vector}; #[derive(arbitrary::Arbitrary, Debug)] enum Op { @@ -34,7 +34,9 @@ impl Op { &self, vec: &mut Vector, arena: &mut Vec>, - ) { + ) where + Const: ValidBranchingConstant, + { match self { Op::Push(x) => vec.push(*x), Op::Pop => { @@ -85,7 +87,10 @@ impl ArrayOp { } } - fn apply_to_array(&self, vec: &mut FunctionalArray) { + fn apply_to_array(&self, vec: &mut FunctionalArray) + where + Const: ValidBranchingConstant, + { match self { ArrayOp::PushFront(x) => vec.push_front(*x), ArrayOp::PopFront => {