From 70ee0212b08a365aaa3a2f4b63a31c8a842605f8 Mon Sep 17 00:00:00 2001 From: Gabriel Barreto Date: Tue, 2 Jan 2024 20:42:01 -0300 Subject: [PATCH] Updated some documentation --- src/lem/pointers.rs | 34 ++++++++++++++++------------------ src/lem/store.rs | 40 +++++++++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/src/lem/pointers.rs b/src/lem/pointers.rs index 79767d156d..0d4938d405 100644 --- a/src/lem/pointers.rs +++ b/src/lem/pointers.rs @@ -7,6 +7,9 @@ use crate::{ use super::Tag; +/// `RawPtr` is the basic pointer type of the LEM store. An `Atom` points to a field +/// element, and a `HashN` points to `N` children, which are also raw pointers. Thus, +/// they are a building block for graphs that represent Lurk data. #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Hash)] pub enum RawPtr { Atom(usize), @@ -66,17 +69,9 @@ impl RawPtr { } } -/// `Ptr` is the main piece of data LEMs operate on. We can think of a pointer -/// as a building block for trees that represent Lurk data. A pointer can be a -/// atom that contains data encoded as an element of a `LurkField` or it can have -/// children. For performance, the children of a pointer are stored on an -/// `IndexSet` and the resulding index is carried by the pointer itself. -/// -/// A pointer also has a tag, which says what kind of data it encodes. On -/// previous implementations, the tag would be used to infer the number of -/// children a pointer has. However, LEMs require extra flexibility because LEM -/// hashing operations can plug any tag to the resulting pointer. Thus, the -/// number of children have to be made explicit as the `Ptr` enum. +/// `Ptr` is a tagged pointer. The tag is there to say what kind of data it encodes. +/// Since tags can be encoded as field elements, they are also able to be expressed +/// as raw pointers. A `Ptr` can thus be seen as a tuple of `RawPtr`s. #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Hash)] pub struct Ptr { tag: Tag, @@ -175,15 +170,18 @@ impl Ptr { } } -/// A `ZPtr` is the result of "hydrating" a `Ptr`. This process is better -/// explained in the store but, in short, we want to know the Poseidon hash of -/// the children of a `Ptr`. +/// A `ZPtr` is the result of "hydrating" a `Ptr`, which is a process of replacing +/// indices by hashes. That is, a `ZPtr` is a content-addressed, tagged, pointer. +/// By analogy, we can view ordinary field elements as hydrated raw pointers. /// -/// `ZPtr`s are used mainly for proofs, but they're also useful when we want -/// to content-address a store. +/// With `ZPtr`s we are able to content-address arbitrary DAGs, and thus be able to +/// represent these data structures as field elements. This is how we can prove facts +/// about data structures only using field elements. `ZPtr`s are also useful when we +/// want to content-address the store. /// -/// An important note is that computing the respective `ZPtr` of a `Ptr` can be -/// expensive because of the Poseidon hashes. That's why we operate on `Ptr`s +/// In principle, `ZPtr`s could be used in place of `Ptr`, but it is important to +/// note that content-addressing can be expensive, especially in the context of +/// interpretation, because of the Poseidon hashes. That's why we operate on `Ptr`s /// when interpreting LEMs and delay the need for `ZPtr`s as much as possible. pub type ZPtr = crate::z_data::z_ptr::ZPtr; diff --git a/src/lem/store.rs b/src/lem/store.rs index 5a30c43d8a..37c0471d74 100644 --- a/src/lem/store.rs +++ b/src/lem/store.rs @@ -29,6 +29,16 @@ use crate::{ use super::pointers::{Ptr, RawPtr, ZPtr}; +/// The `Store` is a crucial part of Lurk's implementation and tries to be a +/// vesatile data structure for many parts of Lurk's data pipeline. +/// +/// It holds Lurk data structured as graphs of `RawPtr`s. When a `RawPtr` has +/// children, we store them in its respective `IndexSet`. These data structures +/// speed up LEM interpretation because lookups by indices are fast, and leave +/// all the hashing to be done by the hydration step in multiple threads. +/// +/// The `Store` also provides an infra to speed up interning strings and symbols. +/// This data is saved in `string_ptr_cache` and `symbol_ptr_cache`. #[derive(Debug)] pub struct Store { f_elts: FrozenIndexSet>>, @@ -849,7 +859,7 @@ impl Store { /// /// Warning: without cache hits, this function might blow up Rust's recursion /// depth limit. This limitation is circumvented by calling `hydrate_z_cache` - /// beforehand or by using `hash_ptr` instead, which is slightly slower. + /// beforehand or by using `hash_raw_ptr` instead, which is slightly slower. fn hash_raw_ptr_unsafe(&self, ptr: &RawPtr) -> FWrap { macro_rules! hash_raw { ($hash:ident, $n:expr, $idx:expr) => {{ @@ -880,8 +890,8 @@ impl Store { /// Hashes pointers in parallel, consuming chunks of length 256, which is a /// reasonably safe limit. The danger of longer chunks is that the rightmost /// pointers are the ones which are more likely to reach the recursion depth - /// limit in `hash_ptr_unsafe`. So we move in smaller chunks from left to - /// right, populating the `z_cache`, which can rescue `hash_ptr_unsafe` from + /// limit in `hash_raw_ptr_unsafe`. So we move in smaller chunks from left to + /// right, populating the `z_cache`, which can rescue `hash_raw_ptr_unsafe` from /// dangerously deep recursions fn hydrate_z_cache_with_ptrs(&self, ptrs: &[&RawPtr]) { ptrs.chunks(256).for_each(|chunk| { @@ -891,8 +901,8 @@ impl Store { }); } - /// Hashes enqueued `Ptr` trees from the bottom to the top, avoiding deep - /// recursions in `hash_ptr`. Resets the `dehydrated` queue afterwards. + /// Hashes enqueued `RawPtr` trees from the bottom to the top, avoiding deep + /// recursions in `hash_raw_ptr`. Resets the `dehydrated` queue afterwards. pub fn hydrate_z_cache(&self) { self.hydrate_z_cache_with_ptrs(&self.dehydrated.load().iter().collect::>()); self.dehydrated.swap(Arc::new(FrozenVec::default())); @@ -911,12 +921,12 @@ impl Store { } } - /// Safe version of `hash_ptr_unsafe` that doesn't hit a stack overflow by + /// Safe version of `hash_raw_ptr_unsafe` that doesn't hit a stack overflow by /// precomputing the pointers that need to be hashed in order to hash the /// provided `ptr` pub fn hash_raw_ptr(&self, ptr: &RawPtr) -> FWrap { if self.is_below_safe_threshold() { - // just run `hash_ptr_unsafe` for extra speed when the dehydrated + // just run `hash_raw_ptr_unsafe` for extra speed when the dehydrated // queue is small enough return self.hash_raw_ptr_unsafe(ptr); } @@ -964,10 +974,12 @@ impl Store { } ptrs.reverse(); self.hydrate_z_cache_with_ptrs(&ptrs.into_iter().collect::>()); - // Now it's okay to call `hash_ptr_unsafe` + // Now it's okay to call `hash_raw_ptr_unsafe` self.hash_raw_ptr_unsafe(ptr) } + /// Hydrates a `Ptr`. That is, creates a `ZPtr` with the tag of the pointer + /// and the hash of its raw pointer pub fn hash_ptr(&self, ptr: &Ptr) -> ZPtr { ZPtr::from_parts(*ptr.tag(), self.hash_raw_ptr(ptr.raw()).0) } @@ -1000,9 +1012,9 @@ impl Store { self.hash_ptr(a) == self.hash_ptr(b) } - /// Attempts to recover the `Ptr` that corresponds to `z_ptr` from - /// `inverse_z_cache`. If the mapping is not there, returns an atom pointer - /// with the same tag and value + /// Attempts to recover the `RawPtr` that corresponds to a field element `z` + /// from `inverse_z_cache`. If the mapping is not there, returns a raw atom + /// pointer with value #[inline] pub fn to_raw_ptr(&self, z: &FWrap) -> RawPtr { self.inverse_z_cache @@ -1011,6 +1023,8 @@ impl Store { .unwrap_or_else(|| self.intern_raw_atom(z.0)) } + /// Attempts to recover the `Ptr` that corresponds to a `ZPtr`. If the mapping + /// is not there, returns an atom pointer with the same tag and value #[inline] pub fn to_ptr(&self, z_ptr: &ZPtr) -> Ptr { Ptr::new(*z_ptr.tag(), self.to_raw_ptr(&FWrap(*z_ptr.value()))) @@ -1428,13 +1442,13 @@ mod tests { let string = String::from_utf8(vec![b'0'; 4096]).unwrap(); let store = Store::::default(); let ptr = store.intern_string(&string); - // `hash_ptr_unsafe` would overflow the stack, whereas `hash_ptr` works + // `hash_raw_ptr_unsafe` would overflow the stack, whereas `hash_raw_ptr` works let x = store.hash_raw_ptr(ptr.raw()); let store = Store::::default(); let ptr = store.intern_string(&string); store.hydrate_z_cache(); - // but `hash_ptr_unsafe` works just fine after manual hydration + // but `hash_raw_ptr_unsafe` works just fine after manual hydration let y = store.hash_raw_ptr_unsafe(ptr.raw()); // and, of course, those functions result on the same `ZPtr`