From 2f5e3872ea5caea7cd63188428ee27494e13846d Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Tue, 2 Jan 2024 20:42:01 -0300
Subject: [PATCH] Updated some documentation

---
 src/lem/pointers.rs | 34 ++++++++++++++++------------------
 src/lem/store.rs    | 40 +++++++++++++++++++++++++++-------------
 2 files changed, 43 insertions(+), 31 deletions(-)
diff --git a/src/lem/pointers.rs b/src/lem/pointers.rs
index 79767d156d..0d4938d405 100644
--- a/src/lem/pointers.rs
+++ b/src/lem/pointers.rs
@@ -7,6 +7,9 @@ use crate::{
 
 use super::Tag;
 
+/// `RawPtr` is the basic pointer type of the LEM store. An `Atom` points to a field
+/// element, and a `HashN` points to `N` children, which are also raw pointers. Thus,
+/// they are a building block for graphs that represent Lurk data.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Hash)]
 pub enum RawPtr {
     Atom(usize),
@@ -66,17 +69,9 @@ impl RawPtr {
     }
 }
 
-/// `Ptr` is the main piece of data LEMs operate on. We can think of a pointer
-/// as a building block for trees that represent Lurk data. A pointer can be a
-/// atom that contains data encoded as an element of a `LurkField` or it can have
-/// children. For performance, the children of a pointer are stored on an
-/// `IndexSet` and the resulding index is carried by the pointer itself.
-///
-/// A pointer also has a tag, which says what kind of data it encodes. On
-/// previous implementations, the tag would be used to infer the number of
-/// children a pointer has. However, LEMs require extra flexibility because LEM
-/// hashing operations can plug any tag to the resulting pointer. Thus, the
-/// number of children have to be made explicit as the `Ptr` enum.
+/// `Ptr` is a tagged pointer. The tag is there to say what kind of data it encodes.
+/// Since tags can be encoded as field elements, they are also able to be expressed
+/// as raw pointers. A `Ptr` can thus be seen as a tuple of `RawPtr`s.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Hash)]
 pub struct Ptr {
     tag: Tag,
@@ -175,15 +170,18 @@ impl Ptr {
     }
 }
 
-/// A `ZPtr` is the result of "hydrating" a `Ptr`. This process is better
-/// explained in the store but, in short, we want to know the Poseidon hash of
-/// the children of a `Ptr`.
+/// A `ZPtr` is the result of "hydrating" a `Ptr`, which is a process of replacing
+/// indices by hashes. That is, a `ZPtr` is a content-addressed, tagged, pointer.
+/// By analogy, we can view ordinary field elements as hydrated raw pointers.
 ///
-/// `ZPtr`s are used mainly for proofs, but they're also useful when we want
-/// to content-address a store.
+/// With `ZPtr`s we are able to content-address arbitrary DAGs, and thus be able to
+/// represent these data structures as field elements. This is how we can prove facts
+/// about data structures only using field elements. `ZPtr`s are also useful when we
+/// want to content-address the store.
 ///
-/// An important note is that computing the respective `ZPtr` of a `Ptr` can be
-/// expensive because of the Poseidon hashes. That's why we operate on `Ptr`s
+/// In principle, `ZPtr`s could be used in place of `Ptr`, but it is important to
+/// note that content-addressing can be expensive, especially in the context of
+/// interpretation, because of the Poseidon hashes. That's why we operate on `Ptr`s
 /// when interpreting LEMs and delay the need for `ZPtr`s as much as possible.
 pub type ZPtr<F> = crate::z_data::z_ptr::ZPtr<Tag, F>;
 
diff --git a/src/lem/store.rs b/src/lem/store.rs
index 5a30c43d8a..37c0471d74 100644
--- a/src/lem/store.rs
+++ b/src/lem/store.rs
@@ -29,6 +29,16 @@ use crate::{
 
 use super::pointers::{Ptr, RawPtr, ZPtr};
 
+/// The `Store` is a crucial part of Lurk's implementation and tries to be a
+/// vesatile data structure for many parts of Lurk's data pipeline.
+///
+/// It holds Lurk data structured as graphs of `RawPtr`s. When a `RawPtr` has
+/// children, we store them in its respective `IndexSet`. These data structures
+/// speed up LEM interpretation because lookups by indices are fast, and leave
+/// all the hashing to be done by the hydration step in multiple threads.
+///
+/// The `Store` also provides an infra to speed up interning strings and symbols.
+/// This data is saved in `string_ptr_cache` and `symbol_ptr_cache`.
 #[derive(Debug)]
 pub struct Store<F: LurkField> {
     f_elts: FrozenIndexSet<Box<FWrap<F>>>,
@@ -849,7 +859,7 @@ impl<F: LurkField> Store<F> {
     ///
     /// Warning: without cache hits, this function might blow up Rust's recursion
     /// depth limit. This limitation is circumvented by calling `hydrate_z_cache`
-    /// beforehand or by using `hash_ptr` instead, which is slightly slower.
+    /// beforehand or by using `hash_raw_ptr` instead, which is slightly slower.
     fn hash_raw_ptr_unsafe(&self, ptr: &RawPtr) -> FWrap<F> {
         macro_rules! hash_raw {
             ($hash:ident, $n:expr, $idx:expr) => {{
@@ -880,8 +890,8 @@ impl<F: LurkField> Store<F> {
     /// Hashes pointers in parallel, consuming chunks of length 256, which is a
     /// reasonably safe limit. The danger of longer chunks is that the rightmost
     /// pointers are the ones which are more likely to reach the recursion depth
-    /// limit in `hash_ptr_unsafe`. So we move in smaller chunks from left to
-    /// right, populating the `z_cache`, which can rescue `hash_ptr_unsafe` from
+    /// limit in `hash_raw_ptr_unsafe`. So we move in smaller chunks from left to
+    /// right, populating the `z_cache`, which can rescue `hash_raw_ptr_unsafe` from
     /// dangerously deep recursions
     fn hydrate_z_cache_with_ptrs(&self, ptrs: &[&RawPtr]) {
         ptrs.chunks(256).for_each(|chunk| {
@@ -891,8 +901,8 @@ impl<F: LurkField> Store<F> {
         });
     }
 
-    /// Hashes enqueued `Ptr` trees from the bottom to the top, avoiding deep
-    /// recursions in `hash_ptr`. Resets the `dehydrated` queue afterwards.
+    /// Hashes enqueued `RawPtr` trees from the bottom to the top, avoiding deep
+    /// recursions in `hash_raw_ptr`. Resets the `dehydrated` queue afterwards.
     pub fn hydrate_z_cache(&self) {
         self.hydrate_z_cache_with_ptrs(&self.dehydrated.load().iter().collect::<Vec<_>>());
         self.dehydrated.swap(Arc::new(FrozenVec::default()));
@@ -911,12 +921,12 @@ impl<F: LurkField> Store<F> {
         }
     }
 
-    /// Safe version of `hash_ptr_unsafe` that doesn't hit a stack overflow by
+    /// Safe version of `hash_raw_ptr_unsafe` that doesn't hit a stack overflow by
     /// precomputing the pointers that need to be hashed in order to hash the
     /// provided `ptr`
     pub fn hash_raw_ptr(&self, ptr: &RawPtr) -> FWrap<F> {
         if self.is_below_safe_threshold() {
-            // just run `hash_ptr_unsafe` for extra speed when the dehydrated
+            // just run `hash_raw_ptr_unsafe` for extra speed when the dehydrated
             // queue is small enough
             return self.hash_raw_ptr_unsafe(ptr);
         }
@@ -964,10 +974,12 @@ impl<F: LurkField> Store<F> {
         }
         ptrs.reverse();
         self.hydrate_z_cache_with_ptrs(&ptrs.into_iter().collect::<Vec<_>>());
-        // Now it's okay to call `hash_ptr_unsafe`
+        // Now it's okay to call `hash_raw_ptr_unsafe`
         self.hash_raw_ptr_unsafe(ptr)
     }
 
+    /// Hydrates a `Ptr`. That is, creates a `ZPtr` with the tag of the pointer
+    /// and the hash of its raw pointer
     pub fn hash_ptr(&self, ptr: &Ptr) -> ZPtr<F> {
         ZPtr::from_parts(*ptr.tag(), self.hash_raw_ptr(ptr.raw()).0)
     }
@@ -1000,9 +1012,9 @@ impl<F: LurkField> Store<F> {
         self.hash_ptr(a) == self.hash_ptr(b)
     }
 
-    /// Attempts to recover the `Ptr` that corresponds to `z_ptr` from
-    /// `inverse_z_cache`. If the mapping is not there, returns an atom pointer
-    /// with the same tag and value
+    /// Attempts to recover the `RawPtr` that corresponds to a field element `z`
+    /// from `inverse_z_cache`. If the mapping is not there, returns a raw atom
+    /// pointer with value
     #[inline]
     pub fn to_raw_ptr(&self, z: &FWrap<F>) -> RawPtr {
         self.inverse_z_cache
@@ -1011,6 +1023,8 @@ impl<F: LurkField> Store<F> {
             .unwrap_or_else(|| self.intern_raw_atom(z.0))
     }
 
+    /// Attempts to recover the `Ptr` that corresponds to a `ZPtr`. If the mapping
+    /// is not there, returns an atom pointer with the same tag and value
     #[inline]
     pub fn to_ptr(&self, z_ptr: &ZPtr<F>) -> Ptr {
         Ptr::new(*z_ptr.tag(), self.to_raw_ptr(&FWrap(*z_ptr.value())))
@@ -1428,13 +1442,13 @@ mod tests {
         let string = String::from_utf8(vec![b'0'; 4096]).unwrap();
         let store = Store::<Fr>::default();
         let ptr = store.intern_string(&string);
-        // `hash_ptr_unsafe` would overflow the stack, whereas `hash_ptr` works
+        // `hash_raw_ptr_unsafe` would overflow the stack, whereas `hash_raw_ptr` works
         let x = store.hash_raw_ptr(ptr.raw());
 
         let store = Store::<Fr>::default();
         let ptr = store.intern_string(&string);
         store.hydrate_z_cache();
-        // but `hash_ptr_unsafe` works just fine after manual hydration
+        // but `hash_raw_ptr_unsafe` works just fine after manual hydration
         let y = store.hash_raw_ptr_unsafe(ptr.raw());
 
         // and, of course, those functions result on the same `ZPtr`