diff --git a/Cargo.toml b/Cargo.toml index e2dfb74ba6..21131d9deb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -109,7 +109,7 @@ flamegraph = ["pprof/flamegraph", "pprof/criterion"] [workspace] resolver = "2" -members = ["lurk-macros", "lurk-metrics"] +members = ["foil", "lurk-macros", "lurk-metrics"] # Dependencies that should be kept in sync through the whole workspace [workspace.dependencies] diff --git a/foil/Cargo.toml b/foil/Cargo.toml new file mode 100644 index 0000000000..340c42f992 --- /dev/null +++ b/foil/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "foil" +version = "0.1.0" +edition = "2021" +authors = ["porcuquine "] +description = "Flat Optimization Intermediate Language" +repository = "https://github.com/lurk-lab/lurk-rs" +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = { workspace = true } +bellpepper = { workspace = true } +bellpepper-core = { workspace = true } +ff = { workspace = true } +indexmap = { version = "1.9.3", features = ["rayon"] } +generic-array = "0.14.7" +lurk = { path = "../" } +lurk-macros = { path = "../lurk-macros" } +tracing = { workspace = true } +neptune = { workspace = true, features = ["arity2","arity4","arity8","arity16","pasta","bls"] } +once_cell = { workspace = true } +pasta_curves = { workspace = true, features = ["repr-c", "serde"] } +pretty_env_logger = "0.4" + +[dev-dependencies] +env_logger = "*" +test-log = "0.2.12" diff --git a/foil/src/circuit.rs b/foil/src/circuit.rs new file mode 100644 index 0000000000..c78879fabf --- /dev/null +++ b/foil/src/circuit.rs @@ -0,0 +1,129 @@ +//! This module provides a general mechanism for synthesizing a circuit for a minimized Foil instance. + +use std::collections::HashSet; +use std::fmt::Debug; + +use bellpepper::gadgets::num::AllocatedNum; +use bellpepper_core::{Circuit, ConstraintSystem, SynthesisError}; + +use crate::{Id, MappedFoil, MetaData, MetaMapper}; +use lurk::field::LurkField; + +pub trait Relation: Debug { + fn synthesize>( + &self, + _cs: &mut CS, + _allocated_head: AllocatedNum, + _successors: Vec>, + ) -> Result<(), SynthesisError> { + unimplemented!() + } +} + +// This is an incomplete schematic sketch of what synthesis should entail. NOTE: the simplicity and uniformity of this +// last-mile conversion to a circuit. Most circuit-specific heavy lifting will be concentrated in the individual +// `Relation` implementations. +impl, MR: MetaMapper> + Circuit for MappedFoil +{ + fn synthesize>(self, cs: &mut CS) -> Result<(), SynthesisError> { + let foil = self.foil; + assert!( + foil.is_minimized(), + "only minimized Foils can be synthesized" + ); + + let partition = foil.graph.partition(); + let _classes = partition.classes().clone(); + + let allocated = foil + .graph + .vertices() + .iter() + .enumerate() + .map(|(i, vertex)| { + AllocatedNum::alloc( + &mut cs.namespace(|| format!("allocated-{i}-{vertex}")), + //|| todo!("get from witness, calculate, or get as constant..."), + || Ok(F::ZERO), // Just for initial testing. + ) + }) + .collect::, SynthesisError>>()?; + + let mut constrained: HashSet = Default::default(); + // for constructor in foil.schema.constructors.iter() { + // let _projectors = constructor.projectors(); + // todo!(); + // } + + for (i, (representative_id, _)) in partition.classes.iter().enumerate() { + let vertex = foil.graph.vertex(*representative_id); + let allocated_head = allocated[i].clone(); + + //if todo!("determine whether this vertex requires allocation") { + if true { + // For example, projectors do not require allocation given that they will be constrained by their + // corresponding constructor. This *could* be handled at the `Relation` level, however, in general this + // decision may require more global context -- so for now, consider that it should be made in the + // top-level `synthesize` method. + + // Currently, so-called `Bindings` (equivalences specified through the graph) become trivial after + // finalization. There is no reason to allocate variables for them or add any related constraints. The + // right answer is probably that minimization should remove them -- along with any other superfluous + // nodes of the graph. + // + // The point is that some parts of the decision (whether a given vertex needs constraints of its own) + // should probably be handled upstream. What is important is that we clearly understand the separation + // of concerns -- so decisions at each stage do not rely on invalid assumptions about the + // responsibilities of others. Some redundancy may therefore be appropriate. + // + // For example, in the case of bindings, all of the following could be implemented: + // + // - A full 'minimization' can remove them (with a check to ensure they were indeed enforced -- which + // means their successors have been made equivalent). This implies that the bindings themselves will not + // be seen here. + + // - If they do appear here, we can detect that and not allocate or constrain them. However, note that + // maybe this is the wrong approach -- since it will require increasing knowledge of semantics here. Not + // special-casing would be simplest. One answer is to ensure that `Func`s can be defined in such a way + // as to provide hints here. Designation of `Bindings` (in the constructors example) as `equivalences` + // is effectively such an annotation, but it may be useful to provide a more general (probably + // trait-based) mechanism. + // + // - If we do reach the point of constraining such nodes, their defined `Relation::synthesize` method + // can be a no-op. + + constrained.insert(*representative_id); + + let allocated_successors = vertex + .successors() + .borrow() + .iter() + .map(|successor_id| { + constrained.insert(*successor_id); + allocated[*successor_id].clone() + }) + .collect::>(); + + let relation = { + self.mapper + .find(vertex.metadata()) + .unwrap_or_else(|| panic!("relation missing for {:?}", vertex.metadata())) + }; + + dbg!(relation); + + relation.synthesize( + &mut cs.namespace(|| format!("relation-{}", i)), + allocated_head, + allocated_successors, + )?; + } + } + + // Every allocation has been constrained. + assert_eq!(constrained.len(), partition.size()); + + Ok(()) + } +} diff --git a/foil/src/coil.rs b/foil/src/coil.rs new file mode 100644 index 0000000000..a6307f2f71 --- /dev/null +++ b/foil/src/coil.rs @@ -0,0 +1,775 @@ +use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; +use tracing::info; + +use bellpepper::gadgets::num::AllocatedNum; +use bellpepper_core::{ConstraintSystem, SynthesisError}; + +use anyhow::{anyhow, bail, Result}; + +use crate::{Foil, Func, Id, Label, MetaData, MetaMapper, Relation, Schema, Var, Vert}; +use lurk::field::LurkField; +use lurk::lem::pointers::Ptr; +use lurk::lem::store::Store; +use lurk::lem::tag::Tag; +use lurk::sym; +use lurk::tag::ExprTag; +use lurk::Symbol; + +impl From for Func { + fn from(sym: Symbol) -> Self { + Self::from(&sym) + } +} + +impl From<&Symbol> for Func { + fn from(sym: &Symbol) -> Self { + Self::new_with_metadata(sym.to_string(), CoilMeta::from(sym.clone())) + } +} + +impl From for Var { + fn from(sym: Symbol) -> Self { + Self::from(&sym) + } +} + +impl From<&Symbol> for Var { + fn from(sym: &Symbol) -> Self { + Self::new(sym.to_string()) + } +} + +impl From for Label { + fn from(sym: Symbol) -> Self { + Self::from(&sym) + } +} + +impl From<&Symbol> for Label { + fn from(sym: &Symbol) -> Self { + Self::from(Var::from(sym)) + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct CoilMeta { + name: Symbol, +} + +impl From for CoilMeta { + fn from(name: Symbol) -> Self { + Self { name } + } +} + +impl Default for CoilMeta { + fn default() -> Self { + let name = sym!("coil", "var"); + + Self { name } + } +} + +impl MetaData for CoilMeta {} + +#[derive(Clone, Debug, PartialEq)] +pub struct Context { + env: Ptr, +} + +/// Look up `var` in `env`, where `env` is a list of bindings from `Symbol` to `U64` representing bindings of variables to vertices +/// by id. +fn lookup_vertex_id(store: &Store, env: &Ptr, var: &Ptr) -> Result> { + let Some([bound_var, id, rest_env]) = store.pop_binding(*env) else { + return Ok(None); + }; + + if *var == bound_var { + match store.fetch_num(&id) { + Some(f) => Ok(f.to_u64().map(|u| { + let n = u as Id; + info!("found {n}"); + n + })), + _ => { + bail!("binding Id could not be fetched"); + } + } + } else { + lookup_vertex_id(store, &rest_env, var) + } +} + +impl Context { + fn new(store: &mut Store) -> Self { + Self { + env: store.intern_empty_env(), + } + } + + fn lookup(&self, store: &Store, var: &Ptr) -> Result> { + info!( + "looking up {} in env: {}", + var.fmt_to_string_simple(store), + self.env.fmt_to_string_simple(store) + ); + lookup_vertex_id(store, &self.env, var) + } + + fn push_binding(&mut self, store: &mut Store, var: Ptr, id: Id) { + let num = store.num((id as u64).into()); + + self.env = store.push_binding(var, num, self.env); + } + + fn pop_binding(&mut self, store: &Store) -> Result { + let [_var, id, rest_env] = store + .pop_binding(self.env) + .ok_or(anyhow!("failed to pop binding"))?; + self.env = rest_env; + + Ok(match store.fetch_num(&id) { + Some(f) => f + .to_u64() + .map(|u| u as Id) + .ok_or(anyhow!("binding Id could not be fetched"))?, + _ => { + bail!("binding Id could not be fetched"); + } + }) + } +} + +pub trait Syntax { + fn expand( + &self, + foil: &mut Foil, + store: &mut Store, + head: &Ptr, + rest: &[Ptr], + ) -> Result> + where + Self: Sized; +} + +#[derive(Clone, Debug)] +pub struct CoilDef, S: Syntax> { + relations: HashMap, + syntax: HashMap, + constructors: HashMap>, + equivalences: HashSet, + _p: PhantomData, +} + +pub struct AsSyntax<'a, T>(&'a T); + +pub struct Let {} + +impl Syntax for Let { + fn expand( + &self, + _foil: &mut Foil, + store: &mut Store, + _head: &Ptr, + rest: &[Ptr], + ) -> Result> { + let bind = store.intern_symbol(&sym!("coil", "bind")); + let pop_binding = store.intern_symbol(&sym!("coil", "pop-binding")); + + if rest.is_empty() { + bail!("Let bindings missing"); + } + + let bindings = store.fetch_proper_list(&rest[0]).unwrap(); + + let mut result = Vec::with_capacity((2 * bindings.len()) + rest.len() - 1); + + result.extend(bindings.iter().map(|binding| { + let b = store.fetch_proper_list(binding).unwrap(); + + assert_eq!(2, b.len()); + // let var = b[0]; + // let val_form = b[1]; + + // This is also store.cons(bind, binding). + store.cons(bind, *binding) + //store.list(&[bind, var, val_form]) + })); + + result.extend(&rest[1..]); + + let pop_binding_form = store.list([pop_binding]); + result.extend(bindings.iter().map(|_| pop_binding_form)); + + Ok(result) + } +} + +#[derive(Default, Debug)] +struct BindRel {} + +#[derive(Default, Debug)] +struct VarRel {} + +impl Relation for BindRel { + fn synthesize>( + &self, + _cs: &mut CS, + _allocated_head: AllocatedNum, + _successors: Vec>, + ) -> Result<(), SynthesisError> { + // TODO: Actually implement constraints enforcing equality of the successors. Ideally, there would also exist a + // mechanism such that this could be implemented without allocating the head -- since the head is not required + // for this. Similarly, we should eventually allow for 'head' values to sometimes be unallocated + // linear-combinations. That is an optimization that can come later, when we focus on improving the generated + // R1CS based on the computation finally assembled. + Ok(()) + } +} +impl Relation for VarRel { + fn synthesize>( + &self, + _cs: &mut CS, + _allocated_head: AllocatedNum, + successors: Vec>, + ) -> Result<(), SynthesisError> { + assert!(successors.is_empty()); + Ok(()) + } +} + +pub enum CoilSyntax { + Let(Let), +} + +impl Syntax for CoilSyntax { + fn expand( + &self, + foil: &mut Foil, + store: &mut Store, + head: &Ptr, + rest: &[Ptr], + ) -> Result> { + match self { + Self::Let(x) => x.expand(foil, store, head, rest), + } + } +} + +impl, S: Syntax> Default for CoilDef { + fn default() -> Self { + Self { + relations: Default::default(), + syntax: Default::default(), + constructors: Default::default(), + equivalences: Default::default(), + _p: Default::default(), + } + } +} + +impl> CoilDef { + fn new_std() -> Self { + let mut def = Self::default(); + + let let_sym = sym!("lurk", "let"); + let bind = sym!("coil", "bind"); + let _var = sym!("coil", "var"); + + def.register_equivalence(bind); + def.register_syntax(let_sym, CoilSyntax::Let(Let {})); + + def + } +} + +impl, S: Syntax> CoilDef { + fn register_relation(&mut self, sym: Symbol, rel: R) { + self.relations.insert(sym, rel); + } + fn register_syntax(&mut self, sym: Symbol, syn: S) { + self.syntax.insert(sym, syn); + } + fn register_constructor(&mut self, constructor: Symbol, projectors: Vec) { + self.constructors.insert(constructor, projectors); + } + fn register_equivalence(&mut self, equivalence: Symbol) { + self.equivalences.insert(equivalence); + } + fn symbol_func(&self, sym: Symbol) -> Func { + if let Some(projectors) = self.constructors.get(&sym) { + Func::constructor( + sym.to_string(), + projectors.iter().map(Func::from).collect(), + CoilMeta::from(sym), + ) + } else { + Func::from(sym) + } + } + + fn schema(&self) -> Schema { + let mut equivalences: Vec> = self + .equivalences + .iter() + .map(|sym| self.symbol_func(sym.clone())) + .collect(); + + equivalences.sort(); + + let mut constructors: Vec> = self + .constructors + .keys() + .map(|sym| self.symbol_func(sym.clone())) + .collect(); + constructors.sort(); + + Schema { + constructors, + equivalences, + } + } +} + +impl, S: Syntax> MetaMapper for CoilDef { + fn find(&self, meta: &CoilMeta) -> Option<&R> { + self.relations.get(&meta.name) + } +} + +impl<'a, F: LurkField, R: Relation, S: Syntax> MetaMapper + for AsSyntax<'a, CoilDef> +{ + fn find(&self, meta: &CoilMeta) -> Option<&S> { + self.0.syntax.get(&meta.name) + } +} + +impl, S: Syntax> CoilDef { + fn add_to_foil( + &self, + foil: &mut Foil, + store: &mut Store, + context: &mut Context, + expr: &Ptr, + ) -> Result> { + match expr.tag() { + Tag::Expr(ExprTag::Cons) => { + info!("adding sexp: {}", expr.fmt_to_string_simple(store)); + + let list = store + .fetch_proper_list(expr) + .expect("sexps must be proper lists"); + let head = list.get(0).ok_or(anyhow!("missing head"))?; + let sym = store.fetch_sym(head).ok_or(anyhow!("sym not in store"))?; + let rest = &list[1..]; + let meta = CoilMeta::from(sym.clone()); + + if let Some(syntax) = AsSyntax::>(self).find(&meta) { + let expanded = syntax.expand(foil, store, head, rest)?; + + let mut last = None; + for (i, x) in expanded.iter().enumerate() { + info!( + "expanded ({} of {}): {}", + i + 1, + expanded.len(), + x.fmt_to_string_simple(store) + ); + + if let Some(vert) = self.add_to_foil(foil, store, context, x)? { + last = Some(vert); + } + } + info!("completed expansion"); + Ok(last) + } else { + if sym == sym!("coil", "pop-binding") { + return self.handle_pop_binding(store, context, rest); + } + + // TODO: don't actually create these literal symbols here. + let successor_verts = if sym == sym!("coil", "bind") { + let var = store + .fetch_sym(&rest[0]) + .ok_or(anyhow!("bind var not in store"))?; + let var_vert = foil.alloc_unique_var(var.to_string()); + + let val_vert = self + .add_to_foil(foil, store, context, &rest[1])? + .ok_or(anyhow!("bind val missing"))?; + + self.handle_bind(store, context, rest, &[var_vert])?; + // return Ok(Some(self.handle_bind(store, context, rest, &[val_vert])?)); + + vec![Some(var_vert), Some(val_vert)] + } else { + rest.iter() + .map(|succ| self.add_to_foil(foil, store, context, succ)) + .collect::>>()? + }; + + info!("adding to foil: {sym:}, {meta:?}"); + let func = self.symbol_func(sym); + + Ok(Some(foil.add_with_meta( + func, + successor_verts.into_iter().flatten().collect(), + meta, + ))) + } + } + Tag::Expr(ExprTag::Sym) => { + info!("adding symbol"); + if let Some(bound_id) = context + .lookup(store, expr) + .map_err(|_| anyhow!("lookup error"))? + { + Ok(Some(Vert::new(bound_id))) + } else { + let sym = store.fetch_sym(expr).expect("missing sym"); + + Ok(Some(foil.alloc(sym))) + } + } + x => { + dbg!(x); + todo!("intern constant") + } + } + } + + fn handle_bind( + &self, + store: &mut Store, + context: &mut Context, + args: &[Ptr], + successors: &[Vert], + ) -> Result { + if args.len() != 2 { + bail!("bind needs exactly two args") + }; + if successors.len() != 1 { + bail!("bind needs exactly two successors") + }; + + let var = args[0]; + let vert = successors[0]; + info!( + "binding {} to {}", + var.fmt_to_string_simple(store), + vert.id() + ); + + context.push_binding(store, var, vert.id()); + + Ok(vert) + } + fn handle_pop_binding( + &self, + store: &mut Store, + context: &mut Context, + args: &[Ptr], + ) -> Result> { + if !args.is_empty() { + bail!("pop-binding needs exactly zero args") + }; + + context + .pop_binding(store) + .map_err(|_| anyhow!("failed to pop binding"))?; + + Ok(None) + } +} + +impl, S: Syntax> From> for Schema { + fn from(coil_def: CoilDef) -> Self { + coil_def.schema() + } +} +impl, S: Syntax> From<&CoilDef> for Schema { + fn from(coil_def: &CoilDef) -> Self { + coil_def.schema() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::congruence::test::assert_expected_classes; + use crate::{FoilConfig, MappedFoil}; + use bellpepper_core::{ + num::AllocatedNum, test_cs::TestConstraintSystem, Circuit, ConstraintSystem, SynthesisError, + }; + use generic_array::typenum::U2; + use lurk::circuit::gadgets::constraints::enforce_equal; + use lurk_macros::{let_store, lurk, store}; + use neptune::circuit2::poseidon_hash_allocated as poseidon_hash; + use neptune::poseidon::PoseidonConstants; + use once_cell::sync::Lazy; + use pasta_curves::pallas::Scalar as Fr; + + #[derive(Default, Debug)] + struct ConsRel {} + + #[derive(Default, Debug)] + struct CarRel {} + + #[derive(Default, Debug)] + struct CdrRel {} + + #[derive(Default, Debug)] + struct XxxRel {} + + #[derive(Debug)] + enum TestRel { + Bind(BindRel), + Var(VarRel), + Cons(ConsRel), + Car(CarRel), + Cdr(CdrRel), + Xxx(XxxRel), + } + + static P: Lazy> = Lazy::new(PoseidonConstants::new); + + impl Relation for TestRel { + fn synthesize>( + &self, + cs: &mut CS, + allocated_head: AllocatedNum, + successors: Vec>, + ) -> Result<(), SynthesisError> { + match self { + Self::Bind(x) => x.synthesize(cs, allocated_head, successors), + Self::Var(x) => x.synthesize(cs, allocated_head, successors), + Self::Cons(x) => x.synthesize(cs, allocated_head, successors), + Self::Car(x) => x.synthesize(cs, allocated_head, successors), + Self::Cdr(x) => x.synthesize(cs, allocated_head, successors), + Self::Xxx(x) => x.synthesize(cs, allocated_head, successors), + } + } + } + impl Relation for ConsRel { + fn synthesize>( + &self, + cs: &mut CS, + allocated_head: AllocatedNum, + successors: Vec>, + ) -> Result<(), SynthesisError> { + let allocated_digest = + poseidon_hash(&mut cs.namespace(|| "poseidon hash"), successors, &*P)?; + enforce_equal(cs, || "digest equal", &allocated_head, &allocated_digest); + Ok(()) + } + } + impl Relation for CarRel { + fn synthesize>( + &self, + _cs: &mut CS, + _allocated_head: AllocatedNum, + _successors: Vec>, + ) -> Result<(), SynthesisError> { + // Nothing to do. The constraints are created by the corresponding ConsRel. + Ok(()) + } + } + impl Relation for CdrRel { + fn synthesize>( + &self, + _cs: &mut CS, + _allocated_head: AllocatedNum, + _successors: Vec>, + ) -> Result<(), SynthesisError> { + // Nothing to do. The constraints are created by the corresponding ConsRel. + Ok(()) + } + } + impl Relation for XxxRel { + fn synthesize>( + &self, + _cs: &mut CS, + _allocated_head: AllocatedNum, + _successors: Vec>, + ) -> Result<(), SynthesisError> { + // Nothing to do. The constraints are created by the corresponding ConsRel. + Ok(()) + } + } + + #[test_log::test] + fn test_coil_foil() { + let mut def = CoilDef::<_, _, CoilSyntax>::new_std(); + + //////////////////////////////////////////////////////////////////////////////// + // Set up the CoilDef and Schema + // + // TODO: derive all this from something simpler and declarative. + + // These shouldn't really be .lurk symbols, but until we have better package support, we need to use those. + let cons = sym!("lurk", "cons"); + let car = sym!("lurk", "car"); + let cdr = sym!("lurk", "cdr"); + + let bind = sym!("coil", "bind"); + let var = sym!("coil", "var"); + let xxx = sym!("lurk", "user", "xxx"); + + def.register_relation(var, TestRel::Var(VarRel {})); + def.register_relation(bind, TestRel::Bind(BindRel {})); + def.register_relation(cons.clone(), TestRel::Cons(ConsRel {})); + def.register_relation(car.clone(), TestRel::Car(CarRel {})); + def.register_relation(cdr.clone(), TestRel::Cdr(CdrRel {})); + def.register_relation(xxx, TestRel::Xxx(XxxRel {})); + + def.register_constructor(cons, vec![car, cdr]); + + let_store!(); // TODO: take field type parameter. This macro currently hard-codes Fr. + + let prog = lurk!((let ((x (cons q r))) + (let ((s (let ((x (cons a b))) + (car x) + (xxx qqq)))) + (car x)))) + .unwrap(); + + let mut foil = Foil::::new( + &def, + FoilConfig { + // This is necessary and should either be made the only option, or be required for Coil. + dedup_var_names: true, + }, + ); + + { + let f = &mut foil; + + let mut context = Context::new(store!()); + def.add_to_foil(f, store!(), &mut context, &prog).unwrap(); + + let classes = dbg!(f.graph.class_info(None)); + + let expected_classes = &[ + (0, vec!["Var(.lurk.user.x)[0]: 0"], None), + (1, vec!["Var(.lurk.user.q): 1"], None), + (2, vec!["Var(.lurk.user.r): 2"], None), + (3, vec![".lurk.cons: 3"], Some(vec![1, 2])), + (4, vec![".coil.bind: 4"], Some(vec![0, 3])), + (5, vec!["Var(.lurk.user.s)[1]: 5"], None), + (6, vec!["Var(.lurk.user.x)[2]: 6"], None), + (7, vec!["Var(.lurk.user.a): 7"], None), + (8, vec!["Var(.lurk.user.b): 8"], None), + (9, vec![".lurk.cons: 9"], Some(vec![7, 8])), + (10, vec![".coil.bind: 10"], Some(vec![6, 9])), + (11, vec![".lurk.car: 11"], Some(vec![6])), + (12, vec!["Var(.lurk.user.qqq): 12"], None), + (13, vec![".lurk.user.xxx: 13"], Some(vec![12])), + (14, vec![".coil.bind: 14"], Some(vec![5, 13])), + (15, vec![".lurk.car: 15"], Some(vec![0])), + ]; + assert_expected_classes(expected_classes, classes); + } + + { + foil.finalize(); + let classes = dbg!(foil.graph.class_info(None)); + let expected_classes = &[ + ( + 0, + vec!["Var(.lurk.user.x)[0]: 0", ".lurk.cons: 3", ".lurk.cons: 19"], + Some(vec![20, 21]), + ), + (4, vec![".coil.bind: 4"], Some(vec![0, 0])), + ( + 5, + vec!["Var(.lurk.user.s)[1]: 5", ".lurk.user.xxx: 13"], + Some(vec![12]), + ), + ( + 6, + vec!["Var(.lurk.user.x)[2]: 6", ".lurk.cons: 9", ".lurk.cons: 16"], + Some(vec![17, 18]), + ), + (10, vec![".coil.bind: 10"], Some(vec![6, 6])), + (12, vec!["Var(.lurk.user.qqq): 12"], None), + (14, vec![".coil.bind: 14"], Some(vec![5, 5])), + ( + 17, + vec![ + "Var(.lurk.user.a): 7", + ".lurk.car: 11", + "Var(.lurk.car)[3]: 17", + ".lurk.car: 24", + ".lurk.car: 26", + ], + Some(vec![6]), + ), + ( + 18, + vec![ + "Var(.lurk.user.b): 8", + "Var(.lurk.cdr)[4]: 18", + ".lurk.cdr: 25", + ".lurk.cdr: 27", + ], + Some(vec![6]), + ), + ( + 20, + vec![ + "Var(.lurk.user.q): 1", + ".lurk.car: 15", + "Var(.lurk.car)[5]: 20", + ".lurk.car: 22", + ".lurk.car: 28", + ], + Some(vec![0]), + ), + ( + 21, + vec![ + "Var(.lurk.user.r): 2", + "Var(.lurk.cdr)[6]: 21", + ".lurk.cdr: 23", + ".lurk.cdr: 29", + ], + Some(vec![0]), + ), + ]; + assert_expected_classes(expected_classes, classes); + } + + info!("minimizing"); + + let minimized = foil.minimize(); + let classes = dbg!(minimized.graph.class_info(None)); + + let expected_classes = &[ + (0, vec![".lurk.cons: 0"], Some(vec![6, 7])), + (1, vec![".lurk.user.xxx: 1"], Some(vec![3])), + (2, vec![".lurk.cons: 2"], Some(vec![4, 5])), + (3, vec!["Var(.lurk.user.qqq): 3"], None), + (4, vec![".lurk.car: 4"], Some(vec![2])), + (5, vec![".lurk.cdr: 5"], Some(vec![2])), + (6, vec![".lurk.car: 6"], Some(vec![0])), + (7, vec![".lurk.cdr: 7"], Some(vec![0])), + ]; + assert_expected_classes(expected_classes, classes); + + let fx = MappedFoil::new(minimized, def); + let cs = &mut TestConstraintSystem::::new(); + + fx.synthesize(cs).unwrap(); + + // Binary poseidon hash with standard strength is 237 constraints. + // This includes a constraint (and allocation) for the returned digest, + // and another to enforce equality with the `allocated_head` of the relation. + // One constraint and allocation could be optimized away by calling the unallocated + // poseidon-circuit function. However, that may add complexity to witness generation. + // + // 239 * 2 = 478, since there are two conses. + assert_eq!(478, cs.num_constraints()); + } +} diff --git a/foil/src/congruence.rs b/foil/src/congruence.rs new file mode 100644 index 0000000000..a2ad77ed88 --- /dev/null +++ b/foil/src/congruence.rs @@ -0,0 +1,536 @@ +//! Congruence Closure +//! +//! As described in [Fast Decision Procedures Based on Congruence Closure](https://dl.acm.org/doi/10.1145/322186.322198) + +use std::cell::RefCell; +use std::collections::HashMap; +use std::fmt::{Debug, Display, Formatter}; +use std::hash::Hash; +use tracing::info; + +use indexmap::IndexSet; + +pub trait LabelTrait: PartialEq + Clone + Display + Debug + Eq {} +pub trait MetaData: PartialEq + Clone + Debug + Default {} + +impl MetaData for () {} + +pub type Id = usize; +pub type SimpleLabel = &'static str; +impl LabelTrait for SimpleLabel {} + +#[derive(Debug, Default, Eq, Clone, PartialEq)] +pub struct Vertex { + id: Id, + label: L, + metadata: T, + predecessors: RefCell>, + successors: RefCell>, + equiv: RefCell, +} + +impl Display for Vertex { + fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(fmt, "{}: {}", self.label, self.id) + } +} + +#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] +pub struct Vert { + id: Id, +} + +impl From for Vert { + fn from(id: Id) -> Self { + Self::new(id) + } +} + +impl Vert { + pub fn new(id: Id) -> Self { + Self { id } + } + pub fn vertex<'a, T: MetaData, L: LabelTrait>(&'a self, g: &'a Graph) -> &Vertex { + g.vertex(self.id) + } + pub fn id(&self) -> Id { + self.id + } + pub fn congruent(&self, g: &Graph, other: &Self) -> bool { + g.congruent(self.id(), other.id()) + } + pub fn merge(&self, g: &mut Graph, other: &Self) { + g.merge(self.id(), other.id()) + } + pub fn connect<'a, V: IntoIterator, T: MetaData, L: LabelTrait>( + &self, + g: &mut Graph, + targets: V, + ) { + g.connect(self, targets) + } + pub fn find(&self, g: &Graph) -> Id { + g.find(self.id()) + } + pub fn find_vertex<'a, T: MetaData, L: LabelTrait>( + &'a self, + g: &'a Graph, + ) -> &Vertex { + let id = self.find(g); + g.vertex(id) + } +} + +impl From<&Vert> for Id { + fn from(v: &Vert) -> Id { + v.id + } +} +impl From<&Vertex> for Id { + fn from(v: &Vertex) -> Id { + v.id + } +} + +impl Vertex { + pub fn new(id: Id, label: L, metadata: T) -> Self { + Self { + id, + label, + metadata, + successors: Default::default(), + predecessors: Default::default(), + equiv: RefCell::new(id), + } + } + + pub fn id(&self) -> usize { + self.id + } + + pub fn vert(&self) -> Vert { + Vert::new(self.id) + } + + pub fn label(&self) -> &L { + &self.label + } + + pub fn arity(&self) -> usize { + self.successors.borrow().len() + } + + pub fn metadata(&self) -> &T { + &self.metadata + } + + pub fn predecessors(&self) -> RefCell> { + self.predecessors.clone() + } + + pub fn predecessors_owned(&self) -> IndexSet { + self.predecessors.borrow().clone() + } + + pub fn find_predecessor<'a, P: FnMut(&Vertex) -> bool>( + &'a self, + g: &'a Graph, + mut predicate: P, + ) -> Option<&Vertex> { + for id in self.predecessors.borrow().iter() { + let vertex = g.vertex(*id); + if predicate(vertex) { + return Some(vertex); + } + } + None + } + + pub fn successors(&self) -> RefCell> { + self.successors.clone() + } + + pub fn successor(&self, n: usize) -> Option { + self.successors.borrow().get(n).copied() + } + + pub fn equiv(&self) -> Id { + *self.equiv.borrow() + } + + pub fn set_equiv(&self, id: Id) { + *(self.equiv.borrow_mut()) = id; + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Graph { + vertices: Vec>, + union_called: bool, +} + +impl Default for Graph { + fn default() -> Self { + Self { + vertices: Default::default(), + union_called: false, + } + } +} + +impl<'a, T: MetaData, L: LabelTrait> Graph { + pub fn alloc>(&'a mut self, label: LL, metadata: T) -> Vert { + // Vertices can never be removed, + let new_id = self.vertices.len(); + self.vertices + .push(Vertex::new(new_id, label.into(), metadata)); + + Vert::new(new_id) + } + + pub fn connect>(&self, source: &Vert, targets: V) { + // union coalesces predecessors into the class representative, + // so the graph must not be modified once it has been called. + assert!(!self.union_called, "connect must not be called after union"); + + let source_id = source.id(); + let target_ids: Vec = targets + .into_iter() + .map(|target_vert| { + // NOTE: We use the target's class representative via find. + let target_find = target_vert.find_vertex(self); + + //target_ids.push(target_find.id()); + target_find.predecessors.borrow_mut().insert(source_id); + + target_find.id() + }) + .collect(); + + // Replace any existing successors. `connect` is not cumulative. + *(source.vertex(self).successors.borrow_mut()) = target_ids; + } + + pub fn vertices(&self) -> &Vec> { + &self.vertices + } + + pub fn vertex(&self, id: Id) -> &Vertex { + &self.vertices[id] + } + + pub fn partition(&self) -> Partition { + self.into() + } + + /// Returns a vector of triples, each represnting the following per equivalence class: + /// - The `Id` of its representative member. + /// - A vector of its members' Vertex's descriptive strings. + /// - An optional vector of its ordered successors, if any. + /// Successor vectors include the `Id` of the class representative at each position, rather than that originally + /// specified. + pub fn class_info( + &self, + p: Option>, + ) -> Vec<(Id, Vec, Option>)> { + let p = p.unwrap_or_else(|| self.partition()); + + let mut classes = p.classes().iter().collect::>(); + classes.sort_by_key(|(id, _)| *id); + + classes + .iter() + .map(|(id, set)| { + let mut class = set + .iter() + .map(|id| (id, format!("{}", self.vertex(*id)), None::>>)) + .collect::>(); + + class.sort_by_key(|x| *x.0); + ( + **id, + class.into_iter().map(|x| x.1).collect(), + p.successors(**id), + ) + }) + .collect() + } + + pub fn find(&self, mut id: Id) -> Id { + // let input_id = id; + loop { + let old_id = id; + let v = self.vertex(id); + id = v.equiv(); + + if id == old_id { + break; + } + } + // info!("find(id{input_id}) => {id}"); + id + } + + pub fn union(&mut self, u: Id, v: Id) { + self.union_called = true; + + let find_u = self.find(u); + let uu = self.vertex(find_u); + + let find_v = self.find(v); + let vv = self.vertex(find_v); + + let l = vv.predecessors.borrow().len(); + for pred in vv.predecessors.borrow_mut().drain(0..l) { + uu.predecessors.borrow_mut().insert(pred); + } + + vv.set_equiv(find_u); + } + + pub fn merge(&mut self, u: Id, v: Id) { + info!("merging {} and {}", self.vertex(u), self.vertex(v)); + let find_u = self.find(u); + let find_v = self.find(v); + + if find_u == find_v { + return; + } + + let p_u = self.vertex(find_u).predecessors().borrow().clone(); + let p_v = self.vertex(find_v).predecessors().borrow().clone(); + info!( + "num predecessors: {}={} and {}={}", + self.vertex(find_u), + p_u.len(), + self.vertex(find_v), + p_v.len() + ); + self.union(u, v); + + for x in p_u.iter() { + for y in p_v.iter() { + info!( + "checking predecesor congruence: {} and {}", + self.vertex(*x), + self.vertex(*y) + ); + let find_x = self.find(*x); + let find_y = self.find(*y); + + if find_x != find_y && self.congruent(*x, *y) { + info!("recursive merge"); + // TODO: We might need or want an explicit stack rather than recursive call. + self.merge(*x, *y) + } + } + } + } + + pub fn congruent(&self, u: Id, v: Id) -> bool { + info!( + "checking congruence of {} and {}", + self.vertex(u), + self.vertex(v) + ); + let u_succ = self.vertex(u).successors(); + let v_succ = self.vertex(v).successors(); + + let labels_equal = self.vertex(u).label == self.vertex(v).label; + + let result = labels_equal + && u_succ.borrow().len() == v_succ.borrow().len() + && u_succ + .borrow() + .iter() + .zip(v_succ.borrow().iter()) + .all(|(x, y)| self.find(*x) == self.find(*y)); + if result { + info!("congruent") + } else { + info!("not congruent") + } + result + } + + /// Partition `self` (a `Graph`), filtering vertices to those for which `predicate` is true. + pub fn to_partition) -> bool>(&self, predicate: F) -> Partition { + let mut classes: HashMap> = Default::default(); + let mut index: HashMap = Default::default(); + let mut successors: HashMap> = Default::default(); + let mut labels: HashMap = Default::default(); + let mut metadata: HashMap = Default::default(); + + for (id, _v) in self + .vertices + .iter() + .enumerate() + .filter(|(_, v)| predicate(*v)) + { + let find_v = self.find(id); + let vertex = self.vertex(id); + let successor_ids = vertex + .successors() + .borrow() + .iter() + .map(|id| self.find(*id)) + .collect::>(); + + if !successor_ids.is_empty() { + dbg!(&successor_ids); + successors.insert(find_v, successor_ids); + + // The label we want is that of the original vertex that has successors. By definition, this must be the + // same for all such vertices in an equivalence class. + labels.insert(find_v, vertex.label().clone()); + + // Same goes for the metadata. + metadata.insert(find_v, vertex.metadata().clone()); + info!( + "partition, assigning {id} to class {find_v} with label {:?} and metadata {:?}", + vertex.label(), + vertex.metadata() + ); + } else { + info!("partition, assigning {id} to class {find_v}",); + } + classes + .entry(find_v) + .and_modify(|set| { + (*set).insert(id); + }) + .or_insert_with(|| IndexSet::from([find_v, id])); + index.insert(id, find_v); + } + Partition { + classes, + index, + successors, + labels, + metadata, + } + } +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct Partition { + // mapping from unique representative to all members of equivalence classes + pub classes: HashMap>, + // mapping from members to the unique representative indexing the class + pub index: HashMap, + // successors of the class, indexed by representative + // all successors are themselves class representatives (via find). + pub successors: HashMap>, + pub labels: HashMap, + pub metadata: HashMap, +} + +impl Partition { + pub fn class(&self, id: Id) -> Option<&IndexSet> { + self.index + .get(&id) + .and_then(|index| self.classes.get(index)) + } + + pub fn classes(&self) -> &HashMap> { + &self.classes + } + + pub fn index(&self) -> &HashMap { + &self.index + } + pub fn successors(&self, id: Id) -> Option> { + self.successors.get(&id).cloned() + } + pub fn label(&self, id: Id) -> Option<&L> { + self.labels.get(&id) + } + + pub fn metadata(&self, id: Id) -> Option<&T> { + self.metadata.get(&id) + } + + pub fn size(&self) -> usize { + self.index.len() + } + + pub fn find(&self, id: Id) -> Id { + *self.index.get(&id).expect("invalid id") + } +} + +impl From<&Graph> for Partition { + fn from(g: &Graph) -> Self { + g.to_partition(|_| true) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + + pub type IdVec = Vec; + pub(crate) fn assert_expected_classes( + expected: &[(Id, Vec<&str>, Option)], + actual: Vec<(Id, Vec, Option)>, + ) { + assert_eq!(expected.len(), actual.len()); + for ((expected_id, expected_class, expected_successors), (id, class, successors)) in + expected.iter().zip(actual.iter()) + { + assert_eq!(expected_id, id); + for (expected_member, member) in expected_class.iter().zip(class.iter()) { + assert_eq!(expected_member, &member); + } + assert_eq!(expected_successors, successors); + } + } + + #[test] + fn test_congruence() { + let g = &mut Graph::<(), SimpleLabel>::default(); + + // See Figure 1 of the paper. + // + // f<--f + // / \ / + // a b + let v1 = g.alloc("f", ()); + let v2 = g.alloc("f", ()); + let v3 = g.alloc("a", ()); + let v4 = g.alloc("b", ()); + + v1.connect(g, &[v2, v4]); + v2.connect(g, &[v3, v4]); + + { + let partition = g.partition(); + + assert_eq!(partition.class(0), Some(&IndexSet::from([0]))); + assert_eq!(partition.class(1), Some(&IndexSet::from([1]))); + assert_eq!(partition.class(2), Some(&IndexSet::from([2]))); + assert_eq!(partition.class(3), Some(&IndexSet::from([3]))); + } + assert!(!v1.congruent(g, &v2)); + assert!(!v1.congruent(g, &v3)); + assert!(!v1.congruent(g, &v4)); + assert!(!v2.congruent(g, &v3)); + assert!(!v2.congruent(g, &v4)); + assert!(!v3.congruent(g, &v4)); + + v2.merge(g, &v3); + + { + let partition = dbg!(g.partition()); + + assert_eq!(partition.class(0), Some(&IndexSet::from([0, 1, 2]))); + assert_eq!(partition.class(1), Some(&IndexSet::from([0, 1, 2]))); + assert_eq!(partition.class(2), Some(&IndexSet::from([0, 1, 2]))); + assert_eq!(partition.class(3), Some(&IndexSet::from([3]))); + } + + assert!(v1.congruent(g, &v2)); + assert_eq!(g.find(v2.id()), g.find(v3.id())); + assert!(!v3.congruent(g, &v4)); + } +} diff --git a/foil/src/constructors.rs b/foil/src/constructors.rs new file mode 100644 index 0000000000..f54cc78d21 --- /dev/null +++ b/foil/src/constructors.rs @@ -0,0 +1,697 @@ +//! Congruence Closure extended with constructors and projectors. +//! The `constructors` module is just an example of how `Foil` can be used. +//! The enum-based implementation of `Fun` is not necessary, just a somewhat simple way to +//! make a small, self-contained example. + +use crate::{Func, Label, Meta, Schema}; + +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +enum Fun { + Car, + Cdr, + Cons, + Binding, + Add, +} + +impl Fun { + fn equivalences() -> Vec> { + vec![Self::Binding.into()] + } + fn constructors() -> Vec> { + vec![Self::Cons.into()] + } + fn schema() -> Schema { + Schema { + equivalences: Self::equivalences(), + constructors: Self::constructors(), + } + } +} + +impl From for Func { + fn from(fun: Fun) -> Self { + match fun { + Fun::Car => Self::new("Car"), + Fun::Cdr => Self::new("Cdr"), + Fun::Cons => Self::constructor( + "Cons", + vec![Fun::Car.into(), Fun::Cdr.into()], + Meta::default(), + ), + Fun::Binding => Self::new("Binding"), + Fun::Add => Self::new("Add"), + } + } +} + +impl From for Label { + fn from(fun: Fun) -> Self { + Self::from(Func::::from(fun)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::congruence::{test::assert_expected_classes, Graph}; + use crate::{Foil, FoilConfig, Label, Meta, Var}; + + /* + * In Lurk context, we can generalize the treatment of 'Quantifier-Free Theory of Equality' with 'Extensions to Theory of List Structure'. + *** Procedure to extend CC to deal with CAR, CDR, and CONS. + - Start with graph of uninterpreted functions, with equivalence relation + - For every CONS, add a unary CAR and unary CDR vertex whose target is the CONS. + - MERGE the CAR with CONS[1] (1-indexed), and merge the CDR with CONS[2]. + *** We can generalize this for Lurk, to deal with N-ary constructors and their respective projection operators. + **** For example, in a notional world where CONS is a binary hash (unlike Lurk's concrete use of hashes): + - CONS = HASH2 + - CAR = HASH2-PROJECT1 + - CDR = HASH2-PROJECT2 + **** Then a 3-ary hash (i.e. a triple constructor) would yield: + - TRIPLE = HASH3 + - HASH3-PROJECT1 + - HASH3-PROJECT2 + - HASH3-PROJECT3 + **** In each case, the same procedure is followed: + - extend the graph to add missing projection operators + - MERGE with the concrete projections + *** We can also perform the complementary extension, to find congruence between the CONS(tructors) implied by projectors. + Conder this example: + + (lambda (a) + (let ((x (car a)) + (y (cdr a))) + (+ x y))) + + The proof of this expression should only require proof of a single CONS. + + This: + A<-CAR<=X + \ + + + / + A<-CDR<=Y + + Becomes: + B + \ + CONS(1)<=A + / \ + C CAR<=X + \ + + + / + D CDR<=Y + \ / + CONS(2)<=A + / + E + + Then: + - MERGE B and CAR + - MERGE E and CDR + + But because we now have new CONSes, we need to add missing projectors. (Should this happen before or after the previous MERGEs?) + + NOTE: Actually, we don't need to add CAR(2) and CDR(2) because they are redundant. The right procedure is to check for + the existence of CAR(1) and CDR(1). Since they exist, we should perform the next MERGE using those instead. + + B CDR(2) + \ / + CONS(1)<=A + / \ + C CAR(1)<=X + \ + + + / + D CDR(1)<=Y + \ / + CONS(2)<=A + / \ + E CAR(2) + + Then: + - MERGE CDR and C + - MERGE CAR and D + + NOTE: In this example, since we added CAR(2) and CDR(2), if we merged those just above, then we should also merge them + with their pre-existing counterparts. + - MERGE CAR(1) and CAR(2) + - MERGE CDR(1) and CDR(2) + + Now we have the following partition: + - 1: {B, CAR(1), CAR(2), D, X} + - 2: {E, CDR(1), CDR(2), C, Y} + - 3: {A, CONS(1), CONS(2)} + - 4: {+} + + With this graph: + + 1 + / \ + 3 4 + \ / + 2 + + Or equivalently: + + X:CAR + / \ + A:CONS + + \ / + Y:CDR + #+end_src + + */ + + #[test_log::test] + fn test_deduce_constructor() { + // This was the original test described above. + + // (let ((x (car a)) + // (y (cdr a))) + // (+ x y))) + + type G = Graph<(), Label>; + let g = &mut G::default(); + + let a = g.alloc(Var::new("a"), ()); + let x = g.alloc(Var::new("x"), ()); + let y = g.alloc(Var::new("y"), ()); + let car = g.alloc(Fun::Car, ()); + let cdr = g.alloc(Fun::Cdr, ()); + let bind_x = g.alloc(Fun::Binding, ()); + let bind_y = g.alloc(Fun::Binding, ()); + let plus = g.alloc(Fun::Add, ()); + + car.connect(g, &[a]); + cdr.connect(g, &[a]); + + // These binding nodes are not really necessary, but they may be useful for bookkeeping. Actually, for our + // purposes, it probably makes sense to use these as the sole indicators of explicit equality. + // These can mark the vertices to be merged, and those merges can be performed programmatically. + // New vertices that need to be added first can add pending merges by creating these connections. + // Binding is probably the wrong term in that context, though. TODO: Better name. + bind_x.connect(g, &[x, car]); + bind_y.connect(g, &[y, cdr]); + + plus.connect(g, &[x, y]); + + let cons = g.alloc(Fun::Cons, ()); + let car1 = g.alloc(Fun::Car, ()); + let cdr1 = g.alloc(Fun::Cdr, ()); + let b = g.alloc(Var::new("b"), ()); + let c = g.alloc(Var::new("c"), ()); + + cons.connect(g, &[b, c]); + + car1.connect(g, &[cons]); + cdr1.connect(g, &[cons]); + + // All merges must follow all connections. + + // This effects the equivalences the bindings signal. + x.merge(g, &car); + y.merge(g, &cdr); + + // This adds the Cons. We know this is needed because `a` has a predecessor that is labeled `Car` and one that is + // labeled `Cdr`. Either would suffice to require this new vertex labeled `Cons` to be added. Such an added cons + // must be merged with the vertex that forced its creation. + a.merge(g, &cons); + + // When a new `Cons` is created, corresponding projection (`Car` and `Cdr`) vertices are created, and their axiomatic + // equivalence to the `Cons`'s successors must be asserted by merging them. + b.merge(g, &car1); + c.merge(g, &cdr1); + + // The bindings each inhabit singleton equivalence classes and can be removed. + let actual_classes = dbg!(g.class_info(None)); + let expected_classes = &[ + // Notice that variable a has been labeld as a cons. + (0, vec!["Var(a): 0", "Cons: 8"], Some(vec![11, 12])), + // Notice that now the bindings point to identical vertices. That is because their potentially distinct + // successors have been merged, by virtue of `Binding` having been specified as an 'equivalence' in the schema. + (5, vec!["Binding: 5"], Some(vec![11, 11])), + (6, vec!["Binding: 6"], Some(vec![12, 12])), + (7, vec!["Add: 7"], Some(vec![11, 12])), + ( + 11, + vec!["Var(x): 1", "Car: 3", "Car: 9", "Var(b): 11"], + Some(vec![0]), + ), + ( + 12, + vec!["Var(y): 2", "Cdr: 4", "Cdr: 10", "Var(c): 12"], + Some(vec![0]), + ), + ]; + assert_expected_classes(expected_classes, actual_classes); + } + + #[test_log::test] + fn test_auto_deduce_constructor() { + // This is the original test with all the 'annotation' (i.e. the added vertices and equivalences) automated. + + // (let ((x (car a)) + // (y (cdr a))) + // (+ x y))) + + let f = &mut Foil::<(), Meta>::new(Fun::schema(), FoilConfig::default()); + + let a = f.alloc(Var::new("a")); + let x = f.alloc(Var::new("x")); + let y = f.alloc(Var::new("y")); + let car = f.alloc(Fun::Car); + let cdr = f.alloc(Fun::Cdr); + let bind_x = f.alloc(Fun::Binding); + let bind_y = f.alloc(Fun::Binding); + let plus = f.alloc(Fun::Add); + + f.connect(&car, &[a]); + f.connect(&cdr, &[a]); + + // These binding nodes are not really necessary, but they may be useful for bookkeeping. Actually, for our + // purposes, it probably makes sense to use these as the sole indicators of explicit equality. + // These can mark the vertices to be merged, and those merges can be performed programmatically. + // New vertices that need to be added first can add pending merges by creating these connections. + // Binding is probably the wrong term in that context, though. TODO: Better name. + f.connect(&bind_x, &[x, car]); + f.connect(&bind_y, &[y, cdr]); + + f.connect(&plus, &[x, y]); + + f.finalize(); + + let actual_classes = dbg!(f.graph.class_info(None)); + + // Notice that the final equivalence classes correspond to those predicted in the analysis (which preceded the implementation): + /* + Now we have the following partition: + - 1: {B, CAR(1), CAR(2), D, X} + - 2: {E, CDR(1), CDR(2), C, Y} + - 3: {A, CONS(1), CONS(2)} + - 4: {+} + */ + + let expected_classes = &[ + ( + 0, + // {A, CONS(1), CONS(2)} + vec!["Var(a): 0", "Cons: 8", "Cons: 11"], + Some(vec![1, 2]), + ), + ( + 1, + // {B, CAR(1), CAR(2), D, X} + vec![ + "Var(x): 1", + "Car: 3", + "Var(Car)[0]: 9", + "Var(Car)[2]: 12", + "Car: 14", + "Car: 16", + ], + Some(vec![0]), + ), + ( + 2, + // {E, CDR(1), CDR(2), C, Y} + vec![ + "Var(y): 2", + "Cdr: 4", + "Var(Cdr)[1]: 10", + "Var(Cdr)[3]: 13", + "Cdr: 15", + "Cdr: 17", + ], + Some(vec![0]), + ), + // These 'Binding's are syntactic and can be removed from the graph. They may have been explicit in the + // source from which the graph was generated, but their children (the bound vars/values) are now equivalent + // in the graph. + (5, vec!["Binding: 5"], Some(vec![1, 1])), + (6, vec!["Binding: 6"], Some(vec![2, 2])), + (7, vec!["Add: 7"], Some(vec![1, 2])), + ]; + + assert_expected_classes(expected_classes, actual_classes); + } + + #[test_log::test] + fn test_simplify_minimal() { + // This is a test showing that the car of a 'cons' is understood to be the first input to the cons expression + // that produced it. + + // (let* ((a (cons b c)) + // (d (car a))) + // ...) + let f = &mut Foil::<(), Meta>::new(Fun::schema(), FoilConfig::default()); + + let a = f.alloc(Var::new("a")); + let b = f.alloc(Var::new("b")); + let c = f.alloc(Var::new("c")); + let d = f.alloc(Var::new("d")); + let cons1 = f.alloc(Fun::Cons); + let car = f.alloc(Fun::Car); + let bind_a = f.alloc(Fun::Binding); + let bind_d = f.alloc(Fun::Binding); + + f.connect(&bind_a, &[a, cons1]); + f.connect(&cons1, &[b, c]); + + f.connect(&bind_d, &[d, car]); + f.connect(&car, &[a]); + + f.finalize(); + + // We want to see that b and d are in the same equivalence class. + + let actual_classes = dbg!(f.graph.class_info(None)); + + let expected_classes = &[ + ( + 0, + vec!["Var(a): 0", "Cons: 4", "Cons: 8"], + Some(vec![3, 10]), + ), + ( + 3, + // Notice that variables b and d are in the same equivalence class. + vec![ + "Var(b): 1", + "Var(d): 3", + "Car: 5", + "Var(Car)[0]: 9", + "Car: 11", + "Car: 13", + ], + Some(vec![0]), + ), + (6, vec!["Binding: 6"], Some(vec![0, 0])), + (7, vec!["Binding: 7"], Some(vec![3, 3])), + ( + 10, + vec!["Var(c): 2", "Var(Cdr)[1]: 10", "Cdr: 12", "Cdr: 14"], + Some(vec![0]), + ), + ]; + + assert_expected_classes(expected_classes, actual_classes); + + let minimized = f.minimize(); + assert!(minimized.is_minimized()); + let minimized_classes = dbg!(minimized.graph.class_info(None)); + + let expected_minimized = &[ + // Notice that all equivalence classes have exactly one member, + // and that its member corresponds to the Func it represents (if any). + (0, vec!["Cons: 0"], Some(vec![1, 2])), + (1, vec!["Car: 1"], Some(vec![0])), + (2, vec!["Cdr: 2"], Some(vec![0])), + ]; + + assert_expected_classes(expected_minimized, minimized_classes); + } + + #[test_log::test] + fn test_auto_simplify_harder() { + // This is a more elaborate version of the minimal test. + + // (let* ((a (cons b c)) + // (d (car a)) + // (e (cdr a)) + // (f (cons d e))) + // (+ a f)) + let f = &mut Foil::<(), Meta>::new(Fun::schema(), FoilConfig::default()); + + let a = f.alloc(Var::new("a")); + let b = f.alloc(Var::new("b")); + let c = f.alloc(Var::new("c")); + let cons1 = f.alloc(Fun::Cons); + f.connect(&cons1, &[b, c]); + let bind_a = f.alloc(Fun::Binding); + f.connect(&bind_a, &[a, cons1]); + + let car = f.alloc(Fun::Car); + f.connect(&car, &[a]); + let d = f.alloc(Var::new("d")); + let bind_d = f.alloc(Fun::Binding); + f.connect(&bind_d, &[d, car]); + + let cdr = f.alloc(Fun::Cdr); + f.connect(&cdr, &[a]); + let e = f.alloc(Var::new("e")); + let bind_e = f.alloc(Fun::Binding); + f.connect(&bind_e, &[e, cdr]); + + let cons2 = f.alloc(Fun::Cons); + f.connect(&cons2, &[d, e]); + let bind_ff = f.alloc(Fun::Binding); + let ff = f.alloc(Var::new("ff")); + f.connect(&bind_ff, &[ff, cons2]); + + let plus = f.alloc(Fun::Add); + f.connect(&plus, &[a, ff]); + + f.finalize(); + + let actual_classes = dbg!(f.graph.class_info(None)); + + let expected_classes = &[ + ( + 6, + vec![ + "Var(b): 1", + "Car: 5", + "Var(d): 6", + "Var(Car)[0]: 16", + "Var(Car)[2]: 19", + "Car: 21", + "Car: 23", + "Car: 25", + "Car: 27", + ], + Some(vec![13]), + ), + (7, vec!["Binding: 7"], Some(vec![6, 6])), + ( + 9, + vec![ + "Var(c): 2", + "Cdr: 8", + "Var(e): 9", + "Var(Cdr)[1]: 17", + "Var(Cdr)[3]: 20", + "Cdr: 22", + "Cdr: 24", + "Cdr: 26", + "Cdr: 28", + ], + Some(vec![13]), + ), + (10, vec!["Binding: 10"], Some(vec![9, 9])), + (12, vec!["Binding: 4", "Binding: 12"], Some(vec![13, 13])), + ( + 13, + vec![ + "Var(a): 0", + "Cons: 3", + "Cons: 11", + "Var(ff): 13", + "Cons: 15", + "Cons: 18", + ], + Some(vec![6, 9]), + ), + (14, vec!["Add: 14"], Some(vec![13, 13])), + ]; + + assert_expected_classes(expected_classes, actual_classes); + + let minimized = f.minimize(); + assert!(minimized.is_minimized()); + let minimized_classes = dbg!(minimized.graph.class_info(None)); + + let expected_minimized = &[ + (0, vec!["Car: 0"], Some(vec![2])), + (1, vec!["Cdr: 1"], Some(vec![2])), + (2, vec!["Cons: 2"], Some(vec![0, 1])), + (3, vec!["Add: 3"], Some(vec![2, 2])), + ]; + + assert_expected_classes(expected_minimized, minimized_classes); + } + + #[test_log::test] + fn test_var_equivalence() { + // Allocate distinct vertices for each instance of `a`: + // + // TODO: But do we really *want* this behavior, or should it be possible for multiple vertices to be labeled by + // the same var? Currently, we resolve this ambiguity by requiring 'unique' vars to be allocated when that + // behavior is desired (for example, projectors). + // + // The alternate behavior might actually be more natural when variables may be shadowed and therefore share + // names while representing unique values. + // + // (let ((a (cons x y)) + // (b (car a)) + // (c (cdr a))) + // (cons b c))) + let f = &mut Foil::<(), Meta>::new( + Fun::schema(), + FoilConfig { + dedup_var_names: true, + }, + ); + let f2 = &mut Foil::<(), Meta>::new( + Fun::schema(), + FoilConfig { + dedup_var_names: false, + }, + ); + + let setup = |f: &mut Foil<(), Meta>| { + let a1 = f.alloc(Var::new("a")); + let a2 = f.alloc(Var::new("a")); + let a3 = f.alloc(Var::new("a")); + let x = f.alloc(Var::new("x")); + let y = f.alloc(Var::new("y")); + let b = f.alloc(Var::new("b")); + let c = f.alloc(Var::new("c")); + let cons1 = f.alloc(Fun::Cons); + let cons2 = f.alloc(Fun::Cons); + let car = f.alloc(Fun::Car); + let cdr = f.alloc(Fun::Cdr); + let bind_a = f.alloc(Fun::Binding); + let bind_b = f.alloc(Fun::Binding); + let bind_c = f.alloc(Fun::Binding); + + f.connect(&cons1, &[x, y]); + f.connect(&cons2, &[b, c]); + f.connect(&car, &[a2]); + f.connect(&cdr, &[a3]); + f.connect(&bind_a, &[a1, cons1]); + f.connect(&bind_b, &[b, car]); + f.connect(&bind_c, &[c, cdr]); + }; + + setup(f); + setup(f2); + + f.finalize(); + f2.finalize(); + + let actual_classes = dbg!(f.graph.class_info(None)); + let f2_actual_classes = dbg!(f2.graph.class_info(None)); + let expected_classes = &[ + ( + 5, + vec![ + "Var(x): 3", + "Var(b): 5", + "Car: 9", + "Var(Car)[0]: 15", + "Var(Car)[2]: 18", + "Car: 20", + "Car: 22", + "Car: 24", + "Car: 26", + ], + Some(vec![8]), + ), + ( + 6, + vec![ + "Var(y): 4", + "Var(c): 6", + "Cdr: 10", + "Var(Cdr)[1]: 16", + "Var(Cdr)[3]: 19", + "Cdr: 21", + "Cdr: 23", + "Cdr: 25", + "Cdr: 27", + ], + Some(vec![8]), + ), + ( + 8, + vec![ + "Var(a): 0", + "Var(a): 1", + "Var(a): 2", + "Cons: 7", + "Cons: 8", + "Cons: 14", + "Cons: 17", + ], + Some(vec![5, 6]), + ), + (11, vec!["Binding: 11"], Some(vec![8, 8])), + (12, vec!["Binding: 12"], Some(vec![5, 5])), + (13, vec!["Binding: 13"], Some(vec![6, 6])), + ]; + assert_expected_classes(expected_classes, actual_classes); + + let f2_expected_classes = &[ + // Notice that there are three distinct `a` variables, each of which has been deduced to be a cons. + // Because we did not 'dedup' these (per the config), they manifest as three distinct conses. + (0, vec!["Var(a): 0", "Cons: 7"], Some(vec![3, 4])), + (1, vec!["Var(a): 1", "Cons: 14"], Some(vec![5, 16])), + (2, vec!["Var(a): 2", "Cons: 17"], Some(vec![18, 6])), + (3, vec!["Var(x): 3", "Car: 20"], Some(vec![0])), + (4, vec!["Var(y): 4", "Cdr: 21"], Some(vec![0])), + ( + 5, + vec!["Var(b): 5", "Car: 9", "Var(Car): 15", "Car: 22", "Car: 24"], + Some(vec![1]), + ), + ( + 6, + vec!["Var(c): 6", "Cdr: 10", "Var(Cdr): 19", "Cdr: 23", "Cdr: 27"], + Some(vec![2]), + ), + // Because the previous `(car a)` and `(cdr a)` were not discovered to refer to projections of the same + // cons, The cons joining them is not identified with some existing cons and instead appears as yet another + // unique cons in the graph. + (8, vec!["Cons: 8"], Some(vec![5, 6])), + (11, vec!["Binding: 11"], Some(vec![0, 0])), + (12, vec!["Binding: 12"], Some(vec![5, 5])), + (13, vec!["Binding: 13"], Some(vec![6, 6])), + (16, vec!["Var(Cdr): 16", "Cdr: 25"], Some(vec![1])), + (18, vec!["Var(Car): 18", "Car: 26"], Some(vec![2])), + ]; + + assert_expected_classes(f2_expected_classes, f2_actual_classes); + + let minimized = f.minimize(); + let f2_minimized = f2.minimize(); + assert!(minimized.is_minimized()); + assert!(f2_minimized.is_minimized()); + let minimized_classes = dbg!(minimized.graph.class_info(None)); + let f2_minimized_classes = dbg!(f2_minimized.graph.class_info(None)); + + let expected_minimized = &[ + (0, vec!["Car: 0"], Some(vec![2])), + (1, vec!["Cdr: 1"], Some(vec![2])), + (2, vec!["Cons: 2"], Some(vec![0, 1])), + ]; + + assert_expected_classes(expected_minimized, minimized_classes); + + let f2_expected_minimized = &[ + (0, vec!["Cons: 0"], Some(vec![3, 4])), + (1, vec!["Cons: 1"], Some(vec![5, 8])), + (2, vec!["Cons: 2"], Some(vec![9, 6])), + (3, vec!["Car: 3"], Some(vec![0])), + (4, vec!["Cdr: 4"], Some(vec![0])), + (5, vec!["Car: 5"], Some(vec![1])), + (6, vec!["Cdr: 6"], Some(vec![2])), + (7, vec!["Cons: 7"], Some(vec![5, 6])), + (8, vec!["Cdr: 8"], Some(vec![1])), + (9, vec!["Car: 9"], Some(vec![2])), + ]; + assert_expected_classes(f2_expected_minimized, f2_minimized_classes); + } +} diff --git a/foil/src/lib.rs b/foil/src/lib.rs new file mode 100644 index 0000000000..12c81f1302 --- /dev/null +++ b/foil/src/lib.rs @@ -0,0 +1,729 @@ +#![allow(dead_code)] + +//! FOIL +//! Flat Optimization Intermediate Language +use std::collections::{HashMap, HashSet, VecDeque}; +use std::fmt::{Debug, Display, Formatter}; +use std::hash::Hash; +use std::marker::PhantomData; +use tracing::{info, warn}; + +use anyhow::{bail, Error}; +use indexmap::IndexSet; + +pub mod circuit; +pub mod coil; +pub mod congruence; +pub mod constructors; + +pub use circuit::Relation; +pub use congruence::{Graph, Id, LabelTrait, MetaData, Vert}; + +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct Func { + name: String, + projectors: Option>, + // For now, we need to keep metadata with Funcs so it can be reproduced when automatically allocating new + // constructors and projectors. This is basically due to Funcs in the Schema acting as prototypes. + metadata: M, +} + +impl Display for Func { + fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(fmt, "{}", self.name) + } +} + +#[derive(Debug, Default, Clone, Hash, PartialEq, Eq)] +pub struct Schema { + equivalences: Vec>, + constructors: Vec>, +} + +impl Schema { + pub fn equivalences(&self) -> Vec> { + self.equivalences.clone() + } + pub fn constructors(&self) -> Vec> { + self.constructors.clone() + } + pub fn add_constructor(&mut self, constructor: Func, _metadata: T) { + self.constructors.push(constructor) + } +} + +impl Func { + pub fn new>(name: S) -> Self { + Self { + name: name.into(), + projectors: None, + metadata: T::default(), + } + } + pub fn new_with_metadata>(name: S, metadata: T) -> Self { + Self { + name: name.into(), + projectors: None, + metadata, + } + } + pub fn constructor>(name: S, projectors: Vec, metadata: T) -> Self { + Self { + name: name.into(), + projectors: Some(projectors), + metadata, + } + } + pub fn name(&self) -> &String { + &self.name + } + pub fn metadata(&self) -> &T { + &self.metadata + } + pub fn projectors(&self) -> Option<&Vec> { + self.projectors.as_ref() + } +} + +#[derive(Debug, Eq, Clone, Hash, PartialEq)] +pub struct FoilConfig { + /// If `dedup_var_names` is `true`, `Var`s with identical names will be merged when finalizing. + /// This option may be removed if it becomes clear that one setting is preferred. + dedup_var_names: bool, +} + +impl Default for FoilConfig { + fn default() -> Self { + Self { + dedup_var_names: true, + } + } +} + +#[derive(Debug, Clone)] +pub struct Foil { + graph: G, + merge_queue: VecDeque<(Id, Id)>, + fun_map: HashMap>, + var_map: HashMap>, + var_counter: usize, + constants: HashMap, + schema: Schema, + state: State, + config: FoilConfig, +} + +#[derive(Debug, Default, Eq, Clone, Hash, PartialEq)] +pub struct Var(pub String, pub Option); + +impl Var { + pub fn new>(name: S) -> Self { + Self(name.into(), None) + } +} + +impl Display for Var { + fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + let res = write!(fmt, "Var({})", self.0); + if let Some(id) = self.1 { + write!(fmt, "[{}]", id) + } else { + res + } + } +} + +#[derive(Debug, Clone, Hash, PartialEq)] +pub enum Label { + F(String), + V(String, Option), +} + +impl Eq for Label {} + +impl Label { + pub fn var>(name: S) -> Self { + Var(name.into(), None).into() + } +} + +impl Display for Label { + fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + match self { + Self::F(f) => std::fmt::Display::fmt(&f, fmt), + Self::V(_, _) => std::fmt::Display::fmt(&Var::from(self), fmt), + } + } +} + +impl LabelTrait for Label {} + +impl From for Label { + fn from(v: Var) -> Self { + Self::V(v.0, v.1) + } +} +impl From<&Var> for Label { + fn from(v: &Var) -> Self { + Self::V(v.0.clone(), v.1) + } +} + +impl From