From a7dc359462388ef59bd02468c6daec99ca27454c Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 26 Jul 2024 11:31:07 +0200 Subject: [PATCH] Implement `prepare_custom_contract` natively (#2008) After the rework of the contract system that sets enum as the default return type of most custom and builtin contracts, some additional glue code was needed to post-process such enums (and raise blame accordingly). This has been implemented in the new internal function `$prepare_custom_contract`. While this function wasn't very long or involved, measurements showed that contract application is a code path that is taken so much that this additional blob of pure Nickel slowed down real world example by potentially a huge factor (up to 5-6x). While this is the symptom of other inefficiencies related to the contract system in general (related to pattern matching, missed caching opportunities for contract generation, etc.), bringing back the logic of `$prepare_custom_contract` to native code brings back the overhead (master compared to latest stable 1.7) to something like 1.1x or 1.2x. This commit implements this strategy, by introducing two new primops: - `%contract/postprocess_result%` which takes an enum `[| 'Ok value, 'Error error_data |]` and either unwrap `'Ok`, or modify the label using the `error_data` and then blame - `%label/with_error_data%`, which is used in the previous primop implementation but can be useful as a stand alone primop and stdlib wrapper in the future, which takes a value `{message, notes}`, a label, and update the label's data with the corresponding diagnostic message and notes Additionally, this commit tries to avoid generating terms at run-time, but rather setup the stack directly, to further make this hot path faster. --- ...unction_contract_domain_violation.ncl.snap | 10 +- ...ecord_forall_constraints_contract.ncl.snap | 12 - core/src/eval/operation.rs | 310 ++++++++++++++---- core/src/term/mod.rs | 12 + core/src/typecheck/operation.rs | 53 ++- core/stdlib/internals.ncl | 45 --- 6 files changed, 293 insertions(+), 149 deletions(-) diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_function_contract_domain_violation.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_function_contract_domain_violation.ncl.snap index d2cfb04c35..43a7a7ed2c 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_function_contract_domain_violation.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_function_contract_domain_violation.ncl.snap @@ -15,16 +15,8 @@ error: contract broken by the caller 1 │ "a" │ --- evaluated to this value -note: - ┌─ [INPUTS_PATH]/errors/function_contract_domain_violation.ncl:5:31 - │ -5 │ let Foo = Number -> Number in ((fun x => x) | Foo) "a" - │ ------------------------ (1) calling - note: ┌─ [INPUTS_PATH]/errors/function_contract_domain_violation.ncl:5:32 │ 5 │ let Foo = Number -> Number in ((fun x => x) | Foo) "a" - │ ------------ (2) calling - - + │ ^^^^^^^^^^^^ While calling to x diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_record_forall_constraints_contract.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_record_forall_constraints_contract.ncl.snap index 662b27d2ab..5717e331a1 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_record_forall_constraints_contract.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_record_forall_constraints_contract.ncl.snap @@ -16,15 +16,3 @@ note: │ 3 │ let f | forall r. { ; r } -> { x: Number; r } = fun r => %record/insert% "x" r 1 in f { x = 0 } │ ^^^^^^^^^^^^^^^^^^^^^^^ While calling to r - -note: - ┌─ [INPUTS_PATH]/errors/record_forall_constraints_contract.ncl:3:85 - │ -3 │ let f | forall r. { ; r } -> { x: Number; r } = fun r => %record/insert% "x" r 1 in f { x = 0 } - │ ----------- (1) calling f - -note: - ┌─ [INPUTS_PATH]/errors/record_forall_constraints_contract.ncl:3:49 - │ -3 │ let f | forall r. { ; r } -> { x: Number; r } = fun r => %record/insert% "x" r 1 in f { x = 0 } - │ -------------------------------- (2) calling diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index b43f1ca16f..6c8a054fd8 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -26,7 +26,6 @@ use crate::{ mk_app, mk_fun, mk_record, parser::utils::parse_number_sci, position::TermPos, - pretty::PrettyPrintCap, serialize, serialize::ExportFormat, stdlib::internals, @@ -40,6 +39,9 @@ use crate::{ typecheck::eq::contract_eq, }; +#[cfg(feature = "metrics")] +use crate::pretty::PrettyPrintCap; + use malachite::{ num::{ arithmetic::traits::Pow, @@ -1317,6 +1319,59 @@ impl VirtualMachine { pos_op_inh, ))) } + UnaryOp::ContractPostprocessResult => { + let (tag, arg) = match (*t).clone() { + Term::EnumVariant { tag, arg, .. } => (tag, arg), + _ => return mk_type_error!("[| 'Ok, 'Error _ |]"), + }; + + // We pop the second argument which isn't strict: we don't need to evaluate the + // label if there's no error. + let (label_closure, pos_label) = self.stack.pop_arg(&self.cache).unwrap(); + + match (tag.label(), arg) { + ("Ok", value) => Ok(Closure { body: value, env }), + ("Error", err_data) => { + // In the error case, we first need to force the error data so that + // primitive values (strings) can be extracted from it, attach the + // corresponding data to the label, and then blame. + // + // To do so, we setup the stack to represent the evaluation context + // `%contract/blame% (%label/with_error_data% (%force% [.]) label)` and + // then continue with `err_data`. + self.stack.push_op_cont( + OperationCont::Op1(UnaryOp::Blame, arg_pos), + self.call_stack.len(), + pos_op_inh, + ); + self.stack.push_op_cont( + OperationCont::Op2First( + BinaryOp::LabelWithErrorData, + label_closure, + pos_label, + ), + self.call_stack.len(), + pos_op_inh, + ); + self.stack.push_op_cont( + OperationCont::Op1( + UnaryOp::Force { + ignore_not_exported: false, + }, + arg_pos, + ), + self.call_stack.len(), + pos_op_inh, + ); + + Ok(Closure { + body: err_data, + env, + }) + } + _ => mk_type_error!("[| 'Ok, 'Error {..} |]'"), + } + } UnaryOp::NumberArcCos => Self::process_unary_number_operation( RichTerm { term: t, pos }, arg_pos, @@ -1682,26 +1737,35 @@ impl VirtualMachine { } BinaryOp::ContractApply | BinaryOp::ContractCheck => { // The translation of a type might return any kind of contract, including e.g. a - // record or a custom contract. The result thus needs to be passed to - // `ContractApply` or `ContractCheck` again. In that case, we don't bother tracking - // the argument and updating the label: this will be done by the next call to - // `contract/apply(_as_custom)`. + // record or a custom contract. The result thus needs to be passed to `b_op` again. + // In that case, we don't bother tracking the argument and updating the label: this + // will be done by the next call to `b_op`. if let Term::Type(typ) = &*t1 { increment!(format!( "primop:contract/apply:{}", typ.pretty_print_cap(40) )); - return Ok(Closure { - body: mk_term::op2( + // We set the stack to represent the evaluation context ` [.] label` and + // proceed to evaluate `` + self.stack.push_op_cont( + OperationCont::Op2First( b_op, - typ.contract()?, - RichTerm { - term: t2, - pos: pos2, + Closure { + body: RichTerm { + term: t2, + pos: pos2, + }, + env: env2, }, - ) - .with_pos(pos1), + fst_pos, + ), + self.call_stack.len(), + pos_op_inh, + ); + + return Ok(Closure { + body: typ.contract()?, env: env1, }); } @@ -1719,81 +1783,189 @@ impl VirtualMachine { increment!(format!("contract:originates_from_field {field}")); } - // Track the contract argument for better error reporting, and push back the - // label on the stack, so that it becomes the first argument of the contract. - let idx = self.stack.track_arg(&mut self.cache).ok_or_else(|| { - EvalError::NotEnoughArgs(3, String::from("contract/apply"), pos_op) - })?; - + // Pop the contract argument to track its cache index in the label for better + // error reporting, and because we might add post-processing steps on the stack + // which need to sit underneath the value and the label (they will be run after + // the contract application is evaluated). We'll just push the value and the + // label back on the stack at the end. + let (idx, stack_value_pos) = + self.stack.pop_arg_as_idx(&mut self.cache).ok_or_else(|| { + EvalError::NotEnoughArgs(3, String::from("contract/apply"), pos_op) + })?; + + // We update the label and convert it back to a term form that can be cheaply cloned label.arg_pos = self.cache.get_then(idx.clone(), |c| c.body.pos); - label.arg_idx = Some(idx); - - // We push the label back on the stack and we properly convert the - // contract to a naked function of a label and a value, which will have the - // stack in the same state as if it had been applied normally to both - // arguments. - self.stack.push_arg( - Closure::atomic_closure(RichTerm::new( - Term::Lbl(label.clone()), - pos2.into_inherited(), - )), - pos2.into_inherited(), - ); + label.arg_idx = Some(idx.clone()); + let new_label = RichTerm::new(Term::Lbl(label), pos2); - match &*t1 { + // If we're evaluating a plain contract application but we are applying + // something with the signature of a custom contract, we need to setup some + // post-processing. + // + // We prepare the stack so that `contract/postprocess_result` will be + // applied afteward. This primop converts the result of a custom contract + // `'Ok value` or `'Error err_data` to either `value` or a proper contract + // error with `err_data` included in the label. + // + // That is, prepare the stack to represent the evaluation context + // `%contract/postprocess_result% [.] label` + if matches!( + (&*t1, &b_op), + ( + Term::CustomContract(_) | Term::Record(_), + BinaryOp::ContractApply + ) + ) { + self.stack + .push_arg(Closure::atomic_closure(new_label.clone()), pos_op_inh); + + self.stack.push_op_cont( + OperationCont::Op1( + UnaryOp::ContractPostprocessResult, + pos1.into_inherited(), + ), + self.call_stack.len(), + pos_op_inh, + ); + } + + // Now that we've updated the label, we push the checked value and the new + // label back on the stack, so that they become the two arguments of the + // contract (transformed to something that can be applied directly). That is, + // we prepare the stack to represent the evaluation context `[.] label value` + // and proceed with the evaluation of `functoid`. + self.stack.push_tracked_arg(idx, stack_value_pos); + self.stack + .push_arg(Closure::atomic_closure(new_label), pos2.into_inherited()); + + // We convert the contract (which can be a custom contract, a record, a naked + // function, etc.) to a form that can be applied to a label and a value. + let functoid = match &*t1 { Term::Fun(..) | Term::Match { .. } => { let as_naked = RichTerm { term: t1, pos: pos1, }; - let body = if let BinaryOp::ContractApply = b_op { - as_naked + if let BinaryOp::ContractApply = b_op { + Closure { + body: as_naked, + env: env1, + } } else { - mk_app!(internals::naked_to_custom(), as_naked).with_pos(pos1) - }; + // Prepare the stack to represent the evaluation context `[.] + // as_naked` and proceed with `$naked_to_custom` + self.stack.push_arg( + Closure { + body: as_naked, + env: env1, + }, + fst_pos, + ); - Ok(Closure { body, env: env1 }) + Closure { + body: internals::naked_to_custom(), + env: Environment::new(), + } + } } - Term::CustomContract(ctr) => { - let body = if let BinaryOp::ContractApply = b_op { - mk_app!(internals::prepare_custom_contract(), ctr.clone()) - .with_pos(pos1) - } else { - ctr.clone() - }; + Term::CustomContract(ctr) => Closure { + body: ctr.clone(), + env: env1, + }, + Term::Record(..) => { + // Prepare the stack to represent the evaluation context `[.] t1` and + // proceed with `$record_contract` + self.stack.push_arg( + Closure { + body: RichTerm { + term: t1, + pos: pos1, + }, + env: env1, + }, + fst_pos, + ); - Ok(Closure { body, env: env1 }) + Closure { + body: internals::record_contract(), + env: Environment::new(), + } } - Term::Record(..) => { - let pos1_inh = pos1.into_inherited(); + _ => return mk_type_error!("Contract", 1, t1, pos1), + }; - // Convert the record to the builtin contract implementation - // `$record_contract `. - let as_custom = mk_app!( - internals::record_contract(), - RichTerm { - term: t1, - pos: pos1 - } - ) - .with_pos(pos1_inh); + Ok(functoid) + } else { + mk_type_error!("Label", 2, t2.into(), pos2) + } + } + BinaryOp::LabelWithErrorData => { + // We need to extract plain values from a Nickel data structure, which most likely + // contains closures at least, even if it's fully evaluated. As for serialization, + // we thus need to fully substitute all variables first. + let t1 = subst( + &self.cache, + RichTerm { + term: t1, + pos: pos1, + }, + &self.initial_env, + &env1, + ) + .term + .into_owned(); - let body = if let BinaryOp::ContractApply = b_op { - // Convert the record to the builtin contract implementation - // `$prepare_custom_contract ($record_contract )`. - mk_app!(internals::prepare_custom_contract(), as_custom) - .with_pos(pos1_inh) - } else { - as_custom - }; + let t2 = t2.into_owned(); + + let Term::Lbl(mut label) = t2 else { + return mk_type_error!("Label", 2, t2.into(), pos2); + }; - Ok(Closure { body, env: env1 }) + if let Term::Record(mut record_data) = t1 { + if let Some(Term::Str(msg)) = record_data + .fields + .remove(&LocIdent::from("message")) + .and_then(|field| field.value) + .map(|v| v.term.into_owned()) + { + label = label.with_diagnostic_message(msg.into_inner()); + } + + if let Some(notes_term) = record_data + .fields + .remove(&LocIdent::from("notes")) + .and_then(|field| field.value) + { + if let Term::Array(array, _) = notes_term.into() { + let notes = array + .into_iter() + .map(|element| { + let term = element.term.into_owned(); + + if let Term::Str(s) = term { + Ok(s.into_inner()) + } else { + mk_type_error!( + "String (notes)", + 1, + term.into(), + element.pos + ) + } + }) + .collect::, _>>()?; + + label = label.with_diagnostic_notes(notes); } - _ => mk_type_error!("Contract", 1, t1, pos1), } + + Ok(Closure::atomic_closure(RichTerm::new( + Term::Lbl(label), + pos2, + ))) } else { - mk_type_error!("Label", 2, t2.into(), pos2) + mk_type_error!("Record", 1, t1.into(), pos1) } } BinaryOp::Unseal => { diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 595c687555..acf87974a7 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1501,6 +1501,13 @@ pub enum UnaryOp { /// type constructor for custom contracts. ContractCustom, + /// After applying a custom contract (or a builtin contract), the result is either `'Ok value` + /// or `'Error err_data`. This primop post-processes this return value (the first argument) to + /// either produce `value` in the first case or to attach the error data to the label (the + /// second argument, taken from the stack, as this op isn't strict in the label) and blame in + /// the second. + ContractPostprocessResult, + /// The cosinus function. NumberArcCos, @@ -1580,6 +1587,7 @@ impl fmt::Display for UnaryOp { PatternBranch => write!(f, "pattern_branch"), ContractCustom => write!(f, "contract/custom"), + ContractPostprocessResult => write!(f, "contract/postprocess_result"), NumberArcCos => write!(f, "number/arccos"), NumberArcSin => write!(f, "number/arcsin"), @@ -1732,6 +1740,9 @@ pub enum BinaryOp { /// [Self::ContractCheck] preserves the immediate/delayed part of the called contract. ContractCheck, + /// Take a record of type `{message | String | optional, notes | String | optional}`. + LabelWithErrorData, + /// Unseal a sealed term. /// /// See [`BinaryOp::Seal`]. @@ -1883,6 +1894,7 @@ impl fmt::Display for BinaryOp { GreaterOrEq => write!(f, "(>=)"), ContractApply => write!(f, "contract/apply"), ContractCheck => write!(f, "contract/check"), + LabelWithErrorData => write!(f, "label/with_error_data"), Unseal => write!(f, "unseal"), LabelGoField => write!(f, "label/go_field"), RecordInsert { diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index d6b450c673..0d5ed87468 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -273,7 +273,12 @@ pub fn get_uop_type( } // -> Dyn UnaryOp::ContractCustom => (custom_contract_type(), mk_uniftype::dynamic()), - // Num -> Num + // -> Dyn -> Dyn + UnaryOp::ContractPostprocessResult => ( + custom_contract_ret_type(), + mk_uty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), + ), + // Number -> Number UnaryOp::NumberCos | UnaryOp::NumberSin | UnaryOp::NumberTan @@ -316,6 +321,13 @@ pub fn get_bop_type( mk_uniftype::dynamic(), mk_uty_arrow!(mk_uniftype::dynamic(), custom_contract_ret_type()), ), + // Ideally: -> Label -> Dyn + // Currently: -> Dyn -> Dyn + BinaryOp::LabelWithErrorData => ( + error_data_type(), + mk_uniftype::dynamic(), + mk_uniftype::dynamic(), + ), // Sym -> Dyn -> Dyn -> Dyn BinaryOp::Unseal => ( mk_uniftype::sym(), @@ -631,6 +643,31 @@ pub fn custom_contract_type() -> UnifType { /// |] /// ``` pub fn custom_contract_ret_type() -> UnifType { + mk_uty_enum!( + ("Ok", mk_uniftype::dynamic()), + // /!\ IMPORTANT + // + // During typechecking, `TypeF::Flat` all have been transformed to `UnifType::Contract(..)` + // with their associated environment. Generating a `TypeF::Flat` at this point will panic + // in the best case (we should catch that), or have incorrect behavior in the worst case. + // Do not turn this into `UnifType::concrete(TypeF::Flat(..))`. + ("Error", error_data_type()) + ) +} + +/// The type of error data that can be returned by a custom contract: +/// +/// ```nickel +/// { +/// message +/// | String +/// | optional, +/// notes +/// | Array String +/// | optional +/// } +/// ``` +fn error_data_type() -> UnifType { use crate::term::make::builder; let error_data = builder::Record::new() @@ -649,17 +686,5 @@ pub fn custom_contract_ret_type() -> UnifType { .optional() .no_value(); - mk_uty_enum!( - ("Ok", mk_uniftype::dynamic()), - // /!\ IMPORTANT - // - // During typechecking, `TypeF::Flat` all have been transformed to `UnifType::Contract(..)` - // with their associated environment. Generating a `TypeF::Flat` at this point will panic - // in the best case (we should catch that), or have incorrect behavior in the worst case. - // Do not turn this into `UnifType::concrete(TypeF::Flat(..))`. - ( - "Error", - UnifType::Contract(error_data.build(), SimpleTermEnvironment::new()) - ) - ) + UnifType::Contract(error_data.build(), SimpleTermEnvironment::new()) } diff --git a/core/stdlib/internals.ncl b/core/stdlib/internals.ncl index 66b8d00494..280aa6f100 100644 --- a/core/stdlib/internals.ncl +++ b/core/stdlib/internals.ncl @@ -385,51 +385,6 @@ # are no extra fields, so we can just ignore them and return the value as is. "$empty_tail" = fun _extra_fields _label value => 'Ok value, - # Take a a partial identity Label -> Dyn -> Dyn that can then be passed - # applied as a normal funtion in the implementation of `contract/apply`. - "$prepare_custom_contract" = fun custom_contract label value => - custom_contract label value - |> match { - 'Ok value => value, - 'Error { message ? null, notes ? [], .. } => - let label = - if std.is_string message then - %label/with_message% message label - else - label - in - - let label = - if notes != [] && std.is_array notes then - %label/with_notes% (%force% notes) label - else - label - in - - %blame% label, - # The contract of `std.contract.custom` should guarantee that we - # never reach this case. However, nothing prevents user from using - # `%contract/custom%` directly, so we still try to handle a misbehaving - # custom contract gracefully. - value => - %blame% - ( - %label/with_notes% - ( - %force% - ( - %trace% - value - [ - "The implementation of this custom contract returned an invalid result (which must be of type `[| 'Ok Dyn, 'Error {..} |]`)", - "Please check the implementation of the contract that has been broken" - ] - ) - ) - label - ) - }, - # Dual of prepare_custom_contract. Turns a naked function of a label and a # value to a custom contract-like representation (that is, a function # returning an enum - however the result is still a naked function, which must