From e9a766ff782ae98edc01f75f298a866ded5cfd76 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Thu, 13 Apr 2023 13:01:11 +0200 Subject: [PATCH 01/13] PVF: Extract prepare/execute workers into separate binaries --- node/core/pvf/Cargo.toml | 8 +++++ node/core/pvf/bin/execute_worker.rs | 21 ++++++++++++ node/core/pvf/bin/prepare_worker.rs | 21 ++++++++++++ node/core/pvf/bin/puppet_worker.rs | 24 +++++++++++++- node/core/pvf/src/host.rs | 13 +++++--- node/core/pvf/src/lib.rs | 37 +++++++++++++++++++++ node/core/pvf/src/testing.rs | 44 ------------------------- node/core/pvf/tests/it/main.rs | 8 +++-- node/core/pvf/tests/it/worker_common.rs | 13 +++----- 9 files changed, 129 insertions(+), 60 deletions(-) create mode 100644 node/core/pvf/bin/execute_worker.rs create mode 100644 node/core/pvf/bin/prepare_worker.rs diff --git a/node/core/pvf/Cargo.toml b/node/core/pvf/Cargo.toml index 6478edc44115..986735d2af09 100644 --- a/node/core/pvf/Cargo.toml +++ b/node/core/pvf/Cargo.toml @@ -4,6 +4,14 @@ version.workspace = true authors.workspace = true edition.workspace = true +[[bin]] +name = "prepare_worker" +path = "bin/prepare_worker.rs" + +[[bin]] +name = "execute_worker" +path = "bin/execute_worker.rs" + [[bin]] name = "puppet_worker" path = "bin/puppet_worker.rs" diff --git a/node/core/pvf/bin/execute_worker.rs b/node/core/pvf/bin/execute_worker.rs new file mode 100644 index 000000000000..b7b26520bc39 --- /dev/null +++ b/node/core/pvf/bin/execute_worker.rs @@ -0,0 +1,21 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Execute worker. +// TODO: Build with musl. +// TODO: Embed into polkadot binary. + +polkadot_node_core_pvf::decl_worker_main!(execute); diff --git a/node/core/pvf/bin/prepare_worker.rs b/node/core/pvf/bin/prepare_worker.rs new file mode 100644 index 000000000000..13017cbfc784 --- /dev/null +++ b/node/core/pvf/bin/prepare_worker.rs @@ -0,0 +1,21 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Prepare worker. +// TODO: Build with musl. +// TODO: Embed into polkadot binary. + +polkadot_node_core_pvf::decl_worker_main!(prepare); diff --git a/node/core/pvf/bin/puppet_worker.rs b/node/core/pvf/bin/puppet_worker.rs index 7f93519d8454..bf82b4fb23a5 100644 --- a/node/core/pvf/bin/puppet_worker.rs +++ b/node/core/pvf/bin/puppet_worker.rs @@ -14,4 +14,26 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -polkadot_node_core_pvf::decl_puppet_worker_main!(); +//! Puppet worker used for integration tests. + +use sp_tracing; + +fn main() { + sp_tracing::try_init_simple(); + + let args = std::env::args().collect::>(); + if args.len() < 3 { + panic!("wrong number of arguments"); + } + + let subcommand = &args[1]; + match subcommand.as_ref() { + "exit" => { + std::process::exit(1); + }, + "sleep" => { + std::thread::sleep(std::time::Duration::from_secs(5)); + }, + other => panic!("unknown subcommand: {}", other), + } +} diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 31a0266145f8..a00281bc90db 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -160,18 +160,23 @@ pub struct Config { impl Config { /// Create a new instance of the configuration. - pub fn new(cache_path: std::path::PathBuf, program_path: std::path::PathBuf) -> Self { + pub fn new( + cache_path: std::path::PathBuf, + prepare_worker_path: std::path::PathBuf, + execute_worker_path: std::path::PathBuf, + ) -> Self { // Do not contaminate the other parts of the codebase with the types from `tokio`. let cache_path = PathBuf::from(cache_path); - let program_path = PathBuf::from(program_path); + let prepare_worker_path = PathBuf::from(prepare_worker_path); + let execute_worker_path = PathBuf::from(execute_worker_path); Self { cache_path, - prepare_worker_program_path: program_path.clone(), + prepare_worker_program_path: prepare_worker_path.clone(), prepare_worker_spawn_timeout: Duration::from_secs(3), prepare_workers_soft_max_num: 1, prepare_workers_hard_max_num: 1, - execute_worker_program_path: program_path, + execute_worker_program_path: execute_worker_path, execute_worker_spawn_timeout: Duration::from_secs(3), execute_workers_max_num: 2, } diff --git a/node/core/pvf/src/lib.rs b/node/core/pvf/src/lib.rs index 8b6f2ddb262c..32ecec5da4ba 100644 --- a/node/core/pvf/src/lib.rs +++ b/node/core/pvf/src/lib.rs @@ -125,3 +125,40 @@ pub use sc_executor_common; pub use sp_maybe_compressed_blob; const LOG_TARGET: &str = "parachain::pvf"; + +/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for +/// spawning the desired worker. +#[macro_export(local_inner_macros)] +macro_rules! decl_worker_main { + ($command:tt) => { + fn main() { + $crate::sp_tracing::try_init_simple(); + + let args = std::env::args().collect::>(); + + let mut version = None; + let mut socket_path: &str = ""; + + for i in 1..args.len() { + match args[i].as_ref() { + "--socket-path" => socket_path = args[i + 1].as_str(), + "--node-version" => version = Some(args[i + 1].as_str()), + _ => (), + } + } + + decl_worker_main_command!($command, socket_path, version) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! decl_worker_main_command { + (prepare, $socket_path:expr, $version: expr) => { + $crate::prepare_worker_entrypoint(&$socket_path, $version) + }; + (execute, $socket_path:expr, $version: expr) => { + $crate::execute_worker_entrypoint(&$socket_path, $version) + }; +} diff --git a/node/core/pvf/src/testing.rs b/node/core/pvf/src/testing.rs index fb1b406cdad6..680de4924de3 100644 --- a/node/core/pvf/src/testing.rs +++ b/node/core/pvf/src/testing.rs @@ -51,47 +51,3 @@ pub fn validate_candidate( Ok(result) } - -/// Use this macro to declare a `fn main() {}` that will check the arguments and dispatch them to -/// the appropriate worker, making the executable that can be used for spawning workers. -#[macro_export] -macro_rules! decl_puppet_worker_main { - () => { - fn main() { - $crate::sp_tracing::try_init_simple(); - - let args = std::env::args().collect::>(); - if args.len() < 3 { - panic!("wrong number of arguments"); - } - - let mut version = None; - let mut socket_path: &str = ""; - - for i in 2..args.len() { - match args[i].as_ref() { - "--socket-path" => socket_path = args[i + 1].as_str(), - "--node-version" => version = Some(args[i + 1].as_str()), - _ => (), - } - } - - let subcommand = &args[1]; - match subcommand.as_ref() { - "exit" => { - std::process::exit(1); - }, - "sleep" => { - std::thread::sleep(std::time::Duration::from_secs(5)); - }, - "prepare-worker" => { - $crate::prepare_worker_entrypoint(&socket_path, version); - }, - "execute-worker" => { - $crate::execute_worker_entrypoint(&socket_path, version); - }, - other => panic!("unknown subcommand: {}", other), - } - } - }; -} diff --git a/node/core/pvf/tests/it/main.rs b/node/core/pvf/tests/it/main.rs index ecd885ab642e..fa1036f56527 100644 --- a/node/core/pvf/tests/it/main.rs +++ b/node/core/pvf/tests/it/main.rs @@ -29,6 +29,8 @@ mod adder; mod worker_common; const PUPPET_EXE: &str = env!("CARGO_BIN_EXE_puppet_worker"); +const PREPARE_EXE: &str = env!("CARGO_BIN_EXE_prepare_worker"); +const EXECUTE_EXE: &str = env!("CARGO_BIN_EXE_execute_worker"); const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(3); @@ -47,8 +49,10 @@ impl TestHost { F: FnOnce(&mut Config), { let cache_dir = tempfile::tempdir().unwrap(); - let program_path = std::path::PathBuf::from(PUPPET_EXE); - let mut config = Config::new(cache_dir.path().to_owned(), program_path); + let prepare_worker_path = std::path::PathBuf::from(PREPARE_EXE); + let execute_worker_path = std::path::PathBuf::from(EXECUTE_EXE); + let mut config = + Config::new(cache_dir.path().to_owned(), prepare_worker_path, execute_worker_path); f(&mut config); let (host, task) = start(config, Metrics::default()); let _ = tokio::task::spawn(task); diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs index 3a17efc8df5c..990b48ff1a67 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::PUPPET_EXE; +use crate::{PREPARE_EXE, PUPPET_EXE}; use polkadot_node_core_pvf::testing::worker_common::{spawn_with_program_path, SpawnErr}; use std::time::Duration; @@ -37,12 +37,7 @@ async fn spawn_timeout() { #[tokio::test] async fn should_connect() { - let _ = spawn_with_program_path( - "integration-test", - PUPPET_EXE, - &["prepare-worker"], - Duration::from_secs(2), - ) - .await - .unwrap(); + let _ = spawn_with_program_path("integration-test", PREPARE_EXE, &[], Duration::from_secs(2)) + .await + .unwrap(); } From faf982e27cbefbdcbdcd59e6547a965aa4d5927e Mon Sep 17 00:00:00 2001 From: Marcin S Date: Thu, 13 Apr 2023 14:38:45 +0200 Subject: [PATCH 02/13] Fix spawning workers --- node/core/pvf/src/execute/worker.rs | 2 +- node/core/pvf/src/prepare/worker.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/node/core/pvf/src/execute/worker.rs b/node/core/pvf/src/execute/worker.rs index c30ebceae693..761caf659804 100644 --- a/node/core/pvf/src/execute/worker.rs +++ b/node/core/pvf/src/execute/worker.rs @@ -50,7 +50,7 @@ pub async fn spawn( let (mut idle_worker, worker_handle) = spawn_with_program_path( "execute", program_path, - &["execute-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], + &["--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], spawn_timeout, ) .await?; diff --git a/node/core/pvf/src/prepare/worker.rs b/node/core/pvf/src/prepare/worker.rs index 3b2ae211e6ca..4905d23c1916 100644 --- a/node/core/pvf/src/prepare/worker.rs +++ b/node/core/pvf/src/prepare/worker.rs @@ -55,7 +55,7 @@ pub async fn spawn( spawn_with_program_path( "prepare", program_path, - &["prepare-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], + &["--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], spawn_timeout, ) .await From ba4e55eb125d567761e219c3d520cace92f6de29 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Wed, 19 Apr 2023 09:44:14 +0200 Subject: [PATCH 03/13] Move PVF workers into separate crate --- Cargo.lock | 44 +++- Cargo.toml | 5 +- node/core/pvf/Cargo.toml | 33 +-- node/core/pvf/src/artifacts.rs | 2 + node/core/pvf/src/error.rs | 16 +- node/core/pvf/src/execute/mod.rs | 4 +- node/core/pvf/src/execute/worker.rs | 174 ++------------ node/core/pvf/src/lib.rs | 63 +---- node/core/pvf/src/prepare/mod.rs | 33 ++- node/core/pvf/src/prepare/queue.rs | 2 +- node/core/pvf/src/prepare/worker.rs | 194 +-------------- node/core/pvf/src/pvf.rs | 17 +- node/core/pvf/src/worker_common.rs | 120 +--------- node/core/pvf/worker/Cargo.toml | 57 +++++ .../pvf/{ => worker}/bin/execute_worker.rs | 2 +- .../pvf/{ => worker}/bin/prepare_worker.rs | 2 +- .../pvf/{ => worker}/bin/puppet_worker.rs | 0 node/core/pvf/worker/build.rs | 19 ++ node/core/pvf/worker/src/common.rs | 142 +++++++++++ node/core/pvf/worker/src/execute.rs | 168 +++++++++++++ .../pvf/{ => worker}/src/executor_intf.rs | 0 node/core/pvf/worker/src/lib.rs | 72 ++++++ .../prepare => worker/src}/memory_stats.rs | 26 +- node/core/pvf/worker/src/prepare.rs | 222 ++++++++++++++++++ node/core/pvf/{ => worker}/src/testing.rs | 4 - node/core/pvf/{ => worker}/tests/it/adder.rs | 0 node/core/pvf/{ => worker}/tests/it/main.rs | 0 .../{ => worker}/tests/it/worker_common.rs | 2 +- 28 files changed, 803 insertions(+), 620 deletions(-) create mode 100644 node/core/pvf/worker/Cargo.toml rename node/core/pvf/{ => worker}/bin/execute_worker.rs (93%) rename node/core/pvf/{ => worker}/bin/prepare_worker.rs (93%) rename node/core/pvf/{ => worker}/bin/puppet_worker.rs (100%) create mode 100644 node/core/pvf/worker/build.rs create mode 100644 node/core/pvf/worker/src/common.rs create mode 100644 node/core/pvf/worker/src/execute.rs rename node/core/pvf/{ => worker}/src/executor_intf.rs (100%) create mode 100644 node/core/pvf/worker/src/lib.rs rename node/core/pvf/{src/prepare => worker/src}/memory_stats.rs (89%) create mode 100644 node/core/pvf/worker/src/prepare.rs rename node/core/pvf/{ => worker}/src/testing.rs (95%) rename node/core/pvf/{ => worker}/tests/it/adder.rs (100%) rename node/core/pvf/{ => worker}/tests/it/main.rs (100%) rename node/core/pvf/{ => worker}/tests/it/worker_common.rs (94%) diff --git a/Cargo.lock b/Cargo.lock index 9baed9429978..ba5e04ebf7df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6579,7 +6579,7 @@ dependencies = [ "nix 0.26.2", "polkadot-cli", "polkadot-core-primitives", - "polkadot-node-core-pvf", + "polkadot-node-core-pvf-worker", "polkadot-overseer", "substrate-rpc-client", "tempfile", @@ -7173,8 +7173,6 @@ name = "polkadot-node-core-pvf" version = "0.9.39" dependencies = [ "always-assert", - "assert_matches", - "cpu-time", "futures", "futures-timer", "hex-literal", @@ -7187,22 +7185,12 @@ dependencies = [ "polkadot-parachain", "polkadot-primitives", "rand 0.8.5", - "rayon", - "sc-executor", - "sc-executor-common", - "sc-executor-wasmtime", "slotmap", "sp-core", - "sp-externalities", - "sp-io", "sp-maybe-compressed-blob", "sp-tracing", "sp-wasm-interface", "substrate-build-script-utils", - "tempfile", - "test-parachain-adder", - "test-parachain-halt", - "tikv-jemalloc-ctl", "tokio", "tracing-gum", ] @@ -7230,6 +7218,36 @@ dependencies = [ "tracing-gum", ] +[[package]] +name = "polkadot-node-core-pvf-worker" +version = "0.9.39" +dependencies = [ + "assert_matches", + "cpu-time", + "futures", + "libc", + "parity-scale-codec", + "polkadot-node-core-pvf", + "polkadot-parachain", + "polkadot-primitives", + "rayon", + "sc-executor", + "sc-executor-common", + "sc-executor-wasmtime", + "sp-core", + "sp-externalities", + "sp-io", + "sp-maybe-compressed-blob", + "sp-tracing", + "substrate-build-script-utils", + "tempfile", + "test-parachain-adder", + "test-parachain-halt", + "tikv-jemalloc-ctl", + "tokio", + "tracing-gum", +] + [[package]] name = "polkadot-node-core-runtime-api" version = "0.9.39" diff --git a/Cargo.toml b/Cargo.toml index 5ec0a74d5cac..cae599040b3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ tikv-jemallocator = "0.5.0" # Crates in our workspace, defined as dependencies so we can pass them feature flags. polkadot-cli = { path = "cli", features = [ "kusama-native", "westend-native", "rococo-native" ] } -polkadot-node-core-pvf = { path = "node/core/pvf" } +polkadot-node-core-pvf-worker = { path = "node/core/pvf/worker" } polkadot-overseer = { path = "node/overseer" } [dev-dependencies] @@ -80,6 +80,7 @@ members = [ "node/core/parachains-inherent", "node/core/provisioner", "node/core/pvf", + "node/core/pvf/worker", "node/core/pvf-checker", "node/core/runtime-api", "node/network/approval-distribution", @@ -206,7 +207,7 @@ try-runtime = [ "polkadot-cli/try-runtime" ] fast-runtime = [ "polkadot-cli/fast-runtime" ] runtime-metrics = [ "polkadot-cli/runtime-metrics" ] pyroscope = ["polkadot-cli/pyroscope"] -jemalloc-allocator = ["polkadot-node-core-pvf/jemalloc-allocator", "polkadot-overseer/jemalloc-allocator"] +jemalloc-allocator = ["polkadot-node-core-pvf-worker/jemalloc-allocator", "polkadot-overseer/jemalloc-allocator"] # Configuration for building a .deb package - for use with `cargo-deb` [package.metadata.deb] diff --git a/node/core/pvf/Cargo.toml b/node/core/pvf/Cargo.toml index 986735d2af09..f816abba8f81 100644 --- a/node/core/pvf/Cargo.toml +++ b/node/core/pvf/Cargo.toml @@ -4,32 +4,15 @@ version.workspace = true authors.workspace = true edition.workspace = true -[[bin]] -name = "prepare_worker" -path = "bin/prepare_worker.rs" - -[[bin]] -name = "execute_worker" -path = "bin/execute_worker.rs" - -[[bin]] -name = "puppet_worker" -path = "bin/puppet_worker.rs" - [dependencies] always-assert = "0.1" -assert_matches = "1.4.0" -cpu-time = "1.0.0" futures = "0.3.21" futures-timer = "3.0.2" gum = { package = "tracing-gum", path = "../../gum" } libc = "0.2.139" pin-project = "1.0.9" rand = "0.8.5" -rayon = "1.5.1" slotmap = "1.0" -tempfile = "3.3.0" -tikv-jemalloc-ctl = { version = "0.5.0", optional = true } tokio = { version = "1.24.2", features = ["fs", "process"] } parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } @@ -38,13 +21,8 @@ polkadot-parachain = { path = "../../../parachain" } polkadot-core-primitives = { path = "../../../core-primitives" } polkadot-node-metrics = { path = "../../metrics" } polkadot-node-primitives = { path = "../../primitives" } - polkadot-primitives = { path = "../../../primitives" } -sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" } -sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } -sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } -sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" } -sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" } + sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-wasm-interface = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" } @@ -53,14 +31,5 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master [build-dependencies] substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" } -[target.'cfg(target_os = "linux")'.dependencies] -tikv-jemalloc-ctl = "0.5.0" - [dev-dependencies] -adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" } -halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" } hex-literal = "0.3.4" -tempfile = "3.3.0" - -[features] -jemalloc-allocator = ["dep:tikv-jemalloc-ctl"] diff --git a/node/core/pvf/src/artifacts.rs b/node/core/pvf/src/artifacts.rs index 0c6146ec8a82..3f82df402dab 100644 --- a/node/core/pvf/src/artifacts.rs +++ b/node/core/pvf/src/artifacts.rs @@ -24,9 +24,11 @@ use std::{ time::{Duration, SystemTime}, }; +/// Contains the bytes for a successfully compiled artifact. pub struct CompiledArtifact(Vec); impl CompiledArtifact { + /// Creates a `CompiledArtifact`. pub fn new(code: Vec) -> Self { Self(code) } diff --git a/node/core/pvf/src/error.rs b/node/core/pvf/src/error.rs index 662fcc22cd31..21f23d515fdd 100644 --- a/node/core/pvf/src/error.rs +++ b/node/core/pvf/src/error.rs @@ -16,7 +16,7 @@ use crate::prepare::PrepareStats; use parity_scale_codec::{Decode, Encode}; -use std::{any::Any, fmt}; +use std::fmt; /// Result of PVF preparation performed by the validation host. Contains stats about the preparation if /// successful @@ -126,17 +126,3 @@ impl From for ValidationError { } } } - -/// Attempt to convert an opaque panic payload to a string. -/// -/// This is a best effort, and is not guaranteed to provide the most accurate value. -pub(crate) fn stringify_panic_payload(payload: Box) -> String { - match payload.downcast::<&'static str>() { - Ok(msg) => msg.to_string(), - Err(payload) => match payload.downcast::() { - Ok(msg) => *msg, - // At least we tried... - Err(_) => "unknown panic payload".to_string(), - }, - } -} diff --git a/node/core/pvf/src/execute/mod.rs b/node/core/pvf/src/execute/mod.rs index e863b4e24e54..9ff6d90857a7 100644 --- a/node/core/pvf/src/execute/mod.rs +++ b/node/core/pvf/src/execute/mod.rs @@ -18,10 +18,10 @@ //! //! The validation host [runs the queue][`start`] communicating with it by sending [`ToQueue`] //! messages. The queue will spawn workers in new processes. Those processes should jump to -//! [`worker_entrypoint`]. +//! `polkadot_node_core_pvf_worker::execute_worker_entrypoint`. mod queue; mod worker; pub use queue::{start, ToQueue}; -pub use worker::{worker_entrypoint, Response as ExecuteResponse}; +pub use worker::{Handshake as ExecuteHandshake, Response as ExecuteResponse}; diff --git a/node/core/pvf/src/execute/worker.rs b/node/core/pvf/src/execute/worker.rs index 761caf659804..e6e1279c5b1a 100644 --- a/node/core/pvf/src/execute/worker.rs +++ b/node/core/pvf/src/execute/worker.rs @@ -16,26 +16,19 @@ use crate::{ artifacts::ArtifactPathId, - executor_intf::Executor, worker_common::{ - bytes_to_path, cpu_time_monitor_loop, framed_recv, framed_send, path_to_bytes, - spawn_with_program_path, worker_event_loop, IdleWorker, SpawnErr, WorkerHandle, - JOB_TIMEOUT_WALL_CLOCK_FACTOR, + framed_recv, framed_send, path_to_bytes, spawn_with_program_path, IdleWorker, SpawnErr, + WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }, LOG_TARGET, }; -use cpu_time::ProcessTime; -use futures::{pin_mut, select_biased, FutureExt}; +use futures::FutureExt; use futures_timer::Delay; use parity_scale_codec::{Decode, Encode}; use polkadot_parachain::primitives::ValidationResult; use polkadot_primitives::ExecutorParams; -use std::{ - path::{Path, PathBuf}, - sync::{mpsc::channel, Arc}, - time::Duration, -}; +use std::{path::Path, time::Duration}; use tokio::{io, net::UnixStream}; /// Spawns a new worker with the given program path that acts as the worker and the spawn timeout. @@ -185,17 +178,6 @@ async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Re framed_send(stream, &handshake.encode()).await } -async fn recv_handshake(stream: &mut UnixStream) -> io::Result { - let handshake_enc = framed_recv(stream).await?; - let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| { - io::Error::new( - io::ErrorKind::Other, - "execute pvf recv_handshake: failed to decode Handshake".to_owned(), - ) - })?; - Ok(handshake) -} - async fn send_request( stream: &mut UnixStream, artifact_path: &Path, @@ -207,29 +189,6 @@ async fn send_request( framed_send(stream, &execution_timeout.encode()).await } -async fn recv_request(stream: &mut UnixStream) -> io::Result<(PathBuf, Vec, Duration)> { - let artifact_path = framed_recv(stream).await?; - let artifact_path = bytes_to_path(&artifact_path).ok_or_else(|| { - io::Error::new( - io::ErrorKind::Other, - "execute pvf recv_request: non utf-8 artifact path".to_string(), - ) - })?; - let params = framed_recv(stream).await?; - let execution_timeout = framed_recv(stream).await?; - let execution_timeout = Duration::decode(&mut &execution_timeout[..]).map_err(|_| { - io::Error::new( - io::ErrorKind::Other, - "execute pvf recv_request: failed to decode duration".to_string(), - ) - })?; - Ok((artifact_path, params, execution_timeout)) -} - -async fn send_response(stream: &mut UnixStream, response: Response) -> io::Result<()> { - framed_send(stream, &response.encode()).await -} - async fn recv_response(stream: &mut UnixStream) -> io::Result { let response_bytes = framed_recv(stream).await?; Response::decode(&mut &response_bytes[..]).map_err(|e| { @@ -240,28 +199,43 @@ async fn recv_response(stream: &mut UnixStream) -> io::Result { }) } +/// The payload of the one-time handshake that is done when a worker process is created. Carries +/// data from the host to the worker. #[derive(Encode, Decode)] -struct Handshake { - executor_params: ExecutorParams, +pub struct Handshake { + /// The executor parameters. + pub executor_params: ExecutorParams, } +/// The response from an execution job on the worker. #[derive(Encode, Decode)] pub enum Response { - Ok { result_descriptor: ValidationResult, duration: Duration }, + /// The job completed successfully. + Ok { + /// The result of parachain validation. + result_descriptor: ValidationResult, + /// The amount of CPU time taken by the job. + duration: Duration, + }, + /// The candidate is invalid. InvalidCandidate(String), + /// The job timed out. TimedOut, + /// Some internal error occurred. Should only be used for errors independent of the candidate. InternalError(String), } impl Response { - fn format_invalid(ctx: &'static str, msg: &str) -> Self { + /// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty). + pub fn format_invalid(ctx: &'static str, msg: &str) -> Self { if msg.is_empty() { Self::InvalidCandidate(ctx.to_string()) } else { Self::InvalidCandidate(format!("{}: {}", ctx, msg)) } } - fn format_internal(ctx: &'static str, msg: &str) -> Self { + /// Creates an internal response from a context `ctx` and a message `msg` (which can be empty). + pub fn format_internal(ctx: &'static str, msg: &str) -> Self { if msg.is_empty() { Self::InternalError(ctx.to_string()) } else { @@ -269,103 +243,3 @@ impl Response { } } } - -/// The entrypoint that the spawned execute worker should start with. The `socket_path` specifies -/// the path to the socket used to communicate with the host. The `node_version`, if `Some`, -/// is checked against the worker version. A mismatch results in immediate worker termination. -/// `None` is used for tests and in other situations when version check is not necessary. -pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { - worker_event_loop("execute", socket_path, node_version, |rt_handle, mut stream| async move { - let worker_pid = std::process::id(); - - let handshake = recv_handshake(&mut stream).await?; - let executor = Arc::new(Executor::new(handshake.executor_params).map_err(|e| { - io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e)) - })?); - - loop { - let (artifact_path, params, execution_timeout) = recv_request(&mut stream).await?; - gum::debug!( - target: LOG_TARGET, - %worker_pid, - "worker: validating artifact {}", - artifact_path.display(), - ); - - // Used to signal to the cpu time monitor thread that it can finish. - let (finished_tx, finished_rx) = channel::<()>(); - let cpu_time_start = ProcessTime::now(); - - // Spawn a new thread that runs the CPU time monitor. - let cpu_time_monitor_fut = rt_handle - .spawn_blocking(move || { - cpu_time_monitor_loop(cpu_time_start, execution_timeout, finished_rx) - }) - .fuse(); - let executor_2 = executor.clone(); - let execute_fut = rt_handle - .spawn_blocking(move || { - validate_using_artifact(&artifact_path, ¶ms, executor_2, cpu_time_start) - }) - .fuse(); - - pin_mut!(cpu_time_monitor_fut); - pin_mut!(execute_fut); - - let response = select_biased! { - // If this future is not selected, the join handle is dropped and the thread will - // finish in the background. - cpu_time_monitor_res = cpu_time_monitor_fut => { - match cpu_time_monitor_res { - Ok(Some(cpu_time_elapsed)) => { - // Log if we exceed the timeout and the other thread hasn't finished. - gum::warn!( - target: LOG_TARGET, - %worker_pid, - "execute job took {}ms cpu time, exceeded execute timeout {}ms", - cpu_time_elapsed.as_millis(), - execution_timeout.as_millis(), - ); - Response::TimedOut - }, - Ok(None) => Response::InternalError("error communicating over finished channel".into()), - Err(e) => Response::format_internal("cpu time monitor thread error", &e.to_string()), - } - }, - execute_res = execute_fut => { - let _ = finished_tx.send(()); - execute_res.unwrap_or_else(|e| Response::format_internal("execute thread error", &e.to_string())) - }, - }; - - send_response(&mut stream, response).await?; - } - }); -} - -fn validate_using_artifact( - artifact_path: &Path, - params: &[u8], - executor: Arc, - cpu_time_start: ProcessTime, -) -> Response { - let descriptor_bytes = match unsafe { - // SAFETY: this should be safe since the compiled artifact passed here comes from the - // file created by the prepare workers. These files are obtained by calling - // [`executor_intf::prepare`]. - executor.execute(artifact_path.as_ref(), params) - } { - Err(err) => return Response::format_invalid("execute", &err), - Ok(d) => d, - }; - - let duration = cpu_time_start.elapsed(); - - let result_descriptor = match ValidationResult::decode(&mut &descriptor_bytes[..]) { - Err(err) => - return Response::format_invalid("validation result decoding failed", &err.to_string()), - Ok(r) => r, - }; - - Response::Ok { result_descriptor, duration } -} diff --git a/node/core/pvf/src/lib.rs b/node/core/pvf/src/lib.rs index 32ecec5da4ba..d76466cab195 100644 --- a/node/core/pvf/src/lib.rs +++ b/node/core/pvf/src/lib.rs @@ -29,11 +29,11 @@ //! //! Then using the handle the client can send three types of requests: //! -//! (a) PVF pre-checking. This takes the PVF [code][`Pvf`] and tries to prepare it (verify and +//! (a) PVF pre-checking. This takes the `Pvf` code and tries to prepare it (verify and //! compile) in order to pre-check its validity. //! //! (b) PVF execution. This accepts the PVF [`params`][`polkadot_parachain::primitives::ValidationParams`] -//! and the PVF [code][`Pvf`], prepares (verifies and compiles) the code, and then executes PVF +//! and the `Pvf` code, prepares (verifies and compiles) the code, and then executes PVF //! with the `params`. //! //! (c) Heads up. This request allows to signal that the given PVF may be needed soon and that it @@ -93,7 +93,6 @@ mod artifacts; mod error; mod execute; -mod executor_intf; mod host; mod metrics; mod prepare; @@ -101,64 +100,22 @@ mod priority; mod pvf; mod worker_common; -#[doc(hidden)] -pub mod testing; - -#[doc(hidden)] -pub use sp_tracing; - +pub use artifacts::CompiledArtifact; pub use error::{InvalidCandidate, PrepareError, PrepareResult, ValidationError}; -pub use prepare::PrepareStats; +pub use execute::{ExecuteHandshake, ExecuteResponse}; +#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] +pub use prepare::MemoryAllocationStats; +pub use prepare::{MemoryStats, PrepareStats}; pub use priority::Priority; pub use pvf::PvfPrepData; pub use host::{start, Config, ValidationHost}; pub use metrics::Metrics; -pub use worker_common::JOB_TIMEOUT_WALL_CLOCK_FACTOR; - -pub use execute::worker_entrypoint as execute_worker_entrypoint; -pub use prepare::worker_entrypoint as prepare_worker_entrypoint; - -pub use executor_intf::{prepare, prevalidate}; - -pub use sc_executor_common; -pub use sp_maybe_compressed_blob; +pub use worker_common::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR}; const LOG_TARGET: &str = "parachain::pvf"; -/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for -/// spawning the desired worker. -#[macro_export(local_inner_macros)] -macro_rules! decl_worker_main { - ($command:tt) => { - fn main() { - $crate::sp_tracing::try_init_simple(); - - let args = std::env::args().collect::>(); - - let mut version = None; - let mut socket_path: &str = ""; - - for i in 1..args.len() { - match args[i].as_ref() { - "--socket-path" => socket_path = args[i + 1].as_str(), - "--node-version" => version = Some(args[i + 1].as_str()), - _ => (), - } - } - - decl_worker_main_command!($command, socket_path, version) - } - }; -} - -#[macro_export] #[doc(hidden)] -macro_rules! decl_worker_main_command { - (prepare, $socket_path:expr, $version: expr) => { - $crate::prepare_worker_entrypoint(&$socket_path, $version) - }; - (execute, $socket_path:expr, $version: expr) => { - $crate::execute_worker_entrypoint(&$socket_path, $version) - }; +pub mod testing { + pub use crate::worker_common::{spawn_with_program_path, SpawnErr}; } diff --git a/node/core/pvf/src/prepare/mod.rs b/node/core/pvf/src/prepare/mod.rs index d8d036a82238..b4b6aea489dd 100644 --- a/node/core/pvf/src/prepare/mod.rs +++ b/node/core/pvf/src/prepare/mod.rs @@ -20,23 +20,44 @@ //! (by running [`start_pool`]). //! //! The pool will spawn workers in new processes and those should execute pass control to -//! [`worker_entrypoint`]. +//! `polkadot_node_core_pvf_worker::prepare_worker_entrypoint`. -mod memory_stats; mod pool; mod queue; mod worker; -pub use memory_stats::MemoryStats; pub use pool::start as start_pool; pub use queue::{start as start_queue, FromQueue, ToQueue}; -pub use worker::worker_entrypoint; use parity_scale_codec::{Decode, Encode}; /// Preparation statistics, including the CPU time and memory taken. #[derive(Debug, Clone, Default, Encode, Decode)] pub struct PrepareStats { - cpu_time_elapsed: std::time::Duration, - memory_stats: MemoryStats, + /// The CPU time that elapsed for the preparation job. + pub cpu_time_elapsed: std::time::Duration, + /// The observed memory statistics for the preparation job. + pub memory_stats: MemoryStats, +} + +/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if +/// supported by the OS, `ru_maxrss`. +#[derive(Clone, Debug, Default, Encode, Decode)] +pub struct MemoryStats { + /// Memory stats from `tikv_jemalloc_ctl`. + #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] + pub memory_tracker_stats: Option, + /// `ru_maxrss` from `getrusage`. A string error since `io::Error` is not `Encode`able. + #[cfg(target_os = "linux")] + pub max_rss: Option, +} + +/// Statistics of collected memory metrics. +#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] +#[derive(Clone, Debug, Default, Encode, Decode)] +pub struct MemoryAllocationStats { + /// Total resident memory, in bytes. + pub resident: u64, + /// Total allocated memory, in bytes. + pub allocated: u64, } diff --git a/node/core/pvf/src/prepare/queue.rs b/node/core/pvf/src/prepare/queue.rs index 20ee95a435b2..f84d5ab0e56e 100644 --- a/node/core/pvf/src/prepare/queue.rs +++ b/node/core/pvf/src/prepare/queue.rs @@ -226,7 +226,7 @@ async fn handle_enqueue( target: LOG_TARGET, validation_code_hash = ?pvf.code_hash(), ?priority, - preparation_timeout = ?pvf.prep_timeout, + preparation_timeout = ?pvf.prep_timeout(), "PVF is enqueued for preparation.", ); queue.metrics.prepare_enqueued(); diff --git a/node/core/pvf/src/prepare/worker.rs b/node/core/pvf/src/prepare/worker.rs index 4905d23c1916..05e485cd34b6 100644 --- a/node/core/pvf/src/prepare/worker.rs +++ b/node/core/pvf/src/prepare/worker.rs @@ -14,33 +14,22 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -#[cfg(target_os = "linux")] -use super::memory_stats::max_rss_stat::{extract_max_rss_stat, get_max_rss_thread}; -#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] -use super::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_tracker_loop}; -use super::memory_stats::MemoryStats; use crate::{ - artifacts::CompiledArtifact, error::{PrepareError, PrepareResult}, metrics::Metrics, prepare::PrepareStats, pvf::PvfPrepData, worker_common::{ - bytes_to_path, cpu_time_monitor_loop, framed_recv, framed_send, path_to_bytes, - spawn_with_program_path, tmpfile_in, worker_event_loop, IdleWorker, SpawnErr, WorkerHandle, - JOB_TIMEOUT_WALL_CLOCK_FACTOR, + framed_recv, framed_send, path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, + SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }, LOG_TARGET, }; -use cpu_time::ProcessTime; -use futures::{pin_mut, select_biased, FutureExt}; use parity_scale_codec::{Decode, Encode}; use sp_core::hexdisplay::HexDisplay; use std::{ - panic, path::{Path, PathBuf}, - sync::mpsc::channel, time::Duration, }; use tokio::{io, net::UnixStream}; @@ -104,7 +93,7 @@ pub async fn start_work( ); with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move { - let preparation_timeout = pvf.prep_timeout; + let preparation_timeout = pvf.prep_timeout(); if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await { gum::warn!( target: LOG_TARGET, @@ -285,28 +274,6 @@ async fn send_request( Ok(()) } -async fn recv_request(stream: &mut UnixStream) -> io::Result<(PvfPrepData, PathBuf)> { - let pvf = framed_recv(stream).await?; - let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| { - io::Error::new( - io::ErrorKind::Other, - format!("prepare pvf recv_request: failed to decode PvfPrepData: {}", e), - ) - })?; - let tmp_file = framed_recv(stream).await?; - let tmp_file = bytes_to_path(&tmp_file).ok_or_else(|| { - io::Error::new( - io::ErrorKind::Other, - "prepare pvf recv_request: non utf-8 artifact path".to_string(), - ) - })?; - Ok((pvf, tmp_file)) -} - -async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<()> { - framed_send(stream, &result.encode()).await -} - async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result { let result = framed_recv(stream).await?; let result = PrepareResult::decode(&mut &result[..]).map_err(|e| { @@ -325,158 +292,3 @@ async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result) { - worker_event_loop("prepare", socket_path, node_version, |rt_handle, mut stream| async move { - let worker_pid = std::process::id(); - - loop { - let (pvf, dest) = recv_request(&mut stream).await?; - gum::debug!( - target: LOG_TARGET, - %worker_pid, - "worker: preparing artifact", - ); - - let cpu_time_start = ProcessTime::now(); - let preparation_timeout = pvf.prep_timeout; - - // Run the memory tracker. - #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] - let (memory_tracker_tx, memory_tracker_rx) = channel::<()>(); - #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] - let memory_tracker_fut = rt_handle.spawn_blocking(move || memory_tracker_loop(memory_tracker_rx)); - - // Spawn a new thread that runs the CPU time monitor. - let (cpu_time_monitor_tx, cpu_time_monitor_rx) = channel::<()>(); - let cpu_time_monitor_fut = rt_handle - .spawn_blocking(move || { - cpu_time_monitor_loop(cpu_time_start, preparation_timeout, cpu_time_monitor_rx) - }) - .fuse(); - // Spawn another thread for preparation. - let prepare_fut = rt_handle - .spawn_blocking(move || { - let result = prepare_artifact(pvf); - - // Get the `ru_maxrss` stat. If supported, call getrusage for the thread. - #[cfg(target_os = "linux")] - let result = result.map(|artifact| (artifact, get_max_rss_thread())); - - result - }) - .fuse(); - - pin_mut!(cpu_time_monitor_fut); - pin_mut!(prepare_fut); - - let result = select_biased! { - // If this future is not selected, the join handle is dropped and the thread will - // finish in the background. - join_res = cpu_time_monitor_fut => { - match join_res { - Ok(Some(cpu_time_elapsed)) => { - // Log if we exceed the timeout and the other thread hasn't finished. - gum::warn!( - target: LOG_TARGET, - %worker_pid, - "prepare job took {}ms cpu time, exceeded prepare timeout {}ms", - cpu_time_elapsed.as_millis(), - preparation_timeout.as_millis(), - ); - Err(PrepareError::TimedOut) - }, - Ok(None) => Err(PrepareError::IoErr("error communicating over finished channel".into())), - Err(err) => Err(PrepareError::IoErr(err.to_string())), - } - }, - prepare_res = prepare_fut => { - let cpu_time_elapsed = cpu_time_start.elapsed(); - let _ = cpu_time_monitor_tx.send(()); - - match prepare_res.unwrap_or_else(|err| Err(PrepareError::IoErr(err.to_string()))) { - Err(err) => { - // Serialized error will be written into the socket. - Err(err) - }, - Ok(ok) => { - // Stop the memory stats worker and get its observed memory stats. - #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] - let memory_tracker_stats = - get_memory_tracker_loop_stats(memory_tracker_fut, memory_tracker_tx, worker_pid).await; - #[cfg(target_os = "linux")] - let (ok, max_rss) = ok; - let memory_stats = MemoryStats { - #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] - memory_tracker_stats, - #[cfg(target_os = "linux")] - max_rss: extract_max_rss_stat(max_rss, worker_pid), - }; - - // Write the serialized artifact into a temp file. - // - // PVF host only keeps artifacts statuses in its memory, successfully - // compiled code gets stored on the disk (and consequently deserialized - // by execute-workers). The prepare worker is only required to send `Ok` - // to the pool to indicate the success. - - gum::debug!( - target: LOG_TARGET, - %worker_pid, - "worker: writing artifact to {}", - dest.display(), - ); - tokio::fs::write(&dest, &ok).await?; - - Ok(PrepareStats{cpu_time_elapsed, memory_stats}) - }, - } - }, - }; - - send_response(&mut stream, result).await?; - } - }); -} - -fn prepare_artifact(pvf: PvfPrepData) -> Result { - panic::catch_unwind(|| { - let blob = match crate::executor_intf::prevalidate(&pvf.code()) { - Err(err) => return Err(PrepareError::Prevalidation(format!("{:?}", err))), - Ok(b) => b, - }; - - match crate::executor_intf::prepare(blob, &pvf.executor_params()) { - Ok(compiled_artifact) => Ok(CompiledArtifact::new(compiled_artifact)), - Err(err) => Err(PrepareError::Preparation(format!("{:?}", err))), - } - }) - .map_err(|panic_payload| { - PrepareError::Panic(crate::error::stringify_panic_payload(panic_payload)) - }) - .and_then(|inner_result| inner_result) -} diff --git a/node/core/pvf/src/pvf.rs b/node/core/pvf/src/pvf.rs index ad2dc5fcd918..c134cacb4acf 100644 --- a/node/core/pvf/src/pvf.rs +++ b/node/core/pvf/src/pvf.rs @@ -36,13 +36,13 @@ use crate::host::tests::TEST_PREPARATION_TIMEOUT; #[derive(Clone, Encode, Decode)] pub struct PvfPrepData { /// Wasm code (uncompressed) - pub(crate) code: Arc>, + code: Arc>, /// Wasm code hash - pub(crate) code_hash: ValidationCodeHash, + code_hash: ValidationCodeHash, /// Executor environment parameters for the session for which artifact is prepared - pub(crate) executor_params: Arc, + executor_params: Arc, /// Preparation timeout - pub(crate) prep_timeout: Duration, + prep_timeout: Duration, } impl PvfPrepData { @@ -69,15 +69,20 @@ impl PvfPrepData { } /// Returns PVF code - pub(crate) fn code(&self) -> Arc> { + pub fn code(&self) -> Arc> { self.code.clone() } /// Returns executor params - pub(crate) fn executor_params(&self) -> Arc { + pub fn executor_params(&self) -> Arc { self.executor_params.clone() } + /// Returns preparation timeout. + pub fn prep_timeout(&self) -> Duration { + self.prep_timeout + } + /// Creates a structure for tests #[cfg(test)] pub(crate) fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self { diff --git a/node/core/pvf/src/worker_common.rs b/node/core/pvf/src/worker_common.rs index 3caee34a5d0f..33144616601d 100644 --- a/node/core/pvf/src/worker_common.rs +++ b/node/core/pvf/src/worker_common.rs @@ -17,8 +17,7 @@ //! Common logic for implementation of worker processes. use crate::LOG_TARGET; -use cpu_time::ProcessTime; -use futures::{never::Never, FutureExt as _}; +use futures::FutureExt as _; use futures_timer::Delay; use pin_project::pin_project; use rand::Rng; @@ -26,7 +25,6 @@ use std::{ fmt, mem, path::{Path, PathBuf}, pin::Pin, - sync::mpsc::{Receiver, RecvTimeoutError}, task::{Context, Poll}, time::Duration, }; @@ -34,17 +32,12 @@ use tokio::{ io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _, ReadBuf}, net::{UnixListener, UnixStream}, process, - runtime::{Handle, Runtime}, }; /// A multiple of the job timeout (in CPU time) for which we are willing to wait on the host (in /// wall clock time). This is lenient because CPU time may go slower than wall clock time. pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; -/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the -/// child process. -pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50); - /// This is publicly exposed only for integration tests. #[doc(hidden)] pub async fn spawn_with_program_path( @@ -171,92 +164,6 @@ pub async fn tmpfile(prefix: &str) -> io::Result { tmpfile_in(prefix, &temp_dir).await } -pub fn worker_event_loop( - debug_id: &'static str, - socket_path: &str, - node_version: Option<&str>, - mut event_loop: F, -) where - F: FnMut(Handle, UnixStream) -> Fut, - Fut: futures::Future>, -{ - let worker_pid = std::process::id(); - gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id); - - // Check for a mismatch between the node and worker versions. - if let Some(version) = node_version { - if version != env!("SUBSTRATE_CLI_IMPL_VERSION") { - gum::error!( - target: LOG_TARGET, - %worker_pid, - "Node and worker version mismatch, node needs restarting, forcing shutdown", - ); - kill_parent_node_in_emergency(); - let err: io::Result = - Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch")); - gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker({}): {:?}", debug_id, err); - return - } - } - - // Run the main worker loop. - let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it."); - let handle = rt.handle(); - let err = rt - .block_on(async move { - let stream = UnixStream::connect(socket_path).await?; - let _ = tokio::fs::remove_file(socket_path).await; - - let result = event_loop(handle.clone(), stream).await; - - result - }) - // It's never `Ok` because it's `Ok(Never)`. - .unwrap_err(); - - gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err); - - // We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast - // as possible and not wait for stalled validation to finish. This isn't strictly necessary now, - // but may be in the future. - rt.shutdown_background(); -} - -/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up -/// and then either blocks for the remaining CPU time, or returns if we exceed the CPU timeout. -/// -/// Returning `Some` indicates that we should send a `TimedOut` error to the host. Will return -/// `None` if the other thread finishes first, without us timing out. -/// -/// NOTE: Sending a `TimedOut` error to the host will cause the worker, whether preparation or -/// execution, to be killed by the host. We do not kill the process here because it would interfere -/// with the proper handling of this error. -pub fn cpu_time_monitor_loop( - cpu_time_start: ProcessTime, - timeout: Duration, - finished_rx: Receiver<()>, -) -> Option { - loop { - let cpu_time_elapsed = cpu_time_start.elapsed(); - - // Treat the timeout as CPU time, which is less subject to variance due to load. - if cpu_time_elapsed <= timeout { - // Sleep for the remaining CPU time, plus a bit to account for overhead. Note that the sleep - // is wall clock time. The CPU clock may be slower than the wall clock. - let sleep_interval = timeout.saturating_sub(cpu_time_elapsed) + JOB_TIMEOUT_OVERHEAD; - match finished_rx.recv_timeout(sleep_interval) { - // Received finish signal. - Ok(()) => return None, - // Timed out, restart loop. - Err(RecvTimeoutError::Timeout) => continue, - Err(RecvTimeoutError::Disconnected) => return None, - } - } - - return Some(cpu_time_elapsed) - } -} - /// A struct that represents an idle worker. /// /// This struct is supposed to be used as a token that is passed by move into a subroutine that @@ -405,12 +312,7 @@ pub fn path_to_bytes(path: &Path) -> &[u8] { path.to_str().expect("non-UTF-8 path").as_bytes() } -/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a -/// a proper utf-8 string. -pub fn bytes_to_path(bytes: &[u8]) -> Option { - std::str::from_utf8(bytes).ok().map(PathBuf::from) -} - +/// Write some data prefixed by its length into `w`. pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> { let len_buf = buf.len().to_le_bytes(); w.write_all(&len_buf).await?; @@ -418,6 +320,7 @@ pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::R Ok(()) } +/// Read some data prefixed by its length from `r`. pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result> { let mut len_buf = [0u8; mem::size_of::()]; r.read_exact(&mut len_buf).await?; @@ -426,20 +329,3 @@ pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result r.read_exact(&mut buf).await?; Ok(buf) } - -/// In case of node and worker version mismatch (as a result of in-place upgrade), send `SIGTERM` -/// to the node to tear it down and prevent it from raising disputes on valid candidates. Node -/// restart should be handled by the node owner. As node exits, unix sockets opened to workers -/// get closed by the OS and other workers receive error on socket read and also exit. Preparation -/// jobs are written to the temporary files that are renamed to real artifacts on the node side, so -/// no leftover artifacts are possible. -fn kill_parent_node_in_emergency() { - unsafe { - // SAFETY: `getpid()` never fails but may return "no-parent" (0) or "parent-init" (1) in - // some corner cases, which is checked. `kill()` never fails. - let ppid = libc::getppid(); - if ppid > 1 { - libc::kill(ppid, libc::SIGTERM); - } - } -} diff --git a/node/core/pvf/worker/Cargo.toml b/node/core/pvf/worker/Cargo.toml new file mode 100644 index 000000000000..d75987048666 --- /dev/null +++ b/node/core/pvf/worker/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "polkadot-node-core-pvf-worker" +version.workspace = true +authors.workspace = true +edition.workspace = true + +[[bin]] +name = "prepare_worker" +path = "bin/prepare_worker.rs" + +[[bin]] +name = "execute_worker" +path = "bin/execute_worker.rs" + +[[bin]] +name = "puppet_worker" +path = "bin/puppet_worker.rs" + +[dependencies] +assert_matches = "1.4.0" +cpu-time = "1.0.0" +futures = "0.3.21" +gum = { package = "tracing-gum", path = "../../../gum" } +libc = "0.2.139" +rayon = "1.5.1" +tempfile = "3.3.0" +tikv-jemalloc-ctl = { version = "0.5.0", optional = true } +tokio = "1.24.2" + +parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } + +polkadot-node-core-pvf = { path = ".." } +polkadot-parachain = { path = "../../../../parachain" } +polkadot-primitives = { path = "../../../../primitives" } + +sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } + +[target.'cfg(target_os = "linux")'.dependencies] +tikv-jemalloc-ctl = "0.5.0" + +[build-dependencies] +substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" } + +[dev-dependencies] +adder = { package = "test-parachain-adder", path = "../../../../parachain/test-parachains/adder" } +halt = { package = "test-parachain-halt", path = "../../../../parachain/test-parachains/halt" } +tempfile = "3.3.0" + +[features] +jemalloc-allocator = ["dep:tikv-jemalloc-ctl"] diff --git a/node/core/pvf/bin/execute_worker.rs b/node/core/pvf/worker/bin/execute_worker.rs similarity index 93% rename from node/core/pvf/bin/execute_worker.rs rename to node/core/pvf/worker/bin/execute_worker.rs index b7b26520bc39..2a3251ea1c0e 100644 --- a/node/core/pvf/bin/execute_worker.rs +++ b/node/core/pvf/worker/bin/execute_worker.rs @@ -18,4 +18,4 @@ // TODO: Build with musl. // TODO: Embed into polkadot binary. -polkadot_node_core_pvf::decl_worker_main!(execute); +polkadot_node_core_pvf_worker::decl_worker_main!(execute); diff --git a/node/core/pvf/bin/prepare_worker.rs b/node/core/pvf/worker/bin/prepare_worker.rs similarity index 93% rename from node/core/pvf/bin/prepare_worker.rs rename to node/core/pvf/worker/bin/prepare_worker.rs index 13017cbfc784..a478009c4673 100644 --- a/node/core/pvf/bin/prepare_worker.rs +++ b/node/core/pvf/worker/bin/prepare_worker.rs @@ -18,4 +18,4 @@ // TODO: Build with musl. // TODO: Embed into polkadot binary. -polkadot_node_core_pvf::decl_worker_main!(prepare); +polkadot_node_core_pvf_worker::decl_worker_main!(prepare); diff --git a/node/core/pvf/bin/puppet_worker.rs b/node/core/pvf/worker/bin/puppet_worker.rs similarity index 100% rename from node/core/pvf/bin/puppet_worker.rs rename to node/core/pvf/worker/bin/puppet_worker.rs diff --git a/node/core/pvf/worker/build.rs b/node/core/pvf/worker/build.rs new file mode 100644 index 000000000000..40e9f832586e --- /dev/null +++ b/node/core/pvf/worker/build.rs @@ -0,0 +1,19 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +fn main() { + substrate_build_script_utils::generate_cargo_keys(); +} diff --git a/node/core/pvf/worker/src/common.rs b/node/core/pvf/worker/src/common.rs new file mode 100644 index 000000000000..84bc88701d62 --- /dev/null +++ b/node/core/pvf/worker/src/common.rs @@ -0,0 +1,142 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use crate::LOG_TARGET; +use cpu_time::ProcessTime; +use futures::never::Never; +use std::{ + path::PathBuf, + sync::mpsc::{Receiver, RecvTimeoutError}, + time::Duration, +}; +use tokio::{ + io, + net::UnixStream, + runtime::{Handle, Runtime}, +}; + +/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the +/// child process. +pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50); + +/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a +/// a proper utf-8 string. +pub fn bytes_to_path(bytes: &[u8]) -> Option { + std::str::from_utf8(bytes).ok().map(PathBuf::from) +} + +pub fn worker_event_loop( + debug_id: &'static str, + socket_path: &str, + node_version: Option<&str>, + mut event_loop: F, +) where + F: FnMut(Handle, UnixStream) -> Fut, + Fut: futures::Future>, +{ + let worker_pid = std::process::id(); + gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id); + + // Check for a mismatch between the node and worker versions. + if let Some(version) = node_version { + if version != env!("SUBSTRATE_CLI_IMPL_VERSION") { + gum::error!( + target: LOG_TARGET, + %worker_pid, + "Node and worker version mismatch, node needs restarting, forcing shutdown", + ); + kill_parent_node_in_emergency(); + let err: io::Result = + Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch")); + gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker({}): {:?}", debug_id, err); + return + } + } + + // Run the main worker loop. + let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it."); + let handle = rt.handle(); + let err = rt + .block_on(async move { + let stream = UnixStream::connect(socket_path).await?; + let _ = tokio::fs::remove_file(socket_path).await; + + let result = event_loop(handle.clone(), stream).await; + + result + }) + // It's never `Ok` because it's `Ok(Never)`. + .unwrap_err(); + + gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err); + + // We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast + // as possible and not wait for stalled validation to finish. This isn't strictly necessary now, + // but may be in the future. + rt.shutdown_background(); +} + +/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up +/// and then either blocks for the remaining CPU time, or returns if we exceed the CPU timeout. +/// +/// Returning `Some` indicates that we should send a `TimedOut` error to the host. Will return +/// `None` if the other thread finishes first, without us timing out. +/// +/// NOTE: Sending a `TimedOut` error to the host will cause the worker, whether preparation or +/// execution, to be killed by the host. We do not kill the process here because it would interfere +/// with the proper handling of this error. +pub fn cpu_time_monitor_loop( + cpu_time_start: ProcessTime, + timeout: Duration, + finished_rx: Receiver<()>, +) -> Option { + loop { + let cpu_time_elapsed = cpu_time_start.elapsed(); + + // Treat the timeout as CPU time, which is less subject to variance due to load. + if cpu_time_elapsed <= timeout { + // Sleep for the remaining CPU time, plus a bit to account for overhead. Note that the sleep + // is wall clock time. The CPU clock may be slower than the wall clock. + let sleep_interval = timeout.saturating_sub(cpu_time_elapsed) + JOB_TIMEOUT_OVERHEAD; + match finished_rx.recv_timeout(sleep_interval) { + // Received finish signal. + Ok(()) => return None, + // Timed out, restart loop. + Err(RecvTimeoutError::Timeout) => continue, + Err(RecvTimeoutError::Disconnected) => return None, + } + } + + return Some(cpu_time_elapsed) + } +} + +/// In case of node and worker version mismatch (as a result of in-place upgrade), send `SIGTERM` +/// to the node to tear it down and prevent it from raising disputes on valid candidates. Node +/// restart should be handled by the node owner. As node exits, unix sockets opened to workers +/// get closed by the OS and other workers receive error on socket read and also exit. Preparation +/// jobs are written to the temporary files that are renamed to real artifacts on the node side, so +/// no leftover artifacts are possible. +fn kill_parent_node_in_emergency() { + unsafe { + // SAFETY: `getpid()` never fails but may return "no-parent" (0) or "parent-init" (1) in + // some corner cases, which is checked. `kill()` never fails. + let ppid = libc::getppid(); + if ppid > 1 { + libc::kill(ppid, libc::SIGTERM); + } + } +} diff --git a/node/core/pvf/worker/src/execute.rs b/node/core/pvf/worker/src/execute.rs new file mode 100644 index 000000000000..ea21c1337c25 --- /dev/null +++ b/node/core/pvf/worker/src/execute.rs @@ -0,0 +1,168 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use crate::{ + common::{bytes_to_path, cpu_time_monitor_loop, worker_event_loop}, + executor_intf::Executor, + LOG_TARGET, +}; +use cpu_time::ProcessTime; +use futures::{pin_mut, select_biased, FutureExt}; +use parity_scale_codec::{Decode, Encode}; +use polkadot_node_core_pvf::{ + framed_recv, framed_send, ExecuteHandshake as Handshake, ExecuteResponse as Response, +}; +use polkadot_parachain::primitives::ValidationResult; +use std::{ + path::{Path, PathBuf}, + sync::{mpsc::channel, Arc}, + time::Duration, +}; +use tokio::{io, net::UnixStream}; + +async fn recv_handshake(stream: &mut UnixStream) -> io::Result { + let handshake_enc = framed_recv(stream).await?; + let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + "execute pvf recv_handshake: failed to decode Handshake".to_owned(), + ) + })?; + Ok(handshake) +} + +async fn recv_request(stream: &mut UnixStream) -> io::Result<(PathBuf, Vec, Duration)> { + let artifact_path = framed_recv(stream).await?; + let artifact_path = bytes_to_path(&artifact_path).ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + "execute pvf recv_request: non utf-8 artifact path".to_string(), + ) + })?; + let params = framed_recv(stream).await?; + let execution_timeout = framed_recv(stream).await?; + let execution_timeout = Duration::decode(&mut &execution_timeout[..]).map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + "execute pvf recv_request: failed to decode duration".to_string(), + ) + })?; + Ok((artifact_path, params, execution_timeout)) +} + +async fn send_response(stream: &mut UnixStream, response: Response) -> io::Result<()> { + framed_send(stream, &response.encode()).await +} + +/// The entrypoint that the spawned execute worker should start with. The `socket_path` specifies +/// the path to the socket used to communicate with the host. The `node_version`, if `Some`, +/// is checked against the worker version. A mismatch results in immediate worker termination. +/// `None` is used for tests and in other situations when version check is not necessary. +pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { + worker_event_loop("execute", socket_path, node_version, |rt_handle, mut stream| async move { + let worker_pid = std::process::id(); + + let handshake = recv_handshake(&mut stream).await?; + let executor = Arc::new(Executor::new(handshake.executor_params).map_err(|e| { + io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e)) + })?); + + loop { + let (artifact_path, params, execution_timeout) = recv_request(&mut stream).await?; + gum::debug!( + target: LOG_TARGET, + %worker_pid, + "worker: validating artifact {}", + artifact_path.display(), + ); + + // Used to signal to the cpu time monitor thread that it can finish. + let (finished_tx, finished_rx) = channel::<()>(); + let cpu_time_start = ProcessTime::now(); + + // Spawn a new thread that runs the CPU time monitor. + let cpu_time_monitor_fut = rt_handle + .spawn_blocking(move || { + cpu_time_monitor_loop(cpu_time_start, execution_timeout, finished_rx) + }) + .fuse(); + let executor_2 = executor.clone(); + let execute_fut = rt_handle + .spawn_blocking(move || { + validate_using_artifact(&artifact_path, ¶ms, executor_2, cpu_time_start) + }) + .fuse(); + + pin_mut!(cpu_time_monitor_fut); + pin_mut!(execute_fut); + + let response = select_biased! { + // If this future is not selected, the join handle is dropped and the thread will + // finish in the background. + cpu_time_monitor_res = cpu_time_monitor_fut => { + match cpu_time_monitor_res { + Ok(Some(cpu_time_elapsed)) => { + // Log if we exceed the timeout and the other thread hasn't finished. + gum::warn!( + target: LOG_TARGET, + %worker_pid, + "execute job took {}ms cpu time, exceeded execute timeout {}ms", + cpu_time_elapsed.as_millis(), + execution_timeout.as_millis(), + ); + Response::TimedOut + }, + Ok(None) => Response::InternalError("error communicating over finished channel".into()), + Err(e) => Response::format_internal("cpu time monitor thread error", &e.to_string()), + } + }, + execute_res = execute_fut => { + let _ = finished_tx.send(()); + execute_res.unwrap_or_else(|e| Response::format_internal("execute thread error", &e.to_string())) + }, + }; + + send_response(&mut stream, response).await?; + } + }); +} + +fn validate_using_artifact( + artifact_path: &Path, + params: &[u8], + executor: Arc, + cpu_time_start: ProcessTime, +) -> Response { + let descriptor_bytes = match unsafe { + // SAFETY: this should be safe since the compiled artifact passed here comes from the + // file created by the prepare workers. These files are obtained by calling + // [`executor_intf::prepare`]. + executor.execute(artifact_path.as_ref(), params) + } { + Err(err) => return Response::format_invalid("execute", &err), + Ok(d) => d, + }; + + let duration = cpu_time_start.elapsed(); + + let result_descriptor = match ValidationResult::decode(&mut &descriptor_bytes[..]) { + Err(err) => + return Response::format_invalid("validation result decoding failed", &err.to_string()), + Ok(r) => r, + }; + + Response::Ok { result_descriptor, duration } +} diff --git a/node/core/pvf/src/executor_intf.rs b/node/core/pvf/worker/src/executor_intf.rs similarity index 100% rename from node/core/pvf/src/executor_intf.rs rename to node/core/pvf/worker/src/executor_intf.rs diff --git a/node/core/pvf/worker/src/lib.rs b/node/core/pvf/worker/src/lib.rs new file mode 100644 index 000000000000..d1d94f75723a --- /dev/null +++ b/node/core/pvf/worker/src/lib.rs @@ -0,0 +1,72 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +mod common; +mod execute; +mod executor_intf; +mod memory_stats; +mod prepare; + +#[doc(hidden)] +pub mod testing; + +#[doc(hidden)] +pub use sp_tracing; + +pub use execute::worker_entrypoint as execute_worker_entrypoint; +pub use prepare::worker_entrypoint as prepare_worker_entrypoint; + +pub use executor_intf::{prepare, prevalidate}; + +// TODO: Does logging in the worker have any effect? +const LOG_TARGET: &str = "parachain::pvf-worker"; + +/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for +/// spawning the desired worker. +#[macro_export(local_inner_macros)] +macro_rules! decl_worker_main { + ($command:tt) => { + fn main() { + $crate::sp_tracing::try_init_simple(); + + let args = std::env::args().collect::>(); + + let mut version = None; + let mut socket_path: &str = ""; + + for i in 1..args.len() { + match args[i].as_ref() { + "--socket-path" => socket_path = args[i + 1].as_str(), + "--node-version" => version = Some(args[i + 1].as_str()), + _ => (), + } + } + + decl_worker_main_command!($command, socket_path, version) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! decl_worker_main_command { + (prepare, $socket_path:expr, $version: expr) => { + $crate::prepare_worker_entrypoint(&$socket_path, $version) + }; + (execute, $socket_path:expr, $version: expr) => { + $crate::execute_worker_entrypoint(&$socket_path, $version) + }; +} diff --git a/node/core/pvf/src/prepare/memory_stats.rs b/node/core/pvf/worker/src/memory_stats.rs similarity index 89% rename from node/core/pvf/src/prepare/memory_stats.rs rename to node/core/pvf/worker/src/memory_stats.rs index 069ef46caba4..72241a029e8f 100644 --- a/node/core/pvf/src/prepare/memory_stats.rs +++ b/node/core/pvf/worker/src/memory_stats.rs @@ -27,38 +27,14 @@ //! for more //! background. -use parity_scale_codec::{Decode, Encode}; - -/// Helper struct to contain all the memory stats, including [`MemoryAllocationStats`] and, if -/// supported by the OS, `ru_maxrss`. -#[derive(Clone, Debug, Default, Encode, Decode)] -pub struct MemoryStats { - /// Memory stats from `tikv_jemalloc_ctl`. - #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] - pub memory_tracker_stats: Option, - /// `ru_maxrss` from `getrusage`. A string error since `io::Error` is not `Encode`able. - #[cfg(target_os = "linux")] - pub max_rss: Option, -} - -/// Statistics of collected memory metrics. -#[non_exhaustive] -#[derive(Clone, Debug, Default, Encode, Decode)] -pub struct MemoryAllocationStats { - /// Total resident memory, in bytes. - pub resident: u64, - /// Total allocated memory, in bytes. - pub allocated: u64, -} - /// Module for the memory tracker. The memory tracker runs in its own thread, where it polls memory /// usage at an interval. /// /// NOTE: Requires jemalloc enabled. #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] pub mod memory_tracker { - use super::*; use crate::LOG_TARGET; + use polkadot_node_core_pvf::MemoryAllocationStats; use std::{ sync::mpsc::{Receiver, RecvTimeoutError, Sender}, time::Duration, diff --git a/node/core/pvf/worker/src/prepare.rs b/node/core/pvf/worker/src/prepare.rs new file mode 100644 index 000000000000..e224afaec8f9 --- /dev/null +++ b/node/core/pvf/worker/src/prepare.rs @@ -0,0 +1,222 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +#[cfg(target_os = "linux")] +use crate::memory_stats::max_rss_stat::{extract_max_rss_stat, get_max_rss_thread}; +#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] +use crate::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_tracker_loop}; +use crate::{ + common::{bytes_to_path, cpu_time_monitor_loop, worker_event_loop}, + prepare, prevalidate, LOG_TARGET, +}; +use cpu_time::ProcessTime; +use futures::{pin_mut, select_biased, FutureExt}; +use parity_scale_codec::{Decode, Encode}; +use polkadot_node_core_pvf::{ + framed_recv, framed_send, CompiledArtifact, MemoryStats, PrepareError, PrepareResult, + PrepareStats, PvfPrepData, +}; +use std::{any::Any, panic, path::PathBuf, sync::mpsc::channel}; +use tokio::{io, net::UnixStream}; + +async fn recv_request(stream: &mut UnixStream) -> io::Result<(PvfPrepData, PathBuf)> { + let pvf = framed_recv(stream).await?; + let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("prepare pvf recv_request: failed to decode PvfPrepData: {}", e), + ) + })?; + let tmp_file = framed_recv(stream).await?; + let tmp_file = bytes_to_path(&tmp_file).ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + "prepare pvf recv_request: non utf-8 artifact path".to_string(), + ) + })?; + Ok((pvf, tmp_file)) +} + +async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<()> { + framed_send(stream, &result.encode()).await +} + +/// The entrypoint that the spawned prepare worker should start with. The `socket_path` specifies +/// the path to the socket used to communicate with the host. The `node_version`, if `Some`, +/// is checked against the worker version. A mismatch results in immediate worker termination. +/// `None` is used for tests and in other situations when version check is not necessary. +/// +/// # Flow +/// +/// This runs the following in a loop: +/// +/// 1. Get the code and parameters for preparation from the host. +/// +/// 2. Start a memory tracker in a separate thread. +/// +/// 3. Start the CPU time monitor loop and the actual preparation in two separate threads. +/// +/// 4. Select on the two threads created in step 3. If the CPU timeout was hit, the CPU time monitor +/// thread will trigger first. +/// +/// 5. Stop the memory tracker and get the stats. +/// +/// 6. If compilation succeeded, write the compiled artifact into a temporary file. +/// +/// 7. Send the result of preparation back to the host. If any error occurred in the above steps, we +/// send that in the `PrepareResult`. +pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { + worker_event_loop("prepare", socket_path, node_version, |rt_handle, mut stream| async move { + let worker_pid = std::process::id(); + + loop { + let (pvf, dest) = recv_request(&mut stream).await?; + gum::debug!( + target: LOG_TARGET, + %worker_pid, + "worker: preparing artifact", + ); + + let cpu_time_start = ProcessTime::now(); + let preparation_timeout = pvf.prep_timeout(); + + // Run the memory tracker. + #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] + let (memory_tracker_tx, memory_tracker_rx) = channel::<()>(); + #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] + let memory_tracker_fut = rt_handle.spawn_blocking(move || memory_tracker_loop(memory_tracker_rx)); + + // Spawn a new thread that runs the CPU time monitor. + let (cpu_time_monitor_tx, cpu_time_monitor_rx) = channel::<()>(); + let cpu_time_monitor_fut = rt_handle + .spawn_blocking(move || { + cpu_time_monitor_loop(cpu_time_start, preparation_timeout, cpu_time_monitor_rx) + }) + .fuse(); + // Spawn another thread for preparation. + let prepare_fut = rt_handle + .spawn_blocking(move || { + let result = prepare_artifact(pvf); + + // Get the `ru_maxrss` stat. If supported, call getrusage for the thread. + #[cfg(target_os = "linux")] + let result = result.map(|artifact| (artifact, get_max_rss_thread())); + + result + }) + .fuse(); + + pin_mut!(cpu_time_monitor_fut); + pin_mut!(prepare_fut); + + let result = select_biased! { + // If this future is not selected, the join handle is dropped and the thread will + // finish in the background. + join_res = cpu_time_monitor_fut => { + match join_res { + Ok(Some(cpu_time_elapsed)) => { + // Log if we exceed the timeout and the other thread hasn't finished. + gum::warn!( + target: LOG_TARGET, + %worker_pid, + "prepare job took {}ms cpu time, exceeded prepare timeout {}ms", + cpu_time_elapsed.as_millis(), + preparation_timeout.as_millis(), + ); + Err(PrepareError::TimedOut) + }, + Ok(None) => Err(PrepareError::IoErr("error communicating over finished channel".into())), + Err(err) => Err(PrepareError::IoErr(err.to_string())), + } + }, + prepare_res = prepare_fut => { + let cpu_time_elapsed = cpu_time_start.elapsed(); + let _ = cpu_time_monitor_tx.send(()); + + match prepare_res.unwrap_or_else(|err| Err(PrepareError::IoErr(err.to_string()))) { + Err(err) => { + // Serialized error will be written into the socket. + Err(err) + }, + Ok(ok) => { + // Stop the memory stats worker and get its observed memory stats. + #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] + let memory_tracker_stats = + get_memory_tracker_loop_stats(memory_tracker_fut, memory_tracker_tx, worker_pid).await; + #[cfg(target_os = "linux")] + let (ok, max_rss) = ok; + let memory_stats = MemoryStats { + #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] + memory_tracker_stats, + #[cfg(target_os = "linux")] + max_rss: extract_max_rss_stat(max_rss, worker_pid), + }; + + // Write the serialized artifact into a temp file. + // + // PVF host only keeps artifacts statuses in its memory, successfully + // compiled code gets stored on the disk (and consequently deserialized + // by execute-workers). The prepare worker is only required to send `Ok` + // to the pool to indicate the success. + + gum::debug!( + target: LOG_TARGET, + %worker_pid, + "worker: writing artifact to {}", + dest.display(), + ); + tokio::fs::write(&dest, &ok).await?; + + Ok(PrepareStats{cpu_time_elapsed, memory_stats}) + }, + } + }, + }; + + send_response(&mut stream, result).await?; + } + }); +} + +fn prepare_artifact(pvf: PvfPrepData) -> Result { + panic::catch_unwind(|| { + let blob = match prevalidate(&pvf.code()) { + Err(err) => return Err(PrepareError::Prevalidation(format!("{:?}", err))), + Ok(b) => b, + }; + + match prepare(blob, &pvf.executor_params()) { + Ok(compiled_artifact) => Ok(CompiledArtifact::new(compiled_artifact)), + Err(err) => Err(PrepareError::Preparation(format!("{:?}", err))), + } + }) + .map_err(|panic_payload| PrepareError::Panic(stringify_panic_payload(panic_payload))) + .and_then(|inner_result| inner_result) +} + +/// Attempt to convert an opaque panic payload to a string. +/// +/// This is a best effort, and is not guaranteed to provide the most accurate value. +fn stringify_panic_payload(payload: Box) -> String { + match payload.downcast::<&'static str>() { + Ok(msg) => msg.to_string(), + Err(payload) => match payload.downcast::() { + Ok(msg) => *msg, + // At least we tried... + Err(_) => "unknown panic payload".to_string(), + }, + } +} diff --git a/node/core/pvf/src/testing.rs b/node/core/pvf/worker/src/testing.rs similarity index 95% rename from node/core/pvf/src/testing.rs rename to node/core/pvf/worker/src/testing.rs index 680de4924de3..9a0c08f920b3 100644 --- a/node/core/pvf/src/testing.rs +++ b/node/core/pvf/worker/src/testing.rs @@ -21,10 +21,6 @@ use polkadot_primitives::ExecutorParams; -pub mod worker_common { - pub use crate::worker_common::{spawn_with_program_path, SpawnErr}; -} - /// A function that emulates the stitches together behaviors of the preparation and the execution /// worker in a single synchronous function. pub fn validate_candidate( diff --git a/node/core/pvf/tests/it/adder.rs b/node/core/pvf/worker/tests/it/adder.rs similarity index 100% rename from node/core/pvf/tests/it/adder.rs rename to node/core/pvf/worker/tests/it/adder.rs diff --git a/node/core/pvf/tests/it/main.rs b/node/core/pvf/worker/tests/it/main.rs similarity index 100% rename from node/core/pvf/tests/it/main.rs rename to node/core/pvf/worker/tests/it/main.rs diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/worker/tests/it/worker_common.rs similarity index 94% rename from node/core/pvf/tests/it/worker_common.rs rename to node/core/pvf/worker/tests/it/worker_common.rs index 990b48ff1a67..b30650e07eeb 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/worker/tests/it/worker_common.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . use crate::{PREPARE_EXE, PUPPET_EXE}; -use polkadot_node_core_pvf::testing::worker_common::{spawn_with_program_path, SpawnErr}; +use polkadot_node_core_pvf::testing::{spawn_with_program_path, SpawnErr}; use std::time::Duration; // Test spawning a program that immediately exits with a failure code. From 37920a1d8cc75c5f5bd477dd70984de1d95b08f1 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Sun, 23 Apr 2023 16:26:03 +0200 Subject: [PATCH 04/13] Add musl-gcc wrapper scripts to repo --- node/core/pvf/worker/.cargo/config.toml | 11 +++++++++++ node/core/pvf/worker/.cargo/musl-g++ | 7 +++++++ node/core/pvf/worker/.cargo/musl-gcc | 13 +++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 node/core/pvf/worker/.cargo/config.toml create mode 100755 node/core/pvf/worker/.cargo/musl-g++ create mode 100755 node/core/pvf/worker/.cargo/musl-gcc diff --git a/node/core/pvf/worker/.cargo/config.toml b/node/core/pvf/worker/.cargo/config.toml new file mode 100644 index 000000000000..77f6517d7dba --- /dev/null +++ b/node/core/pvf/worker/.cargo/config.toml @@ -0,0 +1,11 @@ +[build] +target = "x86_64-unknown-linux-musl" + +[env] +# So user doesn't have to install musl-tools. +CC_x86_64_unknown_linux_musl = { value = ".cargo/musl-gcc", force = true, relative = true } +CXX_x86_64_unknown_linux_musl = { value = ".cargo/musl-g++", force = true, relative = true } + +[profile.release] +# TODO +# lto = "fat" # Perform full LTO to minimize syscall usage and lower binary size. diff --git a/node/core/pvf/worker/.cargo/musl-g++ b/node/core/pvf/worker/.cargo/musl-g++ new file mode 100755 index 000000000000..edd7c1aff4ed --- /dev/null +++ b/node/core/pvf/worker/.cargo/musl-g++ @@ -0,0 +1,7 @@ +#!/bin/sh + +# Wrapper for building the PVF worker binaries with musl. +# +# See comments for musl-gcc in this repo. + +g++ "$@" diff --git a/node/core/pvf/worker/.cargo/musl-gcc b/node/core/pvf/worker/.cargo/musl-gcc new file mode 100755 index 000000000000..cef013975d85 --- /dev/null +++ b/node/core/pvf/worker/.cargo/musl-gcc @@ -0,0 +1,13 @@ +#!/bin/sh + +# Wrapper for building the PVF worker binaries with musl. +# +# musl unfortunately requires a musl-enabled C compiler (musl-gcc) to be +# installed, which can be kind of a pain to get installed depending on the +# distro. That's not a very good user experience. +# +# The real musl-gcc wrapper sets the correct system include paths for linking +# with musl libc library. Since this is not actually used to link any binaries +# it should most likely work just fine. + +gcc "$@" From aecc10679ed018258ba860a89d9b66bcd24f1ef1 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Mon, 24 Apr 2023 20:04:46 +0200 Subject: [PATCH 05/13] Implement worker binary embedding and extraction --- Cargo.lock | 4 +- cli/src/cli.rs | 19 ------ cli/src/command.rs | 44 ------------- node/core/candidate-validation/src/lib.rs | 10 ++- node/core/pvf/src/artifacts.rs | 2 +- node/core/pvf/src/execute/queue.rs | 2 +- node/core/pvf/src/execute/worker_intf.rs | 4 +- node/core/pvf/src/host.rs | 62 +++++++++++++++++-- node/core/pvf/src/lib.rs | 9 +-- node/core/pvf/src/prepare/pool.rs | 2 +- node/core/pvf/src/prepare/worker_intf.rs | 2 +- .../src/{worker_common.rs => worker_intf.rs} | 0 node/core/pvf/worker/.cargo/config.toml | 11 ---- node/core/pvf/worker/.cargo/musl-g++ | 7 --- node/core/pvf/worker/.cargo/musl-gcc | 13 ---- node/core/pvf/worker/bin/execute_worker.rs | 3 +- node/core/pvf/worker/bin/prepare_worker.rs | 3 +- node/core/pvf/worker/src/lib.rs | 37 ++++++----- node/service/src/lib.rs | 10 +-- 19 files changed, 105 insertions(+), 139 deletions(-) rename node/core/pvf/src/{worker_common.rs => worker_intf.rs} (100%) delete mode 100644 node/core/pvf/worker/.cargo/config.toml delete mode 100755 node/core/pvf/worker/.cargo/musl-g++ delete mode 100755 node/core/pvf/worker/.cargo/musl-gcc diff --git a/Cargo.lock b/Cargo.lock index 23abf35bbaba..9742fc0a3269 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7201,6 +7201,8 @@ dependencies = [ "sp-wasm-interface", "substrate-build-script-utils", "tempfile", + "test-parachain-adder", + "test-parachain-halt", "tokio", "tracing-gum", ] @@ -7251,8 +7253,6 @@ dependencies = [ "sp-tracing", "substrate-build-script-utils", "tempfile", - "test-parachain-adder", - "test-parachain-halt", "tikv-jemalloc-ctl", "tokio", "tracing-gum", diff --git a/cli/src/cli.rs b/cli/src/cli.rs index b775bb6b77ad..8d46a24d9719 100644 --- a/cli/src/cli.rs +++ b/cli/src/cli.rs @@ -42,14 +42,6 @@ pub enum Subcommand { /// Revert the chain to a previous state. Revert(sc_cli::RevertCmd), - #[allow(missing_docs)] - #[command(name = "prepare-worker", hide = true)] - PvfPrepareWorker(ValidationWorkerCommand), - - #[allow(missing_docs)] - #[command(name = "execute-worker", hide = true)] - PvfExecuteWorker(ValidationWorkerCommand), - /// Sub-commands concerned with benchmarking. /// The pallet benchmarking moved to the `pallet` sub-command. #[command(subcommand)] @@ -75,17 +67,6 @@ pub enum Subcommand { ChainInfo(sc_cli::ChainInfoCmd), } -#[allow(missing_docs)] -#[derive(Debug, Parser)] -pub struct ValidationWorkerCommand { - /// The path to the validation host's socket. - #[arg(long)] - pub socket_path: String, - /// Calling node implementation version - #[arg(long)] - pub node_impl_version: String, -} - #[allow(missing_docs)] #[derive(Debug, Parser)] #[group(skip)] diff --git a/cli/src/command.rs b/cli/src/command.rs index c0e96de2a54b..d30405edd357 100644 --- a/cli/src/command.rs +++ b/cli/src/command.rs @@ -479,50 +479,6 @@ pub fn run() -> Result<()> { )) })?) }, - Some(Subcommand::PvfPrepareWorker(cmd)) => { - let mut builder = sc_cli::LoggerBuilder::new(""); - builder.with_colors(false); - let _ = builder.init(); - - #[cfg(target_os = "android")] - { - return Err(sc_cli::Error::Input( - "PVF preparation workers are not supported under this platform".into(), - ) - .into()) - } - - #[cfg(not(target_os = "android"))] - { - polkadot_node_core_pvf_worker::prepare_worker_entrypoint( - &cmd.socket_path, - Some(&cmd.node_impl_version), - ); - Ok(()) - } - }, - Some(Subcommand::PvfExecuteWorker(cmd)) => { - let mut builder = sc_cli::LoggerBuilder::new(""); - builder.with_colors(false); - let _ = builder.init(); - - #[cfg(target_os = "android")] - { - return Err(sc_cli::Error::Input( - "PVF execution workers are not supported under this platform".into(), - ) - .into()) - } - - #[cfg(not(target_os = "android"))] - { - polkadot_node_core_pvf_worker::execute_worker_entrypoint( - &cmd.socket_path, - Some(&cmd.node_impl_version), - ); - Ok(()) - } - }, Some(Subcommand::Benchmark(cmd)) => { let runner = cli.create_runner(cmd)?; let chain_spec = &runner.config().chain_spec; diff --git a/node/core/candidate-validation/src/lib.rs b/node/core/candidate-validation/src/lib.rs index 59c4aa562e74..0f99025d5811 100644 --- a/node/core/candidate-validation/src/lib.rs +++ b/node/core/candidate-validation/src/lib.rs @@ -81,9 +81,11 @@ const DEFAULT_APPROVAL_EXECUTION_TIMEOUT: Duration = Duration::from_secs(12); pub struct Config { /// The path where candidate validation can store compiled artifacts for PVFs. pub artifacts_cache_path: PathBuf, + /// The path to extract the PVF workers to, if `program_path` is `None`. + pub pvf_workers_path: PathBuf, /// The path to the executable which can be used for spawning PVF compilation & validation /// workers. - pub program_path: PathBuf, + pub program_path: Option, } /// The candidate validation subsystem. @@ -117,6 +119,7 @@ impl CandidateValidationSubsystem { self.metrics, self.pvf_metrics, self.config.artifacts_cache_path, + self.config.pvf_workers_path, self.config.program_path, ) .map_err(|e| SubsystemError::with_origin("candidate-validation", e)) @@ -131,10 +134,11 @@ async fn run( metrics: Metrics, pvf_metrics: polkadot_node_core_pvf::Metrics, cache_path: PathBuf, - program_path: PathBuf, + workers_path: PathBuf, + program_path: Option, ) -> SubsystemResult<()> { let (validation_host, task) = polkadot_node_core_pvf::start( - polkadot_node_core_pvf::Config::new(cache_path, program_path), + polkadot_node_core_pvf::Config::new(cache_path, workers_path, program_path), pvf_metrics, ); ctx.spawn_blocking("pvf-validation-host", task.boxed())?; diff --git a/node/core/pvf/src/artifacts.rs b/node/core/pvf/src/artifacts.rs index d5a660cc3aa5..228be7c6785e 100644 --- a/node/core/pvf/src/artifacts.rs +++ b/node/core/pvf/src/artifacts.rs @@ -304,7 +304,7 @@ mod tests { #[tokio::test] async fn artifacts_removes_cache_on_startup() { - let fake_cache_path = crate::worker_common::tmpfile("test-cache").await.unwrap(); + let fake_cache_path = crate::worker_intf::tmpfile("test-cache").await.unwrap(); let fake_artifact_path = { let mut p = fake_cache_path.clone(); p.push("wasmtime_0x1234567890123456789012345678901234567890123456789012345678901234"); diff --git a/node/core/pvf/src/execute/queue.rs b/node/core/pvf/src/execute/queue.rs index 5b3e21cee079..2405348f2c64 100644 --- a/node/core/pvf/src/execute/queue.rs +++ b/node/core/pvf/src/execute/queue.rs @@ -21,7 +21,7 @@ use crate::{ artifacts::{ArtifactId, ArtifactPathId}, host::ResultSender, metrics::Metrics, - worker_common::{IdleWorker, WorkerHandle}, + worker_intf::{IdleWorker, WorkerHandle}, InvalidCandidate, ValidationError, LOG_TARGET, }; use futures::{ diff --git a/node/core/pvf/src/execute/worker_intf.rs b/node/core/pvf/src/execute/worker_intf.rs index 31245355bf3a..428b362c43ce 100644 --- a/node/core/pvf/src/execute/worker_intf.rs +++ b/node/core/pvf/src/execute/worker_intf.rs @@ -18,7 +18,7 @@ use crate::{ artifacts::ArtifactPathId, - worker_common::{ + worker_intf::{ framed_recv, framed_send, path_to_bytes, spawn_with_program_path, IdleWorker, SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }, @@ -45,7 +45,7 @@ pub async fn spawn( let (mut idle_worker, worker_handle) = spawn_with_program_path( "execute", program_path, - &["--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], + &["execute-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], spawn_timeout, ) .await?; diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 95c95528b467..124657a533ee 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -38,6 +38,7 @@ use std::{ path::{Path, PathBuf}, time::{Duration, SystemTime}, }; +use tokio; /// The time period after which a failed preparation artifact is considered ready to be retried. /// Note that we will only retry if another request comes in after this cooldown has passed. @@ -49,6 +50,15 @@ pub const PREPARE_FAILURE_COOLDOWN: Duration = Duration::from_millis(200); /// The amount of times we will retry failed prepare jobs. pub const NUM_PREPARE_RETRIES: u32 = 5; +// HACK: Getting the binary locations this way is a bit ugly but seems to work? Should eventually +// use something like wasm-builder: . +/// The prepare worker binary. +const PREPARE_EXE: &'static [u8] = + include_bytes!(concat!(env!("OUT_DIR"), "/../../../prepare_worker")); +/// The execute worker binary. +const EXECUTE_EXE: &'static [u8] = + include_bytes!(concat!(env!("OUT_DIR"), "/../../../execute_worker")); + /// An alias to not spell the type for the oneshot sender for the PVF execution result. pub(crate) type ResultSender = oneshot::Sender>; @@ -140,6 +150,8 @@ struct ExecutePvfInputs { pub struct Config { /// The root directory where the prepared artifacts can be stored. pub cache_path: PathBuf, + /// If we are using the embedded worker binaries, the directory where they are extracted to. + pub workers_path: Option, /// The path to the program that can be used to spawn the prepare workers. pub prepare_worker_program_path: PathBuf, /// The time allotted for a prepare worker to spawn and report to the host. @@ -159,18 +171,28 @@ pub struct Config { impl Config { /// Create a new instance of the configuration. + /// + /// The binary at `program_path` will be used if that is `Some`, otherwise the embedded workers + /// will be extracted to `workers_path` and used. pub fn new( cache_path: std::path::PathBuf, - prepare_worker_path: std::path::PathBuf, - execute_worker_path: std::path::PathBuf, + workers_path: std::path::PathBuf, + program_path: Option, ) -> Self { // Do not contaminate the other parts of the codebase with the types from `tokio`. let cache_path = PathBuf::from(cache_path); - let prepare_worker_path = PathBuf::from(prepare_worker_path); - let execute_worker_path = PathBuf::from(execute_worker_path); + + let (prepare_worker_path, execute_worker_path, workers_path) = + if let Some(path) = program_path { + let path = PathBuf::from(path); + (path.clone(), path, None) + } else { + (worker_path(&workers_path, "prepare"), worker_path(&workers_path, "execute"), Some(workers_path)) + }; Self { cache_path, + workers_path, prepare_worker_program_path: prepare_worker_path.clone(), prepare_worker_spawn_timeout: Duration::from_secs(3), prepare_workers_soft_max_num: 1, @@ -222,6 +244,17 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future impl futures::Stream .map(|_| ()) } +// TODO: Should we purge unneeded binaries? +/// Extracts the worker binaries embedded in this binary onto disk and return their paths. +async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path: &Path) { + // Skip extraction if the binaries are already present. + if !prepare_worker_path.exists() { + let _ = tokio::fs::write(prepare_worker_path, PREPARE_EXE).await; + } + if !execute_worker_path.exists() { + let _ = tokio::fs::write(execute_worker_path, EXECUTE_EXE).await; + } +} + +/// Returns the expected path to this worker given the root of the cache. +/// +/// Appends with the version (including the commit) to avoid conflicts with other versions of +/// polkadot running, i.e. in testnets. +fn worker_path(workers_path: &Path, job_kind: &str) -> PathBuf { + let file_name = format!("{}-worker_{}", job_kind, env!("SUBSTRATE_CLI_IMPL_VERSION")); + workers_path.join(file_name) +} + #[cfg(test)] pub(crate) mod tests { use super::*; diff --git a/node/core/pvf/src/lib.rs b/node/core/pvf/src/lib.rs index cdaee3341402..32832710701c 100644 --- a/node/core/pvf/src/lib.rs +++ b/node/core/pvf/src/lib.rs @@ -96,7 +96,7 @@ mod metrics; mod prepare; mod priority; mod pvf; -mod worker_common; +mod worker_intf; pub use artifacts::CompiledArtifact; pub use error::{InvalidCandidate, PrepareError, PrepareResult, ValidationError}; @@ -109,11 +109,6 @@ pub use pvf::PvfPrepData; pub use host::{start, Config, ValidationHost}; pub use metrics::Metrics; -pub use worker_common::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR}; +pub use worker_intf::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR}; const LOG_TARGET: &str = "parachain::pvf"; - -#[doc(hidden)] -pub mod testing { - pub use crate::worker_common::{spawn_with_program_path, SpawnErr}; -} diff --git a/node/core/pvf/src/prepare/pool.rs b/node/core/pvf/src/prepare/pool.rs index d151f097805e..c72b59b5f147 100644 --- a/node/core/pvf/src/prepare/pool.rs +++ b/node/core/pvf/src/prepare/pool.rs @@ -19,7 +19,7 @@ use crate::{ error::{PrepareError, PrepareResult}, metrics::Metrics, pvf::PvfPrepData, - worker_common::{IdleWorker, WorkerHandle}, + worker_intf::{IdleWorker, WorkerHandle}, LOG_TARGET, }; use always_assert::never; diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index 7525f19791e6..a11bd3c1810b 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -21,7 +21,7 @@ use crate::{ metrics::Metrics, prepare::PrepareStats, pvf::PvfPrepData, - worker_common::{ + worker_intf::{ framed_recv, framed_send, path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }, diff --git a/node/core/pvf/src/worker_common.rs b/node/core/pvf/src/worker_intf.rs similarity index 100% rename from node/core/pvf/src/worker_common.rs rename to node/core/pvf/src/worker_intf.rs diff --git a/node/core/pvf/worker/.cargo/config.toml b/node/core/pvf/worker/.cargo/config.toml deleted file mode 100644 index 77f6517d7dba..000000000000 --- a/node/core/pvf/worker/.cargo/config.toml +++ /dev/null @@ -1,11 +0,0 @@ -[build] -target = "x86_64-unknown-linux-musl" - -[env] -# So user doesn't have to install musl-tools. -CC_x86_64_unknown_linux_musl = { value = ".cargo/musl-gcc", force = true, relative = true } -CXX_x86_64_unknown_linux_musl = { value = ".cargo/musl-g++", force = true, relative = true } - -[profile.release] -# TODO -# lto = "fat" # Perform full LTO to minimize syscall usage and lower binary size. diff --git a/node/core/pvf/worker/.cargo/musl-g++ b/node/core/pvf/worker/.cargo/musl-g++ deleted file mode 100755 index edd7c1aff4ed..000000000000 --- a/node/core/pvf/worker/.cargo/musl-g++ +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -# Wrapper for building the PVF worker binaries with musl. -# -# See comments for musl-gcc in this repo. - -g++ "$@" diff --git a/node/core/pvf/worker/.cargo/musl-gcc b/node/core/pvf/worker/.cargo/musl-gcc deleted file mode 100755 index cef013975d85..000000000000 --- a/node/core/pvf/worker/.cargo/musl-gcc +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - -# Wrapper for building the PVF worker binaries with musl. -# -# musl unfortunately requires a musl-enabled C compiler (musl-gcc) to be -# installed, which can be kind of a pain to get installed depending on the -# distro. That's not a very good user experience. -# -# The real musl-gcc wrapper sets the correct system include paths for linking -# with musl libc library. Since this is not actually used to link any binaries -# it should most likely work just fine. - -gcc "$@" diff --git a/node/core/pvf/worker/bin/execute_worker.rs b/node/core/pvf/worker/bin/execute_worker.rs index 2a3251ea1c0e..9cb1597f95cc 100644 --- a/node/core/pvf/worker/bin/execute_worker.rs +++ b/node/core/pvf/worker/bin/execute_worker.rs @@ -16,6 +16,5 @@ //! Execute worker. // TODO: Build with musl. -// TODO: Embed into polkadot binary. -polkadot_node_core_pvf_worker::decl_worker_main!(execute); +polkadot_node_core_pvf_worker::decl_worker_main!("execute-worker"); diff --git a/node/core/pvf/worker/bin/prepare_worker.rs b/node/core/pvf/worker/bin/prepare_worker.rs index a478009c4673..63e0488a4de1 100644 --- a/node/core/pvf/worker/bin/prepare_worker.rs +++ b/node/core/pvf/worker/bin/prepare_worker.rs @@ -16,6 +16,5 @@ //! Prepare worker. // TODO: Build with musl. -// TODO: Embed into polkadot binary. -polkadot_node_core_pvf_worker::decl_worker_main!(prepare); +polkadot_node_core_pvf_worker::decl_worker_main!("prepare-worker"); diff --git a/node/core/pvf/worker/src/lib.rs b/node/core/pvf/worker/src/lib.rs index 456362cf8f57..2e9078a9018c 100644 --- a/node/core/pvf/worker/src/lib.rs +++ b/node/core/pvf/worker/src/lib.rs @@ -37,18 +37,21 @@ const LOG_TARGET: &str = "parachain::pvf-worker"; /// Use this macro to declare a `fn main() {}` that will create an executable that can be used for /// spawning the desired worker. -#[macro_export(local_inner_macros)] +#[macro_export] macro_rules! decl_worker_main { - ($command:tt) => { + ($expected_command:expr) => { fn main() { $crate::sp_tracing::try_init_simple(); let args = std::env::args().collect::>(); + if args.len() < 3 { + panic!("wrong number of arguments"); + } let mut version = None; let mut socket_path: &str = ""; - for i in 1..args.len() { + for i in 2..args.len() { match args[i].as_ref() { "--socket-path" => socket_path = args[i + 1].as_str(), "--node-version" => version = Some(args[i + 1].as_str()), @@ -56,18 +59,22 @@ macro_rules! decl_worker_main { } } - decl_worker_main_command!($command, socket_path, version) + let subcommand = &args[1]; + if subcommand != $expected_command { + panic!( + "trying to run {} binary with the {} subcommand", + $expected_command, subcommand + ) + } + match subcommand.as_ref() { + "prepare-worker" => { + $crate::prepare_worker_entrypoint(&socket_path, version); + }, + "execute-worker" => { + $crate::execute_worker_entrypoint(&socket_path, version); + }, + other => panic!("unknown subcommand: {}", other), + } } }; } - -#[macro_export] -#[doc(hidden)] -macro_rules! decl_worker_main_command { - (prepare, $socket_path:expr, $version: expr) => { - $crate::prepare_worker_entrypoint(&$socket_path, $version) - }; - (execute, $socket_path:expr, $version: expr) => { - $crate::execute_worker_entrypoint(&$socket_path, $version) - }; -} diff --git a/node/service/src/lib.rs b/node/service/src/lib.rs index d25b0e1a0767..831d8e4e10bd 100644 --- a/node/service/src/lib.rs +++ b/node/service/src/lib.rs @@ -907,10 +907,12 @@ where .path() .ok_or(Error::DatabasePathRequired)? .join("pvf-artifacts"), - program_path: match program_path { - None => std::env::current_exe()?, - Some(p) => p, - }, + pvf_workers_path: config + .database + .path() + .ok_or(Error::DatabasePathRequired)? + .join("pvf-workers"), + program_path, }; let chain_selection_config = ChainSelectionConfig { From a14c254c7c24e1274556c3472b7a0fbd8f4b6922 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Tue, 25 Apr 2023 13:30:26 +0200 Subject: [PATCH 06/13] Move shared functionality to `common`, remove worker -> host dep The worker binaries must be built first so that the host could embed them into `polkadot`. Therefore `pvf/worker` could not depend on `pvf`, so to remove the dependency, common functionality was extracted into `pvf/common`. (NOTE: We already needed to do this host/worker/common separation as part of https://github.com/paritytech/polkadot/issues/7116, it's just unfortunate that it had to be done here and complicate this PR.) Integration tests were moved from `pvf/worker/tests` to `pvf/tests` because they need the PVF host. --- .editorconfig | 2 +- Cargo.lock | 16 +++- Cargo.toml | 1 + node/core/pvf/Cargo.toml | 6 ++ .../pvf/{worker => }/bin/puppet_worker.rs | 22 +----- node/core/pvf/common/Cargo.toml | 15 ++++ node/core/pvf/common/src/error.rs | 75 +++++++++++++++++++ node/core/pvf/common/src/execute.rs | 65 ++++++++++++++++ node/core/pvf/common/src/lib.rs | 48 ++++++++++++ node/core/pvf/common/src/prepare.rs | 48 ++++++++++++ node/core/pvf/{ => common}/src/pvf.rs | 21 ++---- node/core/pvf/src/artifacts.rs | 24 ++---- node/core/pvf/src/error.rs | 60 +-------------- node/core/pvf/src/host.rs | 30 +++++--- node/core/pvf/src/lib.rs | 15 ++-- node/core/pvf/src/metrics.rs | 2 +- node/core/pvf/src/prepare/mod.rs | 33 -------- node/core/pvf/src/prepare/pool.rs | 6 +- node/core/pvf/src/prepare/queue.rs | 26 ++++--- node/core/pvf/src/prepare/worker_intf.rs | 8 +- node/core/pvf/src/testing.rs | 68 +++++++++++++++++ node/core/pvf/{worker => }/tests/it/adder.rs | 0 node/core/pvf/{worker => }/tests/it/main.rs | 7 +- .../{worker => }/tests/it/worker_common.rs | 13 +++- node/core/pvf/worker/Cargo.toml | 14 +--- node/core/pvf/worker/src/execute.rs | 5 +- node/core/pvf/worker/src/prepare.rs | 24 +++++- 27 files changed, 445 insertions(+), 209 deletions(-) rename node/core/pvf/{worker => }/bin/puppet_worker.rs (64%) create mode 100644 node/core/pvf/common/Cargo.toml create mode 100644 node/core/pvf/common/src/error.rs create mode 100644 node/core/pvf/common/src/execute.rs create mode 100644 node/core/pvf/common/src/lib.rs create mode 100644 node/core/pvf/common/src/prepare.rs rename node/core/pvf/{ => common}/src/pvf.rs (82%) create mode 100644 node/core/pvf/src/testing.rs rename node/core/pvf/{worker => }/tests/it/adder.rs (100%) rename node/core/pvf/{worker => }/tests/it/main.rs (96%) rename node/core/pvf/{worker => }/tests/it/worker_common.rs (89%) diff --git a/.editorconfig b/.editorconfig index 6b736d884f22..b9edd5a0dfdc 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,7 +4,7 @@ root = true indent_style=tab indent_size=tab tab_width=4 -max_line_length=120 +max_line_length=100 end_of_line=lf charset=utf-8 trim_trailing_whitespace=true diff --git a/Cargo.lock b/Cargo.lock index 9742fc0a3269..bbd38c8556ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7189,6 +7189,8 @@ dependencies = [ "parity-scale-codec", "pin-project", "polkadot-core-primitives", + "polkadot-node-core-pvf-common", + "polkadot-node-core-pvf-worker", "polkadot-node-metrics", "polkadot-node-primitives", "polkadot-parachain", @@ -7230,16 +7232,26 @@ dependencies = [ "tracing-gum", ] +[[package]] +name = "polkadot-node-core-pvf-common" +version = "0.9.41" +dependencies = [ + "parity-scale-codec", + "polkadot-parachain", + "polkadot-primitives", + "sp-core", + "tokio", +] + [[package]] name = "polkadot-node-core-pvf-worker" version = "0.9.41" dependencies = [ - "assert_matches", "cpu-time", "futures", "libc", "parity-scale-codec", - "polkadot-node-core-pvf", + "polkadot-node-core-pvf-common", "polkadot-parachain", "polkadot-primitives", "rayon", diff --git a/Cargo.toml b/Cargo.toml index 69ea6b036a4d..a1a3bea541a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ members = [ "node/core/parachains-inherent", "node/core/provisioner", "node/core/pvf", + "node/core/pvf/common", "node/core/pvf/worker", "node/core/pvf-checker", "node/core/runtime-api", diff --git a/node/core/pvf/Cargo.toml b/node/core/pvf/Cargo.toml index 026930758b86..0a9b38012e30 100644 --- a/node/core/pvf/Cargo.toml +++ b/node/core/pvf/Cargo.toml @@ -19,6 +19,9 @@ parity-scale-codec = { version = "3.4.0", default-features = false, features = [ polkadot-parachain = { path = "../../../parachain" } polkadot-core-primitives = { path = "../../../core-primitives" } +polkadot-node-core-pvf-common = { path = "common" } +# Must depend on the worker because the binaries must have been already built. +polkadot-node-core-pvf-worker = { path = "worker" } polkadot-node-metrics = { path = "../../metrics" } polkadot-node-primitives = { path = "../../primitives" } polkadot-primitives = { path = "../../../primitives" } @@ -35,3 +38,6 @@ substrate-build-script-utils = { git = "https://github.com/paritytech/substrate" assert_matches = "1.4.0" hex-literal = "0.3.4" tempfile = "3.3.0" + +adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" } +halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" } diff --git a/node/core/pvf/worker/bin/puppet_worker.rs b/node/core/pvf/bin/puppet_worker.rs similarity index 64% rename from node/core/pvf/worker/bin/puppet_worker.rs rename to node/core/pvf/bin/puppet_worker.rs index bf82b4fb23a5..f94f14df1f9e 100644 --- a/node/core/pvf/worker/bin/puppet_worker.rs +++ b/node/core/pvf/bin/puppet_worker.rs @@ -16,24 +16,4 @@ //! Puppet worker used for integration tests. -use sp_tracing; - -fn main() { - sp_tracing::try_init_simple(); - - let args = std::env::args().collect::>(); - if args.len() < 3 { - panic!("wrong number of arguments"); - } - - let subcommand = &args[1]; - match subcommand.as_ref() { - "exit" => { - std::process::exit(1); - }, - "sleep" => { - std::thread::sleep(std::time::Duration::from_secs(5)); - }, - other => panic!("unknown subcommand: {}", other), - } -} +polkadot_node_core_pvf::decl_puppet_worker_main!(); diff --git a/node/core/pvf/common/Cargo.toml b/node/core/pvf/common/Cargo.toml new file mode 100644 index 000000000000..9da2bdd91224 --- /dev/null +++ b/node/core/pvf/common/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "polkadot-node-core-pvf-common" +version.workspace = true +authors.workspace = true +edition.workspace = true + +[dependencies] +tokio = { version = "1.24.2", features = ["fs", "process", "io-util"] } + +parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } + +polkadot-parachain = { path = "../../../../parachain" } +polkadot-primitives = { path = "../../../../primitives" } + +sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/core/pvf/common/src/error.rs b/node/core/pvf/common/src/error.rs new file mode 100644 index 000000000000..34f7266b6e34 --- /dev/null +++ b/node/core/pvf/common/src/error.rs @@ -0,0 +1,75 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use crate::prepare::PrepareStats; +use parity_scale_codec::{Decode, Encode}; +use std::fmt; + +/// Result of PVF preparation performed by the validation host. Contains stats about the preparation if +/// successful +pub type PrepareResult = Result; + +/// An error that occurred during the prepare part of the PVF pipeline. +#[derive(Debug, Clone, Encode, Decode)] +pub enum PrepareError { + /// During the prevalidation stage of preparation an issue was found with the PVF. + Prevalidation(String), + /// Compilation failed for the given PVF. + Preparation(String), + /// An unexpected panic has occurred in the preparation worker. + Panic(String), + /// Failed to prepare the PVF due to the time limit. + TimedOut, + /// An IO error occurred. This state is reported by either the validation host or by the worker. + IoErr(String), + /// The temporary file for the artifact could not be created at the given cache path. This state is reported by the + /// validation host (not by the worker). + CreateTmpFileErr(String), + /// The response from the worker is received, but the file cannot be renamed (moved) to the final destination + /// location. This state is reported by the validation host (not by the worker). + RenameTmpFileErr(String), +} + +impl PrepareError { + /// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those + /// errors depend on the PVF itself and the sc-executor/wasmtime logic. + /// + /// Non-deterministic errors can happen spuriously. Typically, they occur due to resource + /// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient + /// but may persist e.g. if the node is run by overwhelmingly underpowered machine. + pub fn is_deterministic(&self) -> bool { + use PrepareError::*; + match self { + Prevalidation(_) | Preparation(_) | Panic(_) => true, + TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false, + } + } +} + +impl fmt::Display for PrepareError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use PrepareError::*; + match self { + Prevalidation(err) => write!(f, "prevalidation: {}", err), + Preparation(err) => write!(f, "preparation: {}", err), + Panic(err) => write!(f, "panic: {}", err), + TimedOut => write!(f, "prepare: timeout"), + IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err), + CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err), + RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err), + } + } +} diff --git a/node/core/pvf/common/src/execute.rs b/node/core/pvf/common/src/execute.rs new file mode 100644 index 000000000000..d7c300eeeabb --- /dev/null +++ b/node/core/pvf/common/src/execute.rs @@ -0,0 +1,65 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use parity_scale_codec::{Decode, Encode}; +use polkadot_parachain::primitives::ValidationResult; +use polkadot_primitives::ExecutorParams; +use std::time::Duration; + +/// The payload of the one-time handshake that is done when a worker process is created. Carries +/// data from the host to the worker. +#[derive(Encode, Decode)] +pub struct Handshake { + /// The executor parameters. + pub executor_params: ExecutorParams, +} + +/// The response from an execution job on the worker. +#[derive(Encode, Decode)] +pub enum Response { + /// The job completed successfully. + Ok { + /// The result of parachain validation. + result_descriptor: ValidationResult, + /// The amount of CPU time taken by the job. + duration: Duration, + }, + /// The candidate is invalid. + InvalidCandidate(String), + /// The job timed out. + TimedOut, + /// Some internal error occurred. Should only be used for errors independent of the candidate. + InternalError(String), +} + +impl Response { + /// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty). + pub fn format_invalid(ctx: &'static str, msg: &str) -> Self { + if msg.is_empty() { + Self::InvalidCandidate(ctx.to_string()) + } else { + Self::InvalidCandidate(format!("{}: {}", ctx, msg)) + } + } + /// Creates an internal response from a context `ctx` and a message `msg` (which can be empty). + pub fn format_internal(ctx: &'static str, msg: &str) -> Self { + if msg.is_empty() { + Self::InternalError(ctx.to_string()) + } else { + Self::InternalError(format!("{}: {}", ctx, msg)) + } + } +} diff --git a/node/core/pvf/common/src/lib.rs b/node/core/pvf/common/src/lib.rs new file mode 100644 index 000000000000..7d48684f6207 --- /dev/null +++ b/node/core/pvf/common/src/lib.rs @@ -0,0 +1,48 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +pub mod error; +pub mod execute; +pub mod prepare; +pub mod pvf; + +use std::mem; +use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _}; + +pub mod tests { + use std::time::Duration; + + pub const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); + pub const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30); +} + +/// Write some data prefixed by its length into `w`. +pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> { + let len_buf = buf.len().to_le_bytes(); + w.write_all(&len_buf).await?; + w.write_all(buf).await?; + Ok(()) +} + +/// Read some data prefixed by its length from `r`. +pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result> { + let mut len_buf = [0u8; mem::size_of::()]; + r.read_exact(&mut len_buf).await?; + let len = usize::from_le_bytes(len_buf); + let mut buf = vec![0; len]; + r.read_exact(&mut buf).await?; + Ok(buf) +} diff --git a/node/core/pvf/common/src/prepare.rs b/node/core/pvf/common/src/prepare.rs new file mode 100644 index 000000000000..ac64e2927a16 --- /dev/null +++ b/node/core/pvf/common/src/prepare.rs @@ -0,0 +1,48 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use parity_scale_codec::{Decode, Encode}; + +/// Preparation statistics, including the CPU time and memory taken. +#[derive(Debug, Clone, Default, Encode, Decode)] +pub struct PrepareStats { + /// The CPU time that elapsed for the preparation job. + pub cpu_time_elapsed: std::time::Duration, + /// The observed memory statistics for the preparation job. + pub memory_stats: MemoryStats, +} + +/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if +/// supported by the OS, `ru_maxrss`. +#[derive(Clone, Debug, Default, Encode, Decode)] +pub struct MemoryStats { + /// Memory stats from `tikv_jemalloc_ctl`. + #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] + pub memory_tracker_stats: Option, + /// `ru_maxrss` from `getrusage`. `None` if an error occurred. + #[cfg(target_os = "linux")] + pub max_rss: Option, +} + +/// Statistics of collected memory metrics. +#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] +#[derive(Clone, Debug, Default, Encode, Decode)] +pub struct MemoryAllocationStats { + /// Total resident memory, in bytes. + pub resident: u64, + /// Total allocated memory, in bytes. + pub allocated: u64, +} diff --git a/node/core/pvf/src/pvf.rs b/node/core/pvf/common/src/pvf.rs similarity index 82% rename from node/core/pvf/src/pvf.rs rename to node/core/pvf/common/src/pvf.rs index c134cacb4acf..937b2c139523 100644 --- a/node/core/pvf/src/pvf.rs +++ b/node/core/pvf/common/src/pvf.rs @@ -14,7 +14,6 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::artifacts::ArtifactId; use parity_scale_codec::{Decode, Encode}; use polkadot_parachain::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParams; @@ -26,9 +25,6 @@ use std::{ time::Duration, }; -#[cfg(test)] -use crate::host::tests::TEST_PREPARATION_TIMEOUT; - /// A struct that carries the exhaustive set of data to prepare an artifact out of plain /// Wasm binary /// @@ -58,13 +54,8 @@ impl PvfPrepData { Self { code, code_hash, executor_params, prep_timeout } } - /// Returns artifact ID that corresponds to the PVF with given executor params - pub(crate) fn as_artifact_id(&self) -> ArtifactId { - ArtifactId::new(self.code_hash, self.executor_params.hash()) - } - /// Returns validation code hash for the PVF - pub(crate) fn code_hash(&self) -> ValidationCodeHash { + pub fn code_hash(&self) -> ValidationCodeHash { self.code_hash } @@ -84,15 +75,15 @@ impl PvfPrepData { } /// Creates a structure for tests - #[cfg(test)] - pub(crate) fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self { + #[doc(hidden)] + pub fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self { let descriminator_buf = num.to_le_bytes().to_vec(); Self::from_code(descriminator_buf, ExecutorParams::default(), timeout) } - #[cfg(test)] - pub(crate) fn from_discriminator(num: u32) -> Self { - Self::from_discriminator_and_timeout(num, TEST_PREPARATION_TIMEOUT) + #[doc(hidden)] + pub fn from_discriminator(num: u32) -> Self { + Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT) } } diff --git a/node/core/pvf/src/artifacts.rs b/node/core/pvf/src/artifacts.rs index 228be7c6785e..78d2f88941b8 100644 --- a/node/core/pvf/src/artifacts.rs +++ b/node/core/pvf/src/artifacts.rs @@ -55,8 +55,9 @@ //! older by a predefined parameter. This process is run very rarely (say, once a day). Once the //! artifact is expired it is removed from disk eagerly atomically. -use crate::{error::PrepareError, host::PrepareResultSender, prepare::PrepareStats}; +use crate::host::PrepareResultSender; use always_assert::always; +use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_parachain::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; use std::{ @@ -65,22 +66,6 @@ use std::{ time::{Duration, SystemTime}, }; -/// Contains the bytes for a successfully compiled artifact. -pub struct CompiledArtifact(Vec); - -impl CompiledArtifact { - /// Creates a `CompiledArtifact`. - pub fn new(code: Vec) -> Self { - Self(code) - } -} - -impl AsRef<[u8]> for CompiledArtifact { - fn as_ref(&self) -> &[u8] { - self.0.as_slice() - } -} - /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { @@ -96,6 +81,11 @@ impl ArtifactId { Self { code_hash, executor_params_hash } } + /// Returns an artifact ID that corresponds to the PVF with given executor params. + pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self { + Self::new(pvf.code_hash(), pvf.executor_params().hash()) + } + /// Tries to recover the artifact id from the given file name. #[cfg(test)] pub fn from_file_name(file_name: &str) -> Option { diff --git a/node/core/pvf/src/error.rs b/node/core/pvf/src/error.rs index 21f23d515fdd..42765f51a1a0 100644 --- a/node/core/pvf/src/error.rs +++ b/node/core/pvf/src/error.rs @@ -14,65 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::prepare::PrepareStats; -use parity_scale_codec::{Decode, Encode}; -use std::fmt; - -/// Result of PVF preparation performed by the validation host. Contains stats about the preparation if -/// successful -pub type PrepareResult = Result; - -/// An error that occurred during the prepare part of the PVF pipeline. -#[derive(Debug, Clone, Encode, Decode)] -pub enum PrepareError { - /// During the prevalidation stage of preparation an issue was found with the PVF. - Prevalidation(String), - /// Compilation failed for the given PVF. - Preparation(String), - /// An unexpected panic has occurred in the preparation worker. - Panic(String), - /// Failed to prepare the PVF due to the time limit. - TimedOut, - /// An IO error occurred. This state is reported by either the validation host or by the worker. - IoErr(String), - /// The temporary file for the artifact could not be created at the given cache path. This state is reported by the - /// validation host (not by the worker). - CreateTmpFileErr(String), - /// The response from the worker is received, but the file cannot be renamed (moved) to the final destination - /// location. This state is reported by the validation host (not by the worker). - RenameTmpFileErr(String), -} - -impl PrepareError { - /// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those - /// errors depend on the PVF itself and the sc-executor/wasmtime logic. - /// - /// Non-deterministic errors can happen spuriously. Typically, they occur due to resource - /// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient - /// but may persist e.g. if the node is run by overwhelmingly underpowered machine. - pub fn is_deterministic(&self) -> bool { - use PrepareError::*; - match self { - Prevalidation(_) | Preparation(_) | Panic(_) => true, - TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false, - } - } -} - -impl fmt::Display for PrepareError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use PrepareError::*; - match self { - Prevalidation(err) => write!(f, "prevalidation: {}", err), - Preparation(err) => write!(f, "preparation: {}", err), - Panic(err) => write!(f, "panic: {}", err), - TimedOut => write!(f, "prepare: timeout"), - IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err), - CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err), - RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err), - } - } -} +use polkadot_node_core_pvf_common::error::PrepareError; /// A error raised during validation of the candidate. #[derive(Debug, Clone)] diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 124657a533ee..6b2bded5b129 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -22,16 +22,19 @@ use crate::{ artifacts::{ArtifactId, ArtifactPathId, ArtifactState, Artifacts}, - error::PrepareError, execute::{self, PendingExecutionRequest}, metrics::Metrics, - prepare, PrepareResult, Priority, PvfPrepData, ValidationError, LOG_TARGET, + prepare, Priority, ValidationError, LOG_TARGET, }; use always_assert::never; use futures::{ channel::{mpsc, oneshot}, Future, FutureExt, SinkExt, StreamExt, }; +use polkadot_node_core_pvf_common::{ + error::{PrepareError, PrepareResult}, + pvf::PvfPrepData, +}; use polkadot_parachain::primitives::ValidationResult; use std::{ collections::HashMap, @@ -187,7 +190,11 @@ impl Config { let path = PathBuf::from(path); (path.clone(), path, None) } else { - (worker_path(&workers_path, "prepare"), worker_path(&workers_path, "execute"), Some(workers_path)) + ( + worker_path(&workers_path, "prepare"), + worker_path(&workers_path, "execute"), + Some(workers_path), + ) }; Self { @@ -461,7 +468,7 @@ async fn handle_precheck_pvf( pvf: PvfPrepData, result_sender: PrepareResultSender, ) -> Result<(), Fatal> { - let artifact_id = pvf.as_artifact_id(); + let artifact_id = ArtifactId::from_pvf_prep_data(&pvf); if let Some(state) = artifacts.artifact_state_mut(&artifact_id) { match state { @@ -505,7 +512,7 @@ async fn handle_execute_pvf( inputs: ExecutePvfInputs, ) -> Result<(), Fatal> { let ExecutePvfInputs { pvf, exec_timeout, params, priority, result_tx } = inputs; - let artifact_id = pvf.as_artifact_id(); + let artifact_id = ArtifactId::from_pvf_prep_data(&pvf); let executor_params = (*pvf.executor_params()).clone(); if let Some(state) = artifacts.artifact_state_mut(&artifact_id) { @@ -628,7 +635,7 @@ async fn handle_heads_up( let now = SystemTime::now(); for active_pvf in active_pvfs { - let artifact_id = active_pvf.as_artifact_id(); + let artifact_id = ArtifactId::from_pvf_prep_data(&active_pvf); if let Some(state) = artifacts.artifact_state_mut(&artifact_id) { match state { ArtifactState::Prepared { last_time_needed, .. } => { @@ -911,14 +918,15 @@ fn worker_path(workers_path: &Path, job_kind: &str) -> PathBuf { } #[cfg(test)] -pub(crate) mod tests { +mod tests { use super::*; - use crate::{prepare::PrepareStats, InvalidCandidate, PrepareError}; + use crate::InvalidCandidate; use assert_matches::assert_matches; use futures::future::BoxFuture; - - const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); - pub(crate) const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30); + use polkadot_node_core_pvf_common::{ + prepare::PrepareStats, + tests::{TEST_EXECUTION_TIMEOUT, TEST_PREPARATION_TIMEOUT}, + }; #[tokio::test] async fn pulse_test() { diff --git a/node/core/pvf/src/lib.rs b/node/core/pvf/src/lib.rs index 32832710701c..00a49d915c4a 100644 --- a/node/core/pvf/src/lib.rs +++ b/node/core/pvf/src/lib.rs @@ -95,17 +95,18 @@ mod host; mod metrics; mod prepare; mod priority; -mod pvf; mod worker_intf; -pub use artifacts::CompiledArtifact; -pub use error::{InvalidCandidate, PrepareError, PrepareResult, ValidationError}; +#[doc(hidden)] +pub mod testing; + +// TODO: Remove when moving the host into its own crate. +#[doc(hidden)] +pub use sp_tracing; + +pub use error::{InvalidCandidate, ValidationError}; pub use execute::{ExecuteHandshake, ExecuteResponse}; -#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] -pub use prepare::MemoryAllocationStats; -pub use prepare::{MemoryStats, PrepareStats}; pub use priority::Priority; -pub use pvf::PvfPrepData; pub use host::{start, Config, ValidationHost}; pub use metrics::Metrics; diff --git a/node/core/pvf/src/metrics.rs b/node/core/pvf/src/metrics.rs index 12bcd9eadad3..62f8c6dc5157 100644 --- a/node/core/pvf/src/metrics.rs +++ b/node/core/pvf/src/metrics.rs @@ -16,7 +16,7 @@ //! Prometheus metrics related to the validation host. -use crate::prepare::MemoryStats; +use polkadot_node_core_pvf_common::prepare::MemoryStats; use polkadot_node_metrics::metrics::{self, prometheus}; /// Validation host metrics. diff --git a/node/core/pvf/src/prepare/mod.rs b/node/core/pvf/src/prepare/mod.rs index de40c48464c4..580f67f73fa0 100644 --- a/node/core/pvf/src/prepare/mod.rs +++ b/node/core/pvf/src/prepare/mod.rs @@ -28,36 +28,3 @@ mod worker_intf; pub use pool::start as start_pool; pub use queue::{start as start_queue, FromQueue, ToQueue}; - -use parity_scale_codec::{Decode, Encode}; - -/// Preparation statistics, including the CPU time and memory taken. -#[derive(Debug, Clone, Default, Encode, Decode)] -pub struct PrepareStats { - /// The CPU time that elapsed for the preparation job. - pub cpu_time_elapsed: std::time::Duration, - /// The observed memory statistics for the preparation job. - pub memory_stats: MemoryStats, -} - -/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if -/// supported by the OS, `ru_maxrss`. -#[derive(Clone, Debug, Default, Encode, Decode)] -pub struct MemoryStats { - /// Memory stats from `tikv_jemalloc_ctl`. - #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] - pub memory_tracker_stats: Option, - /// `ru_maxrss` from `getrusage`. `None` if an error occurred. - #[cfg(target_os = "linux")] - pub max_rss: Option, -} - -/// Statistics of collected memory metrics. -#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] -#[derive(Clone, Debug, Default, Encode, Decode)] -pub struct MemoryAllocationStats { - /// Total resident memory, in bytes. - pub resident: u64, - /// Total allocated memory, in bytes. - pub allocated: u64, -} diff --git a/node/core/pvf/src/prepare/pool.rs b/node/core/pvf/src/prepare/pool.rs index c72b59b5f147..ae8ecff5285c 100644 --- a/node/core/pvf/src/prepare/pool.rs +++ b/node/core/pvf/src/prepare/pool.rs @@ -16,9 +16,7 @@ use super::worker_intf::{self, Outcome}; use crate::{ - error::{PrepareError, PrepareResult}, metrics::Metrics, - pvf::PvfPrepData, worker_intf::{IdleWorker, WorkerHandle}, LOG_TARGET, }; @@ -26,6 +24,10 @@ use always_assert::never; use futures::{ channel::mpsc, future::BoxFuture, stream::FuturesUnordered, Future, FutureExt, StreamExt, }; +use polkadot_node_core_pvf_common::{ + error::{PrepareError, PrepareResult}, + pvf::PvfPrepData, +}; use slotmap::HopSlotMap; use std::{ fmt, diff --git a/node/core/pvf/src/prepare/queue.rs b/node/core/pvf/src/prepare/queue.rs index f84d5ab0e56e..2a2e6830b67c 100644 --- a/node/core/pvf/src/prepare/queue.rs +++ b/node/core/pvf/src/prepare/queue.rs @@ -17,11 +17,10 @@ //! A queue that handles requests for PVF preparation. use super::pool::{self, Worker}; -use crate::{ - artifacts::ArtifactId, metrics::Metrics, PrepareResult, Priority, PvfPrepData, LOG_TARGET, -}; +use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, LOG_TARGET}; use always_assert::{always, never}; use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt}; +use polkadot_node_core_pvf_common::{error::PrepareResult, pvf::PvfPrepData}; use std::{ collections::{HashMap, VecDeque}, path::PathBuf, @@ -231,7 +230,7 @@ async fn handle_enqueue( ); queue.metrics.prepare_enqueued(); - let artifact_id = pvf.as_artifact_id(); + let artifact_id = ArtifactId::from_pvf_prep_data(&pvf); if never!( queue.artifact_id_to_job.contains_key(&artifact_id), "second Enqueue sent for a known artifact" @@ -339,7 +338,7 @@ async fn handle_worker_concluded( // this can't be None; // qed. let job_data = never_none!(queue.jobs.remove(job)); - let artifact_id = job_data.pvf.as_artifact_id(); + let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf); queue.artifact_id_to_job.remove(&artifact_id); @@ -425,7 +424,7 @@ async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fat async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> { let job_data = &mut queue.jobs[job]; - let artifact_id = job_data.pvf.as_artifact_id(); + let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf); let artifact_path = artifact_id.path(&queue.cache_path); job_data.worker = Some(worker); @@ -488,11 +487,10 @@ pub fn start( #[cfg(test)] mod tests { use super::*; - use crate::{ - error::PrepareError, host::tests::TEST_PREPARATION_TIMEOUT, prepare::PrepareStats, - }; + use crate::{error::PrepareError, prepare::PrepareStats}; use assert_matches::assert_matches; use futures::{future::BoxFuture, FutureExt}; + use polkadot_node_core_pvf_common::tests::TEST_PREPARATION_TIMEOUT; use slotmap::SlotMap; use std::task::Poll; @@ -616,7 +614,10 @@ mod tests { result: Ok(PrepareStats::default()), }); - assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id()); + assert_eq!( + test.poll_and_recv_from_queue().await.artifact_id, + ArtifactId::from_pvf_prep_data(&pvf(1)) + ); } #[tokio::test] @@ -735,7 +736,10 @@ mod tests { // Since there is still work, the queue requested one extra worker to spawn to handle the // remaining enqueued work items. assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); - assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id()); + assert_eq!( + test.poll_and_recv_from_queue().await.artifact_id, + ArtifactId::from_pvf_prep_data(&pvf(1)) + ); } #[tokio::test] diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index a11bd3c1810b..3f1ca5a79aa4 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -17,10 +17,7 @@ //! Host interface to the prepare worker. use crate::{ - error::{PrepareError, PrepareResult}, metrics::Metrics, - prepare::PrepareStats, - pvf::PvfPrepData, worker_intf::{ framed_recv, framed_send, path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, @@ -28,6 +25,11 @@ use crate::{ LOG_TARGET, }; use parity_scale_codec::{Decode, Encode}; +use polkadot_node_core_pvf_common::{ + error::{PrepareError, PrepareResult}, + prepare::PrepareStats, + pvf::PvfPrepData, +}; use sp_core::hexdisplay::HexDisplay; use std::{ diff --git a/node/core/pvf/src/testing.rs b/node/core/pvf/src/testing.rs new file mode 100644 index 000000000000..f0c487659990 --- /dev/null +++ b/node/core/pvf/src/testing.rs @@ -0,0 +1,68 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Various things for testing other crates. +//! +//! N.B. This is not guarded with some feature flag. Overexposing items here may affect the final +//! artifact even for production builds. + +/// Use this macro to declare a `fn main() {}` that will check the arguments and dispatch them to +/// the appropriate worker, making the executable that can be used for spawning workers. + +#[doc(hidden)] +pub use crate::worker_intf::{spawn_with_program_path, SpawnErr}; + +#[macro_export] +macro_rules! decl_puppet_worker_main { + () => { + fn main() { + $crate::sp_tracing::try_init_simple(); + + let args = std::env::args().collect::>(); + if args.len() < 3 { + panic!("wrong number of arguments"); + } + + let mut version = None; + let mut socket_path: &str = ""; + + for i in 2..args.len() { + match args[i].as_ref() { + "--socket-path" => socket_path = args[i + 1].as_str(), + "--node-version" => version = Some(args[i + 1].as_str()), + _ => (), + } + } + + let subcommand = &args[1]; + match subcommand.as_ref() { + "exit" => { + std::process::exit(1); + }, + "sleep" => { + std::thread::sleep(std::time::Duration::from_secs(5)); + }, + "prepare-worker" => { + $crate::prepare_worker_entrypoint(&socket_path, version); + }, + "execute-worker" => { + $crate::execute_worker_entrypoint(&socket_path, version); + }, + other => panic!("unknown subcommand: {}", other), + } + } + }; +} diff --git a/node/core/pvf/worker/tests/it/adder.rs b/node/core/pvf/tests/it/adder.rs similarity index 100% rename from node/core/pvf/worker/tests/it/adder.rs rename to node/core/pvf/tests/it/adder.rs diff --git a/node/core/pvf/worker/tests/it/main.rs b/node/core/pvf/tests/it/main.rs similarity index 96% rename from node/core/pvf/worker/tests/it/main.rs rename to node/core/pvf/tests/it/main.rs index 34e7ce9a0937..be77c9391170 100644 --- a/node/core/pvf/worker/tests/it/main.rs +++ b/node/core/pvf/tests/it/main.rs @@ -29,8 +29,6 @@ mod adder; mod worker_common; const PUPPET_EXE: &str = env!("CARGO_BIN_EXE_puppet_worker"); -const PREPARE_EXE: &str = env!("CARGO_BIN_EXE_prepare_worker"); -const EXECUTE_EXE: &str = env!("CARGO_BIN_EXE_execute_worker"); const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(3); @@ -49,10 +47,9 @@ impl TestHost { F: FnOnce(&mut Config), { let cache_dir = tempfile::tempdir().unwrap(); - let prepare_worker_path = std::path::PathBuf::from(PREPARE_EXE); - let execute_worker_path = std::path::PathBuf::from(EXECUTE_EXE); + let workers_dir = tempfile::tempdir().unwrap(); let mut config = - Config::new(cache_dir.path().to_owned(), prepare_worker_path, execute_worker_path); + Config::new(cache_dir.path().to_owned(), workers_dir.path().to_owned(), None); f(&mut config); let (host, task) = start(config, Metrics::default()); let _ = tokio::task::spawn(task); diff --git a/node/core/pvf/worker/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs similarity index 89% rename from node/core/pvf/worker/tests/it/worker_common.rs rename to node/core/pvf/tests/it/worker_common.rs index b30650e07eeb..439ac8538c95 100644 --- a/node/core/pvf/worker/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::{PREPARE_EXE, PUPPET_EXE}; +use crate::PUPPET_EXE; use polkadot_node_core_pvf::testing::{spawn_with_program_path, SpawnErr}; use std::time::Duration; @@ -37,7 +37,12 @@ async fn spawn_timeout() { #[tokio::test] async fn should_connect() { - let _ = spawn_with_program_path("integration-test", PREPARE_EXE, &[], Duration::from_secs(2)) - .await - .unwrap(); + let _ = spawn_with_program_path( + "integration-test", + PUPPET_EXE, + &["prepare-worker"], + Duration::from_secs(2), + ) + .await + .unwrap(); } diff --git a/node/core/pvf/worker/Cargo.toml b/node/core/pvf/worker/Cargo.toml index d75987048666..3c8742efa511 100644 --- a/node/core/pvf/worker/Cargo.toml +++ b/node/core/pvf/worker/Cargo.toml @@ -12,12 +12,7 @@ path = "bin/prepare_worker.rs" name = "execute_worker" path = "bin/execute_worker.rs" -[[bin]] -name = "puppet_worker" -path = "bin/puppet_worker.rs" - [dependencies] -assert_matches = "1.4.0" cpu-time = "1.0.0" futures = "0.3.21" gum = { package = "tracing-gum", path = "../../../gum" } @@ -25,11 +20,11 @@ libc = "0.2.139" rayon = "1.5.1" tempfile = "3.3.0" tikv-jemalloc-ctl = { version = "0.5.0", optional = true } -tokio = "1.24.2" +tokio = { version = "1.24.2", features = ["fs", "process"] } parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } -polkadot-node-core-pvf = { path = ".." } +polkadot-node-core-pvf-common = { path = "../common" } polkadot-parachain = { path = "../../../../parachain" } polkadot-primitives = { path = "../../../../primitives" } @@ -48,10 +43,5 @@ tikv-jemalloc-ctl = "0.5.0" [build-dependencies] substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" } -[dev-dependencies] -adder = { package = "test-parachain-adder", path = "../../../../parachain/test-parachains/adder" } -halt = { package = "test-parachain-halt", path = "../../../../parachain/test-parachains/halt" } -tempfile = "3.3.0" - [features] jemalloc-allocator = ["dep:tikv-jemalloc-ctl"] diff --git a/node/core/pvf/worker/src/execute.rs b/node/core/pvf/worker/src/execute.rs index 9f6ff164a2b6..87e369584e93 100644 --- a/node/core/pvf/worker/src/execute.rs +++ b/node/core/pvf/worker/src/execute.rs @@ -22,8 +22,9 @@ use crate::{ use cpu_time::ProcessTime; use futures::{pin_mut, select_biased, FutureExt}; use parity_scale_codec::{Decode, Encode}; -use polkadot_node_core_pvf::{ - framed_recv, framed_send, ExecuteHandshake as Handshake, ExecuteResponse as Response, +use polkadot_node_core_pvf_common::{ + execute::{Handshake, Response}, + framed_recv, framed_send, }; use polkadot_parachain::primitives::ValidationResult; use std::{ diff --git a/node/core/pvf/worker/src/prepare.rs b/node/core/pvf/worker/src/prepare.rs index 3cec7439f8df..18b61cc5df45 100644 --- a/node/core/pvf/worker/src/prepare.rs +++ b/node/core/pvf/worker/src/prepare.rs @@ -25,13 +25,31 @@ use crate::{ use cpu_time::ProcessTime; use futures::{pin_mut, select_biased, FutureExt}; use parity_scale_codec::{Decode, Encode}; -use polkadot_node_core_pvf::{ - framed_recv, framed_send, CompiledArtifact, MemoryStats, PrepareError, PrepareResult, - PrepareStats, PvfPrepData, +use polkadot_node_core_pvf_common::{ + error::{PrepareError, PrepareResult}, + framed_recv, framed_send, + prepare::{MemoryStats, PrepareStats}, + pvf::PvfPrepData, }; use std::{any::Any, panic, path::PathBuf, sync::mpsc::channel}; use tokio::{io, net::UnixStream}; +/// Contains the bytes for a successfully compiled artifact. +pub struct CompiledArtifact(Vec); + +impl CompiledArtifact { + /// Creates a `CompiledArtifact`. + pub fn new(code: Vec) -> Self { + Self(code) + } +} + +impl AsRef<[u8]> for CompiledArtifact { + fn as_ref(&self) -> &[u8] { + self.0.as_slice() + } +} + async fn recv_request(stream: &mut UnixStream) -> io::Result<(PvfPrepData, PathBuf)> { let pvf = framed_recv(stream).await?; let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| { From 6a190834453fbb5f42e28d411b68591cd359b1bc Mon Sep 17 00:00:00 2001 From: Marcin S Date: Tue, 25 Apr 2023 16:58:53 +0200 Subject: [PATCH 07/13] Transmute wasm-builder into musl-builder --- Cargo.lock | 15 + Cargo.toml | 3 +- node/core/pvf/musl-builder/Cargo.toml | 21 + node/core/pvf/musl-builder/README.md | 16 + node/core/pvf/musl-builder/src/builder.rs | 287 ++++++ node/core/pvf/musl-builder/src/lib.rs | 231 +++++ .../pvf/musl-builder/src/prerequisites.rs | 177 ++++ node/core/pvf/musl-builder/src/project.rs | 895 ++++++++++++++++++ node/core/pvf/musl-builder/src/version.rs | 197 ++++ 9 files changed, 1841 insertions(+), 1 deletion(-) create mode 100644 node/core/pvf/musl-builder/Cargo.toml create mode 100644 node/core/pvf/musl-builder/README.md create mode 100644 node/core/pvf/musl-builder/src/builder.rs create mode 100644 node/core/pvf/musl-builder/src/lib.rs create mode 100644 node/core/pvf/musl-builder/src/prerequisites.rs create mode 100644 node/core/pvf/musl-builder/src/project.rs create mode 100644 node/core/pvf/musl-builder/src/version.rs diff --git a/Cargo.lock b/Cargo.lock index bbd38c8556ec..482c0e5f4e96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7243,6 +7243,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "polkadot-node-core-pvf-musl-builder" +version = "0.9.41" +dependencies = [ + "ansi_term", + "build-helper", + "cargo_metadata", + "filetime", + "sp-maybe-compressed-blob", + "strum", + "tempfile", + "toml 0.7.3", + "walkdir", +] + [[package]] name = "polkadot-node-core-pvf-worker" version = "0.9.41" diff --git a/Cargo.toml b/Cargo.toml index a1a3bea541a3..f587ff004753 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,8 @@ members = [ "node/core/provisioner", "node/core/pvf", "node/core/pvf/common", - "node/core/pvf/worker", + "node/core/pvf/musl-builder", + "node/core/pvf/worker", "node/core/pvf-checker", "node/core/runtime-api", "node/network/approval-distribution", diff --git a/node/core/pvf/musl-builder/Cargo.toml b/node/core/pvf/musl-builder/Cargo.toml new file mode 100644 index 000000000000..b574e2844892 --- /dev/null +++ b/node/core/pvf/musl-builder/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "polkadot-node-core-pvf-musl-builder" +version.workspace = true +authors.workspace = true +edition.workspace = true + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-musl"] + +# TODO: Check cargo udeps +[dependencies] +ansi_term = "0.12.1" +build-helper = "0.1.1" +cargo_metadata = "0.15.4" +strum = { version = "0.24.1", features = ["derive"] } +tempfile = "3.1.0" +toml = "0.7.3" +walkdir = "2.3.2" +filetime = "0.2.16" + +sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/core/pvf/musl-builder/README.md b/node/core/pvf/musl-builder/README.md new file mode 100644 index 000000000000..d58b2ad3d413 --- /dev/null +++ b/node/core/pvf/musl-builder/README.md @@ -0,0 +1,16 @@ +# musl-builder + +musl-builder is a tool that integrates the process of building the musl binary +of your project into the main `cargo` build process. + + + +## Prerequisites + +musl-builder requires a musl toolchain like `x86_64-unknown-linux-musl` to be installed: + +```sh +rustup target add x86_64-unknown-linux-musl +``` + + diff --git a/node/core/pvf/musl-builder/src/builder.rs b/node/core/pvf/musl-builder/src/builder.rs new file mode 100644 index 000000000000..950e79a09dda --- /dev/null +++ b/node/core/pvf/musl-builder/src/builder.rs @@ -0,0 +1,287 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::{ + env, + path::{Path, PathBuf}, + process, +}; + +/// Returns the manifest dir from the `CARGO_MANIFEST_DIR` env. +fn get_manifest_dir() -> PathBuf { + env::var("CARGO_MANIFEST_DIR") + .expect("`CARGO_MANIFEST_DIR` is always set for `build.rs` files; qed") + .into() +} + +/// First step of the [`Builder`] to select the project to build. +pub struct BuilderSelectProject { + /// This parameter just exists to make it impossible to construct + /// this type outside of this crate. + _ignore: (), +} + +impl BuilderSelectProject { + /// Use the current project as project for building the binary. + /// + /// # Panics + /// + /// Panics if the `CARGO_MANIFEST_DIR` variable is not set. This variable + /// is always set by `Cargo` in `build.rs` files. + pub fn with_current_project(self) -> BuilderSelectTarget { + BuilderSelectTarget { + _ignore: (), + project_cargo_toml: get_manifest_dir().join("Cargo.toml"), + } + } + + /// Use the given `path` as project for building the binary. + /// + /// Returns an error if the given `path` does not points to a `Cargo.toml`. + pub fn with_project( + self, + path: impl Into, + ) -> Result { + let path = path.into(); + + if path.ends_with("Cargo.toml") && path.exists() { + Ok(BuilderSelectTarget { _ignore: (), project_cargo_toml: path }) + } else { + Err("Project path must point to the `Cargo.toml` of the project") + } + } +} + +/// Second step of the [`Builder`] to select the target to build with. +pub struct BuilderSelectTarget { + /// This parameter just exists to make it impossible to construct + /// this type outside of this crate. + _ignore: (), + project_cargo_toml: PathBuf, +} + +impl BuilderSelectTarget { + /// Select the Rust target to use for building the binary. + pub fn with_target(self, target: impl Into) -> Builder { + Builder { + rust_flags: Vec::new(), + file_name: None, + project_cargo_toml: self.project_cargo_toml, + features_to_enable: Vec::new(), + target: target.into(), + } + } +} + +/// The builder for building a binary. +/// +/// The builder itself is separated into multiple structs to make the setup type safe. +/// +/// Building a binary: +/// +/// 1. Call [`Builder::new`] to create a new builder. +/// 2. Select the project to build using the methods of [`BuilderSelectProject`]. +/// 3. Set additional `RUST_FLAGS` or a different name for the file containing the code +/// using methods of [`Builder`]. +/// 4. Build the binary using [`Self::build`]. +pub struct Builder { + /// Flags that should be appended to `RUST_FLAGS` env variable. + rust_flags: Vec, + /// The name of the file that is being generated in `OUT_DIR`. + /// + /// Defaults to `binary.rs`. + file_name: Option, + /// The path to the `Cargo.toml` of the project that should be built. + project_cargo_toml: PathBuf, + /// Features that should be enabled when building the binary. + features_to_enable: Vec, + /// The Rust target to use. + target: String, +} + +impl Builder { + /// Create a new instance of the builder. + pub fn new() -> BuilderSelectProject { + BuilderSelectProject { _ignore: () } + } + + /// Enable exporting `__heap_base` as global variable in the binary. + /// + /// This adds `-Clink-arg=--export=__heap_base` to `RUST_FLAGS`. + pub fn export_heap_base(mut self) -> Self { + self.rust_flags.push("-Clink-arg=--export=__heap_base".into()); + self + } + + /// Set the name of the file that will be generated in `OUT_DIR`. + /// + /// This file needs to be included to get access to the build binary. + /// + /// If this function is not called, `file_name` defaults to `binary.rs` + pub fn set_file_name(mut self, file_name: impl Into) -> Self { + self.file_name = Some(file_name.into()); + self + } + + /// Instruct the linker to import the memory into the binary. + /// + /// This adds `-C link-arg=--import-memory` to `RUST_FLAGS`. + pub fn import_memory(mut self) -> Self { + self.rust_flags.push("-C link-arg=--import-memory".into()); + self + } + + /// Append the given `flag` to `RUST_FLAGS`. + /// + /// `flag` is appended as is, so it needs to be a valid flag. + pub fn append_to_rust_flags(mut self, flag: impl Into) -> Self { + self.rust_flags.push(flag.into()); + self + } + + /// Enable the given feature when building the binary. + /// + /// `feature` needs to be a valid feature that is defined in the project `Cargo.toml`. + pub fn enable_feature(mut self, feature: impl Into) -> Self { + self.features_to_enable.push(feature.into()); + self + } + + /// Build the binary. + pub fn build(self) { + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("`OUT_DIR` is set by cargo!")); + let file_path = out_dir.join(self.file_name.clone().unwrap_or_else(|| "binary.rs".into())); + + if check_skip_build() { + // If we skip the build, we still want to make sure to be called when an env variable + // changes + generate_rerun_if_changed_instructions(); + + provide_dummy_binary_if_not_exist(&file_path); + + return + } + + build_project( + file_path, + self.project_cargo_toml, + self.rust_flags.into_iter().map(|f| format!("{} ", f)).collect(), + self.features_to_enable, + self.file_name, + self.target, + ); + + // As last step we need to generate our `rerun-if-changed` stuff. If a build fails, we don't + // want to spam the output! + generate_rerun_if_changed_instructions(); + } +} + +/// Generate the name of the skip build environment variable for the current crate. +fn generate_crate_skip_build_env_name() -> String { + format!( + "BUILDER_SKIP_{}_BUILD", + env::var("CARGO_PKG_NAME") + .expect("Package name is set") + .to_uppercase() + .replace('-', "_"), + ) +} + +/// Checks if the build of the binary should be skipped. +fn check_skip_build() -> bool { + env::var(crate::SKIP_BUILD_ENV).is_ok() || + env::var(generate_crate_skip_build_env_name()).is_ok() +} + +/// Provide a dummy binary if there doesn't exist one. +fn provide_dummy_binary_if_not_exist(file_path: &Path) { + if !file_path.exists() { + crate::write_file_if_changed( + file_path, + "pub const BINARY: Option<&[u8]> = None;\ + pub const BINARY_BLOATY: Option<&[u8]> = None;", + ); + } +} + +/// Generate the `rerun-if-changed` instructions for cargo to make sure that the binary is +/// rebuilt when needed. +fn generate_rerun_if_changed_instructions() { + // Make sure that the `build.rs` is called again if one of the following env variables changes. + println!("cargo:rerun-if-env-changed={}", crate::SKIP_BUILD_ENV); + println!("cargo:rerun-if-env-changed={}", crate::FORCE_BUILD_ENV); + println!("cargo:rerun-if-env-changed={}", generate_crate_skip_build_env_name()); +} + +/// Build the currently built project as binary. +/// +/// The current project is determined by using the `CARGO_MANIFEST_DIR` environment variable. +/// +/// `file_name` - The name + path of the file being generated. The file contains the +/// constant `BINARY`, which contains the built binary. +/// +/// `project_cargo_toml` - The path to the `Cargo.toml` of the project that should be built. +/// +/// `default_rustflags` - Default `RUSTFLAGS` that will always be set for the build. +/// +/// `features_to_enable` - Features that should be enabled for the project. +/// +/// `binary_name` - The optional binary name that is extended with +/// +/// `target` - The binary target. +fn build_project( + file_name: PathBuf, + project_cargo_toml: PathBuf, + default_rustflags: String, + features_to_enable: Vec, + binary_name: Option, + target: String, +) { + let cargo_cmd = match crate::prerequisites::check(&target) { + Ok(cmd) => cmd, + Err(err_msg) => { + eprintln!("{}", err_msg); + process::exit(1); + }, + }; + + let (binary, bloaty) = crate::project::create_and_compile( + &project_cargo_toml, + &default_rustflags, + cargo_cmd, + features_to_enable, + binary_name, + &target, + ); + + let binary = if let Some(binary) = binary { + binary.binary_path_escaped() + } else { + bloaty.binary_bloaty_path_escaped() + }; + + // NOTE: Don't write bloaty binary, as opposed to wasm-builder which always writes it (why?). + crate::write_file_if_changed( + file_name, + format!( + r#" + pub const BINARY: Option<&[u8]> = Some(include_bytes!("{binary}")); + "#, + binary = binary, + ), + ); +} diff --git a/node/core/pvf/musl-builder/src/lib.rs b/node/core/pvf/musl-builder/src/lib.rs new file mode 100644 index 000000000000..e6b191a43451 --- /dev/null +++ b/node/core/pvf/musl-builder/src/lib.rs @@ -0,0 +1,231 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +// TODO: Extract common parts into a generalized builder that wasm/musl builders are based on. + +// TODO: Make sure we build with O2 and LTO. + +mod builder; +mod prerequisites; +mod project; +mod version; + +pub use builder::{Builder, BuilderSelectProject}; + +use std::{ + env, fs, + io::BufRead, + path::{Path, PathBuf}, + process::Command, +}; +use version::Version; + +/// Environment variable that tells us to skip building the binary. +const SKIP_BUILD_ENV: &str = "BUILDER_SKIP_BUILD"; + +/// Environment variable that tells us whether we should avoid network requests. +const OFFLINE: &str = "CARGO_NET_OFFLINE"; + +/// Environment variable to force a certain build type when building the binary. +/// Expects "debug", "release" or "production" as value. +/// +/// When unset the binary uses the same build type as the main cargo build with +/// the exception of a debug build: In this case the build defaults to `release` in +/// order to avoid a slowdown when not explicitly requested. +const BUILD_TYPE_ENV: &str = "BUILDER_BUILD_TYPE"; + +/// Environment variable to extend the `RUSTFLAGS` variable given to the build. +const BUILD_RUSTFLAGS_ENV: &str = "BUILDER_BUILD_RUSTFLAGS"; + +/// Environment variable to set the target directory to copy the final binary. +/// +/// The directory needs to be an absolute path. +const TARGET_DIRECTORY: &str = "BUILDER_TARGET_DIRECTORY"; + +/// Environment variable to disable color output of the build. +const BUILD_NO_COLOR: &str = "BUILDER_BUILD_NO_COLOR"; + +/// Environment variable to set the toolchain used to compile the binary. +const BUILD_TOOLCHAIN: &str = "BUILDER_BUILD_TOOLCHAIN"; + +/// Environment variable that makes sure the build is triggered. +const FORCE_BUILD_ENV: &str = "BUILDER_FORCE_BUILD"; + +/// Environment variable that hints the workspace we are building. +const BUILD_WORKSPACE_HINT: &str = "BUILDER_BUILD_WORKSPACE_HINT"; + +/// Write to the given `file` if the `content` is different. +fn write_file_if_changed(file: impl AsRef, content: impl AsRef) { + if fs::read_to_string(file.as_ref()).ok().as_deref() != Some(content.as_ref()) { + fs::write(file.as_ref(), content.as_ref()) + .unwrap_or_else(|_| panic!("Writing `{}` can not fail!", file.as_ref().display())); + } +} + +/// Copy `src` to `dst` if the `dst` does not exist or is different. +fn copy_file_if_changed(src: PathBuf, dst: PathBuf) { + let src_file = fs::read_to_string(&src).ok(); + let dst_file = fs::read_to_string(&dst).ok(); + + if src_file != dst_file { + fs::copy(&src, &dst).unwrap_or_else(|_| { + panic!("Copying `{}` to `{}` can not fail; qed", src.display(), dst.display()) + }); + } +} + +/// Get a cargo command that should be used to invoke the compilation. +fn get_cargo_command() -> CargoCommand { + let env_cargo = + CargoCommand::new(&env::var("CARGO").expect("`CARGO` env variable is always set by cargo")); + let default_cargo = CargoCommand::new("cargo"); + let toolchain = env::var(BUILD_TOOLCHAIN).ok(); + + // First check if the user requested a specific toolchain + if let Some(cmd) = + toolchain.map(|t| CargoCommand::new_with_args("rustup", &["run", &t, "cargo"])) + { + cmd + } else if env_cargo.supports_env() { + env_cargo + } else if default_cargo.supports_env() { + default_cargo + } else { + // If no command before provided us with a cargo that supports our Substrate env, we + // try to search one with rustup. If that fails as well, we return the default cargo and let + // the prequisities check fail. + get_rustup_command().unwrap_or(default_cargo) + } +} + +/// Get the newest rustup command that supports our Substrate env. +/// +/// Stable versions are always favored over nightly versions even if the nightly versions are +/// newer. +fn get_rustup_command() -> Option { + let host = format!("-{}", env::var("HOST").expect("`HOST` is always set by cargo")); + + let output = Command::new("rustup").args(&["toolchain", "list"]).output().ok()?.stdout; + let lines = output.as_slice().lines(); + + let mut versions = Vec::new(); + for line in lines.filter_map(|l| l.ok()) { + let rustup_version = line.trim_end_matches(&host); + + let cmd = CargoCommand::new_with_args("rustup", &["run", &rustup_version, "cargo"]); + + if !cmd.supports_env() { + continue + } + + let Some(cargo_version) = cmd.version() else { continue; }; + + versions.push((cargo_version, rustup_version.to_string())); + } + + // Sort by the parsed version to get the latest version (greatest version) at the end of the + // vec. + versions.sort_by_key(|v| v.0); + let version = &versions.last()?.1; + + Some(CargoCommand::new_with_args("rustup", &["run", &version, "cargo"])) +} + +/// Wraps a specific command which represents a cargo invocation. +#[derive(Debug)] +struct CargoCommand { + program: String, + args: Vec, + version: Option, +} + +impl CargoCommand { + fn new(program: &str) -> Self { + let version = Self::extract_version(program, &[]); + + CargoCommand { program: program.into(), args: Vec::new(), version } + } + + fn new_with_args(program: &str, args: &[&str]) -> Self { + let version = Self::extract_version(program, args); + + CargoCommand { + program: program.into(), + args: args.iter().map(ToString::to_string).collect(), + version, + } + } + + fn command(&self) -> Command { + let mut cmd = Command::new(&self.program); + cmd.args(&self.args); + cmd + } + + fn extract_version(program: &str, args: &[&str]) -> Option { + let version = Command::new(program) + .args(args) + .arg("--version") + .output() + .ok() + .and_then(|o| String::from_utf8(o.stdout).ok())?; + + Version::extract(&version) + } + + /// Returns the version of this cargo command or `None` if it failed to extract the version. + fn version(&self) -> Option { + self.version + } + + /// Check if the supplied cargo command supports our environment. + /// + /// Assumes that cargo version matches the rustc version. + fn supports_env(&self) -> bool { + // Just a stub for now -- not sure this is needed for musl-builder. + true + } +} + +/// Wraps a [`CargoCommand`] and the version of `rustc` the cargo command uses. +struct CargoCommandVersioned { + command: CargoCommand, + version: String, +} + +impl CargoCommandVersioned { + fn new(command: CargoCommand, version: String) -> Self { + Self { command, version } + } + + /// Returns the `rustc` version. + fn rustc_version(&self) -> &str { + &self.version + } +} + +impl std::ops::Deref for CargoCommandVersioned { + type Target = CargoCommand; + + fn deref(&self) -> &CargoCommand { + &self.command + } +} + +/// Returns `true` when color output is enabled. +fn color_output_enabled() -> bool { + env::var(crate::BUILD_NO_COLOR).is_err() +} diff --git a/node/core/pvf/musl-builder/src/prerequisites.rs b/node/core/pvf/musl-builder/src/prerequisites.rs new file mode 100644 index 000000000000..669e9a665644 --- /dev/null +++ b/node/core/pvf/musl-builder/src/prerequisites.rs @@ -0,0 +1,177 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use crate::{write_file_if_changed, CargoCommand, CargoCommandVersioned}; + +use std::{fs, path::Path}; + +use ansi_term::Color; +use tempfile::tempdir; + +/// Print an error message. +fn print_error_message(message: &str) -> String { + if super::color_output_enabled() { + Color::Red.bold().paint(message).to_string() + } else { + message.into() + } +} + +/// Checks that all prerequisites are installed. +/// +/// Returns the versioned cargo command on success. +pub(crate) fn check(target: &str) -> Result { + let cargo_command = crate::get_cargo_command(); + + if !cargo_command.supports_env() { + // TODO: are there actual prerequisites for musl? + return Err(print_error_message(&format!( + "Cannot compile the {} runtime: no compatible Rust compiler found!", + target + ))) + } + + check_target_installed(cargo_command, target) +} + +/// Create the project that will be used to check that the required target is installed and to +/// extract the rustc version. +fn create_check_target_project(project_dir: &Path) { + let lib_rs_file = project_dir.join("src/lib.rs"); + let main_rs_file = project_dir.join("src/main.rs"); + let build_rs_file = project_dir.join("build.rs"); + let manifest_path = project_dir.join("Cargo.toml"); + + write_file_if_changed( + &manifest_path, + r#" + [package] + name = "builder-test" + version = "1.0.0" + edition = "2021" + build = "build.rs" + + [lib] + name = "builder_test" + crate-type = ["cdylib"] + + [workspace] + "#, + ); + write_file_if_changed(lib_rs_file, "pub fn test() {}"); + + // We want to know the rustc version of the rustc that is being used by our cargo command. + // The cargo command is determined by some *very* complex algorithm to find the cargo command + // that supports nightly. + // The best solution would be if there is a `cargo rustc --version` command, which sadly + // doesn't exists. So, the only available way of getting the rustc version is to build a project + // and capture the rustc version in this build process. This `build.rs` is exactly doing this. + // It gets the rustc version by calling `rustc --version` and exposing it in the `RUSTC_VERSION` + // environment variable. + write_file_if_changed( + build_rs_file, + r#" + fn main() { + let rustc_cmd = std::env::var("RUSTC").ok().unwrap_or_else(|| "rustc".into()); + + let rustc_version = std::process::Command::new(rustc_cmd) + .arg("--version") + .output() + .ok() + .and_then(|o| String::from_utf8(o.stdout).ok()); + + println!( + "cargo:rustc-env=RUSTC_VERSION={}", + rustc_version.unwrap_or_else(|| "unknown rustc version".into()), + ); + } + "#, + ); + // Just prints the `RUSTC_VERSION` environment variable that is being created by the + // `build.rs` script. + write_file_if_changed( + main_rs_file, + r#" + fn main() { + println!("{}", env!("RUSTC_VERSION")); + } + "#, + ); +} + +fn check_target_installed( + cargo_command: CargoCommand, + target: &str, +) -> Result { + let temp = tempdir().expect("Creating temp dir does not fail; qed"); + fs::create_dir_all(temp.path().join("src")).expect("Creating src dir does not fail; qed"); + create_check_target_project(temp.path()); + + let err_msg = + print_error_message(&format!("{} target not installed, please install it!", target)); + let manifest_path = temp.path().join("Cargo.toml").display().to_string(); + + let mut build_cmd = cargo_command.command(); + // Chdir to temp to avoid including project's .cargo/config.toml + // by accident - it can happen in some CI environments. + build_cmd.current_dir(&temp); + build_cmd.args(&["build", &format!("--target={}", target), "--manifest-path", &manifest_path]); + + if super::color_output_enabled() { + build_cmd.arg("--color=always"); + } + + let mut run_cmd = cargo_command.command(); + // Chdir to temp to avoid including project's .cargo/config.toml + // by accident - it can happen in some CI environments. + run_cmd.current_dir(&temp); + run_cmd.args(&["run", "--manifest-path", &manifest_path]); + + // Unset the `CARGO_TARGET_DIR` to prevent a cargo deadlock + build_cmd.env_remove("CARGO_TARGET_DIR"); + run_cmd.env_remove("CARGO_TARGET_DIR"); + + // Make sure the host's flags aren't used here, e.g. if an alternative linker is specified + // in the RUSTFLAGS then the check we do here will break unless we clear these. + build_cmd.env_remove("CARGO_ENCODED_RUSTFLAGS"); + run_cmd.env_remove("CARGO_ENCODED_RUSTFLAGS"); + build_cmd.env_remove("RUSTFLAGS"); + run_cmd.env_remove("RUSTFLAGS"); + + build_cmd.output().map_err(|_| err_msg.clone()).and_then(|s| { + if s.status.success() { + let version = run_cmd.output().ok().and_then(|o| String::from_utf8(o.stdout).ok()); + Ok(CargoCommandVersioned::new( + cargo_command, + version.unwrap_or_else(|| "unknown rustc version".into()), + )) + } else { + match String::from_utf8(s.stderr) { + Ok(ref err) if err.contains("linker `rust-lld` not found") => + Err(print_error_message("`rust-lld` not found, please install it!")), + Ok(ref err) => Err(format!( + "{}\n\n{}\n{}\n{}{}\n", + err_msg, + Color::Yellow.bold().paint("Further error information:"), + Color::Yellow.bold().paint("-".repeat(60)), + err, + Color::Yellow.bold().paint("-".repeat(60)), + )), + Err(_) => Err(err_msg), + } + } + }) +} diff --git a/node/core/pvf/musl-builder/src/project.rs b/node/core/pvf/musl-builder/src/project.rs new file mode 100644 index 000000000000..fce433ac8917 --- /dev/null +++ b/node/core/pvf/musl-builder/src/project.rs @@ -0,0 +1,895 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use crate::{write_file_if_changed, CargoCommandVersioned, OFFLINE}; + +use build_helper::rerun_if_changed; +use cargo_metadata::{CargoOpt, Metadata, MetadataCommand}; +use std::{ + borrow::ToOwned, + collections::HashSet, + env, fs, + hash::{Hash, Hasher}, + ops::Deref, + path::{Path, PathBuf}, + process, +}; +use strum::{EnumIter, IntoEnumIterator}; +use toml::value::Table; +use walkdir::WalkDir; + +/// Colorize an info message. +/// +/// Returns the colorized message. +fn colorize_info_message(message: &str) -> String { + if super::color_output_enabled() { + ansi_term::Color::Yellow.bold().paint(message).to_string() + } else { + message.into() + } +} + +/// Holds the path to the bloaty binary. +pub struct BinaryBloaty(PathBuf); + +impl BinaryBloaty { + /// Returns the escaped path to the bloaty binary. + pub fn binary_bloaty_path_escaped(&self) -> String { + self.0.display().to_string().escape_default().to_string() + } + + /// Returns the path to the binary. + pub fn binary_bloaty_path(&self) -> &Path { + &self.0 + } +} + +/// Holds the path to the binary. +pub struct Binary(PathBuf); + +impl Binary { + /// Returns the path to the binary. + pub fn binary_path(&self) -> &Path { + &self.0 + } + + /// Returns the escaped path to the binary. + pub fn binary_path_escaped(&self) -> String { + self.0.display().to_string().escape_default().to_string() + } +} + +fn crate_metadata(cargo_manifest: &Path) -> Metadata { + let mut cargo_lock = cargo_manifest.to_path_buf(); + cargo_lock.set_file_name("Cargo.lock"); + + let cargo_lock_existed = cargo_lock.exists(); + + // If we can find a `Cargo.lock`, we assume that this is the workspace root and there exists a + // `Cargo.toml` that we can use for getting the metadata. + let cargo_manifest = if let Some(mut cargo_lock) = find_cargo_lock(cargo_manifest) { + cargo_lock.set_file_name("Cargo.toml"); + cargo_lock + } else { + cargo_manifest.to_path_buf() + }; + + let mut crate_metadata_command = create_metadata_command(cargo_manifest); + crate_metadata_command.features(CargoOpt::AllFeatures); + + let crate_metadata = crate_metadata_command + .exec() + .expect("`cargo metadata` can not fail on project `Cargo.toml`; qed"); + // If the `Cargo.lock` didn't exist, we need to remove it after + // calling `cargo metadata`. This is required to ensure that we don't change + // the build directory outside of the `target` folder. Commands like + // `cargo publish` require this. + if !cargo_lock_existed { + let _ = fs::remove_file(&cargo_lock); + } + + crate_metadata +} + +/// Creates the project, compiles the binary and compacts the binary. +/// +/// # Returns +/// +/// The path to the compact binary and the bloaty binary. +pub(crate) fn create_and_compile( + project_cargo_toml: &Path, + default_rustflags: &str, + cargo_cmd: CargoCommandVersioned, + features_to_enable: Vec, + binary_name: Option, + target: &str, +) -> (Option, BinaryBloaty) { + let workspace_root = get_workspace_root(); + let workspace = workspace_root.join("wbuild"); + + let crate_metadata = crate_metadata(project_cargo_toml); + + let project = create_project( + project_cargo_toml, + &workspace, + &crate_metadata, + crate_metadata.workspace_root.as_ref(), + features_to_enable, + ); + + let profile = build_project(&project, default_rustflags, cargo_cmd, target); + let (binary, binary_compressed, bloaty) = + compact_file(&project, profile, project_cargo_toml, binary_name, target); + + binary + .as_ref() + .map(|binary| copy_binary_to_target_directory(project_cargo_toml, binary)); + + binary_compressed.as_ref().map(|binary_compressed| { + copy_binary_to_target_directory(project_cargo_toml, binary_compressed) + }); + + let final_binary = binary_compressed.or(binary); + + generate_rerun_if_changed_instructions( + project_cargo_toml, + &project, + &workspace, + final_binary.as_ref(), + &bloaty, + ); + + if let Err(err) = adjust_mtime(&bloaty, final_binary.as_ref()) { + build_helper::warning!("Error while adjusting the mtime of the binaries: {}", err) + } + + (final_binary, bloaty) +} + +/// Adjust the mtime of the bloaty and compressed/compact files. +/// +/// We add the bloaty and the compressed/compact file to the `rerun-if-changed` files. +/// Cargo/Rustc determines based on the timestamp of the `invoked.timestamp` file that can be found +/// in the `OUT_DIR/..`, if it needs to rerun a `build.rs` script. The problem is that this +/// `invoked.timestamp` is created when the `build.rs` is executed and the binaries are created +/// later. This leads to them having a later mtime than the `invoked.timestamp` file and thus, +/// cargo/rustc always re-executes the `build.rs` script. To hack around this, we copy the mtime of +/// the `invoked.timestamp` to the binaries. +fn adjust_mtime( + bloaty_binary: &BinaryBloaty, + compressed_or_compact_binary: Option<&Binary>, +) -> std::io::Result<()> { + let out_dir = build_helper::out_dir(); + let invoked_timestamp = out_dir.join("../invoked.timestamp"); + + // Get the mtime of the `invoked.timestamp` + let metadata = fs::metadata(invoked_timestamp)?; + let mtime = filetime::FileTime::from_last_modification_time(&metadata); + + filetime::set_file_mtime(bloaty_binary.binary_bloaty_path(), mtime)?; + if let Some(binary) = compressed_or_compact_binary.as_ref() { + filetime::set_file_mtime(binary.binary_path(), mtime)?; + } + + Ok(()) +} + +/// Find the `Cargo.lock` relative to the `OUT_DIR` environment variable. +/// +/// If the `Cargo.lock` cannot be found, we emit a warning and return `None`. +fn find_cargo_lock(cargo_manifest: &Path) -> Option { + fn find_impl(mut path: PathBuf) -> Option { + loop { + if path.join("Cargo.lock").exists() { + return Some(path.join("Cargo.lock")) + } + + if !path.pop() { + return None + } + } + } + + if let Ok(workspace) = env::var(crate::BUILD_WORKSPACE_HINT) { + let path = PathBuf::from(workspace); + + if path.join("Cargo.lock").exists() { + return Some(path.join("Cargo.lock")) + } else { + build_helper::warning!( + "`{}` env variable doesn't point to a directory that contains a `Cargo.lock`.", + crate::BUILD_WORKSPACE_HINT, + ); + } + } + + if let Some(path) = find_impl(build_helper::out_dir()) { + return Some(path) + } + + build_helper::warning!( + "Could not find `Cargo.lock` for `{}`, while searching from `{}`. \ + To fix this, point the `{}` env variable to the directory of the workspace being compiled.", + cargo_manifest.display(), + build_helper::out_dir().display(), + crate::BUILD_WORKSPACE_HINT, + ); + + None +} + +/// Extract the crate name from the given `Cargo.toml`. +fn get_crate_name(cargo_manifest: &Path) -> String { + let cargo_toml: Table = toml::from_str( + &fs::read_to_string(cargo_manifest).expect("File exists as checked before; qed"), + ) + .expect("Cargo manifest is a valid toml file; qed"); + + let package = cargo_toml + .get("package") + .and_then(|t| t.as_table()) + .expect("`package` key exists in valid `Cargo.toml`; qed"); + + package + .get("name") + .and_then(|p| p.as_str()) + .map(ToOwned::to_owned) + .expect("Package name exists; qed") +} + +/// Returns the name for the binary. +fn get_binary_name(cargo_manifest: &Path) -> String { + get_crate_name(cargo_manifest).replace('-', "_") +} + +/// Returns the root path of the workspace. +fn get_workspace_root() -> PathBuf { + let mut out_dir = build_helper::out_dir(); + + loop { + match out_dir.parent() { + Some(parent) if out_dir.ends_with("build") => return parent.to_path_buf(), + _ => + if !out_dir.pop() { + break + }, + } + } + + panic!("Could not find target dir in: {}", build_helper::out_dir().display()) +} + +fn create_project_cargo_toml( + workspace: &Path, + workspace_root_path: &Path, + crate_name: &str, + crate_path: &Path, + binary: &str, + enabled_features: impl Iterator, +) { + let mut root_workspace_toml: Table = toml::from_str( + &fs::read_to_string(workspace_root_path.join("Cargo.toml")) + .expect("Workspace root `Cargo.toml` exists; qed"), + ) + .expect("Workspace root `Cargo.toml` is a valid toml file; qed"); + + let mut workspace_toml = Table::new(); + + // Add different profiles which are selected by setting `BUILD_TYPE`. + let mut release_profile = Table::new(); + release_profile.insert("panic".into(), "abort".into()); + release_profile.insert("lto".into(), "thin".into()); + + let mut production_profile = Table::new(); + production_profile.insert("inherits".into(), "release".into()); + production_profile.insert("lto".into(), "fat".into()); + production_profile.insert("codegen-units".into(), 1.into()); + + let mut dev_profile = Table::new(); + dev_profile.insert("panic".into(), "abort".into()); + + let mut profile = Table::new(); + profile.insert("release".into(), release_profile.into()); + profile.insert("production".into(), production_profile.into()); + profile.insert("dev".into(), dev_profile.into()); + + workspace_toml.insert("profile".into(), profile.into()); + + // Add patch section from the project root `Cargo.toml` + while let Some(mut patch) = + root_workspace_toml.remove("patch").and_then(|p| p.try_into::().ok()) + { + // Iterate over all patches and make the patch path absolute from the workspace root path. + patch + .iter_mut() + .filter_map(|p| { + p.1.as_table_mut().map(|t| t.iter_mut().filter_map(|t| t.1.as_table_mut())) + }) + .flatten() + .for_each(|p| { + p.iter_mut().filter(|(k, _)| k == &"path").for_each(|(_, v)| { + if let Some(path) = v.as_str().map(PathBuf::from) { + if path.is_relative() { + *v = workspace_root_path.join(path).display().to_string().into(); + } + } + }) + }); + + workspace_toml.insert("patch".into(), patch.into()); + } + + let mut package = Table::new(); + package.insert("name".into(), format!("{}", crate_name).into()); + package.insert("version".into(), "1.0.0".into()); + package.insert("edition".into(), "2021".into()); + + workspace_toml.insert("package".into(), package.into()); + + let mut lib = Table::new(); + lib.insert("name".into(), binary.into()); + lib.insert("crate-type".into(), vec!["cdylib".to_string()].into()); + + workspace_toml.insert("lib".into(), lib.into()); + + let mut dependencies = Table::new(); + + let mut project = Table::new(); + project.insert("package".into(), crate_name.into()); + project.insert("path".into(), crate_path.display().to_string().into()); + project.insert("default-features".into(), false.into()); + project.insert("features".into(), enabled_features.collect::>().into()); + + dependencies.insert("project".into(), project.into()); + + workspace_toml.insert("dependencies".into(), dependencies.into()); + + workspace_toml.insert("workspace".into(), Table::new().into()); + + write_file_if_changed( + workspace.join("Cargo.toml"), + toml::to_string_pretty(&workspace_toml).expect("workspace toml is valid; qed"), + ); +} + +/// Find a package by the given `manifest_path` in the metadata. In case it can't be found by its +/// manifest_path, fallback to finding it by name; this is necessary during publish because the +/// package's manifest path will be *generated* within a specific packaging directory, thus it won't +/// be found by its original path anymore. +/// +/// Panics if the package could not be found. +fn find_package_by_manifest_path<'a>( + pkg_name: &str, + manifest_path: &Path, + crate_metadata: &'a cargo_metadata::Metadata, +) -> &'a cargo_metadata::Package { + if let Some(pkg) = crate_metadata.packages.iter().find(|p| p.manifest_path == manifest_path) { + return pkg + } + + let pkgs_by_name = crate_metadata + .packages + .iter() + .filter(|p| p.name == pkg_name) + .collect::>(); + + if let Some(pkg) = pkgs_by_name.first() { + if pkgs_by_name.len() > 1 { + panic!( + "Found multiple packages matching the name {pkg_name} ({manifest_path:?}): {:?}", + pkgs_by_name + ); + } else { + return pkg + } + } else { + panic!("Failed to find entry for package {pkg_name} ({manifest_path:?})."); + } +} + +/// Get a list of enabled features for the project. +fn project_enabled_features( + pkg_name: &str, + cargo_manifest: &Path, + crate_metadata: &cargo_metadata::Metadata, +) -> Vec { + let package = find_package_by_manifest_path(pkg_name, cargo_manifest, crate_metadata); + + let std_enabled = package.features.get("std"); + + let mut enabled_features = package + .features + .iter() + .filter(|(f, v)| { + let mut feature_env = f.replace("-", "_"); + feature_env.make_ascii_uppercase(); + + // If this is a feature that corresponds only to an optional dependency + // and this feature is enabled by the `std` feature, we assume that this + // is only done through the `std` feature. This is a bad heuristic and should + // be removed after namespaced features are landed: + // https://doc.rust-lang.org/cargo/reference/unstable.html#namespaced-features + // Then we can just express this directly in the `Cargo.toml` and do not require + // this heuristic anymore. However, for the transition phase between now and namespaced + // features already being present in nightly, we need this code to make + // runtimes compile with all the possible rustc versions. + if v.len() == 1 && + v.get(0).map_or(false, |v| *v == format!("dep:{}", f)) && + std_enabled.as_ref().map(|e| e.iter().any(|ef| ef == *f)).unwrap_or(false) + { + return false + } + + // TODO: Generalize this? + // We don't want to enable the `std`/`default` feature for the wasm build and + // we need to check if the feature is enabled by checking the env variable. + // *f != "std" && *f != "default" && + env::var(format!("CARGO_FEATURE_{}", feature_env)) + .map(|v| v == "1") + .unwrap_or_default() + }) + .map(|d| d.0.clone()) + .collect::>(); + + enabled_features.sort(); + enabled_features +} + +// TODO: Generalize this? +// /// Returns if the project has the `runtime-wasm` feature +// fn has_runtime_wasm_feature_declared( +// pkg_name: &str, +// cargo_manifest: &Path, +// crate_metadata: &cargo_metadata::Metadata, +// ) -> bool { +// let package = find_package_by_manifest_path(pkg_name, cargo_manifest, crate_metadata); + +// package.features.keys().any(|k| k == "runtime-wasm") +// } + +/// Create the project used to build the binary. +/// +/// # Returns +/// +/// The path to the created project. +fn create_project( + project_cargo_toml: &Path, + workspace: &Path, + crate_metadata: &Metadata, + workspace_root_path: &Path, + features_to_enable: Vec, +) -> PathBuf { + let crate_name = get_crate_name(project_cargo_toml); + let crate_path = project_cargo_toml.parent().expect("Parent path exists; qed"); + let binary = get_binary_name(project_cargo_toml); + let project_folder = workspace.join(&crate_name); + + fs::create_dir_all(project_folder.join("src")).expect("project dir create can not fail; qed"); + + let enabled_features = + project_enabled_features(&crate_name, project_cargo_toml, crate_metadata); + + // TODO: Generalize this? + // if has_runtime_wasm_feature_declared(&crate_name, project_cargo_toml, crate_metadata) { + // enabled_features.push("runtime-wasm".into()); + // } + + let mut enabled_features = enabled_features.into_iter().collect::>(); + enabled_features.extend(features_to_enable.into_iter()); + + create_project_cargo_toml( + &project_folder, + workspace_root_path, + &crate_name, + crate_path, + &binary, + enabled_features.into_iter(), + ); + + write_file_if_changed(project_folder.join("src/lib.rs"), "#![no_std] pub use project::*;"); + + if let Some(crate_lock_file) = find_cargo_lock(project_cargo_toml) { + // Use the `Cargo.lock` of the main project. + crate::copy_file_if_changed(crate_lock_file, project_folder.join("Cargo.lock")); + } + + project_folder +} + +/// The cargo profile that is used to build the project. +#[derive(Debug, EnumIter)] +enum Profile { + /// The `--profile dev` profile. + Debug, + /// The `--profile release` profile. + Release, + /// The `--profile production` profile. + Production, +} + +impl Profile { + /// Create a profile by detecting which profile is used for the main build. + /// + /// We cannot easily determine the profile that is used by the main cargo invocation + /// because the `PROFILE` environment variable won't contain any custom profiles like + /// "production". It would only contain the builtin profile where the custom profile + /// inherits from. This is why we inspect the build path to learn which profile is used. + /// + /// # Note + /// + /// Can be overriden by setting [`crate::BUILD_TYPE_ENV`]. + fn detect(project: &Path) -> Profile { + let (name, overriden) = if let Ok(name) = env::var(crate::BUILD_TYPE_ENV) { + (name, true) + } else { + // First go backwards to the beginning of the target directory. + // Then go forwards to find the "wbuild" directory. + // We need to go backwards first because when starting from the root there + // might be a chance that someone has a "wbuild" directory somewhere in the path. + let name = project + .components() + .rev() + .take_while(|c| c.as_os_str() != "target") + .collect::>() + .iter() + .rev() + .take_while(|c| c.as_os_str() != "wbuild") + .last() + .expect("We put the project within a `target/.../wbuild` path; qed") + .as_os_str() + .to_str() + .expect("All our profile directory names are ascii; qed") + .to_string(); + (name, false) + }; + match (Profile::iter().find(|p| p.directory() == name), overriden) { + // When not overriden by a env variable we default to using the `Release` profile + // for the build even when the main build uses the debug build. This + // is because the `Debug` profile is too slow for normal development activities. + (Some(Profile::Debug), false) => Profile::Release, + // For any other profile or when overriden we take it at face value. + (Some(profile), _) => profile, + // For non overriden unknown profiles we fall back to `Release`. + // This allows us to continue building when a custom profile is used for the + // main builds cargo. When explicitly passing a profile via env variable we are + // not doing a fallback. + (None, false) => { + let profile = Profile::Release; + build_helper::warning!( + "Unknown cargo profile `{}`. Defaulted to `{:?}` for the runtime build.", + name, + profile, + ); + profile + }, + // Invalid profile specified. + (None, true) => { + // We use println! + exit instead of a panic in order to have a cleaner output. + println!( + "Unexpected profile name: `{}`. One of the following is expected: {:?}", + name, + Profile::iter().map(|p| p.directory()).collect::>(), + ); + process::exit(1); + }, + } + } + + /// The name of the profile as supplied to the cargo `--profile` cli option. + fn name(&self) -> &'static str { + match self { + Self::Debug => "dev", + Self::Release => "release", + Self::Production => "production", + } + } + + /// The sub directory within `target` where cargo places the build output. + /// + /// # Note + /// + /// Usually this is the same as [`Self::name`] with the exception of the debug + /// profile which is called `dev`. + fn directory(&self) -> &'static str { + match self { + Self::Debug => "debug", + _ => self.name(), + } + } + + /// Whether the resulting binary should be compacted and compressed. + fn wants_compact(&self) -> bool { + !matches!(self, Self::Debug) + } +} + +/// Check environment whether we should build without network +fn offline_build() -> bool { + env::var(OFFLINE).map_or(false, |v| v == "true") +} + +/// Build the project to create the binary. +fn build_project( + project: &Path, + default_rustflags: &str, + cargo_cmd: CargoCommandVersioned, + target: &str, +) -> Profile { + let manifest_path = project.join("Cargo.toml"); + let mut build_cmd = cargo_cmd.command(); + + let rustflags = format!( + "-C target-cpu=mvp -C target-feature=-sign-ext -C link-arg=--export-table {} {}", + default_rustflags, + env::var(crate::BUILD_RUSTFLAGS_ENV).unwrap_or_default(), + ); + + build_cmd + .args(&["rustc", &format!("--target={}", target)]) + .arg(format!("--manifest-path={}", manifest_path.display())) + .env("RUSTFLAGS", rustflags) + // Unset the `CARGO_TARGET_DIR` to prevent a cargo deadlock (cargo locks a target dir + // exclusive). The runner project is created in `CARGO_TARGET_DIR` and executing it will + // create a sub target directory inside of `CARGO_TARGET_DIR`. + .env_remove("CARGO_TARGET_DIR") + // As we are being called inside a build-script, this env variable is set. However, we set + // our own `RUSTFLAGS` and thus, we need to remove this. Otherwise cargo favors this + // env variable. + .env_remove("CARGO_ENCODED_RUSTFLAGS") + // We don't want to call ourselves recursively + .env(crate::SKIP_BUILD_ENV, ""); + + if super::color_output_enabled() { + build_cmd.arg("--color=always"); + } + + let profile = Profile::detect(project); + build_cmd.arg("--profile"); + build_cmd.arg(profile.name()); + + if offline_build() { + build_cmd.arg("--offline"); + } + + println!("{}", colorize_info_message("Information that should be included in a bug report.")); + println!("{} {:?}", colorize_info_message("Executing build command:"), build_cmd); + println!("{} {}", colorize_info_message("Using rustc version:"), cargo_cmd.rustc_version()); + + match build_cmd.status().map(|s| s.success()) { + Ok(true) => profile, + // Use `process.exit(1)` to have a clean error output. + _ => process::exit(1), + } +} + +/// Compact the binary if supported for the target. +fn compact_file( + project: &Path, + profile: Profile, + cargo_manifest: &Path, + out_name: Option, + target: &str, +) -> (Option, Option, BinaryBloaty) { + let default_out_name = get_binary_name(cargo_manifest); + let out_name = out_name.unwrap_or_else(|| default_out_name.clone()); + let in_path = project + .join(format!("target/{}", target)) + .join(profile.directory()) + .join(format!("{}", default_out_name)); + + let (compact_path, compact_compressed_path) = if profile.wants_compact() { + // TODO: For a generalized builder we may want to support passing in a function to compact the + // binary. + + let compact_path = project.join(format!("{}.compact", out_name,)); + + let compact_compressed_path = project.join(format!("{}.compact.compressed", out_name)); + if compress(&compact_path, &compact_compressed_path) { + (Some(Binary(compact_path)), Some(Binary(compact_compressed_path))) + } else { + (Some(Binary(compact_path)), None) + } + } else { + (None, None) + }; + + let bloaty_path = project.join(format!("{}", out_name)); + fs::copy(in_path, &bloaty_path).expect("Copying the bloaty file to the project dir."); + + (compact_path, compact_compressed_path, BinaryBloaty(bloaty_path)) +} + +fn compress(binary_path: &Path, compressed_binary_out_path: &Path) -> bool { + use sp_maybe_compressed_blob::CODE_BLOB_BOMB_LIMIT; + + let data = fs::read(binary_path).expect("Failed to read binary"); + if let Some(compressed) = sp_maybe_compressed_blob::compress(&data, CODE_BLOB_BOMB_LIMIT) { + fs::write(compressed_binary_out_path, &compressed[..]) + .expect("Failed to write compressed binary"); + + true + } else { + build_helper::warning!( + "Writing uncompressed binary. Exceeded maximum size {}", + CODE_BLOB_BOMB_LIMIT, + ); + + false + } +} + +/// Custom wrapper for a [`cargo_metadata::Package`] to store it in +/// a `HashSet`. +#[derive(Debug)] +struct DeduplicatePackage<'a> { + package: &'a cargo_metadata::Package, + identifier: String, +} + +impl<'a> From<&'a cargo_metadata::Package> for DeduplicatePackage<'a> { + fn from(package: &'a cargo_metadata::Package) -> Self { + Self { + package, + identifier: format!("{}{}{:?}", package.name, package.version, package.source), + } + } +} + +impl<'a> Hash for DeduplicatePackage<'a> { + fn hash(&self, state: &mut H) { + self.identifier.hash(state); + } +} + +impl<'a> PartialEq for DeduplicatePackage<'a> { + fn eq(&self, other: &Self) -> bool { + self.identifier == other.identifier + } +} + +impl<'a> Eq for DeduplicatePackage<'a> {} + +impl<'a> Deref for DeduplicatePackage<'a> { + type Target = cargo_metadata::Package; + + fn deref(&self) -> &Self::Target { + self.package + } +} + +fn create_metadata_command(path: impl Into) -> MetadataCommand { + let mut metadata_command = MetadataCommand::new(); + metadata_command.manifest_path(path); + + if offline_build() { + metadata_command.other_options(vec!["--offline".to_owned()]); + } + metadata_command +} + +/// Generate the `rerun-if-changed` instructions for cargo to make sure that the binary is +/// rebuilt when needed. +fn generate_rerun_if_changed_instructions( + cargo_manifest: &Path, + project_folder: &Path, + workspace: &Path, + compressed_or_compact: Option<&Binary>, + bloaty_binary: &BinaryBloaty, +) { + // Rerun `build.rs` if the `Cargo.lock` changes + if let Some(cargo_lock) = find_cargo_lock(cargo_manifest) { + rerun_if_changed(cargo_lock); + } + + let metadata = create_metadata_command(project_folder.join("Cargo.toml")) + .exec() + .expect("`cargo metadata` can not fail!"); + + let package = metadata + .packages + .iter() + .find(|p| p.manifest_path == cargo_manifest) + .expect("The crate package is contained in its own metadata; qed"); + + // Start with the dependencies of the crate we want to compile. + let mut dependencies = package.dependencies.iter().collect::>(); + + // Collect all packages by follow the dependencies of all packages we find. + let mut packages = HashSet::new(); + packages.insert(DeduplicatePackage::from(package)); + + while let Some(dependency) = dependencies.pop() { + let path_or_git_dep = + dependency.source.as_ref().map(|s| s.starts_with("git+")).unwrap_or(true); + + let package = metadata + .packages + .iter() + .filter(|p| !p.manifest_path.starts_with(workspace)) + .find(|p| { + // Check that the name matches and that the version matches or this is + // a git or path dep. A git or path dependency can only occur once, so we don't + // need to check the version. + (path_or_git_dep || dependency.req.matches(&p.version)) && dependency.name == p.name + }); + + if let Some(package) = package { + if packages.insert(DeduplicatePackage::from(package)) { + dependencies.extend(package.dependencies.iter()); + } + } + } + + // Make sure that if any file/folder of a dependency change, we need to rerun the `build.rs` + packages.iter().for_each(package_rerun_if_changed); + + compressed_or_compact.map(|w| rerun_if_changed(w.binary_path())); + rerun_if_changed(bloaty_binary.binary_bloaty_path()); + + // Register our env variables + println!("cargo:rerun-if-env-changed={}", crate::SKIP_BUILD_ENV); + println!("cargo:rerun-if-env-changed={}", crate::BUILD_TYPE_ENV); + println!("cargo:rerun-if-env-changed={}", crate::BUILD_RUSTFLAGS_ENV); + println!("cargo:rerun-if-env-changed={}", crate::TARGET_DIRECTORY); + println!("cargo:rerun-if-env-changed={}", crate::BUILD_TOOLCHAIN); +} + +/// Track files and paths related to the given package to rerun `build.rs` on any relevant change. +fn package_rerun_if_changed(package: &DeduplicatePackage) { + let mut manifest_path = package.manifest_path.clone(); + if manifest_path.ends_with("Cargo.toml") { + manifest_path.pop(); + } + + WalkDir::new(&manifest_path) + .into_iter() + .filter_entry(|p| { + // Ignore this entry if it is a directory that contains a `Cargo.toml` that is not the + // `Cargo.toml` related to the current package. This is done to ignore sub-crates of a + // crate. If such a sub-crate is a dependency, it will be processed independently + // anyway. + p.path() == manifest_path || !p.path().is_dir() || !p.path().join("Cargo.toml").exists() + }) + .filter_map(|p| p.ok().map(|p| p.into_path())) + .filter(|p| { + p.is_dir() || p.extension().map(|e| e == "rs" || e == "toml").unwrap_or_default() + }) + .for_each(rerun_if_changed); +} + +/// Copy the binary to the target directory set in `TARGET_DIRECTORY` environment +/// variable. If the variable is not set, this is a no-op. +fn copy_binary_to_target_directory(cargo_manifest: &Path, binary: &Binary) { + let target_dir = match env::var(crate::TARGET_DIRECTORY) { + Ok(path) => PathBuf::from(path), + Err(_) => return, + }; + + if !target_dir.is_absolute() { + // We use println! + exit instead of a panic in order to have a cleaner output. + println!( + "Environment variable `{}` with `{}` is not an absolute path!", + crate::TARGET_DIRECTORY, + target_dir.display(), + ); + process::exit(1); + } + + fs::create_dir_all(&target_dir).expect("Creates `TARGET_DIRECTORY`."); + + fs::copy(binary.binary_path(), target_dir.join(format!("{}", get_binary_name(cargo_manifest)))) + .expect("Copies binary to `TARGET_DIRECTORY`."); +} diff --git a/node/core/pvf/musl-builder/src/version.rs b/node/core/pvf/musl-builder/src/version.rs new file mode 100644 index 000000000000..bc526e107184 --- /dev/null +++ b/node/core/pvf/musl-builder/src/version.rs @@ -0,0 +1,197 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::cmp::Ordering; + +/// The version of rustc/cargo. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Version { + pub major: u32, + pub minor: u32, + pub patch: u32, + pub is_nightly: bool, + pub year: u32, + pub month: u32, + pub day: u32, +} + +impl Version { + /// Returns if `self` is a stable version. + pub fn is_stable(&self) -> bool { + !self.is_nightly + } + + /// Return if `self` is a nightly version. + pub fn is_nightly(&self) -> bool { + self.is_nightly + } + + /// Extract from the given `version` string. + pub fn extract(version: &str) -> Option { + let mut is_nightly = false; + let version_parts = version + .trim() + .split(" ") + .nth(1)? + .split(".") + .filter_map(|v| { + if let Some(rest) = v.strip_suffix("-nightly") { + is_nightly = true; + rest.parse().ok() + } else { + v.parse().ok() + } + }) + .collect::>(); + + if version_parts.len() != 3 { + return None + } + + let date = version.split(" ").nth(3)?; + + let date_parts = date + .split("-") + .filter_map(|v| v.trim().strip_suffix(")").unwrap_or(v).parse().ok()) + .collect::>(); + + if date_parts.len() != 3 { + return None + } + + Some(Version { + major: version_parts[0], + minor: version_parts[1], + patch: version_parts[2], + is_nightly, + year: date_parts[0], + month: date_parts[1], + day: date_parts[2], + }) + } +} + +/// Ordering is done in the following way: +/// +/// 1. `stable` > `nightly` +/// 2. Then compare major, minor and patch. +/// 3. Last compare the date. +impl Ord for Version { + fn cmp(&self, other: &Self) -> Ordering { + if self == other { + return Ordering::Equal + } + + // Ensure that `stable > nightly` + if self.is_stable() && other.is_nightly() { + return Ordering::Greater + } else if self.is_nightly() && other.is_stable() { + return Ordering::Less + } + + let to_compare = [ + (self.major, other.major), + (self.minor, other.minor), + (self.patch, other.patch), + (self.year, other.year), + (self.month, other.month), + (self.day, other.day), + ]; + + to_compare + .iter() + .find_map(|(l, r)| if l != r { l.partial_cmp(&r) } else { None }) + // We already checked this right at the beginning, so we should never return here + // `Equal`. + .unwrap_or(Ordering::Equal) + } +} + +impl PartialOrd for Version { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn version_compare_and_extract_works() { + let version_1_66_0 = Version::extract("cargo 1.66.0 (d65d197ad 2022-11-15)").unwrap(); + let version_1_66_1 = Version::extract("cargo 1.66.1 (d65d197ad 2022-11-15)").unwrap(); + let version_1_66_0_nightly = + Version::extract("cargo 1.66.0-nightly (d65d197ad 2022-10-15)").unwrap(); + let version_1_66_0_nightly_older_date = + Version::extract("cargo 1.66.0-nightly (d65d197ad 2022-10-14)").unwrap(); + let version_1_65_0 = Version::extract("cargo 1.65.0 (d65d197ad 2022-10-15)").unwrap(); + let version_1_65_0_older_date = + Version::extract("cargo 1.65.0 (d65d197ad 2022-10-14)").unwrap(); + + assert!(version_1_66_1 > version_1_66_0); + assert!(version_1_66_1 > version_1_65_0); + assert!(version_1_66_1 > version_1_66_0_nightly); + assert!(version_1_66_1 > version_1_66_0_nightly_older_date); + assert!(version_1_66_1 > version_1_65_0_older_date); + + assert!(version_1_66_0 > version_1_65_0); + assert!(version_1_66_0 > version_1_66_0_nightly); + assert!(version_1_66_0 > version_1_66_0_nightly_older_date); + assert!(version_1_66_0 > version_1_65_0_older_date); + + assert!(version_1_65_0 > version_1_66_0_nightly); + assert!(version_1_65_0 > version_1_66_0_nightly_older_date); + assert!(version_1_65_0 > version_1_65_0_older_date); + + let mut versions = vec![ + version_1_66_0, + version_1_66_0_nightly, + version_1_66_0_nightly_older_date, + version_1_65_0_older_date, + version_1_65_0, + version_1_66_1, + ]; + versions.sort_by(|a, b| b.cmp(a)); + + let expected_versions_order = vec![ + version_1_66_1, + version_1_66_0, + version_1_65_0, + version_1_65_0_older_date, + version_1_66_0_nightly, + version_1_66_0_nightly_older_date, + ]; + assert_eq!(expected_versions_order, versions); + } + + #[test] + fn parse_with_newline() { + let version_1_66_0 = Version::extract("cargo 1.66.0 (d65d197ad 2022-11-15)\n").unwrap(); + assert_eq!( + Version { + major: 1, + minor: 66, + patch: 0, + is_nightly: false, + year: 2022, + month: 11, + day: 15 + }, + version_1_66_0 + ); + } +} From 126aa26b04d76ac3756d1032c900a1f36b437113 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 28 Apr 2023 11:32:27 +0200 Subject: [PATCH 08/13] Move each worker into separate crates, reorganize, fix extraction --- Cargo.lock | 54 ++++++-- Cargo.toml | 7 +- cli/Cargo.toml | 2 - node/core/pvf/Cargo.toml | 11 +- node/core/pvf/common/Cargo.toml | 12 ++ node/core/pvf/{worker => common}/build.rs | 0 node/core/pvf/common/src/executor_intf.rs | 111 ++++++++++++++++ node/core/pvf/common/src/lib.rs | 12 ++ .../src/common.rs => common/src/worker.rs} | 38 ++++++ node/core/pvf/execute-worker/Cargo.toml | 38 ++++++ node/core/pvf/execute-worker/build.rs | 33 +++++ .../src/executor_intf.rs | 122 +----------------- .../execute.rs => execute-worker/src/lib.rs} | 22 +++- .../src/main.rs} | 6 +- .../pvf/{worker => prepare-worker}/Cargo.toml | 17 +-- node/core/pvf/prepare-worker/build.rs | 35 +++++ .../pvf/prepare-worker/src/executor_intf.rs | 42 ++++++ .../prepare.rs => prepare-worker/src/lib.rs} | 24 +++- .../src/main.rs} | 6 +- .../src/memory_stats.rs | 0 node/core/pvf/src/host.rs | 80 +++++++++--- node/core/pvf/src/lib.rs | 4 - node/core/pvf/src/prepare/queue.rs | 5 +- node/core/pvf/src/prepare/worker_intf.rs | 2 +- node/core/pvf/src/testing.rs | 41 +++++- node/core/pvf/tests/it/main.rs | 14 +- node/core/pvf/worker/src/lib.rs | 80 ------------ node/core/pvf/worker/src/testing.rs | 49 ------- node/malus/Cargo.toml | 3 +- node/malus/src/malus.rs | 4 +- node/test/performance-test/Cargo.toml | 2 +- node/test/performance-test/src/lib.rs | 6 +- .../test-parachains/adder/collator/Cargo.toml | 2 +- .../adder/collator/bin/puppet_worker.rs | 2 +- .../test-parachains/adder/collator/src/lib.rs | 2 +- .../undying/collator/Cargo.toml | 2 +- .../undying/collator/bin/puppet_worker.rs | 2 +- .../undying/collator/src/lib.rs | 2 +- 38 files changed, 552 insertions(+), 342 deletions(-) rename node/core/pvf/{worker => common}/build.rs (100%) create mode 100644 node/core/pvf/common/src/executor_intf.rs rename node/core/pvf/{worker/src/common.rs => common/src/worker.rs} (84%) create mode 100644 node/core/pvf/execute-worker/Cargo.toml create mode 100644 node/core/pvf/execute-worker/build.rs rename node/core/pvf/{worker => execute-worker}/src/executor_intf.rs (66%) rename node/core/pvf/{worker/src/execute.rs => execute-worker/src/lib.rs} (89%) rename node/core/pvf/{worker/bin/execute_worker.rs => execute-worker/src/main.rs} (84%) rename node/core/pvf/{worker => prepare-worker}/Cargo.toml (79%) create mode 100644 node/core/pvf/prepare-worker/build.rs create mode 100644 node/core/pvf/prepare-worker/src/executor_intf.rs rename node/core/pvf/{worker/src/prepare.rs => prepare-worker/src/lib.rs} (91%) rename node/core/pvf/{worker/bin/prepare_worker.rs => prepare-worker/src/main.rs} (84%) rename node/core/pvf/{worker => prepare-worker}/src/memory_stats.rs (100%) delete mode 100644 node/core/pvf/worker/src/lib.rs delete mode 100644 node/core/pvf/worker/src/testing.rs diff --git a/Cargo.lock b/Cargo.lock index 482c0e5f4e96..696ba8418f47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6586,7 +6586,7 @@ dependencies = [ "nix 0.26.2", "polkadot-cli", "polkadot-core-primitives", - "polkadot-node-core-pvf-worker", + "polkadot-node-core-pvf-prepare-worker", "polkadot-overseer", "substrate-rpc-client", "tempfile", @@ -6712,7 +6712,6 @@ dependencies = [ "futures", "log", "polkadot-client", - "polkadot-node-core-pvf-worker", "polkadot-node-metrics", "polkadot-performance-test", "polkadot-service", @@ -7190,7 +7189,8 @@ dependencies = [ "pin-project", "polkadot-core-primitives", "polkadot-node-core-pvf-common", - "polkadot-node-core-pvf-worker", + "polkadot-node-core-pvf-execute-worker", + "polkadot-node-core-pvf-prepare-worker", "polkadot-node-metrics", "polkadot-node-primitives", "polkadot-parachain", @@ -7236,11 +7236,44 @@ dependencies = [ name = "polkadot-node-core-pvf-common" version = "0.9.41" dependencies = [ + "cpu-time", + "futures", + "libc", + "parity-scale-codec", + "polkadot-parachain", + "polkadot-primitives", + "sc-executor-common", + "sc-executor-wasmtime", + "sp-core", + "sp-tracing", + "substrate-build-script-utils", + "tokio", + "tracing-gum", +] + +[[package]] +name = "polkadot-node-core-pvf-execute-worker" +version = "0.9.41" +dependencies = [ + "cpu-time", + "futures", "parity-scale-codec", + "polkadot-node-core-pvf-common", + "polkadot-node-core-pvf-musl-builder", "polkadot-parachain", "polkadot-primitives", + "rayon", + "sc-executor", + "sc-executor-common", + "sc-executor-wasmtime", "sp-core", + "sp-externalities", + "sp-io", + "sp-maybe-compressed-blob", + "tempfile", + "tikv-jemalloc-ctl", "tokio", + "tracing-gum", ] [[package]] @@ -7259,22 +7292,20 @@ dependencies = [ ] [[package]] -name = "polkadot-node-core-pvf-worker" +name = "polkadot-node-core-pvf-prepare-worker" version = "0.9.41" dependencies = [ - "cpu-time", "futures", "libc", "parity-scale-codec", "polkadot-node-core-pvf-common", + "polkadot-node-core-pvf-musl-builder", "polkadot-parachain", "polkadot-primitives", "rayon", "sc-executor", "sc-executor-common", "sc-executor-wasmtime", - "sp-core", - "sp-externalities", "sp-io", "sp-maybe-compressed-blob", "sp-tracing", @@ -7534,7 +7565,7 @@ dependencies = [ "kusama-runtime", "log", "polkadot-erasure-coding", - "polkadot-node-core-pvf-worker", + "polkadot-node-core-pvf-prepare-worker", "polkadot-node-primitives", "polkadot-primitives", "quote", @@ -8037,7 +8068,8 @@ dependencies = [ "polkadot-node-core-backing", "polkadot-node-core-candidate-validation", "polkadot-node-core-dispute-coordinator", - "polkadot-node-core-pvf-worker", + "polkadot-node-core-pvf-execute-worker", + "polkadot-node-core-pvf-prepare-worker", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-test-helpers", @@ -12097,7 +12129,7 @@ dependencies = [ "log", "parity-scale-codec", "polkadot-cli", - "polkadot-node-core-pvf-worker", + "polkadot-node-core-pvf", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-parachain", @@ -12145,7 +12177,7 @@ dependencies = [ "log", "parity-scale-codec", "polkadot-cli", - "polkadot-node-core-pvf-worker", + "polkadot-node-core-pvf", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-parachain", diff --git a/Cargo.toml b/Cargo.toml index f587ff004753..178db8f3a006 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ tikv-jemallocator = "0.5.0" # Crates in our workspace, defined as dependencies so we can pass them feature flags. polkadot-cli = { path = "cli", features = [ "kusama-native", "westend-native", "rococo-native" ] } -polkadot-node-core-pvf-worker = { path = "node/core/pvf/worker" } +polkadot-node-core-pvf-prepare-worker = { path = "node/core/pvf/prepare-worker" } polkadot-overseer = { path = "node/overseer" } [dev-dependencies] @@ -82,7 +82,8 @@ members = [ "node/core/pvf", "node/core/pvf/common", "node/core/pvf/musl-builder", - "node/core/pvf/worker", + "node/core/pvf/execute-worker", + "node/core/pvf/prepare-worker", "node/core/pvf-checker", "node/core/runtime-api", "node/network/approval-distribution", @@ -209,7 +210,7 @@ try-runtime = [ "polkadot-cli/try-runtime" ] fast-runtime = [ "polkadot-cli/fast-runtime" ] runtime-metrics = [ "polkadot-cli/runtime-metrics" ] pyroscope = ["polkadot-cli/pyroscope"] -jemalloc-allocator = ["polkadot-node-core-pvf-worker/jemalloc-allocator", "polkadot-overseer/jemalloc-allocator"] +jemalloc-allocator = ["polkadot-node-core-pvf-prepare-worker/jemalloc-allocator", "polkadot-overseer/jemalloc-allocator"] # Configuration for building a .deb package - for use with `cargo-deb` [package.metadata.deb] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 4d08ee18ed1b..99c5466ab22e 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -22,7 +22,6 @@ pyro = { package = "pyroscope", version = "0.3.1", optional = true } service = { package = "polkadot-service", path = "../node/service", default-features = false, optional = true } polkadot-client = { path = "../node/client", optional = true } -polkadot-node-core-pvf-worker = { path = "../node/core/pvf/worker", optional = true } polkadot-performance-test = { path = "../node/test/performance-test", optional = true } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } @@ -53,7 +52,6 @@ cli = [ "frame-benchmarking-cli", "try-runtime-cli", "polkadot-client", - "polkadot-node-core-pvf-worker", ] runtime-benchmarks = [ "service/runtime-benchmarks", diff --git a/node/core/pvf/Cargo.toml b/node/core/pvf/Cargo.toml index 0a9b38012e30..033a58271cd9 100644 --- a/node/core/pvf/Cargo.toml +++ b/node/core/pvf/Cargo.toml @@ -4,6 +4,10 @@ version.workspace = true authors.workspace = true edition.workspace = true +[[bin]] +name = "puppet_worker" +path = "bin/puppet_worker.rs" + [dependencies] always-assert = "0.1" futures = "0.3.21" @@ -13,6 +17,7 @@ libc = "0.2.139" pin-project = "1.0.9" rand = "0.8.5" slotmap = "1.0" +tempfile = "3.3.0" tokio = { version = "1.24.2", features = ["fs", "process"] } parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } @@ -20,8 +25,9 @@ parity-scale-codec = { version = "3.4.0", default-features = false, features = [ polkadot-parachain = { path = "../../../parachain" } polkadot-core-primitives = { path = "../../../core-primitives" } polkadot-node-core-pvf-common = { path = "common" } -# Must depend on the worker because the binaries must have been already built. -polkadot-node-core-pvf-worker = { path = "worker" } +# Must depend on the workers because the binaries must have been already built. +polkadot-node-core-pvf-execute-worker = { path = "execute-worker" } +polkadot-node-core-pvf-prepare-worker = { path = "prepare-worker" } polkadot-node-metrics = { path = "../../metrics" } polkadot-node-primitives = { path = "../../primitives" } polkadot-primitives = { path = "../../../primitives" } @@ -37,7 +43,6 @@ substrate-build-script-utils = { git = "https://github.com/paritytech/substrate" [dev-dependencies] assert_matches = "1.4.0" hex-literal = "0.3.4" -tempfile = "3.3.0" adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" } halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" } diff --git a/node/core/pvf/common/Cargo.toml b/node/core/pvf/common/Cargo.toml index 9da2bdd91224..b9c9ac6286cb 100644 --- a/node/core/pvf/common/Cargo.toml +++ b/node/core/pvf/common/Cargo.toml @@ -4,7 +4,12 @@ version.workspace = true authors.workspace = true edition.workspace = true +# TODO: cargo udeps [dependencies] +cpu-time = "1.0.0" +futures = "0.3.21" +gum = { package = "tracing-gum", path = "../../../gum" } +libc = "0.2.139" tokio = { version = "1.24.2", features = ["fs", "process", "io-util"] } parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } @@ -12,4 +17,11 @@ parity-scale-codec = { version = "3.4.0", default-features = false, features = [ polkadot-parachain = { path = "../../../../parachain" } polkadot-primitives = { path = "../../../../primitives" } +sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } + sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } + +[build-dependencies] +substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/core/pvf/worker/build.rs b/node/core/pvf/common/build.rs similarity index 100% rename from node/core/pvf/worker/build.rs rename to node/core/pvf/common/build.rs diff --git a/node/core/pvf/common/src/executor_intf.rs b/node/core/pvf/common/src/executor_intf.rs new file mode 100644 index 000000000000..acb6779e5ee3 --- /dev/null +++ b/node/core/pvf/common/src/executor_intf.rs @@ -0,0 +1,111 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_primitives::{ExecutorParam, ExecutorParams}; +use sc_executor_common::wasm_runtime::HeapAllocStrategy; +use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics}; + +// Memory configuration +// +// When Substrate Runtime is instantiated, a number of WASM pages are allocated for the Substrate +// Runtime instance's linear memory. The exact number of pages is a sum of whatever the WASM blob +// itself requests (by default at least enough to hold the data section as well as have some space +// left for the stack; this is, of course, overridable at link time when compiling the runtime) +// plus the number of pages specified in the `extra_heap_pages` passed to the executor. +// +// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16 pages. +// The data section for runtimes are typically rather small and can fit in a single digit number of +// WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB are used for +// these needs by default. +const DEFAULT_HEAP_PAGES_ESTIMATE: u32 = 32; +const EXTRA_HEAP_PAGES: u32 = 2048; + +/// The number of bytes devoted for the stack during wasm execution of a PVF. +pub const NATIVE_STACK_MAX: u32 = 256 * 1024 * 1024; + +// VALUES OF THE DEFAULT CONFIGURATION SHOULD NEVER BE CHANGED +// They are used as base values for the execution environment parametrization. +// To overwrite them, add new ones to `EXECUTOR_PARAMS` in the `session_info` pallet and perform +// a runtime upgrade to make them active. +pub const DEFAULT_CONFIG: Config = Config { + allow_missing_func_imports: true, + cache_path: None, + semantics: Semantics { + heap_alloc_strategy: HeapAllocStrategy::Dynamic { + maximum_pages: Some(DEFAULT_HEAP_PAGES_ESTIMATE + EXTRA_HEAP_PAGES), + }, + + instantiation_strategy: + sc_executor_wasmtime::InstantiationStrategy::RecreateInstanceCopyOnWrite, + + // Enable deterministic stack limit to pin down the exact number of items the wasmtime stack + // can contain before it traps with stack overflow. + // + // Here is how the values below were chosen. + // + // At the moment of writing, the default native stack size limit is 1 MiB. Assuming a logical item + // (see the docs about the field and the instrumentation algorithm) is 8 bytes, 1 MiB can + // fit 2x 65536 logical items. + // + // Since reaching the native stack limit is undesirable, we halve the logical item limit and + // also increase the native 256x. This hopefully should preclude wasm code from reaching + // the stack limit set by the wasmtime. + deterministic_stack_limit: Some(DeterministicStackLimit { + logical_max: 65536, + native_stack_max: NATIVE_STACK_MAX, + }), + canonicalize_nans: true, + // Rationale for turning the multi-threaded compilation off is to make the preparation time + // easily reproducible and as deterministic as possible. + // + // Currently the prepare queue doesn't distinguish between precheck and prepare requests. + // On the one hand, it simplifies the code, on the other, however, slows down compile times + // for execute requests. This behavior may change in future. + parallel_compilation: false, + + // WASM extensions. Only those that are meaningful to us may be controlled here. By default, + // we're using WASM MVP, which means all the extensions are disabled. Nevertheless, some + // extensions (e.g., sign extension ops) are enabled by Wasmtime and cannot be disabled. + wasm_reference_types: false, + wasm_simd: false, + wasm_bulk_memory: false, + wasm_multi_value: false, + }, +}; + +pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result { + let mut sem = DEFAULT_CONFIG.semantics.clone(); + let mut stack_limit = if let Some(stack_limit) = sem.deterministic_stack_limit.clone() { + stack_limit + } else { + return Err("No default stack limit set".to_owned()) + }; + + for p in par.iter() { + match p { + ExecutorParam::MaxMemoryPages(max_pages) => + sem.heap_alloc_strategy = + HeapAllocStrategy::Dynamic { maximum_pages: Some(*max_pages) }, + ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm, + ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm, + ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true, + ExecutorParam::PrecheckingMaxMemory(_) => (), // TODO: Not implemented yet + ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), // Not used here + } + } + sem.deterministic_stack_limit = Some(stack_limit); + Ok(sem) +} diff --git a/node/core/pvf/common/src/lib.rs b/node/core/pvf/common/src/lib.rs index 7d48684f6207..eaea6397e077 100644 --- a/node/core/pvf/common/src/lib.rs +++ b/node/core/pvf/common/src/lib.rs @@ -14,10 +14,22 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +//! Functionality that is shared by the host and the workers. + pub mod error; pub mod execute; +pub mod executor_intf; pub mod prepare; pub mod pvf; +pub mod worker; + +#[doc(hidden)] +pub use sp_tracing; + +pub use cpu_time::ProcessTime; + +// TODO: Is this right? +const LOG_TARGET: &str = "parachain::pvf::common"; use std::mem; use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _}; diff --git a/node/core/pvf/worker/src/common.rs b/node/core/pvf/common/src/worker.rs similarity index 84% rename from node/core/pvf/worker/src/common.rs rename to node/core/pvf/common/src/worker.rs index 84bc88701d62..14c40e2cd15c 100644 --- a/node/core/pvf/worker/src/common.rs +++ b/node/core/pvf/common/src/worker.rs @@ -14,6 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +//! Functionality common to both prepare and execute workers. + use crate::LOG_TARGET; use cpu_time::ProcessTime; use futures::never::Never; @@ -28,6 +30,42 @@ use tokio::{ runtime::{Handle, Runtime}, }; +/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for +/// spawning the desired worker. +#[macro_export] +macro_rules! decl_worker_main { + ($expected_command:expr, $entrypoint:expr) => { + fn main() { + $crate::sp_tracing::try_init_simple(); + + let args = std::env::args().collect::>(); + if args.len() < 3 { + panic!("wrong number of arguments"); + } + + let mut version = None; + let mut socket_path: &str = ""; + + for i in 2..args.len() { + match args[i].as_ref() { + "--socket-path" => socket_path = args[i + 1].as_str(), + "--node-version" => version = Some(args[i + 1].as_str()), + _ => (), + } + } + + let subcommand = &args[1]; + if subcommand != $expected_command { + panic!( + "trying to run {} binary with the {} subcommand", + $expected_command, subcommand + ) + } + $entrypoint(&socket_path, version); + } + }; +} + /// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the /// child process. pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50); diff --git a/node/core/pvf/execute-worker/Cargo.toml b/node/core/pvf/execute-worker/Cargo.toml new file mode 100644 index 000000000000..982e0a4de9a2 --- /dev/null +++ b/node/core/pvf/execute-worker/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "polkadot-node-core-pvf-execute-worker" +version.workspace = true +authors.workspace = true +edition.workspace = true + +# TODO: cargo udeps +[dependencies] +cpu-time = "1.0.0" +futures = "0.3.21" +gum = { package = "tracing-gum", path = "../../../gum" } +rayon = "1.5.1" +tempfile = "3.3.0" +tikv-jemalloc-ctl = { version = "0.5.0", optional = true } +tokio = { version = "1.24.2", features = ["fs", "process"] } + +parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] } + +polkadot-node-core-pvf-common = { path = "../common" } +polkadot-parachain = { path = "../../../../parachain" } +polkadot-primitives = { path = "../../../../primitives" } + +sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" } + +[target.'cfg(target_os = "linux")'.dependencies] +tikv-jemalloc-ctl = "0.5.0" + +[build-dependencies] +polkadot-node-core-pvf-musl-builder = { path = "../musl-builder" } + +[features] +builder = [] diff --git a/node/core/pvf/execute-worker/build.rs b/node/core/pvf/execute-worker/build.rs new file mode 100644 index 000000000000..b7d3aff04e55 --- /dev/null +++ b/node/core/pvf/execute-worker/build.rs @@ -0,0 +1,33 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +fn main() { + let builder = polkadot_node_core_pvf_musl_builder::Builder::new() + // Tell the builder to build the project (crate) this `build.rs` is part of. + .with_current_project(); + + // Only require musl on supported secure-mode platforms. + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + let builder = builder.with_target("x86_64-unknown-linux-musl"); + #[cfg(not(all(target_arch = "x86_64", target_os = "linux")))] + let builder = builder.with_current_target(); + + builder + .set_file_name("execute-worker.rs") + .set_constant_name("EXECUTE_EXE") + // Build it. + .build(); +} diff --git a/node/core/pvf/worker/src/executor_intf.rs b/node/core/pvf/execute-worker/src/executor_intf.rs similarity index 66% rename from node/core/pvf/worker/src/executor_intf.rs rename to node/core/pvf/execute-worker/src/executor_intf.rs index 54bf6fd6bc17..c96585354f61 100644 --- a/node/core/pvf/worker/src/executor_intf.rs +++ b/node/core/pvf/execute-worker/src/executor_intf.rs @@ -16,12 +16,15 @@ //! Interface to the Substrate Executor -use polkadot_primitives::{ExecutorParam, ExecutorParams}; +use polkadot_node_core_pvf_common::executor_intf::{ + params_to_wasmtime_semantics, DEFAULT_CONFIG, NATIVE_STACK_MAX, +}; +use polkadot_primitives::ExecutorParams; use sc_executor_common::{ runtime_blob::RuntimeBlob, - wasm_runtime::{HeapAllocStrategy, InvokeMethod, WasmModule as _}, + wasm_runtime::{InvokeMethod, WasmModule as _}, }; -use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics}; +use sc_executor_wasmtime::Config; use sp_core::storage::{ChildInfo, TrackedStorageKey}; use sp_externalities::MultiRemovalResults; use std::{ @@ -29,119 +32,6 @@ use std::{ path::Path, }; -// Memory configuration -// -// When Substrate Runtime is instantiated, a number of WASM pages are allocated for the Substrate -// Runtime instance's linear memory. The exact number of pages is a sum of whatever the WASM blob -// itself requests (by default at least enough to hold the data section as well as have some space -// left for the stack; this is, of course, overridable at link time when compiling the runtime) -// plus the number of pages specified in the `extra_heap_pages` passed to the executor. -// -// By default, rustc (or `lld` specifically) should allocate 1 MiB for the shadow stack, or 16 pages. -// The data section for runtimes are typically rather small and can fit in a single digit number of -// WASM pages, so let's say an extra 16 pages. Thus let's assume that 32 pages or 2 MiB are used for -// these needs by default. -const DEFAULT_HEAP_PAGES_ESTIMATE: u32 = 32; -const EXTRA_HEAP_PAGES: u32 = 2048; - -/// The number of bytes devoted for the stack during wasm execution of a PVF. -const NATIVE_STACK_MAX: u32 = 256 * 1024 * 1024; - -// VALUES OF THE DEFAULT CONFIGURATION SHOULD NEVER BE CHANGED -// They are used as base values for the execution environment parametrization. -// To overwrite them, add new ones to `EXECUTOR_PARAMS` in the `session_info` pallet and perform -// a runtime upgrade to make them active. -const DEFAULT_CONFIG: Config = Config { - allow_missing_func_imports: true, - cache_path: None, - semantics: Semantics { - heap_alloc_strategy: sc_executor_common::wasm_runtime::HeapAllocStrategy::Dynamic { - maximum_pages: Some(DEFAULT_HEAP_PAGES_ESTIMATE + EXTRA_HEAP_PAGES), - }, - - instantiation_strategy: - sc_executor_wasmtime::InstantiationStrategy::RecreateInstanceCopyOnWrite, - - // Enable deterministic stack limit to pin down the exact number of items the wasmtime stack - // can contain before it traps with stack overflow. - // - // Here is how the values below were chosen. - // - // At the moment of writing, the default native stack size limit is 1 MiB. Assuming a logical item - // (see the docs about the field and the instrumentation algorithm) is 8 bytes, 1 MiB can - // fit 2x 65536 logical items. - // - // Since reaching the native stack limit is undesirable, we halve the logical item limit and - // also increase the native 256x. This hopefully should preclude wasm code from reaching - // the stack limit set by the wasmtime. - deterministic_stack_limit: Some(DeterministicStackLimit { - logical_max: 65536, - native_stack_max: NATIVE_STACK_MAX, - }), - canonicalize_nans: true, - // Rationale for turning the multi-threaded compilation off is to make the preparation time - // easily reproducible and as deterministic as possible. - // - // Currently the prepare queue doesn't distinguish between precheck and prepare requests. - // On the one hand, it simplifies the code, on the other, however, slows down compile times - // for execute requests. This behavior may change in future. - parallel_compilation: false, - - // WASM extensions. Only those that are meaningful to us may be controlled here. By default, - // we're using WASM MVP, which means all the extensions are disabled. Nevertheless, some - // extensions (e.g., sign extension ops) are enabled by Wasmtime and cannot be disabled. - wasm_reference_types: false, - wasm_simd: false, - wasm_bulk_memory: false, - wasm_multi_value: false, - }, -}; - -/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds. -pub fn prevalidate(code: &[u8]) -> Result { - let blob = RuntimeBlob::new(code)?; - // It's assumed this function will take care of any prevalidation logic - // that needs to be done. - // - // Do nothing for now. - Ok(blob) -} - -/// Runs preparation on the given runtime blob. If successful, it returns a serialized compiled -/// artifact which can then be used to pass into `Executor::execute` after writing it to the disk. -pub fn prepare( - blob: RuntimeBlob, - executor_params: &ExecutorParams, -) -> Result, sc_executor_common::error::WasmError> { - let semantics = params_to_wasmtime_semantics(executor_params) - .map_err(|e| sc_executor_common::error::WasmError::Other(e))?; - sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics) -} - -fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result { - let mut sem = DEFAULT_CONFIG.semantics.clone(); - let mut stack_limit = if let Some(stack_limit) = sem.deterministic_stack_limit.clone() { - stack_limit - } else { - return Err("No default stack limit set".to_owned()) - }; - - for p in par.iter() { - match p { - ExecutorParam::MaxMemoryPages(max_pages) => - sem.heap_alloc_strategy = - HeapAllocStrategy::Dynamic { maximum_pages: Some(*max_pages) }, - ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm, - ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm, - ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true, - ExecutorParam::PrecheckingMaxMemory(_) => (), // TODO: Not implemented yet - ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), // Not used here - } - } - sem.deterministic_stack_limit = Some(stack_limit); - Ok(sem) -} - pub struct Executor { thread_pool: rayon::ThreadPool, config: Config, diff --git a/node/core/pvf/worker/src/execute.rs b/node/core/pvf/execute-worker/src/lib.rs similarity index 89% rename from node/core/pvf/worker/src/execute.rs rename to node/core/pvf/execute-worker/src/lib.rs index 87e369584e93..60368b919f87 100644 --- a/node/core/pvf/worker/src/execute.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -14,17 +14,29 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::{ - common::{bytes_to_path, cpu_time_monitor_loop, worker_event_loop}, - executor_intf::Executor, - LOG_TARGET, -}; +mod executor_intf; + +// NOTE: main.rs is copied to a temp dir when built with only this current library as a dependency. +#[doc(hidden)] +pub use polkadot_node_core_pvf_common::decl_worker_main; + +pub use executor_intf::Executor; + +// The execute worker binary, brought in at `EXECUTE_EXE`. +#[cfg(not(feature = "builder"))] +include!(concat!(env!("OUT_DIR"), "/execute-worker.rs")); + +// NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are +// separate spawned processes. Run with e.g. `RUST_LOG=parachain::pvf-execute-worker=trace`. +const LOG_TARGET: &str = "parachain::pvf::execute-worker"; + use cpu_time::ProcessTime; use futures::{pin_mut, select_biased, FutureExt}; use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ execute::{Handshake, Response}, framed_recv, framed_send, + worker::{bytes_to_path, cpu_time_monitor_loop, worker_event_loop}, }; use polkadot_parachain::primitives::ValidationResult; use std::{ diff --git a/node/core/pvf/worker/bin/execute_worker.rs b/node/core/pvf/execute-worker/src/main.rs similarity index 84% rename from node/core/pvf/worker/bin/execute_worker.rs rename to node/core/pvf/execute-worker/src/main.rs index 9cb1597f95cc..b7d5391d7c87 100644 --- a/node/core/pvf/worker/bin/execute_worker.rs +++ b/node/core/pvf/execute-worker/src/main.rs @@ -15,6 +15,8 @@ // along with Polkadot. If not, see . //! Execute worker. -// TODO: Build with musl. -polkadot_node_core_pvf_worker::decl_worker_main!("execute-worker"); +polkadot_node_core_pvf_execute_worker::decl_worker_main!( + "execute-worker", + polkadot_node_core_pvf_execute_worker::worker_entrypoint +); diff --git a/node/core/pvf/worker/Cargo.toml b/node/core/pvf/prepare-worker/Cargo.toml similarity index 79% rename from node/core/pvf/worker/Cargo.toml rename to node/core/pvf/prepare-worker/Cargo.toml index 3c8742efa511..c1de42cc2de8 100644 --- a/node/core/pvf/worker/Cargo.toml +++ b/node/core/pvf/prepare-worker/Cargo.toml @@ -1,19 +1,11 @@ [package] -name = "polkadot-node-core-pvf-worker" +name = "polkadot-node-core-pvf-prepare-worker" version.workspace = true authors.workspace = true edition.workspace = true -[[bin]] -name = "prepare_worker" -path = "bin/prepare_worker.rs" - -[[bin]] -name = "execute_worker" -path = "bin/execute_worker.rs" - +# TODO: cargo udeps [dependencies] -cpu-time = "1.0.0" futures = "0.3.21" gum = { package = "tracing-gum", path = "../../../gum" } libc = "0.2.139" @@ -31,8 +23,6 @@ polkadot-primitives = { path = "../../../../primitives" } sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } -sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } -sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } @@ -41,7 +31,10 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master tikv-jemalloc-ctl = "0.5.0" [build-dependencies] +polkadot-node-core-pvf-musl-builder = { path = "../musl-builder" } + substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" } [features] +builder = [] jemalloc-allocator = ["dep:tikv-jemalloc-ctl"] diff --git a/node/core/pvf/prepare-worker/build.rs b/node/core/pvf/prepare-worker/build.rs new file mode 100644 index 000000000000..3f51ade68916 --- /dev/null +++ b/node/core/pvf/prepare-worker/build.rs @@ -0,0 +1,35 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +fn main() { + substrate_build_script_utils::generate_cargo_keys(); + + let builder = polkadot_node_core_pvf_musl_builder::Builder::new() + // Tell the builder to build the project (crate) this `build.rs` is part of. + .with_current_project(); + + // Only require musl on supported secure-mode platforms. + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + let builder = builder.with_target("x86_64-unknown-linux-musl"); + #[cfg(not(all(target_arch = "x86_64", target_os = "linux")))] + let builder = builder.with_current_target(); + + builder + .set_file_name("prepare-worker.rs") + .set_constant_name("PREPARE_EXE") + // Build it. + .build(); +} diff --git a/node/core/pvf/prepare-worker/src/executor_intf.rs b/node/core/pvf/prepare-worker/src/executor_intf.rs new file mode 100644 index 000000000000..1f88f6a6dd6e --- /dev/null +++ b/node/core/pvf/prepare-worker/src/executor_intf.rs @@ -0,0 +1,42 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Interface to the Substrate Executor + +use polkadot_node_core_pvf_common::executor_intf::params_to_wasmtime_semantics; +use polkadot_primitives::ExecutorParams; +use sc_executor_common::runtime_blob::RuntimeBlob; + +/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds. +pub fn prevalidate(code: &[u8]) -> Result { + let blob = RuntimeBlob::new(code)?; + // It's assumed this function will take care of any prevalidation logic + // that needs to be done. + // + // Do nothing for now. + Ok(blob) +} + +/// Runs preparation on the given runtime blob. If successful, it returns a serialized compiled +/// artifact which can then be used to pass into `Executor::execute` after writing it to the disk. +pub fn prepare( + blob: RuntimeBlob, + executor_params: &ExecutorParams, +) -> Result, sc_executor_common::error::WasmError> { + let semantics = params_to_wasmtime_semantics(executor_params) + .map_err(|e| sc_executor_common::error::WasmError::Other(e))?; + sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics) +} diff --git a/node/core/pvf/worker/src/prepare.rs b/node/core/pvf/prepare-worker/src/lib.rs similarity index 91% rename from node/core/pvf/worker/src/prepare.rs rename to node/core/pvf/prepare-worker/src/lib.rs index 18b61cc5df45..6f0d4d046746 100644 --- a/node/core/pvf/worker/src/prepare.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -14,15 +14,27 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +mod executor_intf; +mod memory_stats; + +// NOTE: main.rs is copied to a temp dir when built with only this current library as a dependency. +#[doc(hidden)] +pub use polkadot_node_core_pvf_common::decl_worker_main; + +pub use executor_intf::{prepare, prevalidate}; + +// The prepare worker binary, brought in at `PREPARE_EXE`. +#[cfg(not(feature = "builder"))] +include!(concat!(env!("OUT_DIR"), "/prepare-worker.rs")); + +// NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are +// separate spawned processes. Run with e.g. `RUST_LOG=parachain::pvf-prepare-worker=trace`. +const LOG_TARGET: &str = "parachain::pvf::prepare-worker"; + #[cfg(target_os = "linux")] use crate::memory_stats::max_rss_stat::{extract_max_rss_stat, get_max_rss_thread}; #[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))] use crate::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_tracker_loop}; -use crate::{ - common::{bytes_to_path, cpu_time_monitor_loop, worker_event_loop}, - prepare, prevalidate, LOG_TARGET, -}; -use cpu_time::ProcessTime; use futures::{pin_mut, select_biased, FutureExt}; use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ @@ -30,6 +42,8 @@ use polkadot_node_core_pvf_common::{ framed_recv, framed_send, prepare::{MemoryStats, PrepareStats}, pvf::PvfPrepData, + worker::{bytes_to_path, cpu_time_monitor_loop, worker_event_loop}, + ProcessTime, }; use std::{any::Any, panic, path::PathBuf, sync::mpsc::channel}; use tokio::{io, net::UnixStream}; diff --git a/node/core/pvf/worker/bin/prepare_worker.rs b/node/core/pvf/prepare-worker/src/main.rs similarity index 84% rename from node/core/pvf/worker/bin/prepare_worker.rs rename to node/core/pvf/prepare-worker/src/main.rs index 63e0488a4de1..a25b5c701313 100644 --- a/node/core/pvf/worker/bin/prepare_worker.rs +++ b/node/core/pvf/prepare-worker/src/main.rs @@ -15,6 +15,8 @@ // along with Polkadot. If not, see . //! Prepare worker. -// TODO: Build with musl. -polkadot_node_core_pvf_worker::decl_worker_main!("prepare-worker"); +polkadot_node_core_pvf_prepare_worker::decl_worker_main!( + "prepare-worker", + polkadot_node_core_pvf_prepare_worker::worker_entrypoint +); diff --git a/node/core/pvf/worker/src/memory_stats.rs b/node/core/pvf/prepare-worker/src/memory_stats.rs similarity index 100% rename from node/core/pvf/worker/src/memory_stats.rs rename to node/core/pvf/prepare-worker/src/memory_stats.rs diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 6b2bded5b129..91533713e1a4 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -35,9 +35,14 @@ use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult}, pvf::PvfPrepData, }; +use polkadot_node_core_pvf_execute_worker::EXECUTE_EXE; +use polkadot_node_core_pvf_prepare_worker::PREPARE_EXE; use polkadot_parachain::primitives::ValidationResult; +use sp_maybe_compressed_blob::{decompress, CODE_BLOB_BOMB_LIMIT}; use std::{ collections::HashMap, + fs::OpenOptions, + io::Write, path::{Path, PathBuf}, time::{Duration, SystemTime}, }; @@ -53,15 +58,6 @@ pub const PREPARE_FAILURE_COOLDOWN: Duration = Duration::from_millis(200); /// The amount of times we will retry failed prepare jobs. pub const NUM_PREPARE_RETRIES: u32 = 5; -// HACK: Getting the binary locations this way is a bit ugly but seems to work? Should eventually -// use something like wasm-builder: . -/// The prepare worker binary. -const PREPARE_EXE: &'static [u8] = - include_bytes!(concat!(env!("OUT_DIR"), "/../../../prepare_worker")); -/// The execute worker binary. -const EXECUTE_EXE: &'static [u8] = - include_bytes!(concat!(env!("OUT_DIR"), "/../../../execute_worker")); - /// An alias to not spell the type for the oneshot sender for the PVF execution result. pub(crate) type ResultSender = oneshot::Sender>; @@ -897,14 +893,56 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream } // TODO: Should we purge unneeded binaries? -/// Extracts the worker binaries embedded in this binary onto disk and return their paths. +// TODO: Test on windows. +/// Extracts the worker binaries embedded in this binary onto disk and returns their paths. Skips +/// extraction if the binaries are already present. async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path: &Path) { - // Skip extraction if the binaries are already present. + // Options for opening a binary file. Should create only if it doesn't already exist, and create + // with secure permissions. + #[cfg(unix)] + use std::os::unix::fs::OpenOptionsExt; + let mut open_options = OpenOptions::new(); + #[cfg(unix)] + open_options.write(true).create_new(true).mode(744); + #[cfg(not(unix))] + open_options.write(true).create_new(true); + + gum::debug!( + target: LOG_TARGET, + "extracting prepare-worker binary to {}", + prepare_worker_path.display() + ); if !prepare_worker_path.exists() { - let _ = tokio::fs::write(prepare_worker_path, PREPARE_EXE).await; + let prepare_exe = decompress( + PREPARE_EXE.expect( + "prepare-worker binary is not available. \ + This means it was built with `SKIP_BUILD` flag", + ), + CODE_BLOB_BOMB_LIMIT, + ) + .expect("binary should have been built correctly; qed"); + let _ = open_options + .open(prepare_worker_path) + .and_then(|mut file| file.write_all(&prepare_exe)); } + + gum::debug!( + target: LOG_TARGET, + "extracting execute-worker binary to {}", + execute_worker_path.display() + ); if !execute_worker_path.exists() { - let _ = tokio::fs::write(execute_worker_path, EXECUTE_EXE).await; + let execute_exe = decompress( + EXECUTE_EXE.expect( + "execute-worker binary is not available. \ + This means it was built with `SKIP_BUILD` flag", + ), + CODE_BLOB_BOMB_LIMIT, + ) + .expect("binary should have been built correctly; qed"); + let _ = open_options + .open(execute_worker_path) + .and_then(|mut file| file.write_all(&execute_exe)); } } @@ -913,7 +951,14 @@ async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path /// Appends with the version (including the commit) to avoid conflicts with other versions of /// polkadot running, i.e. in testnets. fn worker_path(workers_path: &Path, job_kind: &str) -> PathBuf { - let file_name = format!("{}-worker_{}", job_kind, env!("SUBSTRATE_CLI_IMPL_VERSION")); + // Windows needs the .exe path for executables. + #[cfg(windows)] + let extension = ".exe"; + #[cfg(not(windows))] + let extension = ""; + + let file_name = + format!("{}-worker_{}{}", job_kind, env!("SUBSTRATE_CLI_IMPL_VERSION"), extension); workers_path.join(file_name) } @@ -923,10 +968,7 @@ mod tests { use crate::InvalidCandidate; use assert_matches::assert_matches; use futures::future::BoxFuture; - use polkadot_node_core_pvf_common::{ - prepare::PrepareStats, - tests::{TEST_EXECUTION_TIMEOUT, TEST_PREPARATION_TIMEOUT}, - }; + use polkadot_node_core_pvf_common::{prepare::PrepareStats, tests::TEST_EXECUTION_TIMEOUT}; #[tokio::test] async fn pulse_test() { @@ -944,7 +986,7 @@ mod tests { /// Creates a new PVF which artifact id can be uniquely identified by the given number. fn artifact_id(descriminator: u32) -> ArtifactId { - PvfPrepData::from_discriminator(descriminator).as_artifact_id() + ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(descriminator)) } fn artifact_path(descriminator: u32) -> PathBuf { diff --git a/node/core/pvf/src/lib.rs b/node/core/pvf/src/lib.rs index 00a49d915c4a..27f7791d9a80 100644 --- a/node/core/pvf/src/lib.rs +++ b/node/core/pvf/src/lib.rs @@ -100,10 +100,6 @@ mod worker_intf; #[doc(hidden)] pub mod testing; -// TODO: Remove when moving the host into its own crate. -#[doc(hidden)] -pub use sp_tracing; - pub use error::{InvalidCandidate, ValidationError}; pub use execute::{ExecuteHandshake, ExecuteResponse}; pub use priority::Priority; diff --git a/node/core/pvf/src/prepare/queue.rs b/node/core/pvf/src/prepare/queue.rs index 2a2e6830b67c..a6d0f5480508 100644 --- a/node/core/pvf/src/prepare/queue.rs +++ b/node/core/pvf/src/prepare/queue.rs @@ -487,10 +487,11 @@ pub fn start( #[cfg(test)] mod tests { use super::*; - use crate::{error::PrepareError, prepare::PrepareStats}; use assert_matches::assert_matches; use futures::{future::BoxFuture, FutureExt}; - use polkadot_node_core_pvf_common::tests::TEST_PREPARATION_TIMEOUT; + use polkadot_node_core_pvf_common::{ + error::PrepareError, prepare::PrepareStats, tests::TEST_PREPARATION_TIMEOUT, + }; use slotmap::SlotMap; use std::task::Poll; diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index 3f1ca5a79aa4..86b041d3e847 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -48,7 +48,7 @@ pub async fn spawn( spawn_with_program_path( "prepare", program_path, - &["--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], + &["prepare-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], spawn_timeout, ) .await diff --git a/node/core/pvf/src/testing.rs b/node/core/pvf/src/testing.rs index f0c487659990..7ac6d3ab3f4d 100644 --- a/node/core/pvf/src/testing.rs +++ b/node/core/pvf/src/testing.rs @@ -19,17 +19,18 @@ //! N.B. This is not guarded with some feature flag. Overexposing items here may affect the final //! artifact even for production builds. -/// Use this macro to declare a `fn main() {}` that will check the arguments and dispatch them to -/// the appropriate worker, making the executable that can be used for spawning workers. - #[doc(hidden)] pub use crate::worker_intf::{spawn_with_program_path, SpawnErr}; +use polkadot_primitives::ExecutorParams; + +/// Use this macro to declare a `fn main() {}` that will check the arguments and dispatch them to +/// the appropriate worker, making the executable that can be used for spawning workers. #[macro_export] macro_rules! decl_puppet_worker_main { () => { fn main() { - $crate::sp_tracing::try_init_simple(); + sp_tracing::try_init_simple(); let args = std::env::args().collect::>(); if args.len() < 3 { @@ -56,13 +57,41 @@ macro_rules! decl_puppet_worker_main { std::thread::sleep(std::time::Duration::from_secs(5)); }, "prepare-worker" => { - $crate::prepare_worker_entrypoint(&socket_path, version); + polkadot_node_core_pvf_prepare_worker::worker_entrypoint(&socket_path, version); }, "execute-worker" => { - $crate::execute_worker_entrypoint(&socket_path, version); + polkadot_node_core_pvf_execute_worker::worker_entrypoint(&socket_path, version); }, other => panic!("unknown subcommand: {}", other), } } }; } + +/// A function that emulates the stitches together behaviors of the preparation and the execution +/// worker in a single synchronous function. +pub fn validate_candidate( + code: &[u8], + params: &[u8], +) -> Result, Box> { + use polkadot_node_core_pvf_execute_worker::Executor; + use polkadot_node_core_pvf_prepare_worker::{prepare, prevalidate}; + + let code = sp_maybe_compressed_blob::decompress(code, 10 * 1024 * 1024) + .expect("Decompressing code failed"); + + let blob = prevalidate(&code)?; + let artifact = prepare(blob, &ExecutorParams::default())?; + let tmpdir = tempfile::tempdir()?; + let artifact_path = tmpdir.path().join("blob"); + std::fs::write(&artifact_path, &artifact)?; + + let executor = Executor::new(ExecutorParams::default())?; + let result = unsafe { + // SAFETY: This is trivially safe since the artifact is obtained by calling `prepare` + // and is written into a temporary directory in an unmodified state. + executor.execute(&artifact_path, params)? + }; + + Ok(result) +} diff --git a/node/core/pvf/tests/it/main.rs b/node/core/pvf/tests/it/main.rs index be77c9391170..b5fa48379232 100644 --- a/node/core/pvf/tests/it/main.rs +++ b/node/core/pvf/tests/it/main.rs @@ -17,9 +17,10 @@ use assert_matches::assert_matches; use parity_scale_codec::Encode as _; use polkadot_node_core_pvf::{ - start, Config, InvalidCandidate, Metrics, PvfPrepData, ValidationError, ValidationHost, + start, Config, InvalidCandidate, Metrics, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; +use polkadot_node_core_pvf_common::pvf::PvfPrepData; use polkadot_parachain::primitives::{BlockData, ValidationParams, ValidationResult}; use polkadot_primitives::{ExecutorParam, ExecutorParams}; use std::time::Duration; @@ -33,7 +34,7 @@ const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(3); struct TestHost { - cache_dir: tempfile::TempDir, + cache_dir: std::path::PathBuf, host: Mutex, } @@ -46,10 +47,9 @@ impl TestHost { where F: FnOnce(&mut Config), { - let cache_dir = tempfile::tempdir().unwrap(); - let workers_dir = tempfile::tempdir().unwrap(); - let mut config = - Config::new(cache_dir.path().to_owned(), workers_dir.path().to_owned(), None); + let cache_dir = tempfile::tempdir().unwrap().path().join("pvf-artifacts"); + let workers_dir = tempfile::tempdir().unwrap().path().join("pvf-workers"); + let mut config = Config::new(cache_dir.to_owned(), workers_dir.to_owned(), None); f(&mut config); let (host, task) = start(config, Metrics::default()); let _ = tokio::task::spawn(task); @@ -246,7 +246,7 @@ async fn execute_queue_doesnt_stall_with_varying_executor_params() { #[tokio::test] async fn deleting_prepared_artifact_does_not_dispute() { let host = TestHost::new(); - let cache_dir = host.cache_dir.path().clone(); + let cache_dir = host.cache_dir.clone(); let result = host .validate_candidate( diff --git a/node/core/pvf/worker/src/lib.rs b/node/core/pvf/worker/src/lib.rs deleted file mode 100644 index 2e9078a9018c..000000000000 --- a/node/core/pvf/worker/src/lib.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -mod common; -mod execute; -mod executor_intf; -mod memory_stats; -mod prepare; - -#[doc(hidden)] -pub mod testing; - -#[doc(hidden)] -pub use sp_tracing; - -pub use execute::worker_entrypoint as execute_worker_entrypoint; -pub use prepare::worker_entrypoint as prepare_worker_entrypoint; - -pub use executor_intf::{prepare, prevalidate}; - -// NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are -// separate spawned processes. Run with e.g. `RUST_LOG=parachain::pvf-worker=trace`. -const LOG_TARGET: &str = "parachain::pvf-worker"; - -/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for -/// spawning the desired worker. -#[macro_export] -macro_rules! decl_worker_main { - ($expected_command:expr) => { - fn main() { - $crate::sp_tracing::try_init_simple(); - - let args = std::env::args().collect::>(); - if args.len() < 3 { - panic!("wrong number of arguments"); - } - - let mut version = None; - let mut socket_path: &str = ""; - - for i in 2..args.len() { - match args[i].as_ref() { - "--socket-path" => socket_path = args[i + 1].as_str(), - "--node-version" => version = Some(args[i + 1].as_str()), - _ => (), - } - } - - let subcommand = &args[1]; - if subcommand != $expected_command { - panic!( - "trying to run {} binary with the {} subcommand", - $expected_command, subcommand - ) - } - match subcommand.as_ref() { - "prepare-worker" => { - $crate::prepare_worker_entrypoint(&socket_path, version); - }, - "execute-worker" => { - $crate::execute_worker_entrypoint(&socket_path, version); - }, - other => panic!("unknown subcommand: {}", other), - } - } - }; -} diff --git a/node/core/pvf/worker/src/testing.rs b/node/core/pvf/worker/src/testing.rs deleted file mode 100644 index 9a0c08f920b3..000000000000 --- a/node/core/pvf/worker/src/testing.rs +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -//! Various things for testing other crates. -//! -//! N.B. This is not guarded with some feature flag. Overexposing items here may affect the final -//! artifact even for production builds. - -use polkadot_primitives::ExecutorParams; - -/// A function that emulates the stitches together behaviors of the preparation and the execution -/// worker in a single synchronous function. -pub fn validate_candidate( - code: &[u8], - params: &[u8], -) -> Result, Box> { - use crate::executor_intf::{prepare, prevalidate, Executor}; - - let code = sp_maybe_compressed_blob::decompress(code, 10 * 1024 * 1024) - .expect("Decompressing code failed"); - - let blob = prevalidate(&code)?; - let artifact = prepare(blob, &ExecutorParams::default())?; - let tmpdir = tempfile::tempdir()?; - let artifact_path = tmpdir.path().join("blob"); - std::fs::write(&artifact_path, &artifact)?; - - let executor = Executor::new(ExecutorParams::default())?; - let result = unsafe { - // SAFETY: This is trivially safe since the artifact is obtained by calling `prepare` - // and is written into a temporary directory in an unmodified state. - executor.execute(&artifact_path, params)? - }; - - Ok(result) -} diff --git a/node/malus/Cargo.toml b/node/malus/Cargo.toml index 3c6aa5c2d39e..964b1cce12ab 100644 --- a/node/malus/Cargo.toml +++ b/node/malus/Cargo.toml @@ -20,7 +20,8 @@ polkadot-node-subsystem-types = { path = "../subsystem-types" } polkadot-node-core-dispute-coordinator = { path = "../core/dispute-coordinator" } polkadot-node-core-candidate-validation = { path = "../core/candidate-validation" } polkadot-node-core-backing = { path = "../core/backing" } -polkadot-node-core-pvf-worker = { path = "../core/pvf/worker" } +polkadot-node-core-pvf-execute-worker = { path = "../core/pvf/execute-worker" } +polkadot-node-core-pvf-prepare-worker = { path = "../core/pvf/prepare-worker" } polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } color-eyre = { version = "0.6.1", default-features = false } diff --git a/node/malus/src/malus.rs b/node/malus/src/malus.rs index 36cf0cca06bf..48afde3c41c6 100644 --- a/node/malus/src/malus.rs +++ b/node/malus/src/malus.rs @@ -97,7 +97,7 @@ impl MalusCli { #[cfg(not(target_os = "android"))] { - polkadot_node_core_pvf_worker::prepare_worker_entrypoint( + polkadot_node_core_pvf_prepare_worker::prepare_worker_entrypoint( &cmd.socket_path, None, ); @@ -111,7 +111,7 @@ impl MalusCli { #[cfg(not(target_os = "android"))] { - polkadot_node_core_pvf_worker::execute_worker_entrypoint( + polkadot_node_core_pvf_execute_worker::execute_worker_entrypoint( &cmd.socket_path, None, ); diff --git a/node/test/performance-test/Cargo.toml b/node/test/performance-test/Cargo.toml index 70f072c03ae1..4e3001b3ee66 100644 --- a/node/test/performance-test/Cargo.toml +++ b/node/test/performance-test/Cargo.toml @@ -10,7 +10,7 @@ quote = "1.0.26" env_logger = "0.9" log = "0.4" -polkadot-node-core-pvf-worker = { path = "../../core/pvf/worker" } +polkadot-node-core-pvf-prepare-worker = { path = "../../core/pvf/prepare-worker" } polkadot-erasure-coding = { path = "../../../erasure-coding" } polkadot-node-primitives = { path = "../../primitives" } polkadot-primitives = { path = "../../../primitives" } diff --git a/node/test/performance-test/src/lib.rs b/node/test/performance-test/src/lib.rs index 1afa43cc62ba..15073912654a 100644 --- a/node/test/performance-test/src/lib.rs +++ b/node/test/performance-test/src/lib.rs @@ -65,9 +65,9 @@ pub fn measure_pvf_prepare(wasm_code: &[u8]) -> Result .or(Err(PerfCheckError::CodeDecompressionFailed))?; // Recreate the pipeline from the pvf prepare worker. - let blob = - polkadot_node_core_pvf_worker::prevalidate(code.as_ref()).map_err(PerfCheckError::from)?; - polkadot_node_core_pvf_worker::prepare(blob, &ExecutorParams::default()) + let blob = polkadot_node_core_pvf_prepare_worker::prevalidate(code.as_ref()) + .map_err(PerfCheckError::from)?; + polkadot_node_core_pvf_prepare_worker::prepare(blob, &ExecutorParams::default()) .map_err(PerfCheckError::from)?; Ok(start.elapsed()) diff --git a/parachain/test-parachains/adder/collator/Cargo.toml b/parachain/test-parachains/adder/collator/Cargo.toml index ee20cb0b0d17..7fe4aefc688d 100644 --- a/parachain/test-parachains/adder/collator/Cargo.toml +++ b/parachain/test-parachains/adder/collator/Cargo.toml @@ -34,7 +34,7 @@ sc-service = { git = "https://github.com/paritytech/substrate", branch = "master # This one is tricky. Even though it is not used directly by the collator, we still need it for the # `puppet_worker` binary, which is required for the integration test. However, this shouldn't be # a big problem since it is used transitively anyway. -polkadot-node-core-pvf-worker = { path = "../../../../node/core/pvf/worker" } +polkadot-node-core-pvf = { path = "../../../../node/core/pvf" } [dev-dependencies] polkadot-parachain = { path = "../../.." } diff --git a/parachain/test-parachains/adder/collator/bin/puppet_worker.rs b/parachain/test-parachains/adder/collator/bin/puppet_worker.rs index ddd81971292b..7f93519d8454 100644 --- a/parachain/test-parachains/adder/collator/bin/puppet_worker.rs +++ b/parachain/test-parachains/adder/collator/bin/puppet_worker.rs @@ -14,4 +14,4 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -polkadot_node_core_pvf_worker::decl_puppet_worker_main!(); +polkadot_node_core_pvf::decl_puppet_worker_main!(); diff --git a/parachain/test-parachains/adder/collator/src/lib.rs b/parachain/test-parachains/adder/collator/src/lib.rs index 4b2b9248de22..02a4598f9e47 100644 --- a/parachain/test-parachains/adder/collator/src/lib.rs +++ b/parachain/test-parachains/adder/collator/src/lib.rs @@ -272,7 +272,7 @@ mod tests { } fn validate_collation(collator: &Collator, parent_head: HeadData, collation: Collation) { - use polkadot_node_core_pvf_worker::testing::validate_candidate; + use polkadot_node_core_pvf::testing::validate_candidate; let block_data = match collation.proof_of_validity { MaybeCompressedPoV::Raw(pov) => pov.block_data, diff --git a/parachain/test-parachains/undying/collator/Cargo.toml b/parachain/test-parachains/undying/collator/Cargo.toml index 1b2ccf3be0ca..2b9d80401f5d 100644 --- a/parachain/test-parachains/undying/collator/Cargo.toml +++ b/parachain/test-parachains/undying/collator/Cargo.toml @@ -34,7 +34,7 @@ sc-service = { git = "https://github.com/paritytech/substrate", branch = "master # This one is tricky. Even though it is not used directly by the collator, we still need it for the # `puppet_worker` binary, which is required for the integration test. However, this shouldn't be # a big problem since it is used transitively anyway. -polkadot-node-core-pvf-worker = { path = "../../../../node/core/pvf/worker" } +polkadot-node-core-pvf = { path = "../../../../node/core/pvf" } [dev-dependencies] polkadot-parachain = { path = "../../.." } diff --git a/parachain/test-parachains/undying/collator/bin/puppet_worker.rs b/parachain/test-parachains/undying/collator/bin/puppet_worker.rs index ddd81971292b..7f93519d8454 100644 --- a/parachain/test-parachains/undying/collator/bin/puppet_worker.rs +++ b/parachain/test-parachains/undying/collator/bin/puppet_worker.rs @@ -14,4 +14,4 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -polkadot_node_core_pvf_worker::decl_puppet_worker_main!(); +polkadot_node_core_pvf::decl_puppet_worker_main!(); diff --git a/parachain/test-parachains/undying/collator/src/lib.rs b/parachain/test-parachains/undying/collator/src/lib.rs index dcaf9b63296d..838590fa16f5 100644 --- a/parachain/test-parachains/undying/collator/src/lib.rs +++ b/parachain/test-parachains/undying/collator/src/lib.rs @@ -354,7 +354,7 @@ mod tests { } fn validate_collation(collator: &Collator, parent_head: HeadData, collation: Collation) { - use polkadot_node_core_pvf_worker::testing::validate_candidate; + use polkadot_node_core_pvf::testing::validate_candidate; let block_data = match collation.proof_of_validity { MaybeCompressedPoV::Raw(pov) => pov.block_data, From ffccb54d9ac468296474ae141f2d6df1291fd3ff Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 28 Apr 2023 11:41:44 +0200 Subject: [PATCH 09/13] Lots of fixes to binary builder --- node/core/pvf/Cargo.toml | 1 + node/core/pvf/common/Cargo.toml | 1 - node/core/pvf/execute-worker/Cargo.toml | 1 - node/core/pvf/musl-builder/src/builder.rs | 26 +++++++-- node/core/pvf/musl-builder/src/lib.rs | 2 +- node/core/pvf/musl-builder/src/project.rs | 64 ++++++++++++++++++----- node/core/pvf/prepare-worker/Cargo.toml | 1 - 7 files changed, 77 insertions(+), 19 deletions(-) diff --git a/node/core/pvf/Cargo.toml b/node/core/pvf/Cargo.toml index 033a58271cd9..571171b0f0fe 100644 --- a/node/core/pvf/Cargo.toml +++ b/node/core/pvf/Cargo.toml @@ -8,6 +8,7 @@ edition.workspace = true name = "puppet_worker" path = "bin/puppet_worker.rs" +# TODO: run `cargo udeps`. Currently broken with the builder. [dependencies] always-assert = "0.1" futures = "0.3.21" diff --git a/node/core/pvf/common/Cargo.toml b/node/core/pvf/common/Cargo.toml index b9c9ac6286cb..de9fa10804c7 100644 --- a/node/core/pvf/common/Cargo.toml +++ b/node/core/pvf/common/Cargo.toml @@ -4,7 +4,6 @@ version.workspace = true authors.workspace = true edition.workspace = true -# TODO: cargo udeps [dependencies] cpu-time = "1.0.0" futures = "0.3.21" diff --git a/node/core/pvf/execute-worker/Cargo.toml b/node/core/pvf/execute-worker/Cargo.toml index 982e0a4de9a2..652277d51a18 100644 --- a/node/core/pvf/execute-worker/Cargo.toml +++ b/node/core/pvf/execute-worker/Cargo.toml @@ -4,7 +4,6 @@ version.workspace = true authors.workspace = true edition.workspace = true -# TODO: cargo udeps [dependencies] cpu-time = "1.0.0" futures = "0.3.21" diff --git a/node/core/pvf/musl-builder/src/builder.rs b/node/core/pvf/musl-builder/src/builder.rs index 950e79a09dda..c7fe6eea7adb 100644 --- a/node/core/pvf/musl-builder/src/builder.rs +++ b/node/core/pvf/musl-builder/src/builder.rs @@ -74,11 +74,17 @@ pub struct BuilderSelectTarget { } impl BuilderSelectTarget { - /// Select the Rust target to use for building the binary. + /// Use the current Rust target for building the binary. + pub fn with_current_target(self) -> Builder { + self.with_target(env::var("TARGET").expect("this is set by cargo! qed")) + } + + /// Use the given Rust target for building the binary. pub fn with_target(self, target: impl Into) -> Builder { Builder { rust_flags: Vec::new(), file_name: None, + constant_name: None, project_cargo_toml: self.project_cargo_toml, features_to_enable: Vec::new(), target: target.into(), @@ -104,6 +110,10 @@ pub struct Builder { /// /// Defaults to `binary.rs`. file_name: Option, + /// The name of the Rust constant that is generated which contains the binary bytes. + /// + /// Defaults to `BINARY`. + constant_name: Option, /// The path to the `Cargo.toml` of the project that should be built. project_cargo_toml: PathBuf, /// Features that should be enabled when building the binary. @@ -136,6 +146,14 @@ impl Builder { self } + /// Set the name of the constant that will be generated in the Rust code to include!. + /// + /// If this function is not called, `constant_name` defaults to `BINARY` + pub fn set_constant_name(mut self, constant_name: impl Into) -> Self { + self.constant_name = Some(constant_name.into()); + self + } + /// Instruct the linker to import the memory into the binary. /// /// This adds `-C link-arg=--import-memory` to `RUST_FLAGS`. @@ -164,6 +182,7 @@ impl Builder { pub fn build(self) { let out_dir = PathBuf::from(env::var("OUT_DIR").expect("`OUT_DIR` is set by cargo!")); let file_path = out_dir.join(self.file_name.clone().unwrap_or_else(|| "binary.rs".into())); + let constant_name = self.constant_name.clone().unwrap_or_else(|| "BINARY".into()); if check_skip_build() { // If we skip the build, we still want to make sure to be called when an env variable @@ -177,6 +196,7 @@ impl Builder { build_project( file_path, + constant_name, self.project_cargo_toml, self.rust_flags.into_iter().map(|f| format!("{} ", f)).collect(), self.features_to_enable, @@ -245,6 +265,7 @@ fn generate_rerun_if_changed_instructions() { /// `target` - The binary target. fn build_project( file_name: PathBuf, + constant_name: String, project_cargo_toml: PathBuf, default_rustflags: String, features_to_enable: Vec, @@ -279,9 +300,8 @@ fn build_project( file_name, format!( r#" - pub const BINARY: Option<&[u8]> = Some(include_bytes!("{binary}")); + pub const {constant_name}: Option<&[u8]> = Some(include_bytes!("{binary}")); "#, - binary = binary, ), ); } diff --git a/node/core/pvf/musl-builder/src/lib.rs b/node/core/pvf/musl-builder/src/lib.rs index e6b191a43451..205ec2db1025 100644 --- a/node/core/pvf/musl-builder/src/lib.rs +++ b/node/core/pvf/musl-builder/src/lib.rs @@ -195,7 +195,7 @@ impl CargoCommand { /// /// Assumes that cargo version matches the rustc version. fn supports_env(&self) -> bool { - // Just a stub for now -- not sure this is needed for musl-builder. + // TODO: Just a stub for now -- not sure this is needed for musl-builder. true } } diff --git a/node/core/pvf/musl-builder/src/project.rs b/node/core/pvf/musl-builder/src/project.rs index fce433ac8917..e91e85f5c445 100644 --- a/node/core/pvf/musl-builder/src/project.rs +++ b/node/core/pvf/musl-builder/src/project.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::{write_file_if_changed, CargoCommandVersioned, OFFLINE}; +use crate::{copy_file_if_changed, write_file_if_changed, CargoCommandVersioned, OFFLINE}; use build_helper::rerun_if_changed; use cargo_metadata::{CargoOpt, Metadata, MetadataCommand}; @@ -28,7 +28,7 @@ use std::{ process, }; use strum::{EnumIter, IntoEnumIterator}; -use toml::value::Table; +use toml::value::{Array, Table}; use walkdir::WalkDir; /// Colorize an info message. @@ -58,6 +58,7 @@ impl BinaryBloaty { } /// Holds the path to the binary. +#[derive(Debug)] pub struct Binary(PathBuf); impl Binary { @@ -280,6 +281,10 @@ fn create_project_cargo_toml( binary: &str, enabled_features: impl Iterator, ) { + // For the PVF workers we want unwinding panics (we have panic handlers in place). + // TODO: Allow customizing this for the generalized builder. + let panic_setting = "unwind"; + let mut root_workspace_toml: Table = toml::from_str( &fs::read_to_string(workspace_root_path.join("Cargo.toml")) .expect("Workspace root `Cargo.toml` exists; qed"), @@ -290,7 +295,7 @@ fn create_project_cargo_toml( // Add different profiles which are selected by setting `BUILD_TYPE`. let mut release_profile = Table::new(); - release_profile.insert("panic".into(), "abort".into()); + release_profile.insert("panic".into(), panic_setting.into()); release_profile.insert("lto".into(), "thin".into()); let mut production_profile = Table::new(); @@ -299,7 +304,7 @@ fn create_project_cargo_toml( production_profile.insert("codegen-units".into(), 1.into()); let mut dev_profile = Table::new(); - dev_profile.insert("panic".into(), "abort".into()); + dev_profile.insert("panic".into(), panic_setting.into()); let mut profile = Table::new(); profile.insert("release".into(), release_profile.into()); @@ -339,11 +344,21 @@ fn create_project_cargo_toml( workspace_toml.insert("package".into(), package.into()); - let mut lib = Table::new(); - lib.insert("name".into(), binary.into()); - lib.insert("crate-type".into(), vec!["cdylib".to_string()].into()); + // For PVF workers use `bin` instead of wasm-builder's lib. + // TODO: Allow choose between bin/lib for the generalized builder. + let mut bin_array = Array::new(); + let mut bin = Table::new(); + bin.insert("name".into(), binary.into()); + bin.insert("path".into(), "src/main.rs".into()); + bin_array.insert(0, bin.into()); + + workspace_toml.insert("bin".into(), bin_array.into()); + + // let mut lib = Table::new(); + // lib.insert("name".into(), binary.into()); + // lib.insert("crate-type".into(), vec!["cdylib".to_string()].into()); - workspace_toml.insert("lib".into(), lib.into()); + // workspace_toml.insert("lib".into(), lib.into()); let mut dependencies = Table::new(); @@ -444,6 +459,14 @@ fn project_enabled_features( .map(|d| d.0.clone()) .collect::>(); + // Assert that the "builder" feature is present but disabled. + assert!( + package.features.contains_key("builder") && + !enabled_features.contains(&"builder".to_string()) + ); + // Enable the builder feature so that it is only present when building the binary. + enabled_features.push("builder".into()); + enabled_features.sort(); enabled_features } @@ -501,9 +524,14 @@ fn create_project( write_file_if_changed(project_folder.join("src/lib.rs"), "#![no_std] pub use project::*;"); + let main_path = crate_path.join("src/main.rs"); + if main_path.exists() { + copy_file_if_changed(main_path, project_folder.join("src/main.rs")); + } + if let Some(crate_lock_file) = find_cargo_lock(project_cargo_toml) { // Use the `Cargo.lock` of the main project. - crate::copy_file_if_changed(crate_lock_file, project_folder.join("Cargo.lock")); + copy_file_if_changed(crate_lock_file, project_folder.join("Cargo.lock")); } project_folder @@ -631,8 +659,16 @@ fn build_project( let manifest_path = project.join("Cargo.toml"); let mut build_cmd = cargo_cmd.command(); + // TODO: If wasm-builder is ever refactored to use this generalized builder, it needs the + // following default flags: + // + // ``` + // -C target-cpu=mvp -C target-feature=-sign-ext -C link-arg=--export-table + // ``` + // + // See . let rustflags = format!( - "-C target-cpu=mvp -C target-feature=-sign-ext -C link-arg=--export-table {} {}", + "{} {}", default_rustflags, env::var(crate::BUILD_RUSTFLAGS_ENV).unwrap_or_default(), ); @@ -675,7 +711,7 @@ fn build_project( } } -/// Compact the binary if supported for the target. +/// Compact the binary and compress it using zstd. fn compact_file( project: &Path, profile: Profile, @@ -694,7 +730,11 @@ fn compact_file( // TODO: For a generalized builder we may want to support passing in a function to compact the // binary. - let compact_path = project.join(format!("{}.compact", out_name,)); + let compact_path = project.join(format!("{}.compact", out_name)); + // TODO: Wasm compaction goes here. + // TODO: For other targets, compaction is a noop so the compact + // and compressed binaries are the same right now. + fs::copy(&in_path, &compact_path).expect("Copying the binary to the project dir."); let compact_compressed_path = project.join(format!("{}.compact.compressed", out_name)); if compress(&compact_path, &compact_compressed_path) { diff --git a/node/core/pvf/prepare-worker/Cargo.toml b/node/core/pvf/prepare-worker/Cargo.toml index c1de42cc2de8..7c2331e8a22a 100644 --- a/node/core/pvf/prepare-worker/Cargo.toml +++ b/node/core/pvf/prepare-worker/Cargo.toml @@ -4,7 +4,6 @@ version.workspace = true authors.workspace = true edition.workspace = true -# TODO: cargo udeps [dependencies] futures = "0.3.21" gum = { package = "tracing-gum", path = "../../../gum" } From f17005a137f68c1ae3bdbd805d1262c676aa4125 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 28 Apr 2023 11:48:39 +0200 Subject: [PATCH 10/13] Rename musl-builder -> binary-builder --- Cargo.lock | 34 +++++++++---------- Cargo.toml | 2 +- .../Cargo.toml | 2 +- node/core/pvf/binary-builder/README.md | 18 ++++++++++ .../src/builder.rs | 0 .../src/lib.rs | 6 ++-- .../src/prerequisites.rs | 0 .../src/project.rs | 0 .../src/version.rs | 0 node/core/pvf/execute-worker/Cargo.toml | 2 +- node/core/pvf/execute-worker/build.rs | 2 +- node/core/pvf/musl-builder/README.md | 16 --------- node/core/pvf/prepare-worker/Cargo.toml | 2 +- node/core/pvf/prepare-worker/build.rs | 2 +- node/core/pvf/src/host.rs | 4 +-- 15 files changed, 47 insertions(+), 43 deletions(-) rename node/core/pvf/{musl-builder => binary-builder}/Cargo.toml (91%) create mode 100644 node/core/pvf/binary-builder/README.md rename node/core/pvf/{musl-builder => binary-builder}/src/builder.rs (100%) rename node/core/pvf/{musl-builder => binary-builder}/src/lib.rs (98%) rename node/core/pvf/{musl-builder => binary-builder}/src/prerequisites.rs (100%) rename node/core/pvf/{musl-builder => binary-builder}/src/project.rs (100%) rename node/core/pvf/{musl-builder => binary-builder}/src/version.rs (100%) delete mode 100644 node/core/pvf/musl-builder/README.md diff --git a/Cargo.lock b/Cargo.lock index 696ba8418f47..b230ed5d8d45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7209,6 +7209,21 @@ dependencies = [ "tracing-gum", ] +[[package]] +name = "polkadot-node-core-pvf-binary-builder" +version = "0.9.41" +dependencies = [ + "ansi_term", + "build-helper", + "cargo_metadata", + "filetime", + "sp-maybe-compressed-blob", + "strum", + "tempfile", + "toml 0.7.3", + "walkdir", +] + [[package]] name = "polkadot-node-core-pvf-checker" version = "0.9.41" @@ -7258,8 +7273,8 @@ dependencies = [ "cpu-time", "futures", "parity-scale-codec", + "polkadot-node-core-pvf-binary-builder", "polkadot-node-core-pvf-common", - "polkadot-node-core-pvf-musl-builder", "polkadot-parachain", "polkadot-primitives", "rayon", @@ -7276,21 +7291,6 @@ dependencies = [ "tracing-gum", ] -[[package]] -name = "polkadot-node-core-pvf-musl-builder" -version = "0.9.41" -dependencies = [ - "ansi_term", - "build-helper", - "cargo_metadata", - "filetime", - "sp-maybe-compressed-blob", - "strum", - "tempfile", - "toml 0.7.3", - "walkdir", -] - [[package]] name = "polkadot-node-core-pvf-prepare-worker" version = "0.9.41" @@ -7298,8 +7298,8 @@ dependencies = [ "futures", "libc", "parity-scale-codec", + "polkadot-node-core-pvf-binary-builder", "polkadot-node-core-pvf-common", - "polkadot-node-core-pvf-musl-builder", "polkadot-parachain", "polkadot-primitives", "rayon", diff --git a/Cargo.toml b/Cargo.toml index 178db8f3a006..0ae661abb066 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,7 @@ members = [ "node/core/provisioner", "node/core/pvf", "node/core/pvf/common", - "node/core/pvf/musl-builder", + "node/core/pvf/binary-builder", "node/core/pvf/execute-worker", "node/core/pvf/prepare-worker", "node/core/pvf-checker", diff --git a/node/core/pvf/musl-builder/Cargo.toml b/node/core/pvf/binary-builder/Cargo.toml similarity index 91% rename from node/core/pvf/musl-builder/Cargo.toml rename to node/core/pvf/binary-builder/Cargo.toml index b574e2844892..7164fe16688c 100644 --- a/node/core/pvf/musl-builder/Cargo.toml +++ b/node/core/pvf/binary-builder/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "polkadot-node-core-pvf-musl-builder" +name = "polkadot-node-core-pvf-binary-builder" version.workspace = true authors.workspace = true edition.workspace = true diff --git a/node/core/pvf/binary-builder/README.md b/node/core/pvf/binary-builder/README.md new file mode 100644 index 000000000000..0ffd9deef3ab --- /dev/null +++ b/node/core/pvf/binary-builder/README.md @@ -0,0 +1,18 @@ +# binary-builder + +binary-builder is a tool that integrates the process of building the binary of a +crate into the main `cargo` build process. The binary can then be embedded into +other crates and extracted at runtime. + + + +## Prerequisites + +binary-builder requires the chosen toolchain (e.g. `x86_64-unknown-linux-musl`) +to be installed: + +```sh +rustup target add x86_64-unknown-linux-musl +``` + + diff --git a/node/core/pvf/musl-builder/src/builder.rs b/node/core/pvf/binary-builder/src/builder.rs similarity index 100% rename from node/core/pvf/musl-builder/src/builder.rs rename to node/core/pvf/binary-builder/src/builder.rs diff --git a/node/core/pvf/musl-builder/src/lib.rs b/node/core/pvf/binary-builder/src/lib.rs similarity index 98% rename from node/core/pvf/musl-builder/src/lib.rs rename to node/core/pvf/binary-builder/src/lib.rs index 205ec2db1025..261fa6d27cba 100644 --- a/node/core/pvf/musl-builder/src/lib.rs +++ b/node/core/pvf/binary-builder/src/lib.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -// TODO: Extract common parts into a generalized builder that wasm/musl builders are based on. +// TODO: Extract common parts into a generalized builder that wasm-builder is based on. // TODO: Make sure we build with O2 and LTO. @@ -33,6 +33,8 @@ use std::{ }; use version::Version; +// TODO: Allow customizing the env vars, for wasm-builder. + /// Environment variable that tells us to skip building the binary. const SKIP_BUILD_ENV: &str = "BUILDER_SKIP_BUILD"; @@ -195,7 +197,7 @@ impl CargoCommand { /// /// Assumes that cargo version matches the rustc version. fn supports_env(&self) -> bool { - // TODO: Just a stub for now -- not sure this is needed for musl-builder. + // TODO: Just a stub for now -- generalize. true } } diff --git a/node/core/pvf/musl-builder/src/prerequisites.rs b/node/core/pvf/binary-builder/src/prerequisites.rs similarity index 100% rename from node/core/pvf/musl-builder/src/prerequisites.rs rename to node/core/pvf/binary-builder/src/prerequisites.rs diff --git a/node/core/pvf/musl-builder/src/project.rs b/node/core/pvf/binary-builder/src/project.rs similarity index 100% rename from node/core/pvf/musl-builder/src/project.rs rename to node/core/pvf/binary-builder/src/project.rs diff --git a/node/core/pvf/musl-builder/src/version.rs b/node/core/pvf/binary-builder/src/version.rs similarity index 100% rename from node/core/pvf/musl-builder/src/version.rs rename to node/core/pvf/binary-builder/src/version.rs diff --git a/node/core/pvf/execute-worker/Cargo.toml b/node/core/pvf/execute-worker/Cargo.toml index 652277d51a18..e6cfeda9cbf6 100644 --- a/node/core/pvf/execute-worker/Cargo.toml +++ b/node/core/pvf/execute-worker/Cargo.toml @@ -31,7 +31,7 @@ sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", br tikv-jemalloc-ctl = "0.5.0" [build-dependencies] -polkadot-node-core-pvf-musl-builder = { path = "../musl-builder" } +polkadot-node-core-pvf-binary-builder = { path = "../binary-builder" } [features] builder = [] diff --git a/node/core/pvf/execute-worker/build.rs b/node/core/pvf/execute-worker/build.rs index b7d3aff04e55..e13d06745544 100644 --- a/node/core/pvf/execute-worker/build.rs +++ b/node/core/pvf/execute-worker/build.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . fn main() { - let builder = polkadot_node_core_pvf_musl_builder::Builder::new() + let builder = polkadot_node_core_pvf_binary_builder::Builder::new() // Tell the builder to build the project (crate) this `build.rs` is part of. .with_current_project(); diff --git a/node/core/pvf/musl-builder/README.md b/node/core/pvf/musl-builder/README.md deleted file mode 100644 index d58b2ad3d413..000000000000 --- a/node/core/pvf/musl-builder/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# musl-builder - -musl-builder is a tool that integrates the process of building the musl binary -of your project into the main `cargo` build process. - - - -## Prerequisites - -musl-builder requires a musl toolchain like `x86_64-unknown-linux-musl` to be installed: - -```sh -rustup target add x86_64-unknown-linux-musl -``` - - diff --git a/node/core/pvf/prepare-worker/Cargo.toml b/node/core/pvf/prepare-worker/Cargo.toml index 7c2331e8a22a..9951aa49e230 100644 --- a/node/core/pvf/prepare-worker/Cargo.toml +++ b/node/core/pvf/prepare-worker/Cargo.toml @@ -30,7 +30,7 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master tikv-jemalloc-ctl = "0.5.0" [build-dependencies] -polkadot-node-core-pvf-musl-builder = { path = "../musl-builder" } +polkadot-node-core-pvf-binary-builder = { path = "../binary-builder" } substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/core/pvf/prepare-worker/build.rs b/node/core/pvf/prepare-worker/build.rs index 3f51ade68916..05c789bb5a6c 100644 --- a/node/core/pvf/prepare-worker/build.rs +++ b/node/core/pvf/prepare-worker/build.rs @@ -17,7 +17,7 @@ fn main() { substrate_build_script_utils::generate_cargo_keys(); - let builder = polkadot_node_core_pvf_musl_builder::Builder::new() + let builder = polkadot_node_core_pvf_binary_builder::Builder::new() // Tell the builder to build the project (crate) this `build.rs` is part of. .with_current_project(); diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 91533713e1a4..ac703832674c 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -916,7 +916,7 @@ async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path let prepare_exe = decompress( PREPARE_EXE.expect( "prepare-worker binary is not available. \ - This means it was built with `SKIP_BUILD` flag", + This means it was built with `BUILDER_SKIP_BUILD` flag", ), CODE_BLOB_BOMB_LIMIT, ) @@ -935,7 +935,7 @@ async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path let execute_exe = decompress( EXECUTE_EXE.expect( "execute-worker binary is not available. \ - This means it was built with `SKIP_BUILD` flag", + This means it was built with `BUILDER_SKIP_BUILD` flag", ), CODE_BLOB_BOMB_LIMIT, ) From 4b4f33bed59a6df18b5156a68df13419321cf89a Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 28 Apr 2023 11:53:48 +0200 Subject: [PATCH 11/13] Fix some clippy lints --- node/core/pvf/binary-builder/src/project.rs | 8 ++++---- node/core/pvf/src/host.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/node/core/pvf/binary-builder/src/project.rs b/node/core/pvf/binary-builder/src/project.rs index e91e85f5c445..af565a31c48b 100644 --- a/node/core/pvf/binary-builder/src/project.rs +++ b/node/core/pvf/binary-builder/src/project.rs @@ -338,7 +338,7 @@ fn create_project_cargo_toml( } let mut package = Table::new(); - package.insert("name".into(), format!("{}", crate_name).into()); + package.insert("name".into(), crate_name.into()); package.insert("version".into(), "1.0.0".into()); package.insert("edition".into(), "2021".into()); @@ -724,7 +724,7 @@ fn compact_file( let in_path = project .join(format!("target/{}", target)) .join(profile.directory()) - .join(format!("{}", default_out_name)); + .join(default_out_name); let (compact_path, compact_compressed_path) = if profile.wants_compact() { // TODO: For a generalized builder we may want to support passing in a function to compact the @@ -746,7 +746,7 @@ fn compact_file( (None, None) }; - let bloaty_path = project.join(format!("{}", out_name)); + let bloaty_path = project.join(out_name); fs::copy(in_path, &bloaty_path).expect("Copying the bloaty file to the project dir."); (compact_path, compact_compressed_path, BinaryBloaty(bloaty_path)) @@ -930,6 +930,6 @@ fn copy_binary_to_target_directory(cargo_manifest: &Path, binary: &Binary) { fs::create_dir_all(&target_dir).expect("Creates `TARGET_DIRECTORY`."); - fs::copy(binary.binary_path(), target_dir.join(format!("{}", get_binary_name(cargo_manifest)))) + fs::copy(binary.binary_path(), target_dir.join(get_binary_name(cargo_manifest))) .expect("Copies binary to `TARGET_DIRECTORY`."); } diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index ac703832674c..0db6878c2103 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -903,7 +903,7 @@ async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path use std::os::unix::fs::OpenOptionsExt; let mut open_options = OpenOptions::new(); #[cfg(unix)] - open_options.write(true).create_new(true).mode(744); + open_options.write(true).create_new(true).mode(0o744); #[cfg(not(unix))] open_options.write(true).create_new(true); From 1d893a3666f32c433c42629e0f3437be7d1242f8 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 28 Apr 2023 12:42:28 +0200 Subject: [PATCH 12/13] [minor] Undo accidentally-committed change --- .editorconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index b9edd5a0dfdc..6b736d884f22 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,7 +4,7 @@ root = true indent_style=tab indent_size=tab tab_width=4 -max_line_length=100 +max_line_length=120 end_of_line=lf charset=utf-8 trim_trailing_whitespace=true From d085a2974c759369ba13966c18337aa45427699f Mon Sep 17 00:00:00 2001 From: Marcin S Date: Tue, 25 Jul 2023 19:31:13 +0200 Subject: [PATCH 13/13] Commit WIP that I had --- Cargo.lock | 1 + node/core/candidate-validation/Cargo.toml | 1 + node/core/candidate-validation/src/lib.rs | 4 +- node/core/pvf/binary-builder/src/builder.rs | 2 +- node/core/pvf/binary-builder/src/lib.rs | 5 + .../pvf/binary-builder/src/prerequisites.rs | 18 +- node/core/pvf/common/src/lib.rs | 1 + node/core/pvf/src/execute/queue.rs | 10 +- node/core/pvf/src/execute/worker_intf.rs | 31 +- node/core/pvf/src/host.rs | 126 +------ node/core/pvf/src/lib.rs | 3 +- node/core/pvf/src/prepare/pool.rs | 14 +- node/core/pvf/src/prepare/worker_intf.rs | 27 +- node/core/pvf/src/testing.rs | 3 +- node/core/pvf/src/worker_intf.rs | 348 ++++++++++++++++-- node/core/pvf/tests/it/main.rs | 7 +- node/core/pvf/tests/it/worker_common.rs | 44 ++- 17 files changed, 441 insertions(+), 204 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b230ed5d8d45..9f96ef249e22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7056,6 +7056,7 @@ dependencies = [ "futures-timer", "parity-scale-codec", "polkadot-node-core-pvf", + "polkadot-node-core-pvf-common", "polkadot-node-metrics", "polkadot-node-primitives", "polkadot-node-subsystem", diff --git a/node/core/candidate-validation/Cargo.toml b/node/core/candidate-validation/Cargo.toml index 18826f7e2376..cd4f43ca235b 100644 --- a/node/core/candidate-validation/Cargo.toml +++ b/node/core/candidate-validation/Cargo.toml @@ -22,6 +22,7 @@ polkadot-node-metrics = { path = "../../metrics" } [target.'cfg(not(any(target_os = "android", target_os = "unknown")))'.dependencies] polkadot-node-core-pvf = { path = "../pvf" } +polkadot-node-core-pvf-common = { path = "../pvf/common" } [dev-dependencies] sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/core/candidate-validation/src/lib.rs b/node/core/candidate-validation/src/lib.rs index 0f99025d5811..d41728bfdeae 100644 --- a/node/core/candidate-validation/src/lib.rs +++ b/node/core/candidate-validation/src/lib.rs @@ -24,9 +24,9 @@ #![warn(missing_docs)] use polkadot_node_core_pvf::{ - InvalidCandidate as WasmInvalidCandidate, PrepareError, PrepareStats, PvfPrepData, - ValidationError, ValidationHost, + InvalidCandidate as WasmInvalidCandidate, ValidationError, ValidationHost, }; +use polkadot_node_core_pvf_common::{error::PrepareError, pvf::PvfPrepData, PrepareStats}; use polkadot_node_primitives::{ BlockData, InvalidCandidate, PoV, ValidationResult, POV_BOMB_LIMIT, VALIDATION_CODE_BOMB_LIMIT, }; diff --git a/node/core/pvf/binary-builder/src/builder.rs b/node/core/pvf/binary-builder/src/builder.rs index c7fe6eea7adb..fef803d74ace 100644 --- a/node/core/pvf/binary-builder/src/builder.rs +++ b/node/core/pvf/binary-builder/src/builder.rs @@ -300,7 +300,7 @@ fn build_project( file_name, format!( r#" - pub const {constant_name}: Option<&[u8]> = Some(include_bytes!("{binary}")); + pub const {constant_name}: &'static [u8] = include_bytes!("{binary}"); "#, ), ); diff --git a/node/core/pvf/binary-builder/src/lib.rs b/node/core/pvf/binary-builder/src/lib.rs index 261fa6d27cba..3a173d3ef416 100644 --- a/node/core/pvf/binary-builder/src/lib.rs +++ b/node/core/pvf/binary-builder/src/lib.rs @@ -231,3 +231,8 @@ impl std::ops::Deref for CargoCommandVersioned { fn color_output_enabled() -> bool { env::var(crate::BUILD_NO_COLOR).is_err() } + +/// Returns `true` when we are building for a musl target. +fn is_musl_target(target: &str) -> bool { + target.ends_with("-musl") +} diff --git a/node/core/pvf/binary-builder/src/prerequisites.rs b/node/core/pvf/binary-builder/src/prerequisites.rs index 669e9a665644..e482789aa9df 100644 --- a/node/core/pvf/binary-builder/src/prerequisites.rs +++ b/node/core/pvf/binary-builder/src/prerequisites.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::{write_file_if_changed, CargoCommand, CargoCommandVersioned}; +use crate::{write_file_if_changed, CargoCommand, CargoCommandVersioned, is_musl_target}; use std::{fs, path::Path}; @@ -49,12 +49,16 @@ pub(crate) fn check(target: &str) -> Result { /// Create the project that will be used to check that the required target is installed and to /// extract the rustc version. -fn create_check_target_project(project_dir: &Path) { +fn create_check_target_project(project_dir: &Path, target: &str) { let lib_rs_file = project_dir.join("src/lib.rs"); let main_rs_file = project_dir.join("src/main.rs"); let build_rs_file = project_dir.join("build.rs"); let manifest_path = project_dir.join("Cargo.toml"); + // TODO: Make this generalizable. + // See . + let crate_type = if is_musl_target(target) { "staticlib" } else { "cdylib" }; + write_file_if_changed( &manifest_path, r#" @@ -66,10 +70,12 @@ fn create_check_target_project(project_dir: &Path) { [lib] name = "builder_test" - crate-type = ["cdylib"] + crate-type = ["{}"] [workspace] - "#, + "# + .to_string() + .replace("{}", crate_type), ); write_file_if_changed(lib_rs_file, "pub fn test() {}"); @@ -118,10 +124,10 @@ fn check_target_installed( ) -> Result { let temp = tempdir().expect("Creating temp dir does not fail; qed"); fs::create_dir_all(temp.path().join("src")).expect("Creating src dir does not fail; qed"); - create_check_target_project(temp.path()); + create_check_target_project(temp.path(), target); let err_msg = - print_error_message(&format!("{} target not installed, please install it!", target)); + print_error_message(&format!("could not build with {} target, please make sure it is installed and check below for more info", target)); let manifest_path = temp.path().join("Cargo.toml").display().to_string(); let mut build_cmd = cargo_command.command(); diff --git a/node/core/pvf/common/src/lib.rs b/node/core/pvf/common/src/lib.rs index eaea6397e077..c806e591511a 100644 --- a/node/core/pvf/common/src/lib.rs +++ b/node/core/pvf/common/src/lib.rs @@ -34,6 +34,7 @@ const LOG_TARGET: &str = "parachain::pvf::common"; use std::mem; use tokio::io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _}; +#[doc(hidden)] pub mod tests { use std::time::Duration; diff --git a/node/core/pvf/src/execute/queue.rs b/node/core/pvf/src/execute/queue.rs index 2405348f2c64..135b5c4e6103 100644 --- a/node/core/pvf/src/execute/queue.rs +++ b/node/core/pvf/src/execute/queue.rs @@ -137,7 +137,7 @@ struct Queue { /// The receiver that receives messages to the pool. to_queue_rx: mpsc::Receiver, - program_path: PathBuf, + program_path: Option, spawn_timeout: Duration, /// The queue of jobs that are waiting for a worker to pick up. @@ -149,7 +149,7 @@ struct Queue { impl Queue { fn new( metrics: Metrics, - program_path: PathBuf, + program_path: Option, worker_capacity: usize, spawn_timeout: Duration, to_queue_rx: mpsc::Receiver, @@ -409,14 +409,14 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) { /// the queue would have to kill a newly started worker and spawn another one. /// Nevertheless, if the worker finishes executing the job, it becomes idle and may be used to execute other jobs with a compatible execution environment. async fn spawn_worker_task( - program_path: PathBuf, + program_path: Option, job: ExecuteJob, spawn_timeout: Duration, ) -> QueueEvent { use futures_timer::Delay; loop { - match super::worker_intf::spawn(&program_path, job.executor_params.clone(), spawn_timeout) + match super::worker_intf::spawn(program_path.clone(), job.executor_params.clone(), spawn_timeout) .await { Ok((idle, handle)) => break QueueEvent::Spawn(idle, handle, job), @@ -476,7 +476,7 @@ fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) { pub fn start( metrics: Metrics, - program_path: PathBuf, + program_path: Option, worker_capacity: usize, spawn_timeout: Duration, ) -> (mpsc::Sender, impl Future) { diff --git a/node/core/pvf/src/execute/worker_intf.rs b/node/core/pvf/src/execute/worker_intf.rs index 428b362c43ce..6c3d5df005ea 100644 --- a/node/core/pvf/src/execute/worker_intf.rs +++ b/node/core/pvf/src/execute/worker_intf.rs @@ -16,11 +16,12 @@ //! Host interface to the execute worker. +use polkadot_node_core_pvf_execute_worker::EXECUTE_EXE; use crate::{ artifacts::ArtifactPathId, worker_intf::{ - framed_recv, framed_send, path_to_bytes, spawn_with_program_path, IdleWorker, SpawnErr, - WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, + framed_recv, framed_send, path_to_bytes, spawn_job_with_worker_source, IdleWorker, SpawnErr, + WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, JobKind, WorkerSource }, LOG_TARGET, }; @@ -30,21 +31,31 @@ use parity_scale_codec::{Decode, Encode}; use polkadot_parachain::primitives::ValidationResult; use polkadot_primitives::ExecutorParams; -use std::{path::Path, time::Duration}; +use std::{path::{Path, PathBuf}, time::Duration}; use tokio::{io, net::UnixStream}; -/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout. -/// Sends a handshake message to the worker as soon as it is spawned. +/// Spawns a new worker with the given parameters. Sends a handshake message to the worker as soon +/// as it is spawned. /// -/// The program should be able to handle ` execute-worker ` invocation. +/// If the `program_path` is passed, will use that to spawn the worker. Otherwise we create the +/// worker in-memory from `EXECUTE_EXE`; see [`spawn_job_with_worker_source`]. +/// +/// The program should be able to handle this invocation: +/// ``` +/// execute-worker --socket path --node-impl-version +/// ``` pub async fn spawn( - program_path: &Path, + program_path: Option, executor_params: ExecutorParams, spawn_timeout: Duration, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let (mut idle_worker, worker_handle) = spawn_with_program_path( - "execute", - program_path, + let worker_source = match program_path { + Some(path) => WorkerSource::ProgramPath(path), + None => WorkerSource::InMemoryBytes(EXECUTE_EXE), + }; + let (mut idle_worker, worker_handle) = spawn_job_with_worker_source( + &JobKind::Execute, + worker_source, &["execute-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], spawn_timeout, ) diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 0db6878c2103..be803e30a214 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -35,14 +35,9 @@ use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult}, pvf::PvfPrepData, }; -use polkadot_node_core_pvf_execute_worker::EXECUTE_EXE; -use polkadot_node_core_pvf_prepare_worker::PREPARE_EXE; use polkadot_parachain::primitives::ValidationResult; -use sp_maybe_compressed_blob::{decompress, CODE_BLOB_BOMB_LIMIT}; use std::{ collections::HashMap, - fs::OpenOptions, - io::Write, path::{Path, PathBuf}, time::{Duration, SystemTime}, }; @@ -149,10 +144,9 @@ struct ExecutePvfInputs { pub struct Config { /// The root directory where the prepared artifacts can be stored. pub cache_path: PathBuf, - /// If we are using the embedded worker binaries, the directory where they are extracted to. - pub workers_path: Option, - /// The path to the program that can be used to spawn the prepare workers. - pub prepare_worker_program_path: PathBuf, + /// The path to the program that can be used to spawn the prepare workers. Use the in-memory + /// binary if `None`. + pub prepare_worker_program_path: Option, /// The time allotted for a prepare worker to spawn and report to the host. pub prepare_worker_spawn_timeout: Duration, /// The maximum number of workers that can be spawned in the prepare pool for tasks with the @@ -160,8 +154,9 @@ pub struct Config { pub prepare_workers_soft_max_num: usize, /// The absolute number of workers that can be spawned in the prepare pool. pub prepare_workers_hard_max_num: usize, - /// The path to the program that can be used to spawn the execute workers. - pub execute_worker_program_path: PathBuf, + /// The path to the program that can be used to spawn the execute workers. Use the in-memory + /// binary if `None`. + pub execute_worker_program_path: Option, /// The time allotted for an execute worker to spawn and report to the host. pub execute_worker_spawn_timeout: Duration, /// The maximum number of execute workers that can run at the same time. @@ -171,31 +166,21 @@ pub struct Config { impl Config { /// Create a new instance of the configuration. /// - /// The binary at `program_path` will be used if that is `Some`, otherwise the embedded workers - /// will be extracted to `workers_path` and used. - pub fn new( - cache_path: std::path::PathBuf, - workers_path: std::path::PathBuf, - program_path: Option, - ) -> Self { + /// The binary at `program_path` will be used if that is `Some`, otherwise the embedded worker + /// binaries will be extracted and used. + pub fn new(cache_path: std::path::PathBuf, program_path: Option) -> Self { // Do not contaminate the other parts of the codebase with the types from `tokio`. let cache_path = PathBuf::from(cache_path); - let (prepare_worker_path, execute_worker_path, workers_path) = - if let Some(path) = program_path { - let path = PathBuf::from(path); - (path.clone(), path, None) - } else { - ( - worker_path(&workers_path, "prepare"), - worker_path(&workers_path, "execute"), - Some(workers_path), - ) - }; + let (prepare_worker_path, execute_worker_path) = if let Some(path) = program_path { + let path = PathBuf::from(path); + (Some(path.clone()), Some(path)) + } else { + (None, None) + }; Self { cache_path, - workers_path, prepare_worker_program_path: prepare_worker_path.clone(), prepare_worker_spawn_timeout: Duration::from_secs(3), prepare_workers_soft_max_num: 1, @@ -247,17 +232,6 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future impl futures::Stream .map(|_| ()) } -// TODO: Should we purge unneeded binaries? -// TODO: Test on windows. -/// Extracts the worker binaries embedded in this binary onto disk and returns their paths. Skips -/// extraction if the binaries are already present. -async fn extract_worker_binaries(prepare_worker_path: &Path, execute_worker_path: &Path) { - // Options for opening a binary file. Should create only if it doesn't already exist, and create - // with secure permissions. - #[cfg(unix)] - use std::os::unix::fs::OpenOptionsExt; - let mut open_options = OpenOptions::new(); - #[cfg(unix)] - open_options.write(true).create_new(true).mode(0o744); - #[cfg(not(unix))] - open_options.write(true).create_new(true); - - gum::debug!( - target: LOG_TARGET, - "extracting prepare-worker binary to {}", - prepare_worker_path.display() - ); - if !prepare_worker_path.exists() { - let prepare_exe = decompress( - PREPARE_EXE.expect( - "prepare-worker binary is not available. \ - This means it was built with `BUILDER_SKIP_BUILD` flag", - ), - CODE_BLOB_BOMB_LIMIT, - ) - .expect("binary should have been built correctly; qed"); - let _ = open_options - .open(prepare_worker_path) - .and_then(|mut file| file.write_all(&prepare_exe)); - } - - gum::debug!( - target: LOG_TARGET, - "extracting execute-worker binary to {}", - execute_worker_path.display() - ); - if !execute_worker_path.exists() { - let execute_exe = decompress( - EXECUTE_EXE.expect( - "execute-worker binary is not available. \ - This means it was built with `BUILDER_SKIP_BUILD` flag", - ), - CODE_BLOB_BOMB_LIMIT, - ) - .expect("binary should have been built correctly; qed"); - let _ = open_options - .open(execute_worker_path) - .and_then(|mut file| file.write_all(&execute_exe)); - } -} - -/// Returns the expected path to this worker given the root of the cache. -/// -/// Appends with the version (including the commit) to avoid conflicts with other versions of -/// polkadot running, i.e. in testnets. -fn worker_path(workers_path: &Path, job_kind: &str) -> PathBuf { - // Windows needs the .exe path for executables. - #[cfg(windows)] - let extension = ".exe"; - #[cfg(not(windows))] - let extension = ""; - - let file_name = - format!("{}-worker_{}{}", job_kind, env!("SUBSTRATE_CLI_IMPL_VERSION"), extension); - workers_path.join(file_name) -} - #[cfg(test)] mod tests { use super::*; diff --git a/node/core/pvf/src/lib.rs b/node/core/pvf/src/lib.rs index 27f7791d9a80..802ad19ed925 100644 --- a/node/core/pvf/src/lib.rs +++ b/node/core/pvf/src/lib.rs @@ -108,4 +108,5 @@ pub use host::{start, Config, ValidationHost}; pub use metrics::Metrics; pub use worker_intf::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR}; -const LOG_TARGET: &str = "parachain::pvf"; +/// The log target for this crate. +pub const LOG_TARGET: &str = "parachain::pvf"; diff --git a/node/core/pvf/src/prepare/pool.rs b/node/core/pvf/src/prepare/pool.rs index ae8ecff5285c..4789d4db2fa7 100644 --- a/node/core/pvf/src/prepare/pool.rs +++ b/node/core/pvf/src/prepare/pool.rs @@ -110,7 +110,7 @@ enum PoolEvent { type Mux = FuturesUnordered>; struct Pool { - program_path: PathBuf, + program_path: Option, cache_path: PathBuf, spawn_timeout: Duration, to_pool: mpsc::Receiver, @@ -152,7 +152,7 @@ async fn run( let to_pool = break_if_fatal!(to_pool.ok_or(Fatal)); handle_to_pool( &metrics, - &program_path, + program_path.clone(), &cache_path, spawn_timeout, &mut spawned, @@ -198,7 +198,7 @@ async fn purge_dead( fn handle_to_pool( metrics: &Metrics, - program_path: &Path, + program_path: Option, cache_path: &Path, spawn_timeout: Duration, spawned: &mut HopSlotMap, @@ -209,7 +209,7 @@ fn handle_to_pool( ToPool::Spawn => { gum::debug!(target: LOG_TARGET, "spawning a new prepare worker"); metrics.prepare_worker().on_begin_spawn(); - mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed()); + mux.push(spawn_worker_task(program_path, spawn_timeout).boxed()); }, ToPool::StartWork { worker, pvf, artifact_path } => { if let Some(data) = spawned.get_mut(worker) { @@ -248,11 +248,11 @@ fn handle_to_pool( } } -async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> PoolEvent { +async fn spawn_worker_task(program_path: Option, spawn_timeout: Duration) -> PoolEvent { use futures_timer::Delay; loop { - match worker_intf::spawn(&program_path, spawn_timeout).await { + match worker_intf::spawn(program_path.clone(), spawn_timeout).await { Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle), Err(err) => { gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err); @@ -416,7 +416,7 @@ fn handle_concluded_no_rip( /// Spins up the pool and returns the future that should be polled to make the pool functional. pub fn start( metrics: Metrics, - program_path: PathBuf, + program_path: Option, cache_path: PathBuf, spawn_timeout: Duration, ) -> (mpsc::Sender, mpsc::UnboundedReceiver, impl Future) { diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index 86b041d3e847..291f22ba62a4 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -16,11 +16,12 @@ //! Host interface to the prepare worker. +use polkadot_node_core_pvf_prepare_worker::PREPARE_EXE; use crate::{ metrics::Metrics, worker_intf::{ - framed_recv, framed_send, path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, - SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, + framed_recv, framed_send, path_to_bytes, spawn_job_with_worker_source, tmpfile_in, IdleWorker, + SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR, JobKind, WorkerSource }, LOG_TARGET, }; @@ -38,16 +39,26 @@ use std::{ }; use tokio::{io, net::UnixStream}; -/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout. +/// Spawns a new worker with the given parameters. /// -/// The program should be able to handle ` prepare-worker ` invocation. +/// If the `program_path` is passed, will use that to spawn the worker. Otherwise we create the +/// worker in-memory from `PREPARE_EXE`; see [`spawn_job_with_worker_source`]. +/// +/// The program should be able to handle this invocation: +/// ``` +/// prepare-worker --socket path --node-impl-version +/// ``` pub async fn spawn( - program_path: &Path, + program_path: Option, spawn_timeout: Duration, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - spawn_with_program_path( - "prepare", - program_path, + let worker_source = match program_path { + Some(path) => WorkerSource::ProgramPath(path), + None => WorkerSource::InMemoryBytes(PREPARE_EXE), + }; + spawn_job_with_worker_source( + &JobKind::Prepare, + worker_source, &["prepare-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")], spawn_timeout, ) diff --git a/node/core/pvf/src/testing.rs b/node/core/pvf/src/testing.rs index 7ac6d3ab3f4d..93d86088ae4d 100644 --- a/node/core/pvf/src/testing.rs +++ b/node/core/pvf/src/testing.rs @@ -20,7 +20,7 @@ //! artifact even for production builds. #[doc(hidden)] -pub use crate::worker_intf::{spawn_with_program_path, SpawnErr}; +pub use crate::worker_intf::{spawn_job_with_worker_source, JobKind, SpawnErr, WorkerSource}; use polkadot_primitives::ExecutorParams; @@ -33,6 +33,7 @@ macro_rules! decl_puppet_worker_main { sp_tracing::try_init_simple(); let args = std::env::args().collect::>(); + gum::trace!(target: $crate::LOG_TARGET, ?args, "running puppet worker"); if args.len() < 3 { panic!("wrong number of arguments"); } diff --git a/node/core/pvf/src/worker_intf.rs b/node/core/pvf/src/worker_intf.rs index 33144616601d..706286c75c48 100644 --- a/node/core/pvf/src/worker_intf.rs +++ b/node/core/pvf/src/worker_intf.rs @@ -17,10 +17,11 @@ //! Common logic for implementation of worker processes. use crate::LOG_TARGET; -use futures::FutureExt as _; +use futures::{FutureExt as _, Future, poll}; use futures_timer::Delay; use pin_project::pin_project; use rand::Rng; +use sp_maybe_compressed_blob::{decompress, CODE_BLOB_BOMB_LIMIT}; use std::{ fmt, mem, path::{Path, PathBuf}, @@ -29,6 +30,7 @@ use std::{ time::Duration, }; use tokio::{ + fs::File, io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _, ReadBuf}, net::{UnixListener, UnixStream}, process, @@ -38,23 +40,73 @@ use tokio::{ /// wall clock time). This is lenient because CPU time may go slower than wall clock time. pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; +/// The kind of job. +pub enum JobKind { + /// Prepare job. + Prepare, + /// Execute job. + Execute, + /// For PUPPET_EXE tests. + #[doc(hidden)] + IntegrationTest, +} + +impl fmt::Display for JobKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + JobKind::Execute => write!(f, "execute"), + JobKind::Prepare => write!(f, "prepare"), + JobKind::IntegrationTest => write!(f, "integration-test"), + } + } +} + +/// The source to spawn the worker binary from. +pub enum WorkerSource { + /// An on-disk path. + ProgramPath(PathBuf), + /// In-memory bytes. + InMemoryBytes(&'static [u8]), +} + +impl fmt::Debug for WorkerSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + WorkerSource::ProgramPath(path) => + f.write_str(&format!("WorkerSource::ProgramPath({:?})", path)), + WorkerSource::InMemoryBytes(_bytes) => + f.write_str("WorkerSource::InMemoryBytes({{...}}))"), + } + } +} + /// This is publicly exposed only for integration tests. #[doc(hidden)] -pub async fn spawn_with_program_path( - debug_id: &'static str, - program_path: impl Into, +pub async fn spawn_job_with_worker_source( + job_kind: &'static JobKind, + worker_source: WorkerSource, extra_args: &'static [&'static str], spawn_timeout: Duration, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let program_path = program_path.into(); - with_transient_socket_path(debug_id, |socket_path| { + let debug_id = job_kind.to_string(); + with_transient_socket_path(&job_kind, |socket_path| { let socket_path = socket_path.to_owned(); + gum::trace!( + target: LOG_TARGET, + %job_kind, + ?worker_source, + ?extra_args, + ?socket_path, + ?spawn_timeout, + "spawning a worker", + ); + async move { let listener = UnixListener::bind(&socket_path).map_err(|err| { gum::warn!( target: LOG_TARGET, %debug_id, - ?program_path, + ?worker_source, ?extra_args, "cannot bind unix socket: {:?}", err, @@ -62,18 +114,47 @@ pub async fn spawn_with_program_path( SpawnErr::Bind })?; - let handle = - WorkerHandle::spawn(&program_path, extra_args, socket_path).map_err(|err| { + let mut handle = match worker_source { + WorkerSource::ProgramPath(ref program_path) => + WorkerHandle::spawn_with_program_path( + program_path.clone(), + extra_args, + socket_path, + ) + .map_err(|err| { + gum::warn!( + target: LOG_TARGET, + %debug_id, + ?worker_source, + ?extra_args, + "cannot spawn a worker from path: {:?}", + err, + ); + SpawnErr::ProcessSpawnFromPath + })?, + WorkerSource::InMemoryBytes(worker_bytes) => WorkerHandle::spawn_with_worker_bytes( + &job_kind, + &worker_bytes, + extra_args, + socket_path, + ) + .await + .map_err(|err| { gum::warn!( - target: LOG_TARGET, + target:LOG_TARGET, %debug_id, - ?program_path, ?extra_args, - "cannot spawn a worker: {:?}", + "cannot spawn a worker from in-memory bytes: {:?}", err, ); - SpawnErr::ProcessSpawn - })?; + SpawnErr::ProcessSpawnFromBytes + })?, + }; + + match poll!(&mut handle) { + Poll::Ready(_) => println!("ready"), + Poll::Pending => println!("pending"), + } futures::select! { accept_result = listener.accept().fuse() => { @@ -81,7 +162,7 @@ pub async fn spawn_with_program_path( gum::warn!( target: LOG_TARGET, %debug_id, - ?program_path, + ?worker_source, ?extra_args, "cannot accept a worker: {:?}", err, @@ -94,7 +175,7 @@ pub async fn spawn_with_program_path( gum::warn!( target: LOG_TARGET, %debug_id, - ?program_path, + ?worker_source, ?extra_args, ?spawn_timeout, "spawning and connecting to socket timed out", @@ -107,12 +188,15 @@ pub async fn spawn_with_program_path( .await } -async fn with_transient_socket_path(debug_id: &'static str, f: F) -> Result +async fn with_transient_socket_path( + job_kind: &'static JobKind, + f: F, +) -> Result where F: FnOnce(&Path) -> Fut, Fut: futures::Future> + 'static, { - let socket_path = tmpfile(&format!("pvf-host-{}", debug_id)) + let socket_path = tmpfile(&format!("pvf-host-{}-", job_kind)) .await .map_err(|_| SpawnErr::TmpFile)?; let result = f(&socket_path).await; @@ -186,8 +270,10 @@ pub enum SpawnErr { Bind, /// An error happened during accepting a connection to the socket. Accept, - /// An error happened during spawning the process. - ProcessSpawn, + /// An error happened during spawning the process from a program path. + ProcessSpawnFromPath, + /// An error happened during spawning the process from in-memory bytes. + ProcessSpawnFromBytes, /// The deadline allotted for the worker spawning and connecting to the socket has elapsed. AcceptTimeout, /// Failed to send handshake after successful spawning was signaled @@ -209,14 +295,51 @@ pub struct WorkerHandle { stdout: process::ChildStdout, program: PathBuf, drop_box: Box<[u8]>, + // /// Hold the file descriptor as part of the worker. We remove the binary from the filesystem, + // /// so the fd needs to stay open for the file to stay alive. + // file_handle: File, } impl WorkerHandle { - fn spawn( + fn new( + child: process::Child, + child_id: u32, + stdout: process::ChildStdout, + program: PathBuf, + // file_handle: File, + ) -> Self { + WorkerHandle { + child, + child_id, + stdout, + program, + // We don't expect the bytes to be ever read. But in case we do, we should not use a buffer + // of a small size, because otherwise if the child process does return any data we will end up + // issuing a syscall for each byte. We also prefer not to do allocate that on the stack, since + // each poll the buffer will be allocated and initialized (and that's due `poll_read` takes &mut [u8] + // and there are no guarantees that a `poll_read` won't ever read from there even though that's + // unlikely). + // + // OTOH, we also don't want to be super smart here and we could just afford to allocate a buffer + // for that here. + drop_box: vec![0; 8192].into_boxed_slice(), + // file_handle, + } + } + + fn spawn_with_program_path( program: impl AsRef, extra_args: &[&str], socket_path: impl AsRef, ) -> io::Result { + gum::trace!( + target: LOG_TARGET, + program_path = ?program.as_ref(), + ?extra_args, + socket_path = ?socket_path.as_ref(), + "spawning with program path", + ); + let mut child = process::Command::new(program.as_ref()) .args(extra_args) .arg("--socket-path") @@ -233,22 +356,171 @@ impl WorkerHandle { .take() .expect("the process spawned with piped stdout should have the stdout handle"); - Ok(WorkerHandle { - child, - child_id, - stdout, - program: program.as_ref().to_path_buf(), - // We don't expect the bytes to be ever read. But in case we do, we should not use a buffer - // of a small size, because otherwise if the child process does return any data we will end up - // issuing a syscall for each byte. We also prefer not to do allocate that on the stack, since - // each poll the buffer will be allocated and initialized (and that's due `poll_read` takes &mut [u8] - // and there are no guarantees that a `poll_read` won't ever read from there even though that's - // unlikely). - // - // OTOH, we also don't want to be super smart here and we could just afford to allocate a buffer - // for that here. - drop_box: vec![0; 8192].into_boxed_slice(), - }) + Ok(WorkerHandle::new(child, child_id, stdout, program.as_ref().to_owned())) + } + + // TODO: On Linux, use memfd_create. + // TODO: File sealing!! + /// Spawn the worker from in-memory bytes. + #[cfg(linux)] + async fn spawn_with_worker_bytes( + job_kind: &JobKind, + worker_bytes: &'static [u8], + extra_args: &'static [&'static str], + socket_path: impl AsRef, + ) -> io::Result { + let bytes = decompress( + worker_bytes.expect(&format!( + "{}-worker binary is not available. \ + This means it was built with `BUILDER_SKIP_BUILD` flag", + job_kind, + )), + CODE_BLOB_BOMB_LIMIT, + ) + .expect("binary should have been built correctly; qed"); + } + + /// Spawn the worker from in-memory bytes. + /// + /// On MacOS there is no good way to open files directly from memory. The best we can do is to + /// write the bytes on-disk to a random filename, launch the process, and clean up the file when + /// the worker shuts down to remove it from the file system. This leaves a possible race + /// condition between writing and unlinking, and we may not have permissions to write or execute + /// the file, but that is acceptable -- MacOS is mainly a development environment and not a + /// secure platform to run validators on. + /// + /// Will first try to spawn the worker from a tmp directory. If that doesn't work (i.e. we don't + /// have execute permission), we try the directory of the current exe, since we should be + /// allowed to execute files in this directory. The issue with this latter approach is that when + /// testing, the binary is located in the target/ directory, and any changes here trigger a + /// rebuild by `binary-builder`. For this reason it's a last resort. + #[cfg(all(unix, not(linux)))] + async fn spawn_with_worker_bytes( + job_kind: &JobKind, + worker_bytes: &'static [u8], + extra_args: &'static [&'static str], + socket_path: impl AsRef, + ) -> io::Result { + use tokio::fs::{self, OpenOptions}; + + // Shared helper code. Needs to be a function because async closures are unstable. + async fn write_and_execute_bytes( + parent_path: &Path, + program_path: &Path, + bytes: &[u8], + extra_args: &'static [&'static str], + socket_path: &Path, + job_kind: &JobKind, + ) -> io::Result { + gum::trace!( + target: LOG_TARGET, + %job_kind, + ?program_path, + ?parent_path, + bytes_len = %bytes.len(), + "writing worker bytes to disk", + ); + + // Make sure the directory exists. + fs::create_dir_all(parent_path).await?; + + // Overwrite if the worker binary already exists on-disk (e.g. race condition). + let file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o744) + .open(&program_path) + .await?; + + async fn handle_file( + mut file: fs::File, + program_path: &Path, + bytes: &[u8], + extra_args: &[&str], + socket_path: &Path, + ) -> io::Result { + file.write_all(&bytes).await?; + file.sync_all().await?; + std::mem::forget(file); + + // Try to execute file. Use `spawn_with_program_path` because MacOS lacks `fexecve`. + WorkerHandle::spawn_with_program_path(&program_path, extra_args, socket_path) + } + let result = handle_file(file, &program_path, bytes, extra_args, socket_path).await; + // Delete/unlink file. + std::thread::sleep(Duration::from_millis(100)); + if let Err(err) = fs::remove_file(&program_path).await { + gum::warn!( + target: LOG_TARGET, + ?program_path, + "error removing file: {}", + err, + ); + } + result + } + + let worker_bytes = decompress(worker_bytes, CODE_BLOB_BOMB_LIMIT) + .expect("binary should have been built correctly; qed"); + + let file_name_prefix = format!("pvf-{}-worker-", job_kind); + + // First, try with a temp file. + let parent_path = tempfile::tempdir()?.path().to_owned(); + let program_path = tmpfile_in(&file_name_prefix, &parent_path).await?; + match write_and_execute_bytes( + &parent_path, + &program_path, + &worker_bytes, + extra_args, + socket_path.as_ref(), + job_kind, + ) + .await + { + Ok(worker) => return Ok(worker), + Err(err) => { + gum::warn!( + target: LOG_TARGET, + %job_kind, + ?program_path, + "could not write and execute bytes; error: {}", + err, + ); + }, + }; + + // If that didn't work, try in the current directory. + let parent_path = std::env::current_exe()? + .parent() + .expect("exe always has a parent directory; qed") + .to_owned(); + let program_path = tmpfile_in(&file_name_prefix, &parent_path).await?; + match write_and_execute_bytes( + &parent_path, + &program_path, + &worker_bytes, + extra_args, + socket_path.as_ref(), + job_kind, + ) + .await + { + Ok(worker) => return Ok(worker), + Err(err) => gum::warn!( + target: LOG_TARGET, + %job_kind, + ?program_path, + "could not write and execute bytes; error: {}", + err, + ), + }; + + Err(std::io::Error::new( + std::io::ErrorKind::Other, + format!("could not extract and execute {}-worker binary", job_kind), + )) } /// Returns the process id of this worker. @@ -257,7 +529,7 @@ impl WorkerHandle { } } -impl futures::Future for WorkerHandle { +impl Future for WorkerHandle { type Output = (); fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { diff --git a/node/core/pvf/tests/it/main.rs b/node/core/pvf/tests/it/main.rs index b5fa48379232..d9792a659e70 100644 --- a/node/core/pvf/tests/it/main.rs +++ b/node/core/pvf/tests/it/main.rs @@ -29,7 +29,7 @@ use tokio::sync::Mutex; mod adder; mod worker_common; -const PUPPET_EXE: &str = env!("CARGO_BIN_EXE_puppet_worker"); +const PUPPET_EXE_PATH: &str = env!("CARGO_BIN_EXE_puppet_worker"); const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(3); @@ -48,8 +48,7 @@ impl TestHost { F: FnOnce(&mut Config), { let cache_dir = tempfile::tempdir().unwrap().path().join("pvf-artifacts"); - let workers_dir = tempfile::tempdir().unwrap().path().join("pvf-workers"); - let mut config = Config::new(cache_dir.to_owned(), workers_dir.to_owned(), None); + let mut config = Config::new(cache_dir.to_owned(), None); f(&mut config); let (host, task) = start(config, Metrics::default()); let _ = tokio::task::spawn(task); @@ -85,6 +84,8 @@ impl TestHost { #[tokio::test] async fn terminates_on_timeout() { + sp_tracing::init_for_tests(); + let host = TestHost::new(); let start = std::time::Instant::now(); diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs index 439ac8538c95..8286a9f151d1 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -14,32 +14,54 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::PUPPET_EXE; -use polkadot_node_core_pvf::testing::{spawn_with_program_path, SpawnErr}; +use crate::PUPPET_EXE_PATH; +use polkadot_node_core_pvf::testing::{spawn_job_with_worker_source, JobKind, SpawnErr, WorkerSource}; +use polkadot_node_core_pvf_prepare_worker::PREPARE_EXE; use std::time::Duration; // Test spawning a program that immediately exits with a failure code. #[tokio::test] async fn spawn_immediate_exit() { - let result = - spawn_with_program_path("integration-test", PUPPET_EXE, &["exit"], Duration::from_secs(2)) - .await; + let result = spawn_job_with_worker_source( + &JobKind::IntegrationTest, + WorkerSource::ProgramPath(PUPPET_EXE_PATH.into()), + &["exit"], + Duration::from_secs(2), + ) + .await; assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } #[tokio::test] async fn spawn_timeout() { - let result = - spawn_with_program_path("integration-test", PUPPET_EXE, &["sleep"], Duration::from_secs(2)) - .await; + let result = spawn_job_with_worker_source( + &JobKind::IntegrationTest, + WorkerSource::ProgramPath(PUPPET_EXE_PATH.into()), + &["sleep"], + Duration::from_secs(2), + ) + .await; assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } #[tokio::test] async fn should_connect() { - let _ = spawn_with_program_path( - "integration-test", - PUPPET_EXE, + let _ = spawn_job_with_worker_source( + &JobKind::IntegrationTest, + WorkerSource::ProgramPath(PUPPET_EXE_PATH.into()), + &["prepare-worker"], + Duration::from_secs(2), + ) + .await + .unwrap(); +} + +#[tokio::test] +async fn should_connect_to_in_memory_binary() { + sp_tracing::init_for_tests(); + let _ = spawn_job_with_worker_source( + &JobKind::Prepare, + WorkerSource::InMemoryBytes(PREPARE_EXE), &["prepare-worker"], Duration::from_secs(2), )