diff --git a/rust/performance_counters/Cargo.toml b/rust/performance_counters/Cargo.toml new file mode 100644 index 000000000..24572aebb --- /dev/null +++ b/rust/performance_counters/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "performance_counters" +version = "1.0.0" +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +candid = "0.9.11" +ic-cdk = "0.11.3" +ic-cdk-macros = "0.8.1" diff --git a/rust/performance_counters/Makefile b/rust/performance_counters/Makefile new file mode 100644 index 000000000..bdc9ce3ff --- /dev/null +++ b/rust/performance_counters/Makefile @@ -0,0 +1,38 @@ +.PHONY: all +all: build + +.PHONY: build +.SILENT: build +build: + dfx canister create --all + dfx build + +.PHONY: install +.SILENT: install +install: build + dfx canister install performance_counters + +.PHONY: upgrade +.SILENT: upgrade +upgrade: build + dfx canister install performance_counters --mode=upgrade + +.PHONY: deploy +.SILENT: deploy +deploy: + # Deploy the canisters and run periodic tasks with 1s interval. + dfx deploy performance_counters + +.PHONY: test +.SILENT: test +test: deploy + # Wait at least 2 seconds. + sleep 2 + # Validate the counters are non-zero. + dfx canister call performance_counters for_update | grep -vw '0' && echo 'PASS' + dfx canister call performance_counters for_composite_query | grep -vw '0' && echo 'PASS' + +.PHONY: clean +.SILENT: clean +clean: + rm -fr .dfx diff --git a/rust/performance_counters/README.md b/rust/performance_counters/README.md new file mode 100644 index 000000000..9266b2013 --- /dev/null +++ b/rust/performance_counters/README.md @@ -0,0 +1,165 @@ +# Performance counter + +## Overview + +The canister can query one of the "performance counters", which is a deterministic monotonically increasing integer approximating the amount of work the canister has done. Developers might use this data to profile and optimize the canister performance. + +```Candid +ic0.performance_counter : (counter_type : i32) -> i64 +```` + +The argument `type` decides which performance counter to return: + +- 0 : current execution instruction counter. + The number of WebAssembly instructions the canister has executed + since the beginning of the current Message execution. + +- 1 : call context instruction counter. + + - For replicated message execution, it is the number of WebAssembly instructions + the canister has executed within the call context of the current Message execution + since Call context creation. The counter monotonically increases across all message + executions in the call context until the corresponding call context is removed. + + - For non-replicated message execution, it is the number of WebAssembly instructions + the canister has executed within the corresponding `composite_query_helper` + in Query call. The counter monotonically increases across the executions + of the composite query method and the composite query callbacks + until the corresponding `composite_query_helper` returns + (ignoring WebAssembly instructions executed within any further downstream calls + of `composite_query_helper`). + +In the future, the IC might expose more performance counters. + +## Tutorial + +## Prerequisites + +This example requires an installation of: + +- [x] Install the [IC SDK](https://internetcomputer.org/docs/current/developer-docs/setup/install/index.mdx). + +### Step 1: Begin by opening a terminal window and navigating into the project's directory + +```sh +cd examples/rust/performance_counters +``` + +### Step 2: Start a clean local Internet Computer replica and a web server + +```sh +dfx stop +dfx start --clean +``` + +This terminal will stay blocked, printing log messages, until the `Ctrl+C` is pressed or `dfx stop` command is run. + +Example output: + +```sh +% dfx stop && dfx start --clean +[...] +Dashboard: http://localhost:63387/_/dashboard +``` + +### Step 3: Open another terminal window in the same directory + +```sh +cd examples/rust/performance_counters +``` + +### Step 4: Compile and deploy `performance_counters` canister + +```sh +dfx deploy +``` + +Example output: + +```sh +% dfx deploy +[...] +Deployed canisters. +URLs: + Backend canister via Candid interface: + performance_counters: http://127.0.0.1/... +``` + +### Step 5: Call `performance_counters` canister `for_update` method + +```sh +dfx canister call performance_counters for_update +``` + +Example output: + +```sh +% dfx canister call performance_counters for_update +(6_618_678 : nat64, 19_886_107 : nat64) +``` + +Note, how the current message execution counter (~6M instructions) is much different from the call context counter (~19M instructions). + +### Step 6: Check the Internet Computer replica terminal window for more details + +Example replica log output: + +```text +Performance counters for update call: current (0) call context (1) + before the nested call: 6614001 6614201 + > inside the 1st nested call: 12425 12625 + after the 1st nested call: 6618836 13250387 + > inside the 2nd nested call: 12516 12716 + after the 2nd nested call: 6618678 19886107 +``` + +Note, how the current execution instruction counter (0) stays at ~6M instructions after each await point. +By contrast, the call context performance counter (1) is monotonically increasing (~6M, ~13M, ~19M instructions). + +Also note, that both counters start over for each nested execution (~12K instructions). + +### Step 7: Repeat the steps above calling `for_composite_query` method + +```sh +dfx canister call performance_counters for_composite_query +``` + +Example output: + +```sh +% dfx canister call performance_counters for_update +(6_621_477 : nat64, 19_893_467 : nat64) +``` + +Example replica log output: + +```text +Perf. counters for composite query call: current (0) call context (1) + before the nested call: 6614001 6614201 + > inside the 1st nested call: 13567 13767 + after the 1st nested call: 6623158 13254766 + > inside the 2nd nested call: 13567 13767 + after the 2nd nested call: 6621477 19893467 +``` + +Note the same performance counters behavior for composite queries. + +## Further learning + +1. Have a look at the locally running dashboard. The URL is at the end of the `dfx start` command: `Dashboard: http://localhost/...` +2. Check out the Candid user interface for `performance_counters` canister. The URL is at the end of the `dfx deploy` command: `performance_counters: http://127.0.0.1/...` + +### Canister Interface + +The `performance_counters` canisters provide the following interface: + +- `for_update` — return all the performance counters values after two nested update calls. +- `for_composite_query` — return all the performance counters values after two nested composite query calls. + +## Conclusion + +Performance counters is a great tool to optimize canister performance, both for update calls and queries. + +## Security considerations and security best practices + +If you base your application on this example, we recommend you familiarize yourself with and adhere to the [security best practices](https://internetcomputer.org/docs/current/references/security/) for developing on the Internet Computer. This example may not implement all the best practices. diff --git a/rust/performance_counters/dfx.json b/rust/performance_counters/dfx.json new file mode 100644 index 000000000..46f790c21 --- /dev/null +++ b/rust/performance_counters/dfx.json @@ -0,0 +1,10 @@ +{ + "canisters": { + "performance_counters": { + "candid": "performance_counters.did", + "package": "performance_counters", + "type": "rust" + } + }, + "version": 1 +} diff --git a/rust/performance_counters/performance_counters.did b/rust/performance_counters/performance_counters.did new file mode 100644 index 000000000..c1fceeda5 --- /dev/null +++ b/rust/performance_counters/performance_counters.did @@ -0,0 +1,4 @@ +service : { + "for_update" : () -> (nat64, nat64); + "for_composite_query" : () -> (nat64, nat64) query; +}; diff --git a/rust/performance_counters/src/lib.rs b/rust/performance_counters/src/lib.rs new file mode 100644 index 000000000..f450f49aa --- /dev/null +++ b/rust/performance_counters/src/lib.rs @@ -0,0 +1,125 @@ +//! An example of performance counters usage. +//! +//! dfx deploy && dfx canister call performance_counters get + +// The following performance counters supported: +// +// - 0 : current execution instruction counter. +// The number of WebAssembly instructions the canister has executed +// since the beginning of the current Message execution. +// +// - 1 : call context instruction counter. +// - For replicated message execution, it is the number of WebAssembly instructions +// the canister has executed within the call context of the current Message execution +// since Call context creation. The counter monotonically increases across all message +// executions in the call context until the corresponding call context is removed. +// - For non-replicated message execution, it is the number of WebAssembly instructions +// the canister has executed within the corresponding `composite_query_helper` +// in Query call. The counter monotonically increases across the executions +// of the composite query method and the composite query callbacks +// until the corresponding `composite_query_helper` returns +// (ignoring WebAssembly instructions executed within any further downstream calls +// of `composite_query_helper`). +// +// In the future, the IC might expose more performance counters. +use ic_cdk::api::{call_context_instruction_counter, instruction_counter}; + +/// Pretty print the `title` and a corresponding `tuple` with counters. +fn pretty_print(title: N, counters: (T, T)) { + ic_cdk::println!("{:40} {:<15} {:<15}", title, counters.0, counters.1); +} + +/// Loop to simulate some amount of work. +fn do_some_work() { + for i in 0..1_000_000 { + // The black box hint is to avoid compiler optimizations for the loop. + std::hint::black_box(i); + } +} + +/// Returns a tuple with all the performance counters. +fn counters() -> (u64, u64) { + (instruction_counter(), call_context_instruction_counter()) +} + +/// Emulate a nested inter-canister update call. +#[ic_cdk_macros::update] +fn nested_update_call() -> (u64, u64) { + counters() +} + +/// Emulate a nested inter-canister composite query call. +#[ic_cdk_macros::query(composite = true)] +fn nested_composite_query_call() -> (u64, u64) { + counters() +} + +//////////////////////////////////////////////////////////////////////// +// Canister interface +//////////////////////////////////////////////////////////////////////// + +/// Example usage: `dfx canister call performance_counters for_update` +#[ic_cdk_macros::update] +async fn for_update() -> (u64, u64) { + do_some_work(); + let before = counters(); + + let inside_1st: (u64, u64) = ic_cdk::call(ic_cdk::id(), "nested_update_call", ()) + .await + .unwrap(); + + do_some_work(); + let after_1st = counters(); + + let inside_2nd: (u64, u64) = ic_cdk::call(ic_cdk::id(), "nested_update_call", ()) + .await + .unwrap(); + + do_some_work(); + let after_2nd = counters(); + + pretty_print( + "Performance counters for update call:", + ("current (0)", "call context (1)"), + ); + pretty_print(" before the nested call:", before); + pretty_print(" > inside the 1st nested call:", inside_1st); + pretty_print(" after the 1st nested call:", after_1st); + pretty_print(" > inside the 2nd nested call:", inside_2nd); + pretty_print(" after the 2nd nested call:", after_2nd); + + after_2nd +} + +/// Example usage: `dfx canister call performance_counters for_composite_query` +#[ic_cdk_macros::query(composite = true)] +async fn for_composite_query() -> (u64, u64) { + do_some_work(); + let before = counters(); + + let inside_1st: (u64, u64) = ic_cdk::call(ic_cdk::id(), "nested_composite_query_call", ()) + .await + .unwrap(); + + do_some_work(); + let after_1st = counters(); + + let inside_2nd: (u64, u64) = ic_cdk::call(ic_cdk::id(), "nested_composite_query_call", ()) + .await + .unwrap(); + + do_some_work(); + let after_2nd = counters(); + + pretty_print( + "Perf. counters for composite query call:", + ("current (0)", "call context (1)"), + ); + pretty_print(" before the nested call:", before); + pretty_print(" > inside the 1st nested call:", inside_1st); + pretty_print(" after the 1st nested call:", after_1st); + pretty_print(" > inside the 2nd nested call:", inside_2nd); + pretty_print(" after the 2nd nested call:", after_2nd); + + after_2nd +}