From 2018d60f1ff63720f7df0f687bba91823e7bf9ba Mon Sep 17 00:00:00 2001 From: fmarek-kindred <123923685+fmarek-kindred@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:28:24 +1000 Subject: [PATCH] Cohort Banking example with SDK (#63) * feature: Cohort SDK with delegated DB operations. --- .env.example | 1 + .vscode/extensions.json | 6 - .vscode/launch.json | 101 --- Cargo.lock | 623 +++++++++++++----- Makefile | 7 +- examples/agent_client/Cargo.toml | 1 + .../agent_client/examples/agent_client.rs | 48 +- .../Cargo.toml | 2 +- .../examples/cohort_banking.rs | 98 +-- examples/cohort_banking_with_sdk/Cargo.toml | 36 + .../examples/cohort_banking_with_sdk.rs | 474 +++++++++++++ packages/cohort/Cargo.toml | 2 + packages/cohort/src/bank_api.rs | 42 +- packages/cohort/src/bin/preload_db.rs | 2 +- packages/cohort/src/bin/replicator.rs | 7 +- packages/cohort/src/config_loader.rs | 6 +- packages/cohort/src/core.rs | 4 +- packages/cohort/src/delay_controller.rs | 1 + packages/cohort/src/examples_support/mod.rs | 1 + .../src/examples_support/queue_processor.rs} | 18 +- packages/cohort/src/lib.rs | 1 + .../src/replicator/pg_replicator_installer.rs | 2 +- .../replicator/services/replicator_service.rs | 1 + .../services/statemap_queue_service.rs | 11 +- .../src/replicator/utils/installer_utils.rs | 4 +- packages/cohort/src/snapshot_api.rs | 28 +- .../cohort/src/state/postgres/data_store.rs | 60 +- .../cohort/src/state/postgres/database.rs | 178 ++++- .../src/state/postgres/database_config.rs | 5 +- packages/cohort_banking/Cargo.toml | 34 + packages/cohort_banking/src/app.rs | 134 ++++ packages/cohort_banking/src/callbacks/mod.rs | 3 + .../src/callbacks/oo_installer.rs | 261 ++++++++ .../src/callbacks/state_provider.rs | 133 ++++ .../src/callbacks/statemap_installer.rs | 77 +++ .../src/examples_support/mod.rs | 1 + .../src/examples_support/queue_processor.rs | 84 +++ packages/cohort_banking/src/lib.rs | 6 + packages/cohort_banking/src/metrics.rs | 240 +++++++ .../cohort_banking/src/model/bank_account.rs | 66 ++ packages/cohort_banking/src/model/mod.rs | 3 + packages/cohort_banking/src/model/requests.rs | 78 +++ packages/cohort_banking/src/model/snapshot.rs | 56 ++ packages/cohort_banking/src/state/mod.rs | 1 + .../src/state/postgres/database.rs | 226 +++++++ .../src/state/postgres/database_config.rs | 59 ++ .../cohort_banking/src/state/postgres/mod.rs | 2 + packages/cohort_sdk/Cargo.toml | 27 + packages/cohort_sdk/src/cohort.rs | 485 ++++++++++++++ packages/cohort_sdk/src/delay_controller.rs | 46 ++ packages/cohort_sdk/src/installer_callback.rs | 17 + packages/cohort_sdk/src/lib.rs | 6 + packages/cohort_sdk/src/model/callbacks.rs | 34 + packages/cohort_sdk/src/model/internal.rs | 10 + packages/cohort_sdk/src/model/mod.rs | 233 +++++++ packages/cohort_sdk/src/replicator/core.rs | 196 ++++++ packages/cohort_sdk/src/replicator/mod.rs | 7 + .../cohort_sdk/src/replicator/services/mod.rs | 2 + .../replicator/services/replicator_service.rs | 88 +++ .../services/statemap_installer_service.rs | 35 + packages/cohort_sdk/src/replicator/suffix.rs | 113 ++++ .../cohort_sdk/src/replicator/tests/mod.rs | 3 + .../cohort_sdk/src/replicator/tests/suffix.rs | 181 +++++ .../src/replicator/tests/test_utils.rs | 119 ++++ .../cohort_sdk/src/replicator/tests/utils.rs | 102 +++ packages/cohort_sdk/src/replicator/utils.rs | 36 + .../src/replicator2/cohort_replicator.rs | 130 ++++ .../src/replicator2/cohort_suffix.rs | 368 +++++++++++ packages/cohort_sdk/src/replicator2/mod.rs | 4 + packages/cohort_sdk/src/replicator2/model.rs | 46 ++ .../cohort_sdk/src/replicator2/service.rs | 106 +++ packages/examples_support/src/cohort/mod.rs | 1 - packages/examples_support/src/lib.rs | 1 - .../src/load_generator/generator.rs | 9 +- .../src/load_generator/models.rs | 4 + packages/talos_agent/src/agent/core.rs | 21 +- packages/talos_agent/src/agent/errors.rs | 1 - .../talos_agent/src/agent/state_manager.rs | 2 +- packages/talos_agent/src/api.rs | 16 +- 79 files changed, 5221 insertions(+), 462 deletions(-) delete mode 100644 .vscode/extensions.json delete mode 100644 .vscode/launch.json rename examples/{cohort_banking => cohort_banking_example}/Cargo.toml (97%) rename examples/{cohort_banking => cohort_banking_example}/examples/cohort_banking.rs (89%) create mode 100644 examples/cohort_banking_with_sdk/Cargo.toml create mode 100644 examples/cohort_banking_with_sdk/examples/cohort_banking_with_sdk.rs create mode 100644 packages/cohort/src/examples_support/mod.rs rename packages/{examples_support/src/cohort/queue_workers.rs => cohort/src/examples_support/queue_processor.rs} (98%) create mode 100644 packages/cohort_banking/Cargo.toml create mode 100644 packages/cohort_banking/src/app.rs create mode 100644 packages/cohort_banking/src/callbacks/mod.rs create mode 100644 packages/cohort_banking/src/callbacks/oo_installer.rs create mode 100644 packages/cohort_banking/src/callbacks/state_provider.rs create mode 100644 packages/cohort_banking/src/callbacks/statemap_installer.rs create mode 100644 packages/cohort_banking/src/examples_support/mod.rs create mode 100644 packages/cohort_banking/src/examples_support/queue_processor.rs create mode 100644 packages/cohort_banking/src/lib.rs create mode 100644 packages/cohort_banking/src/metrics.rs create mode 100644 packages/cohort_banking/src/model/bank_account.rs create mode 100644 packages/cohort_banking/src/model/mod.rs create mode 100644 packages/cohort_banking/src/model/requests.rs create mode 100644 packages/cohort_banking/src/model/snapshot.rs create mode 100644 packages/cohort_banking/src/state/mod.rs create mode 100644 packages/cohort_banking/src/state/postgres/database.rs create mode 100644 packages/cohort_banking/src/state/postgres/database_config.rs create mode 100644 packages/cohort_banking/src/state/postgres/mod.rs create mode 100644 packages/cohort_sdk/Cargo.toml create mode 100644 packages/cohort_sdk/src/cohort.rs create mode 100644 packages/cohort_sdk/src/delay_controller.rs create mode 100644 packages/cohort_sdk/src/installer_callback.rs create mode 100644 packages/cohort_sdk/src/lib.rs create mode 100644 packages/cohort_sdk/src/model/callbacks.rs create mode 100644 packages/cohort_sdk/src/model/internal.rs create mode 100644 packages/cohort_sdk/src/model/mod.rs create mode 100644 packages/cohort_sdk/src/replicator/core.rs create mode 100644 packages/cohort_sdk/src/replicator/mod.rs create mode 100644 packages/cohort_sdk/src/replicator/services/mod.rs create mode 100644 packages/cohort_sdk/src/replicator/services/replicator_service.rs create mode 100644 packages/cohort_sdk/src/replicator/services/statemap_installer_service.rs create mode 100644 packages/cohort_sdk/src/replicator/suffix.rs create mode 100644 packages/cohort_sdk/src/replicator/tests/mod.rs create mode 100644 packages/cohort_sdk/src/replicator/tests/suffix.rs create mode 100644 packages/cohort_sdk/src/replicator/tests/test_utils.rs create mode 100644 packages/cohort_sdk/src/replicator/tests/utils.rs create mode 100644 packages/cohort_sdk/src/replicator/utils.rs create mode 100644 packages/cohort_sdk/src/replicator2/cohort_replicator.rs create mode 100644 packages/cohort_sdk/src/replicator2/cohort_suffix.rs create mode 100644 packages/cohort_sdk/src/replicator2/mod.rs create mode 100644 packages/cohort_sdk/src/replicator2/model.rs create mode 100644 packages/cohort_sdk/src/replicator2/service.rs delete mode 100644 packages/examples_support/src/cohort/mod.rs diff --git a/.env.example b/.env.example index 4b9916c2..e0282181 100644 --- a/.env.example +++ b/.env.example @@ -51,6 +51,7 @@ COHORT_PG_PORT=5432 COHORT_PG_USER=postgres COHORT_PG_PASSWORD=admin COHORT_PG_DATABASE=talos-sample-cohort-dev +COHORT_PG_POOL_SIZE=10 # Replicator and Statemap Installer Services REPLICATOR_KAFKA_COMMIT_FREQ_MS=10000 diff --git a/.vscode/extensions.json b/.vscode/extensions.json deleted file mode 100644 index 712ec527..00000000 --- a/.vscode/extensions.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "recommendations": [ - "vadimcn.vscode-lldb", - "rust-lang.rust-analyzer" - ] -} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 557be1af..00000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,101 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in library 'certifier_library'", - "cargo": { - "args": [ - "test", - "--no-run", - "--lib", - "--package=talos-certifier" - ], - "filter": { - "name": "certifier_library", - "kind": "lib" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug executable 'talos-certifier'", - "cargo": { - "args": [ - "build", - "--bin=talos-certifier", - "--package=talos-certifier" - ], - "filter": { - "name": "talos-certifier", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in executable 'talos-certifier'", - "cargo": { - "args": [ - "test", - "--no-run", - "--bin=talos-certifier", - "--package=talos-certifier" - ], - "filter": { - "name": "talos-certifier", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug executable 'deploy_kafka'", - "cargo": { - "args": [ - "build", - "--bin=deploy_kafka", - "--package=talos-certifier" - ], - "filter": { - "name": "deploy_kafka", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in executable 'deploy_kafka'", - "cargo": { - "args": [ - "test", - "--no-run", - "--bin=deploy_kafka", - "--package=talos-certifier" - ], - "filter": { - "name": "deploy_kafka", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - } - ] -} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 1b8a45d3..edfee74e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "agent_client" version = "0.0.1" @@ -11,13 +26,14 @@ dependencies = [ "env_logger", "examples_support", "log", - "rdkafka", + "rand", + "rdkafka 0.29.0", "rdkafka-sys", "serde", "serde_json", - "strum", + "strum 0.24.1", "talos_agent", - "time 0.3.22", + "time 0.3.24", "tokio", "uuid", ] @@ -83,9 +99,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "async-channel" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf46fee83e5ccffc220104713af3292ff9bc7c64c7de289f66dae8e38d826833" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" dependencies = [ "concurrent-queue", "event-listener", @@ -111,18 +127,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] name = "async-trait" -version = "0.1.68" +version = "0.1.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] @@ -142,6 +158,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "backtrace" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base64" version = "0.21.2" @@ -154,6 +185,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + [[package]] name = "bitvec" version = "1.0.1" @@ -182,7 +219,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b" dependencies = [ "borsh-derive", - "hashbrown 0.12.3", + "hashbrown 0.13.2", ] [[package]] @@ -348,7 +385,7 @@ version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ - "bitflags", + "bitflags 1.3.2", "clap_lex", "indexmap 1.9.3", "textwrap", @@ -368,8 +405,10 @@ name = "cohort" version = "0.0.1" dependencies = [ "ahash 0.8.3", + "async-channel", "async-trait", "csv", + "deadpool", "deadpool-postgres", "env_logger", "futures", @@ -379,19 +418,19 @@ dependencies = [ "mockall", "multimap 0.9.0", "rand", - "rdkafka", + "rdkafka 0.29.0", "rdkafka-sys", "refinery", "rust_decimal", "serde", "serde_json", - "strum", + "strum 0.24.1", "talos_agent", "talos_certifier", "talos_certifier_adapters", "talos_suffix", "thiserror", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-postgres", "tokio-test", @@ -402,6 +441,36 @@ dependencies = [ [[package]] name = "cohort_banking" version = "0.0.1" +dependencies = [ + "async-channel", + "async-trait", + "cohort_sdk", + "deadpool", + "deadpool-postgres", + "env_logger", + "futures", + "log", + "metrics", + "opentelemetry", + "opentelemetry_api", + "opentelemetry_sdk", + "rand", + "refinery", + "rust_decimal", + "serde", + "serde_json", + "strum 0.25.0", + "talos_agent", + "talos_certifier", + "talos_suffix", + "tokio", + "tokio-postgres", + "uuid", +] + +[[package]] +name = "cohort_banking_example" +version = "0.0.1" dependencies = [ "async-channel", "async-trait", @@ -413,7 +482,7 @@ dependencies = [ "log", "metrics", "rand", - "rdkafka", + "rdkafka 0.29.0", "rdkafka-sys", "rust_decimal", "serde", @@ -422,12 +491,70 @@ dependencies = [ "talos_certifier", "talos_certifier_adapters", "talos_suffix", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-postgres", "uuid", ] +[[package]] +name = "cohort_banking_with_sdk" +version = "0.0.1" +dependencies = [ + "async-channel", + "async-trait", + "cohort_banking", + "cohort_sdk", + "deadpool-postgres", + "env_logger", + "examples_support", + "log", + "metrics", + "opentelemetry", + "opentelemetry-prometheus", + "opentelemetry_api", + "opentelemetry_sdk", + "rand", + "rdkafka 0.29.0", + "rdkafka-sys", + "rust_decimal", + "serde", + "serde_json", + "talos_agent", + "talos_certifier", + "talos_certifier_adapters", + "talos_suffix", + "time 0.3.24", + "tokio", + "tokio-postgres", + "uuid", +] + +[[package]] +name = "cohort_sdk" +version = "0.0.1" +dependencies = [ + "async-trait", + "env_logger", + "futures", + "log", + "opentelemetry", + "opentelemetry_api", + "opentelemetry_sdk", + "rand", + "rdkafka 0.33.2", + "rdkafka-sys", + "serde", + "serde_json", + "strum 0.25.0", + "talos_agent", + "talos_certifier", + "talos_certifier_adapters", + "talos_suffix", + "tokio", + "uuid", +] + [[package]] name = "concurrent-queue" version = "2.2.0" @@ -445,9 +572,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" dependencies = [ "libc", ] @@ -564,12 +691,12 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.4.0" +version = "5.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d" dependencies = [ "cfg-if", - "hashbrown 0.12.3", + "hashbrown 0.14.0", "lock_api", "once_cell", "parking_lot_core", @@ -609,6 +736,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "deranged" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8810e7e2cf385b1e9b50d68264908ec367ba642c96d02edfe61c39e88e2a3c01" + [[package]] name = "difflib" version = "0.4.0" @@ -646,9 +779,9 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "env_logger" @@ -665,24 +798,24 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "erased-serde" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2b0c2380453a92ea8b6c8e5f64ecaafccddde8ceab55ff7a8ac1029f894569" +checksum = "da96524cc884f6558f1769b6c46686af2fe8e8b4cd253bd5a3cdba8181b8e070" dependencies = [ "serde", ] [[package]] name = "errno" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", @@ -718,9 +851,9 @@ dependencies = [ "metrics", "refinery", "rust_decimal", - "strum", + "strum 0.24.1", "thiserror", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-postgres", "uuid", @@ -741,6 +874,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "form_urlencoded" version = "1.2.0" @@ -818,7 +957,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] @@ -872,6 +1011,12 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "gimli" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" + [[package]] name = "half" version = "1.8.2" @@ -887,6 +1032,15 @@ dependencies = [ "ahash 0.7.6", ] +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash 0.8.3", +] + [[package]] name = "hashbrown" version = "0.14.0" @@ -910,18 +1064,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.2.6" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "hmac" @@ -992,25 +1137,13 @@ dependencies = [ "rayon", ] -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi 0.3.1", - "libc", - "windows-sys", -] - [[package]] name = "is-terminal" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "hermit-abi 0.3.1", - "io-lifetimes", + "hermit-abi 0.3.2", "rustix", "windows-sys", ] @@ -1026,9 +1159,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" @@ -1053,9 +1186,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libz-sys" -version = "1.1.9" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" dependencies = [ "cc", "libc", @@ -1065,9 +1198,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.8" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" [[package]] name = "lock_api" @@ -1129,7 +1262,16 @@ dependencies = [ name = "metrics" version = "0.0.1" dependencies = [ - "time 0.3.22", + "time 0.3.24", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", ] [[package]] @@ -1196,20 +1338,20 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.2", "libc", ] @@ -1234,6 +1376,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "object" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -1258,6 +1409,65 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f4b8347cc26099d3aeee044065ecc3ae11469796b4d65d065a23a584ed92a6f" +dependencies = [ + "opentelemetry_api", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry-prometheus" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a9f186f6293ebb693caddd0595e66b74a6068fa51048e26e0bf9c95478c639c" +dependencies = [ + "opentelemetry", + "prometheus", + "protobuf", +] + +[[package]] +name = "opentelemetry_api" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed41783a5bf567688eb38372f2b7a8530f5a607a4b49d38dd7573236c23ca7e2" +dependencies = [ + "fnv", + "futures-channel", + "futures-util", + "indexmap 1.9.3", + "once_cell", + "pin-project-lite", + "thiserror", + "urlencoding", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b3a2a91fdbfdd4d212c0dcc2ab540de2c2bcbbd90be17de7a7daf8822d010c1" +dependencies = [ + "async-trait", + "crossbeam-channel", + "dashmap", + "fnv", + "futures-channel", + "futures-executor", + "futures-util", + "once_cell", + "opentelemetry_api", + "percent-encoding", + "rand", + "thiserror", + "tokio", + "tokio-stream", +] + [[package]] name = "os_pipe" version = "1.1.4" @@ -1323,9 +1533,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57" [[package]] name = "pin-utils" @@ -1470,13 +1680,34 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.63" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] +[[package]] +name = "prometheus" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror", +] + +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "ptr_meta" version = "0.1.4" @@ -1499,9 +1730,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.28" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] @@ -1582,6 +1813,24 @@ dependencies = [ "tokio", ] +[[package]] +name = "rdkafka" +version = "0.33.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da18026aad1c24033da3da726200de7e911e75c2e2cc2f77ffb9b4502720faae" +dependencies = [ + "futures-channel", + "futures-util", + "libc", + "log", + "rdkafka-sys", + "serde", + "serde_derive", + "serde_json", + "slab", + "tokio", +] + [[package]] name = "rdkafka-sys" version = "4.5.0+1.9.2" @@ -1602,7 +1851,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1629,10 +1878,10 @@ dependencies = [ "serde", "siphasher", "thiserror", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-postgres", - "toml 0.7.5", + "toml 0.7.6", "url", "walkdir", ] @@ -1647,14 +1896,26 @@ dependencies = [ "quote", "refinery-core", "regex", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] name = "regex" -version = "1.8.4" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" dependencies = [ "aho-corasick", "memchr", @@ -1663,9 +1924,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rend" @@ -1712,13 +1973,12 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.30.0" +version = "1.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0446843641c69436765a35a5a77088e28c2e6a12da93e84aa3ab1cd4aa5a042" +checksum = "4a2ab0025103a60ecaaf3abf24db1db240a4e1c15837090d2c32f625ac98abea" dependencies = [ "arrayvec", "borsh", - "bytecheck", "byteorder", "bytes", "num-traits", @@ -1730,15 +1990,20 @@ dependencies = [ "tokio-postgres", ] +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustix" -version = "0.37.20" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" +checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ - "bitflags", + "bitflags 2.3.3", "errno", - "io-lifetimes", "libc", "linux-raw-sys", "windows-sys", @@ -1746,15 +2011,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -1779,9 +2044,9 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "seahash" @@ -1791,22 +2056,22 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.164" +version = "1.0.179" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +checksum = "0a5bf42b8d227d4abf38a1ddb08602e229108a517cd4e5bb28f9c7eaafdce5c0" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.164" +version = "1.0.179" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +checksum = "741e124f5485c7e60c03b043f79f320bff3527f4bbf12cf3831750dc46a0ec2c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] @@ -1820,9 +2085,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.99" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ "itoa", "ryu", @@ -1916,9 +2181,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" [[package]] name = "socket2" @@ -1942,9 +2207,9 @@ dependencies = [ [[package]] name = "stringprep" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1" +checksum = "db3737bde7edce97102e0e2b15365bf7a20bfdb5f60f4f9e8d7004258a51a8da" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -1956,7 +2221,16 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" dependencies = [ - "strum_macros", + "strum_macros 0.24.3", +] + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.1", ] [[package]] @@ -1972,6 +2246,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "strum_macros" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.28", +] + [[package]] name = "subtle" version = "2.5.0" @@ -1980,15 +2267,15 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" [[package]] name = "sval" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2faba619276044eec7cd160d87b15d9191fb9b9f7198440343d2144f760cf08" +checksum = "8b031320a434d3e9477ccf9b5756d57d4272937b8d22cb88af80b7633a1b78b1" [[package]] name = "sval_buffer" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a353d3cca10721384077c9643c3fafdd6ed2600e57933b8e45c0b580d97b25af" +checksum = "6bf7e9412af26b342f3f2cc5cc4122b0105e9d16eb76046cd14ed10106cf6028" dependencies = [ "sval", "sval_ref", @@ -1996,18 +2283,18 @@ dependencies = [ [[package]] name = "sval_dynamic" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee5fc7349e9f6cb2ab950046818f66ad3f2d7209ccc5dced93da19292a30273a" +checksum = "a0ef628e8a77a46ed3338db8d1b08af77495123cc229453084e47cd716d403cf" dependencies = [ "sval", ] [[package]] name = "sval_fmt" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "098fb51d5d6007bd2c3f0a23b79aa953d7c46bf943086ce51424c3187c40f9b1" +checksum = "7dc09e9364c2045ab5fa38f7b04d077b3359d30c4c2b3ec4bae67a358bd64326" dependencies = [ "itoa", "ryu", @@ -2016,9 +2303,9 @@ dependencies = [ [[package]] name = "sval_json" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01126a2783d767496f18f13af26ab2587881f6343368bb26dc62956a723d1c7" +checksum = "ada6f627e38cbb8860283649509d87bc4a5771141daa41c78fd31f2b9485888d" dependencies = [ "itoa", "ryu", @@ -2027,18 +2314,18 @@ dependencies = [ [[package]] name = "sval_ref" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5854d9eaa7bd31840a850322591c59c5b547eb29c9a6ecee1989d6ef963312ce" +checksum = "703ca1942a984bd0d9b5a4c0a65ab8b4b794038d080af4eb303c71bc6bf22d7c" dependencies = [ "sval", ] [[package]] name = "sval_serde" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdd25fc04c5e882787d62112591aa93efb5bdc2000b43164d29f08582bb85f7" +checksum = "830926cd0581f7c3e5d51efae4d35c6b6fc4db583842652891ba2f1bed8db046" dependencies = [ "serde", "sval", @@ -2059,9 +2346,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.22" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", @@ -2077,13 +2364,13 @@ dependencies = [ "log", "mockall", "multimap 0.8.3", - "rdkafka", + "rdkafka 0.29.0", "rdkafka-sys", "serde", "serde_json", - "strum", + "strum 0.24.1", "thiserror", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-test", "uuid", @@ -2103,10 +2390,10 @@ dependencies = [ "logger", "serde", "serde_json", - "strum", + "strum 0.24.1", "talos_suffix", "thiserror", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-test", ] @@ -2123,7 +2410,7 @@ dependencies = [ "logger", "metrics", "mockall", - "rdkafka", + "rdkafka 0.29.0", "refinery", "serde", "serde_json", @@ -2131,7 +2418,7 @@ dependencies = [ "talos_certifier", "talos_suffix", "thiserror", - "time 0.3.22", + "time 0.3.24", "tokio", "tokio-postgres", "uuid", @@ -2182,22 +2469,22 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] @@ -2213,10 +2500,11 @@ dependencies = [ [[package]] name = "time" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd" +checksum = "b79eabcd964882a646b3584543ccabeae7869e9ac32a46f6f22b7a5bd405308b" dependencies = [ + "deranged", "itoa", "serde", "time-core", @@ -2231,9 +2519,9 @@ checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" [[package]] name = "time-macros" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b" +checksum = "eb71511c991639bb078fd5bf97757e03914361c48100d52878b8e52b46fb92cd" dependencies = [ "time-core", ] @@ -2265,11 +2553,12 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.28.2" +version = "1.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" +checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" dependencies = [ "autocfg", + "backtrace", "bytes", "libc", "mio", @@ -2290,7 +2579,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", ] [[package]] @@ -2366,9 +2655,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ebafdf5ad1220cb59e7d17cf4d2c72015297b75b19a10472f99b89225089240" +checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542" dependencies = [ "serde", "serde_spanned", @@ -2387,9 +2676,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.19.11" +version = "0.19.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266f016b7f039eec8a1a80dfe6156b633d208b9fccca5e4db1d6775b0c4e34a7" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" dependencies = [ "indexmap 2.0.0", "serde", @@ -2432,9 +2721,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-normalization" @@ -2456,20 +2745,26 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "uuid" -version = "1.3.4" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa2982af2eec27de306107c027578ff7f423d65f7250e40ce0fea8f45248b81" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" dependencies = [ "getrandom", ] [[package]] name = "value-bag" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4d330786735ea358f3bc09eea4caa098569c1c93f342d9aca0514915022fe7e" +checksum = "d92ccd67fb88503048c01b59152a04effd0782d035a83a6d256ce6085f08f4a3" dependencies = [ "value-bag-serde1", "value-bag-sval2", @@ -2477,9 +2772,9 @@ dependencies = [ [[package]] name = "value-bag-serde1" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4735c95b4cca1447b448e2e2e87e98d7e7498f4da27e355cf7af02204521001d" +checksum = "b0b9f3feef403a50d4d67e9741a6d8fc688bcbb4e4f31bd4aab72cc690284394" dependencies = [ "erased-serde", "serde", @@ -2488,9 +2783,9 @@ dependencies = [ [[package]] name = "value-bag-sval2" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859cb4f0ce7da6a118b559ba74b0e63bf569bea867c20ba457a6b1c886a04e97" +checksum = "30b24f4146b6f3361e91cbf527d1fb35e9376c3c0cef72ca5ec5af6d640fad7d" dependencies = [ "sval", "sval_buffer", @@ -2556,7 +2851,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", "wasm-bindgen-shared", ] @@ -2578,7 +2873,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.28", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2650,9 +2945,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.48.0" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -2707,9 +3002,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.7" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448" +checksum = "8bd122eb777186e60c3fdf765a58ac76e41c582f1f535fbf3314434c6b58f3f7" dependencies = [ "memchr", ] diff --git a/Makefile b/Makefile index b970a3ba..3c57f02a 100644 --- a/Makefile +++ b/Makefile @@ -151,11 +151,16 @@ dev.preload_db: $(call pp,run preload_db...) cargo run --bin preload_db -- $(args) -## dev.cohort_banking: 🧪 Runs Cohort with built-it replicator and executes banking transactions +## dev.cohort_banking: 🧪 Runs Cohort with built-in replicator and executes banking transactions dev.cohort_banking: $(call pp,run cohort_banking...) cargo run --example cohort_banking --release -- $(args) +## dev.cohort_banking_with_sdk: 🧪 Runs an example of rust app "Cohort Banking" which use cohort_sdk. +dev.cohort_banking_with_sdk: + $(call pp,run cohort_banking...) + cargo run --example cohort_banking_with_sdk --release -- $(args) + ## dev.histogram_decision_timeline_from_kafka: 🧪 Reads all decisions from kafka and prints processing timeline as csv dev.histogram_decision_timeline_from_kafka: $(call pp,histogram_decision_timeline_from_kafka...) diff --git a/examples/agent_client/Cargo.toml b/examples/agent_client/Cargo.toml index 07e9c459..4b117068 100644 --- a/examples/agent_client/Cargo.toml +++ b/examples/agent_client/Cargo.toml @@ -12,6 +12,7 @@ async-channel = { version = "1.8.0" } async-trait = { workspace = true } env_logger = { workspace = true } log = { workspace = true } +rand = { version = "0.8.5" } rdkafka = { version = "0.29.0", features = ["sasl"] } rdkafka-sys = { version = "4.3.0" } serde = { workspace = true } diff --git a/examples/agent_client/examples/agent_client.rs b/examples/agent_client/examples/agent_client.rs index 27c51433..327e0c50 100644 --- a/examples/agent_client/examples/agent_client.rs +++ b/examples/agent_client/examples/agent_client.rs @@ -1,6 +1,6 @@ use async_channel::Receiver; use examples_support::load_generator::generator::ControlledRateLoadGenerator; -use examples_support::load_generator::models::StopType; +use examples_support::load_generator::models::{Generator, StopType}; use std::num::ParseIntError; use std::{env, sync::Arc, time::Duration}; @@ -57,7 +57,7 @@ async fn certify() -> Result<(), String> { let h_workload_generator = tokio::spawn(async move { let params = params.clone(); - ControlledRateLoadGenerator::generate::(params.stop_type, params.target_rate as f32, &create_new_candidate, tx_generated).await + ControlledRateLoadGenerator::generate(params.stop_type, params.target_rate as f32, RequestGenerator {}, tx_generated).await }); let all_async_services = tokio::spawn(async move { @@ -155,7 +155,7 @@ fn load_configs() -> Result<(AgentConfig, KafkaConfig), String> { agent: read_var("AGENT_NAME").unwrap(), cohort: read_var("COHORT_NAME").unwrap(), buffer_size: read_var("AGENT_BUFFER_SIZE").unwrap().parse().unwrap(), - timout_ms: read_var("AGENT_TIMEOUT_MS").unwrap().parse().unwrap(), + timeout_ms: read_var("AGENT_TIMEOUT_MS").unwrap().parse().unwrap(), }; let cfg_kafka = KafkaConfig { @@ -260,27 +260,8 @@ async fn make_agent(params: LaunchParams) -> impl TalosAgent { }, ); + let _ = agent.start(rx_certify, rx_cancel, tx_decision, rx_decision, publisher, consumer); agent - .start(rx_certify, rx_cancel, tx_decision, rx_decision, publisher, consumer) - .expect("unable to start agent"); - agent -} - -fn create_new_candidate() -> CertificationRequest { - let tx_data = CandidateData { - xid: Uuid::new_v4().to_string(), - readset: Vec::new(), - readvers: Vec::new(), - snapshot: 5, - writeset: Vec::from(["3".to_string()]), - statemap: None, - }; - - CertificationRequest { - message_key: "12345".to_string(), - candidate: tx_data, - timeout: None, // this will use the default global value as defined in AgentConfig - } } fn create_stop_controller(params: LaunchParams, queue: Arc>) -> JoinHandle> { @@ -369,3 +350,24 @@ async fn get_params() -> Result { }) } } + +struct RequestGenerator {} + +impl Generator for RequestGenerator { + fn generate(&mut self) -> CertificationRequest { + let tx_data = CandidateData { + xid: Uuid::new_v4().to_string(), + readset: Vec::new(), + readvers: Vec::new(), + snapshot: 5, + writeset: Vec::from(["3".to_string()]), + statemap: None, + }; + + CertificationRequest { + message_key: "12345".to_string(), + candidate: tx_data, + timeout: None, // this will use the default global value as defined in AgentConfig + } + } +} diff --git a/examples/cohort_banking/Cargo.toml b/examples/cohort_banking_example/Cargo.toml similarity index 97% rename from examples/cohort_banking/Cargo.toml rename to examples/cohort_banking_example/Cargo.toml index 7956cb6c..74b598ae 100644 --- a/examples/cohort_banking/Cargo.toml +++ b/examples/cohort_banking_example/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "cohort_banking" +name = "cohort_banking_example" version = "0.0.1" edition = "2021" diff --git a/examples/cohort_banking/examples/cohort_banking.rs b/examples/cohort_banking_example/examples/cohort_banking.rs similarity index 89% rename from examples/cohort_banking/examples/cohort_banking.rs rename to examples/cohort_banking_example/examples/cohort_banking.rs index 9c5dc640..af015ad4 100644 --- a/examples/cohort_banking/examples/cohort_banking.rs +++ b/examples/cohort_banking_example/examples/cohort_banking.rs @@ -3,6 +3,7 @@ use std::{env, sync::Arc, time::Duration}; use async_channel::Receiver; use cohort::{ config_loader::ConfigLoader, + examples_support::queue_processor::QueueProcessor, metrics::Stats, model::requests::TransferRequest, replicator::{ @@ -19,11 +20,11 @@ use cohort::{ snapshot_api::SnapshotApi, state::postgres::database::Database, }; -use examples_support::{ - cohort::queue_workers::QueueProcessor, - load_generator::{generator::ControlledRateLoadGenerator, models::StopType}, -}; +use examples_support::load_generator::{ + generator::ControlledRateLoadGenerator, + models::{Generator, StopType}, +}; use metrics::model::{MicroMetrics, MinMax}; use rand::Rng; use rust_decimal::{prelude::FromPrimitive, Decimal}; @@ -39,6 +40,7 @@ type HeartBeatReceiver = tokio::sync::watch::Receiver; #[derive(Clone)] struct LaunchParams { + accounts: u64, stop_type: StopType, target_rate: f32, threads: u64, @@ -59,7 +61,12 @@ async fn main() -> Result<(), String> { let rx_queue = Arc::new(rx_queue); let rx_queue_ref = Arc::clone(&rx_queue); - let generator = ControlledRateLoadGenerator::generate(params.stop_type, params.target_rate, &create_transfer_request, Arc::new(tx_queue)); + let generator_impl = TransferRequestGenerator { + available_accounts: params.accounts, + generated: Vec::new(), + }; + + let generator = ControlledRateLoadGenerator::generate(params.stop_type, params.target_rate, generator_impl, Arc::new(tx_queue)); let h_generator = tokio::spawn(generator); let (tx_metrics, rx_metrics) = async_channel::unbounded::(); @@ -69,7 +76,7 @@ async fn main() -> Result<(), String> { let rx_metrics_ref2 = Arc::clone(&rx_metrics); let cfg_db = ConfigLoader::load_db_config()?; - let db = Database::init_db(cfg_db).await; + let db = Database::init_db(cfg_db).await.unwrap(); let db_ref1 = Arc::clone(&db); let db_ref2 = Arc::clone(&db); @@ -203,7 +210,7 @@ async fn start_replicator( let (tx_installation_feedback_req, rx_installation_feedback_req) = tokio::sync::mpsc::channel(channel_size); let (tx_installation_req, rx_installation_req) = tokio::sync::mpsc::channel(channel_size); - // let manual_tx_api = PostgresApi { client: database.get().await }; + let installer = PgReplicatorStatemapInstaller { metrics_frequency: replicator_metrics, pg: database.clone(), @@ -262,40 +269,6 @@ async fn start_replicator( (h_replicator, h_installer, h_installation, rx_heartbeat) } -fn create_transfer_request() -> TransferRequest { - let mut available_accounts = 0_u64; - let args: Vec = env::args().collect(); - if args.len() >= 2 { - let mut i = 1; - while i < args.len() { - let param_name = &args[i]; - if param_name.eq("--accounts") { - let param_value = &args[i + 1]; - available_accounts = param_value.parse().unwrap(); - } - i += 2; - } - } - - let mut rnd = rand::thread_rng(); - let mut to; - - let from = rnd.gen_range(1..=available_accounts); - loop { - to = rnd.gen_range(1..=available_accounts); - if to == from { - continue; - } - break; - } - - TransferRequest { - from: format!("{:<04}", from), - to: format!("{:<04}", to), - amount: Decimal::from_f32(1.0).unwrap(), - } -} - async fn get_params() -> Result { let args: Vec = env::args().collect(); let mut threads: Option = Some(1); @@ -364,6 +337,7 @@ async fn get_params() -> Result { Err("Parameter --rate is required".into()) } else { Ok(LaunchParams { + accounts: accounts.unwrap(), target_rate: target_rate.unwrap(), stop_type: stop_type.unwrap(), threads: threads.unwrap(), @@ -373,3 +347,45 @@ async fn get_params() -> Result { }) } } + +struct TransferRequestGenerator { + available_accounts: u64, + generated: Vec<(u64, u64)>, +} + +impl Generator for TransferRequestGenerator { + fn generate(&mut self) -> TransferRequest { + let mut rnd = rand::thread_rng(); + let mut to; + + let from = rnd.gen_range(1..=self.available_accounts); + loop { + to = rnd.gen_range(1..=self.available_accounts); + if to == from { + continue; + } + + let result = self + .generated + .iter() + .find(|(past_from, past_to)| *past_from == from && *past_to == to || *past_from == to && *past_to == from); + + if result.is_none() { + if self.generated.len() < 100 { + self.generated.push((from, to)); + } else { + self.generated.remove(0); + self.generated.insert(0, (from, to)); + } + + break; + } + } + + TransferRequest { + from: format!("{:<04}", from), + to: format!("{:<04}", to), + amount: Decimal::from_f32(1.0).unwrap(), + } + } +} diff --git a/examples/cohort_banking_with_sdk/Cargo.toml b/examples/cohort_banking_with_sdk/Cargo.toml new file mode 100644 index 00000000..d82bff86 --- /dev/null +++ b/examples/cohort_banking_with_sdk/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "cohort_banking_with_sdk" +version = "0.0.1" +edition = "2021" + +[dev-dependencies] + +cohort_sdk = { path = "../../packages/cohort_sdk" } +cohort_banking = { path = "../../packages/cohort_banking" } +examples_support = { path = "../../packages/examples_support" } +metrics = { path = "../../packages/metrics" } +talos_agent = { path = "../../packages/talos_agent" } +talos_certifier = { path = "../../packages/talos_certifier" } +talos_certifier_adapters = { path = "../../packages/talos_certifier_adapters" } +talos_suffix = { path = "../../packages/talos_suffix" } + +async-trait = { workspace = true } +env_logger = { workspace = true } +log = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["full"] } + +async-channel = { version = "1.8.0" } +deadpool-postgres = { version = "0.10" } +opentelemetry_api = { version = "0.19.0" } +opentelemetry_sdk = { version = "0.19.0", features = ["metrics", "rt-tokio"] } +opentelemetry = { version = "0.19.0" } +opentelemetry-prometheus = { version = "0.12.0", features = ["prometheus-encoding"] } +rand = { version = "0.8.5" } +rdkafka = { version = "0.29.0", features = ["sasl"] } +rdkafka-sys = { version = "4.3.0" } +rust_decimal = { version = "1.30.0", features = ["db-tokio-postgres", "serde-with-float"] } +time = { version = "0.3.17" } +tokio-postgres = { version = "0.7", features = [ "with-uuid-1", "with-serde_json-1" ] } +uuid = { version = "1.2.2", features = ["v4"] } \ No newline at end of file diff --git a/examples/cohort_banking_with_sdk/examples/cohort_banking_with_sdk.rs b/examples/cohort_banking_with_sdk/examples/cohort_banking_with_sdk.rs new file mode 100644 index 00000000..30e71e8c --- /dev/null +++ b/examples/cohort_banking_with_sdk/examples/cohort_banking_with_sdk.rs @@ -0,0 +1,474 @@ +use std::str::FromStr; +use std::{collections::HashMap, env, sync::Arc, time::Duration}; + +use async_channel::Receiver; +use cohort_banking::{app::BankingApp, examples_support::queue_processor::QueueProcessor, model::requests::TransferRequest}; +use cohort_sdk::model::Config; +use examples_support::load_generator::models::Generator; +use examples_support::load_generator::{generator::ControlledRateLoadGenerator, models::StopType}; + +use metrics::model::MinMax; +use opentelemetry_api::KeyValue; +use opentelemetry_sdk::Resource; +use rand::Rng; +use rust_decimal::{prelude::FromPrimitive, Decimal}; +use tokio::{signal, task::JoinHandle, try_join}; + +use opentelemetry::global; +use opentelemetry::global::shutdown_tracer_provider; +use opentelemetry::sdk::metrics::{controllers, processors, selectors}; + +use opentelemetry_prometheus::{Encoder, ExporterConfig, PrometheusExporter, TextEncoder}; + +#[derive(Clone)] +struct LaunchParams { + stop_type: StopType, + target_rate: f32, + threads: u64, + accounts: u64, +} + +#[tokio::main] +async fn main() -> Result<(), String> { + env_logger::builder().format_timestamp_millis().init(); + log::info!("Started, pid: {}", std::process::id()); + + let params = get_params().await?; + + let (tx_queue, rx_queue) = async_channel::unbounded::(); + let rx_queue = Arc::new(rx_queue); + let rx_queue_ref = Arc::clone(&rx_queue); + + let generator_impl = TransferRequestGenerator { + available_accounts: params.accounts, + generated: Vec::new(), + }; + + let generator = ControlledRateLoadGenerator::generate(params.stop_type, params.target_rate, generator_impl, Arc::new(tx_queue)); + let h_generator = tokio::spawn(generator); + + let config = Config { + // + // cohort configs + // + retry_attempts_max: 10, + retry_backoff_max_ms: 1500, + retry_oo_backoff_max_ms: 1000, + retry_oo_attempts_max: 10, + + // + // agent config values + // + agent: "cohort-banking".into(), + cohort: "cohort-banking".into(), + // The size of internal buffer for candidates + buffer_size: 10_000_000, + timeout_ms: 600_000, + + // + // Common to kafka configs values + // + brokers: "127.0.0.1:9092".into(), + topic: "dev.ksp.certification".into(), + sasl_mechanisms: None, + kafka_username: None, + kafka_password: None, + + // + // Kafka configs for Agent + // + // Must be unique for each agent instance. Can be the same as AgentConfig.agent_id + agent_group_id: "cohort-banking".into(), + agent_fetch_wait_max_ms: 6000, + // The maximum time librdkafka may use to deliver a message (including retries) + agent_message_timeout_ms: 15000, + // Controls how long to wait until message is successfully placed on the librdkafka producer queue (including retries). + agent_enqueue_timeout_ms: 10, + // should be mapped to rdkafka::config::RDKafkaLogLevel + agent_log_level: 6, + + // + // Kafka configs for Replicator + // + replicator_client_id: "cohort-banking".into(), + replicator_group_id: "cohort-banking-replicator".into(), + producer_config_overrides: HashMap::new(), + consumer_config_overrides: HashMap::new(), + + // + // Suffix config values + // + /// Initial capacity of the suffix + // suffix_size_max: 500_000, + suffix_size_max: 10, + /// - The suffix prune threshold from when we start checking if the suffix + /// should prune. + /// - Set to None if pruning is not required. + /// - Defaults to None. + // suffix_prune_at_size: Some(300_000), + suffix_prune_at_size: Some(2000), + /// Minimum size of suffix after prune. + /// - Defaults to None. + // suffix_size_min: Some(100_000), + suffix_size_min: None, + + // + // Replicator config values + // + replicator_buffer_size: 100_000, + + // + // Database config + // + db_pool_size: 200, + db_user: "postgres".into(), + db_password: "admin".into(), + db_host: "127.0.0.1".into(), + db_port: "5432".into(), + db_database: "talos-sample-cohort-dev".into(), + }; + + let buckets = [ + 0.1, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 200.0, 300.0, 400.0, 500.0, 1000.0, + 1500.0, 2000.0, 2500.0, 3000.0, 3500.0, 4000.0, 4500.0, 5000.0, 10000.0, + ]; + + let factory = processors::factory( + selectors::simple::histogram(buckets), + opentelemetry::sdk::export::metrics::aggregation::cumulative_temporality_selector(), + ); + + let controller = controllers::basic(factory) + .with_collect_period(Duration::from_secs(20)) + .with_resource(Resource::new([KeyValue::new("service_name", "banking_with_cohort_sdk")])) + .build(); + + // this exporter can export into file + let exporter = opentelemetry_prometheus::exporter(controller) + .with_config(ExporterConfig::default().with_scope_info(true)) + .init(); + + let meter = global::meter("banking_cohort"); + let meter = Arc::new(meter); + + let h_cohort = tokio::spawn(async move { + let mut banking_app = BankingApp::new(config).await.unwrap(); + let _ = banking_app.init().await; + let tasks = QueueProcessor::process::(rx_queue, meter, params.threads, Arc::new(banking_app)).await; + + let mut i = 1; + let mut errors_count = 0; + let mut timeline = MinMax::default(); + for task in tasks { + match task.await { + Err(e) => { + errors_count += 1; + log::error!("{:?}", e); + } + Ok(thread_timeline) => timeline.merge(thread_timeline), + } + if i % 10 == 0 { + log::warn!("Initiator thread {} of {} finished.", i, params.threads); + } + + i += 1; + } + + log::warn!("Duration: {}", Duration::from_nanos((timeline.max - timeline.min) as u64).as_secs_f32()); + log::info!("Finished. errors count: {}", errors_count); + }); + + let h_stop: JoinHandle> = start_queue_monitor(rx_queue_ref); + + let all_async_services = tokio::spawn(async move { + let result = try_join!(h_generator, h_cohort); + log::warn!("Result from services ={result:?}"); + }); + + tokio::select! { + _ = h_stop => { + log::warn!("Stop manager is active..."); + } + + _ = all_async_services => {} + + // CTRL + C termination signal + _ = signal::ctrl_c() => { + log::warn!("Shutting down..."); + } + } + + shutdown_tracer_provider(); + + print_prometheus_report_as_text(exporter, params.threads); + + Ok(()) +} + +fn start_queue_monitor(queue: Arc>) -> JoinHandle> { + tokio::spawn(async move { + let check_frequency = Duration::from_secs(10); + let total_attempts = 3; + + let mut remaining_attempts = total_attempts; + loop { + if remaining_attempts == 0 { + // we consumed all attempts + break; + } + + if queue.is_empty() { + // queue is empty and there are no signals from other workers, reduce window and try again + remaining_attempts -= 1; + log::warn!( + "Workers queue is empty and there is no activity signal from replicator. Finishing in: {} seconds...", + remaining_attempts * check_frequency.as_secs() + ); + } else { + remaining_attempts = total_attempts; + log::warn!("Counts. Remaining: {}", queue.len(),); + } + + tokio::time::sleep(check_frequency).await; + } + + queue.close(); + + Err("Signal from StopController".into()) + }) +} + +async fn get_params() -> Result { + let args: Vec = env::args().collect(); + let mut threads: Option = Some(1); + let mut accounts: Option = None; + let mut target_rate: Option = None; + let mut stop_type: Option = None; + + if args.len() >= 3 { + let mut i = 1; + while i < args.len() { + let param_name = &args[i]; + if param_name.eq("--accounts") { + let param_value = &args[i + 1]; + accounts = Some(param_value.parse().unwrap()); + } else if param_name.eq("--threads") { + let param_value = &args[i + 1]; + threads = Some(param_value.parse().unwrap()); + } else if param_name.eq("--rate") { + let param_value = &args[i + 1]; + target_rate = Some(param_value.parse().unwrap()); + } else if param_name.eq("--volume") { + let param_value = &args[i + 1]; + + if param_value.contains("-sec") { + let seconds: u64 = param_value.replace("-sec", "").parse().unwrap(); + stop_type = Some(StopType::LimitExecutionDuration { + run_duration: Duration::from_secs(seconds), + }) + } else { + let count: u64 = param_value.parse().unwrap(); + stop_type = Some(StopType::LimitGeneratedTransactions { count }) + } + } + + i += 2; + } + } + + if stop_type.is_none() { + Err("Parameter --volume is required".into()) + } else if accounts.is_none() { + Err("Parameter --accounts is required".into()) + } else if target_rate.is_none() { + Err("Parameter --rate is required".into()) + } else { + Ok(LaunchParams { + target_rate: target_rate.unwrap(), + stop_type: stop_type.unwrap(), + threads: threads.unwrap(), + accounts: accounts.unwrap(), + }) + } +} + +fn print_prometheus_report_as_text(exporter: PrometheusExporter, threads: u64) { + let encoder = TextEncoder::new(); + let metric_families = exporter.registry().gather(); + let mut report_buffer = Vec::::new(); + encoder.encode(&metric_families, &mut report_buffer).unwrap(); + + let report: Vec<&str> = match std::str::from_utf8(&report_buffer) { + Ok(v) => v.split('\n').collect(), + Err(e) => panic!("Invalid UTF-8 sequence: {}", e), + }; + // for line in report.iter().filter(|v| v.starts_with("metric_")) { + // log::warn!("Printing results = {}", line); + // } + + print_histogram("Out of Order Install (DB work)", "metric_oo_install_duration", "ms", &report, threads, true); + print_histogram("Out of Order Install (sleeps)", "metric_oo_wait_duration", "ms", &report, threads, true); + print_histogram( + "Out of Order Install (full span)", + "metric_oo_install_and_wait_duration", + "ms", + &report, + threads, + true, + ); + print_histogram("Out of Order Install attempts used", "metric_oo_attempts", "attempts", &report, threads, false); + print_histogram("Talos roundtrip", "metric_talos", "ms", &report, threads, true); + print_histogram("Candidate roundtrip", "metric_duration", "ms", &report, threads, true); + + let aborts = extract_num_value::(&report, "metric_aborts_total"); + let commits = extract_num_value::(&report, "metric_commits_total"); + let oo_retries = extract_num_value::(&report, "metric_oo_retry_count_total"); + let oo_giveups = extract_num_value::(&report, "metric_oo_giveups_count_total"); + let oo_no_data_found = extract_num_value::(&report, "metric_oo_no_data_found_total"); + let oo_not_safe = extract_num_value::(&report, "metric_oo_not_safe_count_total"); + let talos_aborts = extract_num_value::(&report, "metric_talos_aborts_count_total"); + let agent_retries = extract_num_value::(&report, "metric_agent_retries_count_total"); + let agent_errors = extract_num_value::(&report, "metric_agent_errors_count_total"); + let db_errors = extract_num_value::(&report, "metric_db_errors_count_total"); + + log::warn!("Commits : {}", if let Some(v) = commits { v } else { 0 }); + log::warn!("Aborts : {}", if let Some(v) = aborts { v } else { 0 }); + + log::warn!("OO no data : {}", if let Some(v) = oo_no_data_found { v } else { 0 }); + log::warn!("OO not safe : {}", if let Some(v) = oo_not_safe { v } else { 0 }); + log::warn!("OO retries : {}", if let Some(v) = oo_retries { v } else { 0 }); + log::warn!("OO giveups : {}", if let Some(v) = oo_giveups { v } else { 0 }); + + log::warn!("Talos aborts : {}", if let Some(v) = talos_aborts { v } else { 0 }); + log::warn!("Agent retries : {}", if let Some(v) = agent_retries { v } else { 0 }); + log::warn!("Agent errors : {}", if let Some(v) = agent_errors { v } else { 0 }); + + log::warn!("DB errors : {}", if let Some(v) = db_errors { v } else { 0 }); +} + +fn print_histogram(name: &str, id: &str, unit: &str, report: &[&str], threads: u64, print_tps: bool) { + let histogram: Vec<(f64, u64)> = report + .iter() + .filter(|v| v.starts_with(format!("{}_bucket", id).as_str())) + .filter_map(|line| { + let bucket_label_start_index = line.find("le=\""); + bucket_label_start_index?; + + let line_remainder = &line[bucket_label_start_index.unwrap() + 4..]; + let bucket_label_end_index = line_remainder.find("\"}"); + bucket_label_end_index?; + + let bucket_label = &line_remainder[..bucket_label_end_index.unwrap()]; + let bucket_label_value = if bucket_label == "+Inf" { + f64::MAX + } else { + bucket_label.parse::().unwrap() + }; + let count_in_bucket = &line_remainder[bucket_label_end_index.unwrap() + 3..]; + Some((bucket_label_value, count_in_bucket.parse::().unwrap())) + }) + .collect(); + + let extracted_count = extract_num_value::(report, format!("{}_count", id).as_str()); + if let Some(total_count) = extracted_count { + log::warn!("---------------------------------------------------------"); + log::warn!("{}", name); + for (bucket, count_in_bucket) in histogram { + let percents_in_bucket = (100.0 * count_in_bucket as f64) / total_count as f64; + if bucket == f64::MAX { + log::warn!("< {:>8} {} : {:>9} : {:>6.2}%", "10000+", unit, count_in_bucket, percents_in_bucket); + } else { + log::warn!("< {:>8} {} : {:>9} : {:>6.2}%", bucket, unit, count_in_bucket, percents_in_bucket); + } + } + + let rslt_sum = extract_num_value::(report, format!("{}_sum", id).as_str()); + if let Some(sum) = rslt_sum { + if unit == "ms" { + if sum > 1000.0 { + log::warn!("Total (sec) : {:.1}", sum / 1_000_f64); + log::warn!("Total (sec) avg per th : {:.1}", sum / 1_000_f64 / threads as f64); + } else { + log::warn!("Total (ms) : {:.1}", sum); + log::warn!("Total (ms) avg per th : {:.1}", sum / threads as f64); + } + } else { + log::warn!("Total ({}) : {:.1}", unit, sum); + log::warn!("Total ({}) avg per th : {:.1}", unit, sum / threads as f64); + } + + log::warn!("Count : {}", total_count); + if print_tps { + log::warn!("Approx throughput (tps) : {:.1}", (total_count as f64) / sum * 1000.0 * threads as f64); + } + } + log::warn!("---------------------------------------------------------\n"); + } +} + +fn extract_num_value(report: &[&str], value: &str) -> Option { + let extracted_as_list: Vec = report + .iter() + .filter(|i| i.starts_with(value)) + .filter_map(|i| { + if let Some(pos) = i.find(' ') { + let parsed = i[pos + 1..].parse::(); + if let Ok(num) = parsed { + Some(num) + } else { + None + } + } else { + None + } + }) + .collect(); + + if extracted_as_list.len() == 1 { + Some(extracted_as_list[0].clone()) + } else { + None + } +} + +struct TransferRequestGenerator { + available_accounts: u64, + generated: Vec<(u64, u64)>, +} + +impl Generator for TransferRequestGenerator { + fn generate(&mut self) -> TransferRequest { + let mut rnd = rand::thread_rng(); + let mut to; + + let from = rnd.gen_range(1..=self.available_accounts); + loop { + to = rnd.gen_range(1..=self.available_accounts); + if to == from { + continue; + } + + let result = self + .generated + .iter() + .find(|(past_from, past_to)| *past_from == from && *past_to == to || *past_from == to && *past_to == from); + + if result.is_none() { + if self.generated.len() < 100 { + self.generated.push((from, to)); + } else { + self.generated.remove(0); + self.generated.insert(0, (from, to)); + } + + break; + } + } + + TransferRequest { + from: format!("{:<04}", from), + to: format!("{:<04}", to), + amount: Decimal::from_f32(1.0).unwrap(), + } + } +} diff --git a/packages/cohort/Cargo.toml b/packages/cohort/Cargo.toml index 39406201..6be08f2b 100644 --- a/packages/cohort/Cargo.toml +++ b/packages/cohort/Cargo.toml @@ -15,6 +15,7 @@ tokio = { workspace = true, features = ["full"] } # Postgres refinery = { version = "0.8.7", features = ["tokio-postgres"] } tokio-postgres = { version = "0.7", features = [ "with-uuid-1", "with-serde_json-1" ] } +deadpool = { version = "0.9.5" } deadpool-postgres = { version = "0.10" } # Kafka @@ -27,6 +28,7 @@ thiserror = { version = "1.0.31" } # Time time = { version = "0.3.17" } +async-channel = { version = "1.8.0" } csv = { version = "1.2.1" } futures = { version = "0.3.28" } multimap = { version = "0.9.0" } diff --git a/packages/cohort/src/bank_api.rs b/packages/cohort/src/bank_api.rs index 996be6cc..475a0e9d 100644 --- a/packages/cohort/src/bank_api.rs +++ b/packages/cohort/src/bank_api.rs @@ -1,6 +1,9 @@ use std::collections::HashMap; use std::sync::Arc; +use rust_decimal::Decimal; +use tokio_postgres::Row; + use crate::actions::action::Action; use crate::actions::transfer::Transfer; @@ -8,28 +11,43 @@ use crate::model::bank_account::BankAccount; use crate::model::requests::TransferRequest; use crate::state::postgres::data_access::PostgresApi; -use crate::state::postgres::data_store::DataStore; -use crate::state::postgres::database::Database; +use crate::state::postgres::database::{Database, DatabaseError}; use crate::state::data_access_api::{ManualTx, TxApi}; pub struct BankApi {} impl BankApi { - pub async fn get_accounts(db: Arc) -> Result, String> { - let list = db.query("SELECT * FROM bank_accounts", DataStore::account_from_row).await; - Ok(list) + pub fn account_from_row(row: &Row) -> Result { + Ok(BankAccount { + name: row + .try_get::<&str, String>("name") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account name".into()))?, + number: row + .try_get::<&str, String>("number") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account number".into()))?, + version: row + .try_get::<&str, i64>("version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account version".into()))? as u64, + balance: row + .try_get::<&str, Decimal>("amount") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account amount".into()))?, + }) + } + + pub async fn get_accounts(db: Arc) -> Result, DatabaseError> { + db.query("SELECT * FROM bank_accounts", Self::account_from_row).await } - pub async fn get_accounts_as_map(db: Arc, number_from: String, number_to: String) -> Result, String> { + pub async fn get_accounts_as_map(db: Arc, number_from: String, number_to: String) -> Result, DatabaseError> { let mut map = HashMap::::new(); let from = db - .query_one("SELECT * FROM bank_accounts WHERE number = $1", &[&number_from], DataStore::account_from_row) - .await; + .query_one("SELECT * FROM bank_accounts WHERE number = $1", &[&number_from], BankApi::account_from_row) + .await?; let to = db - .query_one("SELECT * FROM bank_accounts WHERE number = $1", &[&number_to], DataStore::account_from_row) - .await; + .query_one("SELECT * FROM bank_accounts WHERE number = $1", &[&number_to], BankApi::account_from_row) + .await?; map.insert(number_from, from); map.insert(number_to, to); @@ -50,7 +68,9 @@ impl BankApi { } pub async fn transfer_one(db: Arc, action: Transfer) -> Result { - let mut manual_tx_api = PostgresApi { client: db.get().await }; + let mut manual_tx_api = PostgresApi { + client: db.get().await.map_err(|e| e.to_string())?, + }; let tx = manual_tx_api.transaction().await; action.execute(&tx).await.as_ref()?; let result = action.update_version(&tx).await; diff --git a/packages/cohort/src/bin/preload_db.rs b/packages/cohort/src/bin/preload_db.rs index 8030daab..3b8adc0d 100644 --- a/packages/cohort/src/bin/preload_db.rs +++ b/packages/cohort/src/bin/preload_db.rs @@ -13,7 +13,7 @@ async fn main() -> Result<(), String> { let cfg_db = ConfigLoader::load_db_config()?; - let database = Database::init_db(cfg_db.clone()).await; + let database = Database::init_db(cfg_db.clone()).await.map_err(|e| e.to_string())?; prefill_db(snapshots_file, accounts_file, Arc::clone(&database)).await; diff --git a/packages/cohort/src/bin/replicator.rs b/packages/cohort/src/bin/replicator.rs index 3eca068c..a5786296 100644 --- a/packages/cohort/src/bin/replicator.rs +++ b/packages/cohort/src/bin/replicator.rs @@ -26,7 +26,7 @@ use talos_suffix::{core::SuffixConfig, Suffix}; use tokio::{signal, sync::mpsc, try_join}; #[tokio::main] -async fn main() { +async fn main() -> Result<(), String> { env_logger::builder().format_timestamp_millis().init(); // 0. Create required items. @@ -56,8 +56,7 @@ async fn main() { // e. Create postgres statemap installer instance. let cfg_db = ConfigLoader::load_db_config().unwrap(); - let database = Database::init_db(cfg_db).await; - // let manual_tx_api = PostgresApi { client: database.get().await }; + let database = Database::init_db(cfg_db).await.map_err(|e| e.to_string())?; let pg_statemap_installer = PgReplicatorStatemapInstaller { metrics_frequency: None, @@ -128,5 +127,7 @@ async fn main() { } info!("Exiting Cohort Replicator!!"); + + Ok(()) } // $coverage:ignore-end diff --git a/packages/cohort/src/config_loader.rs b/packages/cohort/src/config_loader.rs index a6f82c83..09b032f1 100644 --- a/packages/cohort/src/config_loader.rs +++ b/packages/cohort/src/config_loader.rs @@ -74,6 +74,7 @@ impl ConfigLoader { pub fn load_db_config() -> Result { Ok(DatabaseConfig { + pool_size: Self::read_var("COHORT_PG_POOL_SIZE")?.parse().map_err(|e: ParseIntError| e.to_string())?, user: Self::read_var("COHORT_PG_USER")?, password: Self::read_var("COHORT_PG_PASSWORD")?, host: Self::read_var("COHORT_PG_HOST")?, @@ -103,7 +104,7 @@ impl ConfigLoader { agent: Self::read_var("AGENT_NAME").unwrap(), cohort: Self::read_var("COHORT_NAME").unwrap(), buffer_size: Self::read_var("AGENT_BUFFER_SIZE").unwrap().parse().unwrap(), - timout_ms: Self::read_var("AGENT_TIMEOUT_MS").unwrap().parse().unwrap(), + timeout_ms: Self::read_var("AGENT_TIMEOUT_MS").unwrap().parse().unwrap(), }) } } @@ -216,13 +217,14 @@ mod tests { env::set_var("COHORT_PG_HOST", "some host"); env::set_var("COHORT_PG_PORT", "0"); env::set_var("COHORT_PG_DATABASE", "not-existing"); + env::set_var("COHORT_PG_POOL_SIZE", "10"); let result = ConfigLoader::load(); let (a, k, _) = result.map_err(|e| assert_eq!("no error is expected", e)).unwrap(); assert_eq!(a.agent, "aName".to_string()); assert_eq!(a.cohort, "cName".to_string()); - assert_eq!(a.timout_ms, 2_u64); + assert_eq!(a.timeout_ms, 2_u64); assert_eq!(a.buffer_size, 1_usize); assert_eq!(k.brokers, "kBrokers".to_string()); diff --git a/packages/cohort/src/core.rs b/packages/cohort/src/core.rs index b7d0b4a2..7864ad7f 100644 --- a/packages/cohort/src/core.rs +++ b/packages/cohort/src/core.rs @@ -73,9 +73,7 @@ impl Cohort { }, ); - agent - .start(rx_certify, rx_cancel, tx_decision, rx_decision, publisher, consumer) - .expect("unable to start agent"); + let _ = agent.start(rx_certify, rx_cancel, tx_decision, rx_decision, publisher, consumer); Box::new(agent) } diff --git a/packages/cohort/src/delay_controller.rs b/packages/cohort/src/delay_controller.rs index ea76f82b..0cee3bc7 100644 --- a/packages/cohort/src/delay_controller.rs +++ b/packages/cohort/src/delay_controller.rs @@ -8,6 +8,7 @@ pub struct DelayController { max_sleep_ms: u64, } +// TODO: move me into cohort_sdk package impl DelayController { pub fn new(max_sleep_ms: u64) -> Self { Self { diff --git a/packages/cohort/src/examples_support/mod.rs b/packages/cohort/src/examples_support/mod.rs new file mode 100644 index 00000000..fd4e7553 --- /dev/null +++ b/packages/cohort/src/examples_support/mod.rs @@ -0,0 +1 @@ +pub mod queue_processor; diff --git a/packages/examples_support/src/cohort/queue_workers.rs b/packages/cohort/src/examples_support/queue_processor.rs similarity index 98% rename from packages/examples_support/src/cohort/queue_workers.rs rename to packages/cohort/src/examples_support/queue_processor.rs index 94f8e4e9..8ceb18e7 100644 --- a/packages/examples_support/src/cohort/queue_workers.rs +++ b/packages/cohort/src/examples_support/queue_processor.rs @@ -1,6 +1,10 @@ use std::{sync::Arc, time::Duration}; -use cohort::{ +use metrics::model::{MicroMetrics, MinMax}; +use time::OffsetDateTime; +use tokio::task::JoinHandle; + +use crate::{ bank_api::BankApi, config_loader::ConfigLoader, core::{AgentType, Cohort}, @@ -13,9 +17,6 @@ use cohort::{ snapshot_api::SnapshotApi, state::postgres::database::Database, }; -use metrics::model::{MicroMetrics, MinMax}; -use time::OffsetDateTime; -use tokio::task::JoinHandle; pub struct QueueProcessor {} impl QueueProcessor { @@ -189,6 +190,7 @@ impl QueueProcessor { loop { let started_at = OffsetDateTime::now_utc().unix_timestamp_nanos(); let result = Self::execute_once(thread_number, &tx_request, Arc::clone(&agent), Arc::clone(&database)).await?; + let finished_at = OffsetDateTime::now_utc().unix_timestamp_nanos(); stats.total_count += 1; stats.on_tx_finished(started_at, finished_at); @@ -234,7 +236,10 @@ impl QueueProcessor { ) -> Result { // span 1 (get_accounts_as_map) let s1_getacc_s = OffsetDateTime::now_utc().unix_timestamp_nanos(); - let all_accounts = BankApi::get_accounts_as_map(Arc::clone(&database), tx_request.from.clone(), tx_request.to.clone()).await?; + let all_accounts = BankApi::get_accounts_as_map(Arc::clone(&database), tx_request.from.clone(), tx_request.to.clone()) + .await + .map_err(|e| e.to_string())?; + let s1_getacc_f = OffsetDateTime::now_utc().unix_timestamp_nanos(); let account_from = all_accounts.get(&tx_request.from).unwrap(); @@ -257,7 +262,8 @@ impl QueueProcessor { // span 2 (SnapshotApi::query) let s2_getsnap_s = OffsetDateTime::now_utc().unix_timestamp_nanos(); - let cpt_snapshot = SnapshotApi::query(Arc::clone(&database)).await?; + let cpt_snapshot = SnapshotApi::query(Arc::clone(&database)).await.map_err(|e| e.to_string())?; + let s2_getsnap_f = OffsetDateTime::now_utc().unix_timestamp_nanos(); let mut bank_result = Cohort::transfer( diff --git a/packages/cohort/src/lib.rs b/packages/cohort/src/lib.rs index c9b1a715..7a980354 100644 --- a/packages/cohort/src/lib.rs +++ b/packages/cohort/src/lib.rs @@ -3,6 +3,7 @@ pub mod bank_api; pub mod config_loader; pub mod core; // TODO: move to /packages/src/core/ pub mod delay_controller; +pub mod examples_support; pub mod metrics; pub mod model; pub mod replicator; diff --git a/packages/cohort/src/replicator/pg_replicator_installer.rs b/packages/cohort/src/replicator/pg_replicator_installer.rs index f3067818..5095122b 100644 --- a/packages/cohort/src/replicator/pg_replicator_installer.rs +++ b/packages/cohort/src/replicator/pg_replicator_installer.rs @@ -35,7 +35,7 @@ impl ReplicatorInstaller for PgReplicatorStatemapInstaller { let vers = sm.first().map(|item| item.version); loop { - let client = self.pg.get().await; + let client = self.pg.get().await.map_err(|e| e.to_string())?; let mut manual_tx_api = PostgresApi { client }; // error!("[Replicator Installer] Isolation level {value} used for installing version={vers:?}"); diff --git a/packages/cohort/src/replicator/services/replicator_service.rs b/packages/cohort/src/replicator/services/replicator_service.rs index 7e8050d9..38b1250a 100644 --- a/packages/cohort/src/replicator/services/replicator_service.rs +++ b/packages/cohort/src/replicator/services/replicator_service.rs @@ -132,6 +132,7 @@ where } + ReplicatorChannel::InstallationFailure(_) => { // panic!("[panic panic panic] Installation Failed and replicator will panic and stop"); } diff --git a/packages/cohort/src/replicator/services/statemap_queue_service.rs b/packages/cohort/src/replicator/services/statemap_queue_service.rs index ef5f9e47..f8de2877 100644 --- a/packages/cohort/src/replicator/services/statemap_queue_service.rs +++ b/packages/cohort/src/replicator/services/statemap_queue_service.rs @@ -7,9 +7,12 @@ use log::{error, info}; use time::OffsetDateTime; use tokio::sync::mpsc; -use crate::replicator::{ - core::{StatemapInstallState, StatemapInstallationStatus, StatemapInstallerHashmap, StatemapItem}, - models::StatemapInstallerQueue, +use crate::{ + replicator::{ + core::{StatemapInstallState, StatemapInstallationStatus, StatemapInstallerHashmap, StatemapItem}, + models::StatemapInstallerQueue, + }, + state::postgres::database::DatabaseError, }; #[derive(Debug)] @@ -32,7 +35,7 @@ pub async fn statemap_queue_service( mut statemap_installation_rx: mpsc::Receiver, installation_tx: mpsc::Sender<(u64, Vec)>, // Get snapshot callback fn - get_snapshot_fn: impl Future>, + get_snapshot_fn: impl Future>, config: StatemapQueueServiceConfig, ) -> Result<(), String> { info!("Starting Installer Queue Service.... "); diff --git a/packages/cohort/src/replicator/utils/installer_utils.rs b/packages/cohort/src/replicator/utils/installer_utils.rs index 63787b2b..01a96305 100644 --- a/packages/cohort/src/replicator/utils/installer_utils.rs +++ b/packages/cohort/src/replicator/utils/installer_utils.rs @@ -1,9 +1,9 @@ use futures::Future; -use crate::model::snapshot::Snapshot; +use crate::{model::snapshot::Snapshot, state::postgres::database::DatabaseError}; /// Callback fn used in the `installer_queue_service` to retrieve the current snapshot. -pub async fn get_snapshot_callback(callback_fn: impl Future>) -> Result { +pub async fn get_snapshot_callback(callback_fn: impl Future>) -> Result { let snapshot = callback_fn.await?; Ok(snapshot.version) } diff --git a/packages/cohort/src/snapshot_api.rs b/packages/cohort/src/snapshot_api.rs index 0e930aed..3f94be7a 100644 --- a/packages/cohort/src/snapshot_api.rs +++ b/packages/cohort/src/snapshot_api.rs @@ -3,11 +3,11 @@ use std::time::Duration; use time::OffsetDateTime; use tokio::task::JoinHandle; +use tokio_postgres::Row; use crate::model::snapshot::Snapshot; use crate::state::data_access_api::ManualTx; -use crate::state::postgres::data_store::DataStore; -use crate::state::postgres::database::{Database, SNAPSHOT_SINGLETON_ROW_ID}; +use crate::state::postgres::database::{Database, DatabaseError, SNAPSHOT_SINGLETON_ROW_ID}; pub static SNAPSHOT_UPDATE_QUERY: &str = r#"UPDATE cohort_snapshot SET "version" = ($1)::BIGINT WHERE id = $2 AND "version" < ($1)::BIGINT"#; @@ -15,15 +15,21 @@ pub static SNAPSHOT_UPDATE_QUERY: &str = r#"UPDATE cohort_snapshot SET "version" pub struct SnapshotApi {} impl SnapshotApi { - pub async fn query(db: Arc) -> Result { - let result = db - .query_one( - r#"SELECT "version" FROM cohort_snapshot WHERE id = $1"#, - &[&SNAPSHOT_SINGLETON_ROW_ID], - DataStore::snapshot_from_row, - ) - .await; - Ok(result) + pub fn from_row(row: &Row) -> Result { + let updated = row + .try_get::<&str, i64>("version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read snapshot".into()))?; + + Ok(Snapshot { version: updated as u64 }) + } + + pub async fn query(db: Arc) -> Result { + db.query_one( + r#"SELECT "version" FROM cohort_snapshot WHERE id = $1"#, + &[&SNAPSHOT_SINGLETON_ROW_ID], + SnapshotApi::from_row, + ) + .await } pub async fn update_using(client: &T, new_version: u64) -> Result { diff --git a/packages/cohort/src/state/postgres/data_store.rs b/packages/cohort/src/state/postgres/data_store.rs index ae9b0d52..9830eb97 100644 --- a/packages/cohort/src/state/postgres/data_store.rs +++ b/packages/cohort/src/state/postgres/data_store.rs @@ -1,45 +1,45 @@ // $coverage:ignore-start -use rust_decimal::Decimal; use std::sync::Arc; -use tokio_postgres::Row; +use crate::bank_api::BankApi; use crate::model::bank_account::BankAccount; use crate::model::snapshot::Snapshot; +use crate::snapshot_api::SnapshotApi; use crate::state::postgres::database::{Database, SNAPSHOT_SINGLETON_ROW_ID}; +use super::database::DatabaseError; + pub struct DataStore {} impl DataStore { - pub async fn prefill_snapshot(db: Arc, snapshot: Snapshot) -> Result { + pub async fn prefill_snapshot(db: Arc, snapshot: Snapshot) -> Result { let rslt = db .query_opt( r#"SELECT "version" FROM cohort_snapshot WHERE id = $1 AND "version" > $2"#, &[&SNAPSHOT_SINGLETON_ROW_ID, &(snapshot.version as i64)], - Self::snapshot_from_row, + SnapshotApi::from_row, ) - .await; + .await?; if let Some(snapshot) = rslt { Ok(snapshot) } else { - let updated = db - .query_one( - r#" - INSERT INTO cohort_snapshot ("id", "version") VALUES ($1, $2) - ON CONFLICT(id) DO - UPDATE SET version = $2 RETURNING version - "#, - &[&SNAPSHOT_SINGLETON_ROW_ID, &(snapshot.version as i64)], - Self::snapshot_from_row, - ) - .await; - - Ok(updated) + db.query_one( + r#" + INSERT INTO cohort_snapshot ("id", "version") VALUES ($1, $2) + ON CONFLICT(id) DO + UPDATE SET version = $2 RETURNING version + "#, + &[&SNAPSHOT_SINGLETON_ROW_ID, &(snapshot.version as i64)], + SnapshotApi::from_row, + ) + .await } } - pub async fn prefill_accounts(db: Arc, accounts: Vec) -> Result, String> { + pub async fn prefill_accounts(db: Arc, accounts: Vec) -> Result, DatabaseError> { let client = db.pool.get().await.unwrap(); let mut updated_accounts = Vec::::new(); + let frequency: u64 = (accounts.len() as f32 * 15.0 / 100.0) as u64; for acc in accounts.iter() { let updated = { let rslt = client @@ -51,7 +51,7 @@ impl DataStore { .unwrap(); if rslt.is_some() { - Self::account_from_row(&rslt.unwrap()) + BankApi::account_from_row(&rslt.unwrap())? } else { // update db with new account data let updated_row = client @@ -66,28 +66,18 @@ impl DataStore { .await .unwrap(); - Self::account_from_row(&updated_row) + BankApi::account_from_row(&updated_row)? } }; + if updated_accounts.len() as f32 % frequency as f32 == 0.0 { + log::warn!("Inserted: {} accounts of {}", updated_accounts.len(), accounts.len()); + } + updated_accounts.push(updated); } Ok(updated_accounts) } - - pub fn account_from_row(row: &Row) -> BankAccount { - BankAccount { - name: row.get::<&str, String>("name"), - number: row.get::<&str, String>("number"), - version: row.get::<&str, i64>("version") as u64, - balance: row.get::<&str, Decimal>("amount"), - } - } - - pub fn snapshot_from_row(row: &Row) -> Snapshot { - let updated = row.get::<&str, i64>("version"); - Snapshot { version: updated as u64 } - } } // $coverage:ignore-end diff --git a/packages/cohort/src/state/postgres/database.rs b/packages/cohort/src/state/postgres/database.rs index 1b2f75eb..0561bc48 100644 --- a/packages/cohort/src/state/postgres/database.rs +++ b/packages/cohort/src/state/postgres/database.rs @@ -1,10 +1,12 @@ +use std::fmt::{self, Display, Formatter}; // $coverage:ignore-start use std::sync::Arc; +use strum::Display; use tokio_postgres::types::ToSql; use tokio_postgres::{NoTls, Row}; -use deadpool_postgres::{Config, GenericClient, ManagerConfig, Object, Pool, PoolConfig, Runtime}; +use deadpool_postgres::{Config, CreatePoolError, GenericClient, ManagerConfig, Object, Pool, PoolConfig, Runtime}; use crate::state::postgres::database_config::DatabaseConfig; @@ -14,12 +16,84 @@ pub struct Database { pub pool: Pool, } +#[derive(Display, Debug)] +pub enum DatabaseErrorKind { + PoolInit, + BorrowConnection, + QueryOrExecute, + PrepareStatement, + Deserialise, +} + +#[derive(Debug)] +pub struct DatabaseError { + kind: DatabaseErrorKind, + pub reason: String, + pub cause: Option, +} + +impl DatabaseError { + pub fn cannot_borrow(cause: String) -> Self { + Self { + kind: DatabaseErrorKind::BorrowConnection, + reason: "Cannot get client from DB pool.".into(), + cause: Some(cause), + } + } + + pub fn query(cause: String, query: String) -> Self { + Self { + kind: DatabaseErrorKind::QueryOrExecute, + cause: Some(cause), + reason: format!("Error executing: '{}'", query), + } + } + + pub fn prepare(cause: String, query: String) -> Self { + Self { + kind: DatabaseErrorKind::PrepareStatement, + cause: Some(cause), + reason: format!("Error preparing statement for: '{}'", query), + } + } + + pub fn deserialise_payload(cause: String, message: String) -> Self { + Self { + kind: DatabaseErrorKind::Deserialise, + cause: Some(cause), + reason: format!("Resultset parsing error. Details: '{}'", message), + } + } +} + +impl From for DatabaseError { + fn from(value: CreatePoolError) -> Self { + Self { + kind: DatabaseErrorKind::PoolInit, + reason: "Cannot create DB pool".into(), + cause: Some(value.to_string()), + } + } +} + +impl Display for DatabaseError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "DatabaseError: [kind: {}, reason: {}, cause: {}]", + self.kind, + self.reason, + self.cause.clone().unwrap_or("".into()) + ) + } +} + impl Database { - pub async fn get(&self) -> Object { - self.pool.get().await.unwrap() + pub async fn get(&self) -> Result { + self.pool.get().await.map_err(|e| DatabaseError::cannot_borrow(e.to_string())) } - pub async fn init_db(cfg: DatabaseConfig) -> Arc { + pub async fn init_db(cfg: DatabaseConfig) -> Result, DatabaseError> { let mut config = Config::new(); config.dbname = Some(cfg.database); config.user = Some(cfg.user); @@ -30,7 +104,7 @@ impl Database { recycling_method: deadpool_postgres::RecyclingMethod::Fast, }); let pc = PoolConfig { - max_size: 40, + max_size: cfg.pool_size, ..PoolConfig::default() }; config.pool = Some(pc); @@ -41,52 +115,92 @@ impl Database { .unwrap(); { - //test connection let mut tmp_list: Vec = Vec::new(); for _ in 1..=pc.max_size { - let client = pool.get().await.map_err(|e| format!("Cannot get client from DB pool. Error: {}", e)).unwrap(); - client - .execute("SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL REPEATABLE READ;", &[]) - .await - .unwrap(); + let client = pool.get().await.map_err(|e| DatabaseError::cannot_borrow(e.to_string()))?; + + let stm = "SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL REPEATABLE READ;"; + client.execute(stm, &[]).await.map_err(|e| DatabaseError::query(e.to_string(), stm.into()))?; tmp_list.push(client); } } for _ in 1..=pc.max_size { - let client = pool.get().await.map_err(|e| format!("Cannot get client from DB pool. Error: {}", e)).unwrap(); - let rs = client.query_one("show transaction_isolation", &[]).await.unwrap(); + let client = pool.get().await.map_err(|e| DatabaseError::cannot_borrow(e.to_string()))?; + + let stm = "show transaction_isolation"; + let rs = client.query_one(stm, &[]).await.map_err(|e| DatabaseError::query(e.to_string(), stm.into()))?; let value: String = rs.get(0); log::debug!("init: db-isolation-level: {}", value); } - Arc::new(Database { pool }) + Ok(Arc::new(Database { pool })) } - pub async fn query_one(&self, sql: &str, params: &[&(dyn ToSql + Sync)], fn_converter: fn(&Row) -> T) -> T { - let client = self.get().await; - let stm = client.prepare_cached(sql).await.unwrap(); - fn_converter(&client.query_one(&stm, params).await.unwrap()) + pub async fn query_one( + &self, + sql: &str, + params: &[&(dyn ToSql + Sync)], + fn_converter: fn(&Row) -> Result, + ) -> Result { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + fn_converter( + &client + .query_one(&stm, params) + .await + .map_err(|e| DatabaseError::query(e.to_string(), sql.into()))?, + ) } - pub async fn query_opt(&self, sql: &str, params: &[&(dyn ToSql + Sync)], fn_converter: fn(&Row) -> T) -> Option { - let client = self.get().await; - let stm = client.prepare_cached(sql).await.unwrap(); - let result = client.query_opt(&stm, params).await.unwrap(); - result.map(|r| fn_converter(&r)) + pub async fn query_opt( + &self, + sql: &str, + params: &[&(dyn ToSql + Sync)], + fn_converter: fn(&Row) -> Result, + ) -> Result, DatabaseError> { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + let result = client + .query_opt(&stm, params) + .await + .map_err(|e| DatabaseError::query(e.to_string(), sql.to_string()))?; + + if let Some(row) = result { + fn_converter(&row).map(|v| Some(v)) + } else { + Ok(None) + } } - pub async fn query(&self, sql: &str, fn_converter: fn(&Row) -> T) -> Vec { - let client = self.get().await; - let stm = client.prepare_cached(sql).await.unwrap(); - let result = client.query(&stm, &[]).await.unwrap(); - result.iter().map(fn_converter).collect::>() + pub async fn query(&self, sql: &str, fn_converter: fn(&Row) -> Result) -> Result, DatabaseError> { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + let result = client.query(&stm, &[]).await.map_err(|e| DatabaseError::query(e.to_string(), sql.into()))?; + + let mut items: Vec = Vec::new(); + for row in result.iter() { + items.push(fn_converter(row)?); + } + Ok(items) } - pub async fn execute(&self, sql: &str, params: &[&(dyn ToSql + Sync)]) -> u64 { - let client = self.get().await; - let stm = client.prepare_cached(sql).await.unwrap(); - client.execute(&stm, params).await.unwrap() + pub async fn execute(&self, sql: &str, params: &[&(dyn ToSql + Sync)]) -> Result { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + client.execute(&stm, params).await.map_err(|e| DatabaseError::query(e.to_string(), sql.into())) } } // $coverage:ignore-end diff --git a/packages/cohort/src/state/postgres/database_config.rs b/packages/cohort/src/state/postgres/database_config.rs index 807f20f5..2b8b25a4 100644 --- a/packages/cohort/src/state/postgres/database_config.rs +++ b/packages/cohort/src/state/postgres/database_config.rs @@ -1,5 +1,6 @@ #[derive(Clone, Debug)] pub struct DatabaseConfig { + pub pool_size: usize, pub user: String, pub password: String, pub host: String, @@ -25,6 +26,7 @@ mod tests { #[allow(clippy::redundant_clone)] fn test_model() { let cfg = DatabaseConfig { + pool_size: 1, user: "USER1".into(), password: "1234".into(), host: "localhost".into(), @@ -33,13 +35,14 @@ mod tests { }; assert_eq!( format!("{:?}", cfg.clone()), - r#"DatabaseConfig { user: "USER1", password: "1234", host: "localhost", port: "1010", database: "db_admin" }"#, + r#"DatabaseConfig { pool_size: 1, user: "USER1", password: "1234", host: "localhost", port: "1010", database: "db_admin" }"#, ); } #[test] fn should_generate_valid_connection_string() { let cfg = DatabaseConfig { + pool_size: 1, user: "USER1".into(), password: "1234".into(), host: "localhost".into(), diff --git a/packages/cohort_banking/Cargo.toml b/packages/cohort_banking/Cargo.toml new file mode 100644 index 00000000..515e68e3 --- /dev/null +++ b/packages/cohort_banking/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "cohort_banking" +version = "0.0.1" +edition = "2021" + +[dependencies] +async-trait = { workspace = true } +env_logger = { workspace = true } +log = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["full"] } + +# Postgres +refinery = { version = "0.8.7", features = ["tokio-postgres"] } +rust_decimal = { version = "1.30.0", features = ["db-tokio-postgres", "serde-with-float"] } +tokio-postgres = { version = "0.7", features = [ "with-uuid-1", "with-serde_json-1" ] } +deadpool = { version = "0.9.5" } +deadpool-postgres = { version = "0.10" } + +async-channel = { version = "1.8.0" } +futures = { version = "0.3.28" } +opentelemetry_api = { version = "0.19.0" } +opentelemetry_sdk = { version = "0.19.0", features = ["metrics", "rt-tokio"] } +opentelemetry = { version = "0.19.0" } +rand = { version = "0.8.5" } +strum = { version = "0.25", features = ["derive"] } +uuid = { version = "1.2.2", features = ["v4"] } + +talos_agent = { path = "../talos_agent" } +cohort_sdk = { path = "../cohort_sdk" } +metrics = { path = "../metrics" } +talos_certifier = { path = "../talos_certifier" } +talos_suffix = { path = "../talos_suffix" } \ No newline at end of file diff --git a/packages/cohort_banking/src/app.rs b/packages/cohort_banking/src/app.rs new file mode 100644 index 00000000..736e0deb --- /dev/null +++ b/packages/cohort_banking/src/app.rs @@ -0,0 +1,134 @@ +use std::{collections::HashMap, sync::Arc}; + +use async_trait::async_trait; +use cohort_sdk::{ + cohort::Cohort, + model::{CandidateData, CertificationRequest, ClientErrorKind, Config}, +}; +use opentelemetry_api::{ + global, + metrics::{Counter, Unit}, + Context, +}; +use talos_agent::messaging::api::Decision; + +use crate::{ + callbacks::{oo_installer::OutOfOrderInstallerImpl, state_provider::StateProviderImpl, statemap_installer::StatemapInstallerImpl}, + examples_support::queue_processor::Handler, + model::requests::{BusinessActionType, TransferRequest}, + state::postgres::{database::Database, database_config::DatabaseConfig}, +}; + +pub struct BankingApp { + config: Config, + cohort_api: Option, + pub database: Arc, + counter_aborts: Arc>, + counter_commits: Arc>, + counter_oo_no_data_found: Arc>, +} + +impl BankingApp { + pub async fn new(config: Config) -> Result { + let db_config = DatabaseConfig { + pool_size: config.db_pool_size, + user: config.db_user.clone(), + password: config.db_password.clone(), + host: config.db_host.clone(), + port: config.db_port.clone(), + database: config.db_database.clone(), + }; + + let meter = global::meter("banking_cohort"); + let counter_aborts = meter.u64_counter("metric_aborts").with_unit(Unit::new("tx")).init(); + let counter_commits = meter.u64_counter("metric_commits").with_unit(Unit::new("tx")).init(); + let counter_oo_no_data_found = meter.u64_counter("metric_oo_no_data_found").with_unit(Unit::new("tx")).init(); + + Ok(BankingApp { + config: config.clone(), + cohort_api: None, + database: Database::init_db(db_config).await.map_err(|e| e.to_string())?, + counter_aborts: Arc::new(counter_aborts), + counter_commits: Arc::new(counter_commits), + counter_oo_no_data_found: Arc::new(counter_oo_no_data_found), + }) + } + + pub async fn init(&mut self) -> Result<(), String> { + let installer = StatemapInstallerImpl { + database: Arc::clone(&self.database), + }; + + let cohort_api = Cohort::create(self.config.clone(), installer).await.map_err(|e| e.to_string())?; + + self.cohort_api = Some(cohort_api); + + Ok(()) + } +} + +#[async_trait] +impl Handler for BankingApp { + async fn handle(&self, request: TransferRequest) -> Result<(), String> { + log::debug!("processig new banking transfer request: {:?}", request); + + let request_copy = request.clone(); + + let statemap = vec![HashMap::from([( + BusinessActionType::TRANSFER.to_string(), + TransferRequest::new(request.from.clone(), request.to.clone(), request.amount).json(), + )])]; + + let request = CertificationRequest { + timeout_ms: 0, + candidate: CandidateData { + readset: vec![request.from.clone(), request.to.clone()], + writeset: vec![request.from, request.to], + statemap: Some(statemap), + }, + }; + + let single_query_strategy = true; + let state_provider = StateProviderImpl { + database: Arc::clone(&self.database), + request: request_copy.clone(), + single_query_strategy, + }; + + let oo_inst = OutOfOrderInstallerImpl { + database: Arc::clone(&self.database), + request: request_copy, + detailed_logging: false, + counter_oo_no_data_found: Arc::clone(&self.counter_oo_no_data_found), + single_query_strategy, + }; + + match self + .cohort_api + .as_ref() + .expect("Banking app is not initialised") + .certify(request, &state_provider, &oo_inst) + .await + { + Ok(rsp) => { + let ca = Arc::clone(&self.counter_aborts); + let cc = Arc::clone(&self.counter_commits); + let is_abort = rsp.decision == Decision::Aborted; + tokio::spawn(async move { + if is_abort { + ca.add(&Context::current(), 1, &[]); + } else { + cc.add(&Context::current(), 1, &[]); + } + }); + + log::debug!("Talos decision for xid '{}' is: {:?}", rsp.xid, rsp.decision); + Ok(()) + } + Err(client_error) => match client_error.kind { + ClientErrorKind::OutOfOrderSnapshotTimeout => Ok(()), + _ => Err(client_error.to_string()), + }, + } + } +} diff --git a/packages/cohort_banking/src/callbacks/mod.rs b/packages/cohort_banking/src/callbacks/mod.rs new file mode 100644 index 00000000..474d49c6 --- /dev/null +++ b/packages/cohort_banking/src/callbacks/mod.rs @@ -0,0 +1,3 @@ +pub mod oo_installer; +pub mod state_provider; +pub mod statemap_installer; diff --git a/packages/cohort_banking/src/callbacks/oo_installer.rs b/packages/cohort_banking/src/callbacks/oo_installer.rs new file mode 100644 index 00000000..decd8a23 --- /dev/null +++ b/packages/cohort_banking/src/callbacks/oo_installer.rs @@ -0,0 +1,261 @@ +use std::{ + collections::HashSet, + sync::Arc, + time::{Duration, Instant}, +}; + +use async_trait::async_trait; +use cohort_sdk::model::callbacks::{OutOfOrderInstallOutcome, OutOfOrderInstaller}; +use opentelemetry_api::{metrics::Counter, Context}; +use tokio::task::JoinHandle; +use tokio_postgres::types::ToSql; + +use crate::{ + model::requests::TransferRequest, + state::postgres::database::{Database, DatabaseError}, +}; + +pub struct OutOfOrderInstallerImpl { + pub database: Arc, + pub request: TransferRequest, + pub detailed_logging: bool, + pub counter_oo_no_data_found: Arc>, + pub single_query_strategy: bool, +} + +pub static SNAPSHOT_SINGLETON_ROW_ID: &str = "SINGLETON"; + +impl OutOfOrderInstallerImpl { + async fn is_safe_to_proceed(db: Arc, safepoint: u64) -> Result { + let snapshot = db + .query_one(r#"SELECT "version" FROM cohort_snapshot WHERE id = $1"#, &[&SNAPSHOT_SINGLETON_ROW_ID], |row| { + let snapshot = row + .try_get::<&str, i64>("version") + .map(|v| v as u64) + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read snapshot version".into()))?; + Ok(snapshot) + }) + .await + .map_err(|e| e.to_string())?; + Ok(snapshot >= safepoint) + } + + async fn install_item(&self, new_version: u64) -> Result { + let sql = r#" + UPDATE bank_accounts ba SET + "amount" = + (CASE + WHEN ba."number" = ($1)::TEXT THEN ba."amount" + ($3)::DECIMAL + WHEN ba."number" = ($2)::TEXT THEN ba."amount" - ($3)::DECIMAL + END), + "version" = ($4)::BIGINT + WHERE ba."number" IN (($1)::TEXT, ($2)::TEXT) AND ba."version" < ($4)::BIGINT + "#; + + let params: &[&(dyn ToSql + Sync)] = &[&self.request.from, &self.request.to, &self.request.amount, &(new_version as i64)]; + + let result = self.database.execute(sql, params).await.map_err(|e| e.to_string())?; + + if result == 0 { + Ok(OutOfOrderInstallOutcome::InstalledAlready) + } else { + Ok(OutOfOrderInstallOutcome::Installed) + } + } + + async fn install_using_polling(&self, _xid: String, safepoint: u64, new_version: u64, _attempt_nr: u64) -> Result { + let db = Arc::clone(&self.database); + let wait_handle: JoinHandle> = tokio::spawn(async move { + let mut safe_now = Self::is_safe_to_proceed(Arc::clone(&db), safepoint).await?; + let poll_frequency = Duration::from_secs(1); + let started_at = Instant::now(); + loop { + if safe_now { + return Ok(true); + } + + tokio::time::sleep(poll_frequency).await; + if started_at.elapsed().as_secs() >= 600 { + return Ok(false); + } + + safe_now = Self::is_safe_to_proceed(Arc::clone(&db), safepoint).await?; + } + }); + + let is_safe_now = wait_handle.await.map_err(|e| e.to_string())??; + if is_safe_now { + self.install_item(new_version).await + } else { + Ok(OutOfOrderInstallOutcome::SafepointCondition) + } + } + + async fn install_using_single_query(&self, xid: String, safepoint: u64, new_version: u64, attempt_nr: u64) -> Result { + // Params order: + // 1 - from, 2 - to, 3 - amount + // 4 - new_ver, 5 - safepoint + let sql = r#" + WITH bank_accounts_temp AS ( + UPDATE bank_accounts ba SET + "amount" = + (CASE + WHEN ba."number" = ($1)::TEXT THEN ba."amount" + ($3)::DECIMAL + WHEN ba."number" = ($2)::TEXT THEN ba."amount" - ($3)::DECIMAL + END), + "version" = ($4)::BIGINT + WHERE ba."number" IN (($1)::TEXT, ($2)::TEXT) + AND EXISTS (SELECT 1 FROM cohort_snapshot cs WHERE cs."version" >= ($5)::BIGINT) + AND ba."version" < ($4)::BIGINT + RETURNING + ba."number", ba."version" as "new_version", (null)::BIGINT as "version", (SELECT cs."version" FROM cohort_snapshot cs) as "snapshot" + ) + SELECT * FROM bank_accounts_temp + UNION + SELECT + ba."number", (null)::BIGINT as "new_version", ba."version" as "version", cs."version" as "snapshot" + FROM + bank_accounts ba, cohort_snapshot cs + WHERE ba."number" IN (($1)::TEXT, ($2)::TEXT) + "#; + + let params: &[&(dyn ToSql + Sync)] = &[ + &self.request.from, + &self.request.to, + &self.request.amount, + &(new_version as i64), + &(safepoint as i64), + ]; + + let result = self + .database + .query_many(sql, params, |row| { + let nr = row + .try_get::<&str, String>("number") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account name".into()))?; + let new_ver = row + .try_get::<&str, Option>("new_version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read new_version column".into()))?; + let version = row + .try_get::<&str, Option>("version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read version column".into()))?; + let snapshot = row + .try_get::<&str, i64>("snapshot") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read snapshot column".into()))?; + + Ok((nr, new_ver, version, snapshot)) + }) + .await + .map_err(|e| e.to_string())?; + + if result.is_empty() { + // there were no items found to work with + log::warn!( + "No bank accounts where found by these IDs: {:?}", + (self.request.from.clone(), self.request.to.clone()) + ); + let c = Arc::clone(&self.counter_oo_no_data_found); + tokio::spawn(async move { + c.add(&Context::current(), 1, &[]); + }); + return Ok(OutOfOrderInstallOutcome::InstalledAlready); + } + + // Quickly grab the snapshot to check whether safepoint condition is satisfied. Any row can be used for that. + let (_, _, _, snapshot) = &result[0]; + if (*snapshot as u64) < safepoint { + return Ok(OutOfOrderInstallOutcome::SafepointCondition); + } + + // Now we know that it was safe to execute install. We either just installed or we were late and replicator has done it before us. + // The number of returned rows could be anything from 1 to 4. + // When: + // 1: Edge case. We wanted to update 2 accounts, but one account got deleted from DB, thats why "SELECT ... WHERE number IN($1,$2)" could find only one account. + // With that single accout which we found, we or replicator did the installation. + // 4: Happy path. We updated two accounts and also queried them using the bottom part of UNION statement. + // 2: This is possible in two scenarios. + // 2.1: Happy path. We could not update anyhting, so we just queried data using the bottom part of UNION statement. Replicator has done thr work. + // 2.2: Edge case. We could find one account because it was deleted. This returned only one row: "SELECT ... WHERE number IN($1,$2)". + // However that rows was returned 2 times, one time by each arm of UNION. Basically this is the same as case "4" but applied to one account only. + // 3: Only one accout was updated by us, and two accouts were queried by bottom part of UNION statement, while another accout has been updated by replicator. + + // Code below is for debugging purposes only + if self.detailed_logging { + if result.len() == 1 { + let (number, new_ver, version, _snapshot) = &result[0]; + if new_ver.is_none() { + log::debug!( + "Case 1: No rows were updated for xid '{}' when installing out of order data with new version {} using attempts: {}. Account {} version is now {:?}. Another candidate account was not found", + xid, + new_version, + attempt_nr, + number, + version, + ); + } else { + log::debug!( + "Case 1: 1 row was updated for xid '{}' when installing out of order data with new version {} using attempts: {}. Account {} version is now {:?}. Another candidate account was not found", + xid, + new_version, + attempt_nr, + number, + new_ver, + ); + } + } else if result.len() == 2 { + let accounts: HashSet<&String> = result.iter().map(|(n, _, _, _)| n).collect(); + if accounts.len() == 2 { + // 2.1 + let (_, _, version_from, _) = &result[0]; + let (_, _, version_to, _) = &result[1]; + log::debug!( + "Case 2.1: No rows were updated for xid '{}' when installing out of order data with new version {} using attempts: {}. Current versions have moved to {:?}", + xid, + new_version, + attempt_nr, + (version_from, version_to) + ); + } else { + // 2.2 + let (number, new_ver, _, _) = &result[0]; + log::debug!( + "Case 2.2: 1 row was updated for xid '{}' when installing out of order data with new version {} using attempts: {}. Account {} version is now {:?}. Another candidate account was not found", + xid, + new_version, + attempt_nr, + number, + new_ver, + ); + } + } else if result.len() == 3 { + let (number, new_ver, _, _) = &result[0]; + // Since order of rows in the UNION bottom arm is not known, we do a simple comparison to find row correspinding to 'another' account (the one we did not update). + let (number_a, _, _, _) = &result[1]; + let (_, _, version, _) = if *number_a == *number { &result[2] } else { &result[1] }; + + log::debug!( + "Case 3: 1 row was updated for xid '{}' when installing out of order data with new version {} using attempts: {}. Account {} version is now {:?}. Another accout was already set to: {:?}", + xid, + new_version, + attempt_nr, + number, + new_ver, + version, + ); + } + } + + Ok(OutOfOrderInstallOutcome::Installed) + } +} + +#[async_trait] +impl OutOfOrderInstaller for OutOfOrderInstallerImpl { + async fn install(&self, xid: String, safepoint: u64, new_version: u64, attempt_nr: u64) -> Result { + if self.single_query_strategy { + self.install_using_single_query(xid, safepoint, new_version, attempt_nr).await + } else { + self.install_using_polling(xid, safepoint, new_version, attempt_nr).await + } + } +} diff --git a/packages/cohort_banking/src/callbacks/state_provider.rs b/packages/cohort_banking/src/callbacks/state_provider.rs new file mode 100644 index 00000000..a5e27bed --- /dev/null +++ b/packages/cohort_banking/src/callbacks/state_provider.rs @@ -0,0 +1,133 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use cohort_sdk::model::callbacks::{CapturedItemState, CapturedState, ItemStateProvider}; +use rust_decimal::Decimal; +use tokio_postgres::Row; + +use crate::{ + model::{bank_account::BankAccount, requests::TransferRequest}, + state::postgres::database::{Database, DatabaseError}, +}; + +pub struct StateProviderImpl { + pub request: TransferRequest, + pub database: Arc, + pub single_query_strategy: bool, +} + +impl StateProviderImpl { + pub fn account_from_row(row: &Row) -> Result { + Ok(BankAccount { + name: row + .try_get::<&str, String>("name") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account name".into()))?, + number: row + .try_get::<&str, String>("number") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account number".into()))?, + version: row + .try_get::<&str, i64>("version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account version".into()))? as u64, + balance: row + .try_get::<&str, Decimal>("amount") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read account amount".into()))?, + }) + } + + async fn get_state_using_two_queries(&self) -> Result { + let list = self + .database + .query_many( + r#"SELECT ba.* FROM bank_accounts ba WHERE ba."number" = $1 OR ba."number" = $2"#, + &[&self.request.from, &self.request.to], + Self::account_from_row, + ) + .await + .map_err(|e| e.to_string())?; + + if list.len() != 2 { + return Err(format!("Unable to load state of accounts: '{}' and '{}'", self.request.from, self.request.to)); + } + + let snapshot_version = self + .database + .query_one( + r#"SELECT cs."version" AS snapshot_version FROM cohort_snapshot cs WHERE cs.id = $1"#, + &[&"SINGLETON"], + |row| { + let snapshot_version = row + .try_get::<&str, i64>("snapshot_version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read snapshot_version".into()))?; + Ok(snapshot_version as u64) + }, + ) + .await + .map_err(|e| e.to_string())?; + + Ok(CapturedState { + snapshot_version, + items: list + .iter() + .map(|account| CapturedItemState { + id: account.number.clone(), + version: account.version, + }) + .collect(), + }) + } + + async fn get_state_using_one_query(&self) -> Result { + let list = self + .database + .query_many( + // Note: + // We intentioanly left 'cohort_snapshot' table not joined to 'bank_accounts', in that case + // database will multiply its content with 'bank_accounts' and automatically join rows from both tables. + // Given that 'cohort_snapshot' will ever have one single row, we are good here. + r#" + SELECT + ba.*, cs."version" AS snapshot_version + FROM + bank_accounts ba, cohort_snapshot cs + WHERE + ba."number" = $1 OR ba."number" = $2"#, + &[&self.request.from, &self.request.to], + // convert RAW output into tuple (bank account, snap ver) + |row| { + let account = Self::account_from_row(row)?; + let snapshot_version = row + .try_get::<&str, i64>("snapshot_version") + .map_err(|e| DatabaseError::deserialise_payload(e.to_string(), "Cannot read snapshot_version".into()))?; + Ok((account, snapshot_version as u64)) + }, + ) + .await + .map_err(|e| e.to_string())?; + + if list.len() != 2 { + return Err(format!("Unable to load state of accounts: '{}' and '{}'", self.request.from, self.request.to)); + } + + Ok(CapturedState { + snapshot_version: list[0].1, + items: list + .iter() + .map(|tuple| CapturedItemState { + id: tuple.0.number.clone(), + version: tuple.0.version, + }) + .collect(), + }) + } +} + +#[async_trait] +impl ItemStateProvider for StateProviderImpl { + async fn get_state(&self) -> Result { + if self.single_query_strategy { + self.get_state_using_one_query().await + } else { + self.get_state_using_two_queries().await + } + } +} diff --git a/packages/cohort_banking/src/callbacks/statemap_installer.rs b/packages/cohort_banking/src/callbacks/statemap_installer.rs new file mode 100644 index 00000000..a13e89a6 --- /dev/null +++ b/packages/cohort_banking/src/callbacks/statemap_installer.rs @@ -0,0 +1,77 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use cohort_sdk::{model::callbacks::StatemapInstaller, replicator::core::StatemapItem}; +use tokio_postgres::types::ToSql; + +use crate::{model::requests::TransferRequest, state::postgres::database::Database}; + +pub struct StatemapInstallerImpl { + pub database: Arc, +} + +#[async_trait] +impl StatemapInstaller for StatemapInstallerImpl { + async fn install(&self, statemap: Vec, snapshot_version: u64) -> Result<(), String> { + // from = 1 + // to = 2 + // amount = 3 + // new_ver = 4 + + let mut cnn = self.database.get().await.map_err(|e| e.to_string())?; + let tx = cnn.transaction().await.map_err(|e| e.to_string())?; + + if !statemap.is_empty() { + let sti = statemap[0].clone(); + + let request: TransferRequest = serde_json::from_value(sti.payload.clone()).map_err(|e| e.to_string())?; + + let sql = r#" + UPDATE bank_accounts ba SET + "amount" = + (CASE + WHEN ba."number" = ($1)::TEXT THEN ba."amount" + ($3)::DECIMAL + WHEN ba."number" = ($2)::TEXT THEN ba."amount" - ($3)::DECIMAL + END), + "version" = ($4)::BIGINT + WHERE ba."number" IN (($1)::TEXT, ($2)::TEXT) + AND ba."version" < ($4)::BIGINT + "#; + + let params: &[&(dyn ToSql + Sync)] = &[&request.from, &request.to, &request.amount, &(sti.version as i64)]; + + let updated_rows = tx.execute(sql, params).await.map_err(|e| e.to_string())?; + + if updated_rows > 0 { + log::debug!("No rows were updated when installing: {:?}. Snapshot will be set to: {}", sti, snapshot_version); + } + + log::info!( + "{} rows were updated when installing: {:?}. Snapshot will be set to: {}", + updated_rows, + sti, + snapshot_version + ); + } + + let params: &[&(dyn ToSql + Sync)] = &[&(snapshot_version as i64), &"SINGLETON"]; + + let sql = r#"UPDATE cohort_snapshot SET "version" = ($1)::BIGINT WHERE id = $2 AND "version" < ($1)::BIGINT"#; + let updated_rows = tx.execute(sql, params).await.map_err(|e| e.to_string())?; + + if updated_rows == 0 { + log::debug!( + "No rows were updated when updating snapshot. Snapshot is already set to {} or higher", + snapshot_version + ); + } + + log::info!("{} rows were updated when updating snapshot to {}", updated_rows, snapshot_version); + + tx.commit() + .await + .map_err(|tx_error| format!("Commit error for statemap. Error: {}", tx_error))?; + + Ok(()) + } +} diff --git a/packages/cohort_banking/src/examples_support/mod.rs b/packages/cohort_banking/src/examples_support/mod.rs new file mode 100644 index 00000000..fd4e7553 --- /dev/null +++ b/packages/cohort_banking/src/examples_support/mod.rs @@ -0,0 +1 @@ +pub mod queue_processor; diff --git a/packages/cohort_banking/src/examples_support/queue_processor.rs b/packages/cohort_banking/src/examples_support/queue_processor.rs new file mode 100644 index 00000000..2ac44a61 --- /dev/null +++ b/packages/cohort_banking/src/examples_support/queue_processor.rs @@ -0,0 +1,84 @@ +use std::{ + sync::Arc, + time::{Instant, SystemTime, UNIX_EPOCH}, +}; + +use metrics::model::MinMax; +use opentelemetry_api::{ + metrics::{Meter, Unit}, + Context, +}; +use tokio::task::JoinHandle; + +use async_trait::async_trait; + +pub struct QueueProcessor {} + +#[async_trait] +pub trait Handler: Sync + Send { + async fn handle(&self, item: T) -> Result<(), String>; +} + +impl QueueProcessor { + pub async fn process + 'static>( + queue: Arc>, + meter: Arc, + threads: u64, + item_handler: Arc, + ) -> Vec> { + let item_handler = Arc::new(item_handler); + let mut tasks = Vec::>::new(); + + for thread_number in 1..=threads { + let queue_ref = Arc::clone(&queue); + let item_handler = Arc::clone(&item_handler); + let meter = Arc::clone(&meter); + let task_h: JoinHandle = tokio::spawn(async move { + let mut timeline = MinMax::default(); + let histogram = Arc::new(meter.f64_histogram("metric_duration").with_unit(Unit::new("ms")).init()); + let counter = Arc::new(meter.u64_counter("metric_count").with_unit(Unit::new("tx")).init()); + + let mut handled_count = 0; + + loop { + let histogram_ref = Arc::clone(&histogram); + match queue_ref.recv().await { + Err(_) => break, + Ok(item) => { + timeline.add(SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos() as i128); + handled_count += 1; + let span_1 = Instant::now(); + let result = item_handler.handle(item).await; + let span_1_val = span_1.elapsed().as_nanos() as f64 / 1_000_000_f64; + tokio::spawn(async move { + histogram_ref.record(&Context::current(), span_1_val, &[]); + }); + + if let Err(e) = result { + log::warn!( + "Thread {} cannot process more requests. Error handling item: {}. Processed items: {}", + thread_number, + e, + handled_count + ); + break; + } + } + } + } + + timeline.add(SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos() as i128); + + tokio::spawn(async move { + counter.add(&Context::current(), handled_count, &[]); + }); + log::debug!("Thread {:>2} stopped. Processed items: {}.", thread_number, handled_count); + + timeline + }); + tasks.push(task_h); + } + + tasks + } +} diff --git a/packages/cohort_banking/src/lib.rs b/packages/cohort_banking/src/lib.rs new file mode 100644 index 00000000..64cf5b0f --- /dev/null +++ b/packages/cohort_banking/src/lib.rs @@ -0,0 +1,6 @@ +pub mod app; +pub mod callbacks; +pub mod examples_support; +pub mod metrics; +pub mod model; +pub mod state; diff --git a/packages/cohort_banking/src/metrics.rs b/packages/cohort_banking/src/metrics.rs new file mode 100644 index 00000000..8ad8a1ce --- /dev/null +++ b/packages/cohort_banking/src/metrics.rs @@ -0,0 +1,240 @@ +use std::{collections::HashMap, time::Duration}; + +use metrics::model::MinMax; + +#[derive(Debug, Clone)] +pub struct Span { + pub started: i128, + pub ended: i128, +} + +impl Span { + pub fn new(started: i128, ended: i128) -> Self { + Self { started, ended } + } + + pub fn duration(&self) -> i128 { + self.ended - self.started + } +} + +pub struct TxExecSpans { + pub span1_get_accounts: Span, + pub span2_get_snap_ver: Span, + pub span3_certify: Span, + pub span4_wait_for_safepoint: Span, + pub span5_install: Span, +} + +impl Default for TxExecSpans { + fn default() -> Self { + TxExecSpans { + span1_get_accounts: Span::new(0, 0), + span2_get_snap_ver: Span::new(0, 0), + span3_certify: Span::new(0, 0), + span4_wait_for_safepoint: Span::new(0, 0), + span5_install: Span::new(0, 0), + } + } +} + +#[derive(Debug, Clone)] +pub struct Stats { + pub min_started_at: i128, + pub max_finished_at: i128, + pub aborts: u64, + pub isolation_errors: u64, + pub retry_min_max: MinMax, + pub retry_count: u64, + pub validation_errors: u64, + pub sleep_time: u128, + + pub getaccounts: MinMax, + pub getsnap: MinMax, + pub certify: MinMax, + pub waiting: MinMax, + pub installing: MinMax, + + pub duration_min_max: MinMax, + + pub giveup_count: u128, + pub total_count: u128, + + pub exceptions: u128, + pub threading_errors: u128, + + pub durations: Vec, +} + +impl Default for Stats { + fn default() -> Self { + Self::new() + } +} + +impl Stats { + pub fn new() -> Stats { + Stats { + min_started_at: i128::MAX, + max_finished_at: 0, + aborts: 0, + isolation_errors: 0, + retry_count: 0, + retry_min_max: MinMax::default(), + validation_errors: 0, + sleep_time: 0, + duration_min_max: MinMax::default(), + giveup_count: 0, + total_count: 0, + getaccounts: MinMax::default(), + getsnap: MinMax::default(), + certify: MinMax::default(), + waiting: MinMax::default(), + installing: MinMax::default(), + + exceptions: 0, + threading_errors: 0, + + durations: Vec::new(), + } + } + + pub fn inc_retry_count(&mut self) { + self.retry_count += 1; + self.retry_min_max.add(self.retry_count as i128); + } + + pub fn on_tx_finished(&mut self, started_at: i128, finished_at: i128) { + if self.min_started_at > started_at { + self.min_started_at = started_at; + } + if self.max_finished_at < finished_at { + self.max_finished_at = finished_at; + } + self.on_tx_completed(finished_at - started_at); + } + + fn on_tx_completed(&mut self, duration_nanos: i128) { + self.duration_min_max.add(duration_nanos); + self.durations.push(duration_nanos); + } + + pub fn merge(&mut self, stats: Stats) { + if self.min_started_at > stats.min_started_at { + self.min_started_at = stats.min_started_at; + } + if self.max_finished_at < stats.max_finished_at { + self.max_finished_at = stats.max_finished_at; + } + + self.aborts += stats.aborts; + self.isolation_errors += stats.isolation_errors; + self.retry_count += stats.retry_count; + self.retry_min_max.add(stats.retry_min_max.min); + self.retry_min_max.add(stats.retry_min_max.max); + + self.validation_errors += stats.validation_errors; + self.sleep_time += stats.sleep_time; + self.getaccounts.merge(stats.getaccounts); + self.getsnap.merge(stats.getsnap); + self.certify.merge(stats.certify); + self.waiting.merge(stats.waiting); + self.installing.merge(stats.installing); + + self.duration_min_max.add(stats.duration_min_max.min); + self.duration_min_max.add(stats.duration_min_max.max); + + self.giveup_count += stats.giveup_count; + self.total_count += stats.total_count; + for d in stats.durations { + self.durations.push(d); + } + } + + pub fn generate_report(&mut self, threads: u64, max_retry: u64) -> String { + let p_lables = vec![50, 75, 90, 95, 99]; + let p_durations = Self::compute_percentiles::(&p_lables, &mut self.durations); + + let duration_sec = Duration::from_nanos((self.max_finished_at - self.min_started_at) as u64).as_secs_f32(); + let mut report: String = "".into(); + report += "\n------------------------------------------"; + report += &format!("\nSet size : {}", (self.total_count / threads as u128) as u64); + report += " - candidates per thread"; + report += &format!("\nThreads : {}", threads); + report += &format!("\nTotal time : {:.3} (sec)", duration_sec); + report += &format!("\nTransactions: {}", self.total_count); + report += &format!("\nRetries : {}", self.retry_count); + report += &format!("\n min, max : {}, {}", self.retry_min_max.min, self.retry_min_max.max); + report += &format!("\nGiven up : {}", self.giveup_count); + report += &format!(" - used up all retry attempts ({})", max_retry); + report += "\n\nThroughput (tps)"; + report += &format!("\n Client view : {:.3}", (self.total_count as f32 / duration_sec)); + report += " - observed by end user, this excludes retries"; + report += &format!( + "\n System : {:.3}", + ((self.retry_count as u128 + self.total_count) as f32 / duration_sec) + ); + report += " - produced by system, this includes retries"; + report += &format!( + "\n System projected: {:.3}", + ((self.retry_count as u128 + self.total_count) as f32 / (duration_sec - (self.sleep_time as f32 / threads as f32 / 1000.0))) + ); + report += " - produced by system excluding time spent sleeping"; + report += "\n\nDurations"; + report += &format!( + "\n Tx min : {:.3} (sec)", + Duration::from_nanos(self.duration_min_max.min as u64).as_secs_f64() + ); + report += &format!( + "\n Tx max : {:.3} (sec)", + Duration::from_nanos(self.duration_min_max.max as u64).as_secs_f64() + ); + report += " - candidate roundtrip\n"; + for p_label in p_lables { + report += &format!( + "\n p{} : {}", + p_label, + p_durations + .get(&p_label) + .map_or("".to_string(), |v| format!("{:.3} (sec)", (*v) as f32 / 1_000_000_000.0)) + ); + } + + report += &format!( + "\n\n Sleeps : {:.3} (sec avg. per retry)", + self.sleep_time as f32 / 1000.0 / self.retry_count as f32 + ); + report += " - time wasted on sleeps before retrying"; + report += &format!( + "\n Waited : {:.3} (sec avg. per thread) - initiator waited for safepoint", + (Duration::from_nanos(self.waiting.sum as u64).as_secs_f64()) / threads as f64 + ); + report += &format!("\n : {:.3} (sec min)", (Duration::from_nanos(self.waiting.min as u64).as_secs_f64())); + report += &format!("\n : {:.3} (sec max)", (Duration::from_nanos(self.waiting.max as u64).as_secs_f64())); + report += "\n\nErrors"; + + report += &format!("\n Talos aborts: {}", self.aborts); + report += &format!("\n Validations : {}", self.validation_errors); + report += &format!("\n DB isolation: {}", self.isolation_errors); + report += " - DB rollbacks caused by tx isolation conflicts"; + report += &format!("\n Exceptions : {}", self.exceptions); + report += &format!("\n Threading : {}", self.threading_errors); + report += " - Tokio threading errors"; + + report += "\n------------------------------------------"; + + report + } + + pub fn compute_percentiles(p_list: &Vec, values: &mut Vec) -> HashMap { + let mut result = HashMap::::new(); + values.sort(); + let count = values.len() as f32; + for p_ref in p_list { + let p = *p_ref; + let p_index = (count * p as f32 / 100.0) as usize - 1; + result.insert(p, values[p_index].clone()); + } + result + } +} diff --git a/packages/cohort_banking/src/model/bank_account.rs b/packages/cohort_banking/src/model/bank_account.rs new file mode 100644 index 00000000..bf1c1b76 --- /dev/null +++ b/packages/cohort_banking/src/model/bank_account.rs @@ -0,0 +1,66 @@ +use std::fmt; +use std::fmt::{Display, Formatter}; + +use rust_decimal::prelude::FromPrimitive; +use rust_decimal::Decimal; +use serde::Deserialize; + +#[derive(Debug, Clone, PartialEq, Deserialize)] +pub struct BankAccount { + pub name: String, + pub number: String, + #[serde(with = "rust_decimal::serde::float")] + pub balance: Decimal, + pub version: u64, +} + +impl BankAccount { + pub fn new(name: String, number: String, balance: f32, version: u64) -> Self { + BankAccount { + name, + number, + balance: Decimal::from_f32(balance).unwrap(), + version, + } + } + + pub fn increment(&mut self, amount: f32) { + self.balance += Decimal::from_f32(amount).unwrap() + } +} + +impl Display for BankAccount { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "BankAccount: [name: {}, number: {}, balance: {}, version: {}]", + self.name, self.number, self.balance, self.version + ) + } +} + +// $coverage:ignore-start +#[cfg(test)] +mod tests { + use rust_decimal::Decimal; + + use crate::model::bank_account::BankAccount; + + #[test] + fn test_model() { + assert_eq!( + format!( + "{}", + BankAccount::new("TestBankAccount123456".to_string(), "123456".to_string(), 123.45, 111_u64,) + ), + "BankAccount: [name: TestBankAccount123456, number: 123456, balance: 123.45, version: 111]", + ); + } + + #[test] + fn should_increment_amount() { + let mut a = BankAccount::new("TestBankAccount123456".to_string(), "123456".to_string(), 123.45, 111_u64); + a.increment(100.0); + assert_eq!(a.balance, Decimal::from_str_exact("223.45").unwrap()); + } +} diff --git a/packages/cohort_banking/src/model/mod.rs b/packages/cohort_banking/src/model/mod.rs new file mode 100644 index 00000000..e20c76c9 --- /dev/null +++ b/packages/cohort_banking/src/model/mod.rs @@ -0,0 +1,3 @@ +pub mod bank_account; +pub mod requests; +pub mod snapshot; diff --git a/packages/cohort_banking/src/model/requests.rs b/packages/cohort_banking/src/model/requests.rs new file mode 100644 index 00000000..a465ab17 --- /dev/null +++ b/packages/cohort_banking/src/model/requests.rs @@ -0,0 +1,78 @@ +// $coverage:ignore-start +use rust_decimal::Decimal; +use serde::{Deserialize, Serialize}; +// $coverage:ignore-end +use serde_json::Value; + +use strum::{Display, EnumString}; + +#[derive(Display, Debug, Deserialize, EnumString, PartialEq, Eq)] +pub enum BusinessActionType { + TRANSFER, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct TransferRequest { + pub from: String, + pub to: String, + #[serde(with = "rust_decimal::serde::float")] + pub amount: Decimal, +} + +impl TransferRequest { + pub fn new(from: String, to: String, amount: Decimal) -> Self { + Self { from, to, amount } + } + + pub fn json(&self) -> Value { + serde_json::to_value(self).unwrap() + } +} + +// $coverage:ignore-start +#[cfg(test)] +mod tests { + use super::*; + use rust_decimal::{prelude::FromPrimitive, Decimal}; + use std::str::FromStr; + + #[test] + fn models() { + assert_eq!( + format!("{:?}", TransferRequest::new("a1".into(), "a2".into(), Decimal::from_f32(10.0).unwrap())), + r#"TransferRequest { from: "a1", to: "a2", amount: 10 }"#.to_string(), + ); + + assert_eq!(format!("{:?}", BusinessActionType::TRANSFER), "TRANSFER".to_string()); + assert_eq!(BusinessActionType::from_str("TRANSFER").unwrap(), BusinessActionType::TRANSFER); + } + + #[test] + fn should_deserialize_transfer_request() { + let rslt = serde_json::from_str::(r#"{ "from": "a1", "to": "a2", "amount": 10.0 }"#).unwrap(); + assert_eq!(rslt.from, "a1"); + assert_eq!(rslt.to, "a2"); + assert_eq!(rslt.amount, Decimal::from_f32(10.0).unwrap()); + } + + #[test] + fn json_for_transfer_request() { + let json = TransferRequest::new("a1".into(), "a2".into(), Decimal::from_f32(10.0).unwrap()).json(); + + assert!(json.get("from").is_some()); + assert_eq!(json.get("from").unwrap(), "a1"); + + assert!(json.get("to").is_some()); + assert_eq!(json.get("to").unwrap(), "a2"); + + assert!(json.get("amount").is_some()); + assert_eq!(json.get("amount").unwrap(), 10.0); + } + + #[test] + fn should_serialize_transfer_request() { + let rslt = serde_json::to_string(&TransferRequest::new("a1".into(), "a2".into(), Decimal::from_f32(10.0).unwrap())).unwrap(); + assert_eq!(rslt, r#"{"from":"a1","to":"a2","amount":10.0}"#); + } +} +// $coverage:ignore-end diff --git a/packages/cohort_banking/src/model/snapshot.rs b/packages/cohort_banking/src/model/snapshot.rs new file mode 100644 index 00000000..296caa24 --- /dev/null +++ b/packages/cohort_banking/src/model/snapshot.rs @@ -0,0 +1,56 @@ +use std::fmt; +use std::fmt::{Display, Formatter}; + +use serde::Deserialize; + +#[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Clone, Deserialize)] +pub struct Snapshot { + pub version: u64, +} + +impl Snapshot { + pub fn is_safe_for(snapshot: Snapshot, safepoint: u64) -> bool { + snapshot.version >= safepoint + } +} + +impl Display for Snapshot { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "Snapshot: [version: {:?}]", self.version) + } +} + +impl From for Snapshot { + fn from(value: u64) -> Self { + Snapshot { version: value } + } +} + +// $coverage:ignore-start +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn models() { + assert_eq!(Snapshot { version: 1 }, Snapshot::from(1)); + assert_ne!(Snapshot { version: 2 }, Snapshot::from(1)); + } + + #[test] + fn should_deserialize_snapshot() { + let rslt_snapshot = serde_json::from_str::(r#"{ "version": 123456 }"#); + let snapshot = rslt_snapshot.unwrap(); + assert_eq!(snapshot.version, 123456); + } + + #[test] + #[allow(clippy::bool_assert_comparison)] + fn when_snapshot_is_behind_safepoint_ist_not_safe() { + let safepoint = 2; + assert_eq!(Snapshot::is_safe_for(Snapshot { version: 1 }, safepoint), false); + assert_eq!(Snapshot::is_safe_for(Snapshot { version: 2 }, safepoint), true); + assert_eq!(Snapshot::is_safe_for(Snapshot { version: 3 }, safepoint), true); + } +} +// $coverage:ignore-end diff --git a/packages/cohort_banking/src/state/mod.rs b/packages/cohort_banking/src/state/mod.rs new file mode 100644 index 00000000..26e9103c --- /dev/null +++ b/packages/cohort_banking/src/state/mod.rs @@ -0,0 +1 @@ +pub mod postgres; diff --git a/packages/cohort_banking/src/state/postgres/database.rs b/packages/cohort_banking/src/state/postgres/database.rs new file mode 100644 index 00000000..6661ed86 --- /dev/null +++ b/packages/cohort_banking/src/state/postgres/database.rs @@ -0,0 +1,226 @@ +use std::fmt::{self, Display, Formatter}; +// $coverage:ignore-start +use std::sync::Arc; + +use strum::Display; +use tokio_postgres::types::ToSql; +use tokio_postgres::{NoTls, Row}; + +use deadpool_postgres::{Config, CreatePoolError, GenericClient, ManagerConfig, Object, Pool, PoolConfig, Runtime}; + +use crate::state::postgres::database_config::DatabaseConfig; + +pub static SNAPSHOT_SINGLETON_ROW_ID: &str = "SINGLETON"; + +pub struct Database { + pub pool: Pool, +} + +#[derive(Display, Debug)] +pub enum DatabaseErrorKind { + PoolInit, + BorrowConnection, + QueryOrExecute, + PrepareStatement, + Deserialise, +} + +#[derive(Debug)] +pub struct DatabaseError { + kind: DatabaseErrorKind, + pub reason: String, + pub cause: Option, +} + +impl DatabaseError { + pub fn cannot_borrow(cause: String) -> Self { + Self { + kind: DatabaseErrorKind::BorrowConnection, + reason: "Cannot get client from DB pool.".into(), + cause: Some(cause), + } + } + + pub fn query(cause: String, query: String) -> Self { + Self { + kind: DatabaseErrorKind::QueryOrExecute, + cause: Some(cause), + reason: format!("Error executing: '{}'", query), + } + } + + pub fn prepare(cause: String, query: String) -> Self { + Self { + kind: DatabaseErrorKind::PrepareStatement, + cause: Some(cause), + reason: format!("Error preparing statement for: '{}'", query), + } + } + + pub fn deserialise_payload(cause: String, message: String) -> Self { + Self { + kind: DatabaseErrorKind::Deserialise, + cause: Some(cause), + reason: format!("Resultset parsing error. Details: '{}'", message), + } + } +} + +impl From for DatabaseError { + fn from(value: CreatePoolError) -> Self { + Self { + kind: DatabaseErrorKind::PoolInit, + reason: "Cannot create DB pool".into(), + cause: Some(value.to_string()), + } + } +} + +impl Display for DatabaseError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "DatabaseError: [kind: {}, reason: {}, cause: {}]", + self.kind, + self.reason, + self.cause.clone().unwrap_or("".into()) + ) + } +} + +impl Database { + pub async fn get(&self) -> Result { + self.pool.get().await.map_err(|e| DatabaseError::cannot_borrow(e.to_string())) + } + + pub async fn init_db(cfg: DatabaseConfig) -> Result, DatabaseError> { + let mut config = Config::new(); + config.dbname = Some(cfg.database); + config.user = Some(cfg.user); + config.password = Some(cfg.password); + config.host = Some(cfg.host); + config.port = Some(cfg.port.parse::().expect("Failed to parse port to u16")); + config.manager = Some(ManagerConfig { + recycling_method: deadpool_postgres::RecyclingMethod::Fast, + }); + let pc = PoolConfig { + max_size: cfg.pool_size, + ..PoolConfig::default() + }; + config.pool = Some(pc); + + let pool = config + .create_pool(Some(Runtime::Tokio1), NoTls) + .map_err(|e| format!("Cannot connect to database. Error: {}", e)) + .unwrap(); + + { + let mut tmp_list: Vec = Vec::new(); + for _ in 1..=pc.max_size { + let client = pool.get().await.map_err(|e| DatabaseError::cannot_borrow(e.to_string()))?; + + let stm = "SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL REPEATABLE READ;"; + client.execute(stm, &[]).await.map_err(|e| DatabaseError::query(e.to_string(), stm.into()))?; + tmp_list.push(client); + } + } + + for _ in 1..=pc.max_size { + let client = pool.get().await.map_err(|e| DatabaseError::cannot_borrow(e.to_string()))?; + + let stm = "show transaction_isolation"; + let rs = client.query_one(stm, &[]).await.map_err(|e| DatabaseError::query(e.to_string(), stm.into()))?; + let value: String = rs.get(0); + log::debug!("init: db-isolation-level: {}", value); + } + + Ok(Arc::new(Database { pool })) + } + + pub async fn query_one( + &self, + sql: &str, + params: &[&(dyn ToSql + Sync)], + fn_converter: fn(&Row) -> Result, + ) -> Result { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + fn_converter( + &client + .query_one(&stm, params) + .await + .map_err(|e| DatabaseError::query(e.to_string(), sql.into()))?, + ) + } + + pub async fn query_opt( + &self, + sql: &str, + params: &[&(dyn ToSql + Sync)], + fn_converter: fn(&Row) -> Result, + ) -> Result, DatabaseError> { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + let result = client + .query_opt(&stm, params) + .await + .map_err(|e| DatabaseError::query(e.to_string(), sql.to_string()))?; + + if let Some(row) = result { + fn_converter(&row).map(|v| Some(v)) + } else { + Ok(None) + } + } + + pub async fn query(&self, sql: &str, fn_converter: fn(&Row) -> Result) -> Result, DatabaseError> { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + let result = client.query(&stm, &[]).await.map_err(|e| DatabaseError::query(e.to_string(), sql.into()))?; + + let mut items: Vec = Vec::new(); + for row in result.iter() { + items.push(fn_converter(row)?); + } + Ok(items) + } + + pub async fn query_many( + &self, + sql: &str, + params: &[&(dyn ToSql + Sync)], + fn_converter: fn(&Row) -> Result, + ) -> Result, DatabaseError> { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + let result = client.query(&stm, params).await.map_err(|e| DatabaseError::query(e.to_string(), sql.into()))?; + + let mut items: Vec = Vec::new(); + for row in result.iter() { + items.push(fn_converter(row)?); + } + Ok(items) + } + + pub async fn execute(&self, sql: &str, params: &[&(dyn ToSql + Sync)]) -> Result { + let client = self.get().await?; + let stm = client + .prepare_cached(sql) + .await + .map_err(|e| DatabaseError::prepare(e.to_string(), sql.to_string()))?; + client.execute(&stm, params).await.map_err(|e| DatabaseError::query(e.to_string(), sql.into())) + } +} +// $coverage:ignore-end diff --git a/packages/cohort_banking/src/state/postgres/database_config.rs b/packages/cohort_banking/src/state/postgres/database_config.rs new file mode 100644 index 00000000..2b8b25a4 --- /dev/null +++ b/packages/cohort_banking/src/state/postgres/database_config.rs @@ -0,0 +1,59 @@ +#[derive(Clone, Debug)] +pub struct DatabaseConfig { + pub pool_size: usize, + pub user: String, + pub password: String, + pub host: String, + pub port: String, + pub database: String, +} + +impl DatabaseConfig { + pub fn get_connection_string(&self, database: &str) -> String { + format!("postgres://{}:{}@{}:{}/{}", self.user, self.password, self.host, self.port, database) + } + + pub fn get_public_connection_string(&self, database: &str) -> String { + format!("postgres://{}:***@{}:{}/{}", self.user, self.host, self.port, database) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[allow(clippy::redundant_clone)] + fn test_model() { + let cfg = DatabaseConfig { + pool_size: 1, + user: "USER1".into(), + password: "1234".into(), + host: "localhost".into(), + port: "1010".into(), + database: "db_admin".into(), + }; + assert_eq!( + format!("{:?}", cfg.clone()), + r#"DatabaseConfig { pool_size: 1, user: "USER1", password: "1234", host: "localhost", port: "1010", database: "db_admin" }"#, + ); + } + + #[test] + fn should_generate_valid_connection_string() { + let cfg = DatabaseConfig { + pool_size: 1, + user: "USER1".into(), + password: "1234".into(), + host: "localhost".into(), + port: "1010".into(), + database: "db_admin".into(), + }; + + assert_eq!(cfg.get_connection_string("db_app").as_str(), "postgres://USER1:1234@localhost:1010/db_app"); + assert_eq!( + cfg.get_public_connection_string("db_app").as_str(), + "postgres://USER1:***@localhost:1010/db_app", + ); + } +} diff --git a/packages/cohort_banking/src/state/postgres/mod.rs b/packages/cohort_banking/src/state/postgres/mod.rs new file mode 100644 index 00000000..f2f908a6 --- /dev/null +++ b/packages/cohort_banking/src/state/postgres/mod.rs @@ -0,0 +1,2 @@ +pub mod database; +pub mod database_config; diff --git a/packages/cohort_sdk/Cargo.toml b/packages/cohort_sdk/Cargo.toml new file mode 100644 index 00000000..5e9f3b43 --- /dev/null +++ b/packages/cohort_sdk/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "cohort_sdk" +version = "0.0.1" +edition = "2021" + +[dependencies] + +async-trait = { workspace = true } +env_logger = { workspace = true } +log = { workspace = true } +futures = { version = "0.3.28" } +opentelemetry_api = { version = "0.19.0" } +opentelemetry_sdk = { version = "0.19.0", features = ["metrics", "rt-tokio"] } +opentelemetry = { version = "0.19.0" } +rand = { version = "0.8.5" } +rdkafka = { version = "0.33.0", features = ["sasl"] } +rdkafka-sys = { version = "4.3.0" } +serde = { workspace = true } +serde_json = { workspace = true } +strum = { version = "0.25", features = ["derive"] } +talos_agent = { path = "../talos_agent" } +talos_suffix = { path = "../talos_suffix" } +talos_certifier = { path = "../talos_certifier" } +talos_certifier_adapters = { path = "../talos_certifier_adapters" } +uuid = { version = "1.2.2", features = ["v4"] } + +tokio = { workspace = true, features = ["full"] } diff --git a/packages/cohort_sdk/src/cohort.rs b/packages/cohort_sdk/src/cohort.rs new file mode 100644 index 00000000..84fff416 --- /dev/null +++ b/packages/cohort_sdk/src/cohort.rs @@ -0,0 +1,485 @@ +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; + +use opentelemetry_api::{ + global, + metrics::{Counter, Histogram, Unit}, + Context, +}; +use talos_agent::{ + agent::{ + core::{AgentServices, TalosAgentImpl}, + model::{CancelRequestChannelMessage, CertifyRequestChannelMessage}, + }, + api::{AgentConfig, CandidateData, CertificationRequest, KafkaConfig, TalosAgent}, + messaging::{ + api::{Decision, DecisionMessage}, + kafka::KafkaInitializer, + }, + metrics::{client::MetricsClient, model::Signal}, + mpsc::core::{ReceiverWrapper, SenderWrapper}, +}; + +use talos_certifier_adapters::{kafka::config::KafkaConfig as TalosKafkaConfig, KafkaConsumer}; +use tokio::sync::mpsc; + +use crate::{ + delay_controller::DelayController, + installer_callback::ReplicatorInstallerImpl, + model::{ + self, + callbacks::{ItemStateProvider, OutOfOrderInstallOutcome, OutOfOrderInstaller, StatemapInstaller}, + internal::CertificationAttemptOutcome, + CertificationResponse, ClientError, Config, ReplicatorServices, ResponseMetadata, + }, + replicator2::{cohort_replicator::CohortReplicator, cohort_suffix::CohortSuffix, service::ReplicatorService2}, +}; + +use talos_certifier::ports::MessageReciever; + +use talos_agent::api::CertificationResponse as InternalCertificationResponse; + +// #[napi] +pub struct Cohort { + config: Config, + talos_agent: Box, + agent_services: AgentServices, + replicator_services: ReplicatorServices, + oo_retry_counter: Arc>, + oo_giveups_counter: Arc>, + oo_not_safe_counter: Arc>, + oo_install_histogram: Arc>, + oo_attempts_histogram: Arc>, + oo_install_and_wait_histogram: Arc>, + oo_wait_histogram: Arc>, + talos_histogram: Arc>, + talos_aborts_counter: Arc>, + agent_retries_counter: Arc>, + agent_errors_counter: Arc>, + db_errors_counter: Arc>, +} + +// #[napi] +impl Cohort { + // #[napi] + pub async fn create( + config: Config, + // Param1: The list of statemap items. + // Param2: Version to install. + // Returns error descrition. If string is empty it means there was no error installing + statemap_installer: S, + ) -> Result + where + S: StatemapInstaller + Sync + Send + 'static, + { + let agent_config: AgentConfig = config.clone().into(); + let kafka_config: KafkaConfig = config.clone().into(); + let talos_kafka_config: TalosKafkaConfig = config.clone().into(); + + // + // Create instance of Agent + // + let (tx_certify_ch, rx_certify_ch) = tokio::sync::mpsc::channel::(agent_config.buffer_size); + let tx_certify = SenderWrapper:: { tx: tx_certify_ch }; + let rx_certify = ReceiverWrapper:: { rx: rx_certify_ch }; + + let (tx_cancel_ch, rx_cancel_ch) = tokio::sync::mpsc::channel::(agent_config.buffer_size); + let tx_cancel = SenderWrapper:: { tx: tx_cancel_ch }; + let rx_cancel = ReceiverWrapper:: { rx: rx_cancel_ch }; + + let (tx_decision_ch, rx_decision_ch) = tokio::sync::mpsc::channel::(agent_config.buffer_size); + let tx_decision = SenderWrapper:: { tx: tx_decision_ch }; + let rx_decision = ReceiverWrapper:: { rx: rx_decision_ch }; + + let metrics_client: Option>>> = None; + + let agent = TalosAgentImpl::new( + agent_config.clone(), + Arc::new(Box::new(tx_certify)), + tx_cancel, + None, + Arc::new(metrics_client), + || { + let (tx_ch, rx_ch) = tokio::sync::mpsc::channel::(1); + (SenderWrapper { tx: tx_ch }, ReceiverWrapper { rx: rx_ch }) + }, + ); + + let (publisher, consumer) = KafkaInitializer::connect(agent_config.agent.clone(), kafka_config) + .await + .map_err(|me| ClientError { + kind: model::ClientErrorKind::Messaging, + reason: "Error connecting Talos agent to Kafka.".into(), + cause: Some(me.reason), + })?; + + let agent_services = agent.start(rx_certify, rx_cancel, tx_decision, rx_decision, publisher, consumer); + + // + // Code below is to start replicator from master branch... + // + + // // + // // start replicator + // // + + let suffix = CohortSuffix::with_config(config.clone().into()); + let kafka_consumer = KafkaConsumer::new(&talos_kafka_config); + kafka_consumer.subscribe().await.unwrap(); + let replicator = CohortReplicator::new(kafka_consumer, suffix); + + let (tx_install_req, rx_statemaps_ch) = mpsc::channel(config.replicator_buffer_size); + let (tx_install_result_ch, rx_install_result) = tokio::sync::mpsc::channel(config.replicator_buffer_size); + let replicator_handle = tokio::spawn(ReplicatorService2::start_replicator(replicator, tx_install_req, rx_install_result)); + let replicator_impl = ReplicatorInstallerImpl { + installer_impl: statemap_installer, + }; + let installer_handle = tokio::spawn(ReplicatorService2::start_installer(rx_statemaps_ch, tx_install_result_ch, replicator_impl)); + + let meter = global::meter("cohort_sdk"); + let oo_install_histogram = meter.f64_histogram("metric_oo_install_duration").with_unit(Unit::new("ms")).init(); + let oo_attempts_histogram = meter.u64_histogram("metric_oo_attempts").with_unit(Unit::new("tx")).init(); + let oo_install_and_wait_histogram = meter.f64_histogram("metric_oo_install_and_wait_duration").with_unit(Unit::new("ms")).init(); + let oo_wait_histogram = meter.f64_histogram("metric_oo_wait_duration").with_unit(Unit::new("ms")).init(); + let talos_histogram = meter.f64_histogram("metric_talos").with_unit(Unit::new("ms")).init(); + let oo_retry_counter = meter.u64_counter("metric_oo_retry_count").with_unit(Unit::new("tx")).init(); + let oo_giveups_counter = meter.u64_counter("metric_oo_giveups_count").with_unit(Unit::new("tx")).init(); + let oo_not_safe_counter = meter.u64_counter("metric_oo_not_safe_count").with_unit(Unit::new("tx")).init(); + let talos_aborts_counter = meter.u64_counter("metric_talos_aborts_count").with_unit(Unit::new("tx")).init(); + let agent_errors_counter = meter.u64_counter("metric_agent_errors_count").with_unit(Unit::new("tx")).init(); + let agent_retries_counter = meter.u64_counter("metric_agent_retries_count").with_unit(Unit::new("tx")).init(); + let db_errors_counter = meter.u64_counter("metric_db_errors_counter").with_unit(Unit::new("tx")).init(); + + Ok(Self { + config, + talos_agent: Box::new(agent), + agent_services, + replicator_services: ReplicatorServices { + replicator_handle, + installer_handle, + }, + oo_install_histogram: Arc::new(oo_install_histogram), + oo_install_and_wait_histogram: Arc::new(oo_install_and_wait_histogram), + oo_wait_histogram: Arc::new(oo_wait_histogram), + oo_retry_counter: Arc::new(oo_retry_counter), + oo_giveups_counter: Arc::new(oo_giveups_counter), + oo_not_safe_counter: Arc::new(oo_not_safe_counter), + oo_attempts_histogram: Arc::new(oo_attempts_histogram), + talos_histogram: Arc::new(talos_histogram), + talos_aborts_counter: Arc::new(talos_aborts_counter), + agent_retries_counter: Arc::new(agent_retries_counter), + agent_errors_counter: Arc::new(agent_errors_counter), + db_errors_counter: Arc::new(db_errors_counter), + }) + } + + pub async fn certify( + &self, + request: model::CertificationRequest, + state_provider: &S, + oo_installer: &O, + ) -> Result + where + S: ItemStateProvider, + O: OutOfOrderInstaller, + { + let span_1 = Instant::now(); + let response = self.send_to_talos(request, state_provider).await?; + let span_1_val = span_1.elapsed().as_nanos() as f64 / 1_000_000_f64; + + let h_talos = Arc::clone(&self.talos_histogram); + tokio::spawn(async move { + h_talos.record(&Context::current(), span_1_val, &[]); + }); + + if response.decision == Decision::Aborted { + return Ok(response); + } + + // system error if we have Commit decision but no safepoint is given + let safepoint = response.safepoint.unwrap(); + let new_version = response.version; + + let mut controller = DelayController::new(20, self.config.retry_oo_backoff_max_ms); + let mut attempt = 0; + let span_2 = Instant::now(); + + let mut is_not_save = 0_u64; + let mut giveups = 0_u64; + + let result = loop { + attempt += 1; + + let span_3 = Instant::now(); + let install_result = oo_installer.install(response.xid.clone(), safepoint, new_version, attempt).await; + let span_3_val = span_3.elapsed().as_nanos() as f64 / 1_000_000_f64; + + let h_install = Arc::clone(&self.oo_install_histogram); + + tokio::spawn(async move { + let ctx = &Context::current(); + h_install.record(ctx, span_3_val, &[]); + }); + + let error = match install_result { + Ok(OutOfOrderInstallOutcome::Installed) => None, + Ok(OutOfOrderInstallOutcome::InstalledAlready) => None, + Ok(OutOfOrderInstallOutcome::SafepointCondition) => { + is_not_save += 1; + // We create this error as "safepoint timeout" in advance. Error is erased if further attempt will be successfull or replaced with anotuer error. + Some(ClientError { + kind: model::ClientErrorKind::OutOfOrderSnapshotTimeout, + reason: format!("Timeout waitig for safepoint: {}", safepoint), + cause: None, + }) + } + Err(error) => Some(ClientError { + kind: model::ClientErrorKind::OutOfOrderCallbackFailed, + reason: error, + cause: None, + }), + }; + + if let Some(client_error) = error { + if attempt >= self.config.retry_oo_attempts_max { + giveups += 1; + break Err(client_error); + } + + // try again + controller.sleep().await; + } else { + break Ok(response); + } + }; + + let span_2_val = span_2.elapsed().as_nanos() as f64 / 1_000_000_f64; + let total_sleep = controller.total_sleep_time; + + let c_not_safe = Arc::clone(&self.oo_not_safe_counter); + let h_total_sleep = Arc::clone(&self.oo_wait_histogram); + let h_attempts = Arc::clone(&self.oo_attempts_histogram); + let h_span_2 = Arc::clone(&self.oo_install_and_wait_histogram); + let c_giveups = Arc::clone(&self.oo_giveups_counter); + let c_retry = Arc::clone(&self.oo_retry_counter); + + tokio::spawn(async move { + let ctx = &Context::current(); + + if is_not_save > 0 { + c_not_safe.add(ctx, is_not_save, &[]); + } + if total_sleep > 0 { + h_total_sleep.record(ctx, total_sleep as f64, &[]); + } + if giveups > 0 { + c_giveups.add(ctx, giveups, &[]); + } + if attempt > 1 { + c_retry.add(ctx, attempt - 1, &[]); + } + + h_attempts.record(ctx, attempt, &[]); + h_span_2.record(ctx, span_2_val, &[]); + }); + result + } + + async fn send_to_talos(&self, request: model::CertificationRequest, state_provider: &S) -> Result + where + S: ItemStateProvider, + { + let started_at = Instant::now(); + let mut attempts = 0; + + // let mut delay_controller = Box::new(DelayController::new(20, self.config.retry_backoff_max_ms)); + let mut delay_controller = DelayController::new(20, self.config.retry_backoff_max_ms); + let mut talos_aborts = 0_u64; + let mut agent_errors = 0_u64; + let mut db_errors = 0_u64; + + let result = loop { + // One of these will be sent to client if we failed + let recent_error: Option; + let recent_response: Option; + + attempts += 1; + let is_success = match self.send_to_talos_attempt(request.clone(), state_provider).await { + CertificationAttemptOutcome::Success { mut response } => { + response.metadata.duration_ms = started_at.elapsed().as_millis() as u64; + response.metadata.attempts = attempts; + recent_error = None; + recent_response = Some(response); + true + } + CertificationAttemptOutcome::Aborted { mut response } => { + talos_aborts += 1; + response.metadata.duration_ms = started_at.elapsed().as_millis() as u64; + response.metadata.attempts = attempts; + recent_error = None; + recent_response = Some(response); + false + } + CertificationAttemptOutcome::AgentError { error } => { + recent_error = Some(ClientError::from(error)); + recent_response = None; + agent_errors += 1; + false + } + CertificationAttemptOutcome::DataError { reason } => { + recent_error = Some(ClientError { + kind: model::ClientErrorKind::Persistence, + reason, + cause: None, + }); + recent_response = None; + db_errors += 1; + false + } + }; + + if is_success { + break Ok(recent_response.unwrap()); + } + + if self.config.retry_attempts_max <= attempts { + if let Some(response) = recent_response { + break Ok(response); + } else if let Some(error) = recent_error { + break Err(error); + } + } else if let Some(response) = recent_response { + log::debug!( + "Unsuccessful transaction: {:?}. Response: {:?} This might retry. Attempts: {}", + request.candidate.statemap, + response.decision, + attempts + ); + } else if let Some(error) = recent_error { + log::debug!( + "Unsuccessful transaction with error: {:?}. {} This might retry. Attempts: {}", + request.candidate.statemap, + error, + attempts + ); + } + + delay_controller.sleep().await; + }; + + let c_talos_aborts = Arc::clone(&self.talos_aborts_counter); + let c_agent_retries = Arc::clone(&self.agent_retries_counter); + let c_agent_errors = Arc::clone(&self.agent_errors_counter); + let c_db_errors = Arc::clone(&self.db_errors_counter); + + if agent_errors > 0 || db_errors > 0 || attempts > 1 || talos_aborts > 0 { + tokio::spawn(async move { + let ctx = &Context::current(); + c_talos_aborts.add(ctx, talos_aborts, &[]); + c_agent_retries.add(ctx, attempts, &[]); + c_agent_errors.add(ctx, agent_errors, &[]); + c_db_errors.add(ctx, db_errors, &[]); + }); + } + + result + } + + async fn send_to_talos_attempt(&self, request: model::CertificationRequest, state_provider: &S) -> CertificationAttemptOutcome + where + S: ItemStateProvider, + { + let result_local_state = state_provider.get_state().await; + if let Err(reason) = result_local_state { + return CertificationAttemptOutcome::DataError { reason }; + } + + let local_state = result_local_state.unwrap(); + + log::debug!("loaded state: {}, {:?}", local_state.snapshot_version, local_state.items); + + let (snapshot, readvers) = Self::select_snapshot_and_readvers(local_state.snapshot_version, local_state.items.iter().map(|i| i.version).collect()); + + let xid = uuid::Uuid::new_v4().to_string(); + let agent_request = CertificationRequest { + message_key: xid.clone(), + candidate: CandidateData { + xid: xid.clone(), + statemap: request.candidate.statemap, + readset: request.candidate.readset, + writeset: request.candidate.writeset, + readvers, + snapshot, + }, + timeout: if request.timeout_ms > 0 { + Some(Duration::from_millis(request.timeout_ms)) + } else { + None + }, + }; + + match self.talos_agent.certify(agent_request).await { + Ok(agent_response) => { + let response = CertificationResponse { + xid: agent_response.xid, + decision: agent_response.decision, + safepoint: agent_response.safepoint, + version: agent_response.version, + metadata: ResponseMetadata { duration_ms: 0, attempts: 0 }, + }; + + if response.decision == Decision::Aborted { + CertificationAttemptOutcome::Aborted { response } + } else { + CertificationAttemptOutcome::Success { response } + } + } + Err(error) => CertificationAttemptOutcome::AgentError { error }, + } + } + + fn select_snapshot_and_readvers(cpt_snapshot: u64, cpt_versions: Vec) -> (u64, Vec) { + if cpt_versions.is_empty() { + log::debug!( + "select_snapshot_and_readvers({}, {:?}): {:?}", + cpt_snapshot, + cpt_versions, + (cpt_snapshot, Vec::::new()) + ); + return (cpt_snapshot, vec![]); + } + + let mut cpt_version_min: u64 = u64::MAX; + for v in cpt_versions.iter() { + if cpt_version_min > *v { + cpt_version_min = *v; + } + } + let snapshot_version = std::cmp::max(cpt_snapshot, cpt_version_min); + let mut read_vers = Vec::::new(); + for v in cpt_versions.iter() { + if snapshot_version < *v { + read_vers.push(*v); + } + } + + log::debug!( + "select_snapshot_and_readvers({}, {:?}): {:?}", + cpt_snapshot, + cpt_versions, + (snapshot_version, read_vers.clone()) + ); + (snapshot_version, read_vers) + } + + pub async fn shutdown(&self) { + // TODO implement graceful shutdown with timeout? Wait for channels to be drained and then exit. + // while self.channel_tx_certify.capacity() != MAX { wait() } + self.agent_services.decision_reader.abort(); + self.agent_services.state_manager.abort(); + self.replicator_services.replicator_handle.abort(); + self.replicator_services.installer_handle.abort(); + } +} diff --git a/packages/cohort_sdk/src/delay_controller.rs b/packages/cohort_sdk/src/delay_controller.rs new file mode 100644 index 00000000..4749c756 --- /dev/null +++ b/packages/cohort_sdk/src/delay_controller.rs @@ -0,0 +1,46 @@ +use rand::Rng; +use std::time::Duration; + +#[derive(Clone)] +pub struct DelayController { + pub total_sleep_time: u128, + multiplier: u64, + min_sleep_ms: u64, + max_sleep_ms: u64, +} + +// TODO: move me into cohort_sdk package +impl DelayController { + pub fn new(min_sleep_ms: u64, max_sleep_ms: u64) -> Self { + Self { + multiplier: 1, + min_sleep_ms, + max_sleep_ms, + total_sleep_time: 0, + } + } + + pub async fn sleep(&mut self) { + let step_ms = self.min_sleep_ms; + if self.multiplier > 64 { + self.multiplier = 1; + } + + let m = if self.multiplier == 1 { + self.multiplier * step_ms + } else { + self.multiplier * 2 * step_ms + }; + + self.multiplier *= 2; + + let add = { + let mut rnd = rand::thread_rng(); + rnd.gen_range(m..=m * 2) + }; + + let delay_ms = std::cmp::min(self.max_sleep_ms, m + add); + tokio::time::sleep(Duration::from_millis(delay_ms)).await; + self.total_sleep_time += delay_ms as u128; + } +} diff --git a/packages/cohort_sdk/src/installer_callback.rs b/packages/cohort_sdk/src/installer_callback.rs new file mode 100644 index 00000000..139bf260 --- /dev/null +++ b/packages/cohort_sdk/src/installer_callback.rs @@ -0,0 +1,17 @@ +use crate::{ + model::callbacks::StatemapInstaller, + replicator::core::{ReplicatorInstaller, StatemapItem}, +}; +use async_trait::async_trait; + +pub struct ReplicatorInstallerImpl { + pub installer_impl: S, +} + +#[async_trait] +impl ReplicatorInstaller for ReplicatorInstallerImpl { + async fn install(&mut self, sm: Vec, version: Option) -> Result { + let _ = self.installer_impl.install(sm, version.unwrap_or(0)).await?; + Ok(true) + } +} diff --git a/packages/cohort_sdk/src/lib.rs b/packages/cohort_sdk/src/lib.rs new file mode 100644 index 00000000..d3f435b5 --- /dev/null +++ b/packages/cohort_sdk/src/lib.rs @@ -0,0 +1,6 @@ +pub mod cohort; +pub mod delay_controller; +pub mod installer_callback; +pub mod model; +pub mod replicator; +pub mod replicator2; diff --git a/packages/cohort_sdk/src/model/callbacks.rs b/packages/cohort_sdk/src/model/callbacks.rs new file mode 100644 index 00000000..cd24648f --- /dev/null +++ b/packages/cohort_sdk/src/model/callbacks.rs @@ -0,0 +1,34 @@ +use crate::replicator::core::StatemapItem; +use async_trait::async_trait; + +pub struct CapturedState { + pub snapshot_version: u64, + pub items: Vec, +} + +#[derive(Debug)] +pub struct CapturedItemState { + pub id: String, + pub version: u64, +} + +#[async_trait] +pub trait ItemStateProvider { + async fn get_state(&self) -> Result; +} + +#[async_trait] +pub trait OutOfOrderInstaller { + async fn install(&self, xid: String, safepoint: u64, new_version: u64, attempt_nr: u64) -> Result; +} + +#[async_trait] +pub trait StatemapInstaller { + async fn install(&self, statemap: Vec, snapshot_version: u64) -> Result<(), String>; +} + +pub enum OutOfOrderInstallOutcome { + Installed, + InstalledAlready, + SafepointCondition, +} diff --git a/packages/cohort_sdk/src/model/internal.rs b/packages/cohort_sdk/src/model/internal.rs new file mode 100644 index 00000000..838563af --- /dev/null +++ b/packages/cohort_sdk/src/model/internal.rs @@ -0,0 +1,10 @@ +use talos_agent::agent::errors::AgentError; + +use super::CertificationResponse; + +pub(crate) enum CertificationAttemptOutcome { + Success { response: CertificationResponse }, + Aborted { response: CertificationResponse }, + AgentError { error: AgentError }, + DataError { reason: String }, +} diff --git a/packages/cohort_sdk/src/model/mod.rs b/packages/cohort_sdk/src/model/mod.rs new file mode 100644 index 00000000..5e19fbc0 --- /dev/null +++ b/packages/cohort_sdk/src/model/mod.rs @@ -0,0 +1,233 @@ +pub mod callbacks; +pub mod internal; + +use std::{collections::HashMap, fmt::Display}; + +use serde_json::Value; +use talos_agent::{ + agent::errors::{AgentError, AgentErrorKind}, + api::{AgentConfig, KafkaConfig, TalosType}, + messaging::api::Decision, +}; +use talos_certifier_adapters::kafka::config::KafkaConfig as TalosKafkaConfig; +use talos_suffix::core::SuffixConfig; +use tokio::task::JoinHandle; + +// #[napi] +#[derive(Clone)] +pub struct CandidateData { + pub readset: Vec, + pub writeset: Vec, + pub statemap: Option>>, + // The "snapshot" is intentionally messing here. We will compute it ourselves before feeding this data to Talos +} + +// #[napi] +#[derive(Clone)] +pub struct CertificationRequest { + pub candidate: CandidateData, + pub timeout_ms: u64, +} + +// #[napi] +pub struct CertificationResponse { + pub xid: String, + pub decision: Decision, + pub version: u64, + pub safepoint: Option, + pub metadata: ResponseMetadata, +} + +pub struct ResponseMetadata { + pub attempts: u64, + pub duration_ms: u64, +} + +#[derive(strum::Display)] +// #[napi] +// this is napi friendly copy of talos_agent::agent::errors::AgentErrorKind +pub enum ClientErrorKind { + Certification, + CertificationTimeout, + Messaging, + Persistence, + Internal, + OutOfOrderCallbackFailed, + OutOfOrderSnapshotTimeout, +} + +// #[napi] +pub struct ClientError { + pub kind: ClientErrorKind, + pub reason: String, + pub cause: Option, +} + +#[derive(Clone)] +// #[napi] +pub struct Config { + // + // cohort configs + // + pub retry_attempts_max: u64, + pub retry_backoff_max_ms: u64, + pub retry_oo_backoff_max_ms: u64, + pub retry_oo_attempts_max: u64, + + // + // agent config values + // + pub agent: String, + pub cohort: String, + // The size of internal buffer for candidates + pub buffer_size: usize, + pub timeout_ms: u64, + + // + // Common to kafka configs values + // + pub brokers: String, + pub topic: String, + pub sasl_mechanisms: Option, + pub kafka_username: Option, + pub kafka_password: Option, + + // + // Kafka configs for Agent + // + // Must be unique for each agent instance. Can be the same as AgentConfig.agent_id + pub agent_group_id: String, + pub agent_fetch_wait_max_ms: u64, + // The maximum time librdkafka may use to deliver a message (including retries) + pub agent_message_timeout_ms: u64, + // Controls how long to wait until message is successfully placed on the librdkafka producer queue (including retries). + pub agent_enqueue_timeout_ms: u64, + // should be mapped to rdkafka::config::RDKafkaLogLevel + pub agent_log_level: u64, + + // + // Kafka configs for Replicator + // + pub replicator_client_id: String, + pub replicator_group_id: String, + pub producer_config_overrides: HashMap<&'static str, &'static str>, + pub consumer_config_overrides: HashMap<&'static str, &'static str>, + + // + // Suffix config values + // + /// Initial capacity of the suffix + pub suffix_size_max: usize, + /// - The suffix prune threshold from when we start checking if the suffix + /// should prune. + /// - Set to None if pruning is not required. + /// - Defaults to None. + pub suffix_prune_at_size: Option, + /// Minimum size of suffix after prune. + /// - Defaults to None. + pub suffix_size_min: Option, + + // + // Replicator config values + // + pub replicator_buffer_size: usize, + + // + // Database config + // + pub db_pool_size: usize, + pub db_user: String, + pub db_password: String, + pub db_host: String, + pub db_port: String, + pub db_database: String, +} +pub struct ReplicatorServices { + pub replicator_handle: JoinHandle>, + pub installer_handle: JoinHandle>, +} + +impl From for AgentConfig { + fn from(val: Config) -> Self { + AgentConfig { + agent: val.agent, + cohort: val.cohort, + buffer_size: val.buffer_size, + timeout_ms: val.timeout_ms, + } + } +} + +impl From for SuffixConfig { + fn from(val: Config) -> Self { + SuffixConfig { + capacity: val.suffix_size_max, + prune_start_threshold: val.suffix_prune_at_size, + min_size_after_prune: val.suffix_size_min, + } + } +} + +impl From for KafkaConfig { + fn from(val: Config) -> Self { + KafkaConfig { + brokers: val.brokers, + certification_topic: val.topic, + sasl_mechanisms: val.sasl_mechanisms, + username: val.kafka_username, + password: val.kafka_password, + group_id: val.agent_group_id, + fetch_wait_max_ms: val.agent_fetch_wait_max_ms, + message_timeout_ms: val.agent_message_timeout_ms, + enqueue_timeout_ms: val.agent_enqueue_timeout_ms, + log_level: KafkaConfig::map_log_level(val.agent_log_level), + talos_type: TalosType::External, + } + } +} + +impl From for TalosKafkaConfig { + fn from(val: Config) -> Self { + TalosKafkaConfig { + brokers: val.brokers.split(',').map(|i| i.to_string()).collect(), + topic: val.topic, + // TODO: not sure how napi will handle Option<> fields, if it can process them then we dont need to use this mapping. + username: val.kafka_username.unwrap_or_else(|| "".into()), + // TODO: not sure how napi will handle Option<> fields, if it can process them then we dont need to use this mapping. + password: val.kafka_password.unwrap_or_else(|| "".into()), + client_id: val.replicator_client_id, + group_id: val.replicator_group_id, + producer_config_overrides: val.producer_config_overrides, + consumer_config_overrides: val.consumer_config_overrides, + } + } +} + +impl From for ClientError { + fn from(agent_error: AgentError) -> Self { + let (kind, reason) = match agent_error.kind { + AgentErrorKind::CertificationTimeout { xid, elapsed_ms } => ( + ClientErrorKind::CertificationTimeout, + format!("Transaction {} timedout after {}ms", xid, elapsed_ms), + ), + + AgentErrorKind::Messaging => (ClientErrorKind::Messaging, agent_error.reason), + + // error during cerification attempt, typically indicates early closure of internal buffers while some + // transaction is not yet concluded + AgentErrorKind::Certification { xid: _xid } => (ClientErrorKind::Internal, agent_error.reason), + }; + + Self { + kind, + reason, + cause: agent_error.cause, + } + } +} + +impl Display for ClientError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ClientError: [kind: {}, reason: {}, cause: {:?}]", self.kind, self.reason, self.cause) + } +} diff --git a/packages/cohort_sdk/src/replicator/core.rs b/packages/cohort_sdk/src/replicator/core.rs new file mode 100644 index 00000000..d9cb4545 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/core.rs @@ -0,0 +1,196 @@ +use async_trait::async_trait; +use log::warn; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::{collections::HashMap, marker::PhantomData}; +use talos_certifier::{ + model::{CandidateMessage, DecisionMessageTrait}, + ports::MessageReciever, + ChannelMessage, +}; + +use super::{ + suffix::{ReplicatorSuffixItemTrait, ReplicatorSuffixTrait}, + utils::{get_filtered_batch, get_statemap_from_suffix_items}, +}; + +#[derive(Debug)] +pub enum ReplicatorChannel { + InstallationSuccess(Vec), + // InstallationFailure(String), +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub enum CandidateDecisionOutcome { + Committed, + Aborted, + Timedout, + Undecided, +} + +#[derive(Debug, Clone, Eq, PartialEq, Deserialize)] +pub struct StatemapItem { + pub action: String, + pub version: u64, + pub payload: Value, +} + +impl StatemapItem { + pub fn new(action: String, version: u64, payload: Value) -> Self { + StatemapItem { action, version, payload } + } +} + +#[async_trait] +pub trait ReplicatorInstaller { + async fn install(&mut self, sm: Vec, version: Option) -> Result; +} + +#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)] +pub struct ReplicatorCandidate { + pub candidate: CandidateMessage, + + #[serde(skip_deserializing)] + pub safepoint: Option, + + #[serde(skip_deserializing)] + pub decision_outcome: Option, + + #[serde(skip_deserializing)] + pub is_installed: bool, +} + +impl From for ReplicatorCandidate { + fn from(value: CandidateMessage) -> Self { + ReplicatorCandidate { + candidate: value, + safepoint: None, + decision_outcome: None, + is_installed: false, + } + } +} + +impl ReplicatorSuffixItemTrait for ReplicatorCandidate { + fn get_safepoint(&self) -> &Option { + &self.safepoint + } + fn get_statemap(&self) -> &Option>> { + &self.candidate.statemap + } + fn set_safepoint(&mut self, safepoint: Option) { + self.safepoint = safepoint + } + fn set_decision_outcome(&mut self, decision_outcome: Option) { + self.decision_outcome = decision_outcome + } + fn set_suffix_item_installed(&mut self) { + self.is_installed = true + } + fn is_installed(&self) -> bool { + self.is_installed + } +} + +pub struct Replicator +where + T: ReplicatorSuffixItemTrait, + S: ReplicatorSuffixTrait + std::fmt::Debug, + M: MessageReciever + Send + Sync, +{ + pub receiver: M, + pub suffix: S, + pub last_installing: u64, + _phantom: PhantomData, +} + +impl Replicator +where + T: ReplicatorSuffixItemTrait, + S: ReplicatorSuffixTrait + std::fmt::Debug, + M: MessageReciever + Send + Sync, +{ + pub fn new(receiver: M, suffix: S) -> Self { + Replicator { + receiver, + suffix, + last_installing: 0, + _phantom: PhantomData, + } + } + + pub(crate) async fn process_consumer_message(&mut self, version: u64, message: T) { + if version > 0 { + self.suffix.insert(version, message).unwrap(); + } else { + warn!("Version 0 will not be inserted into suffix.") + } + } + + pub(crate) async fn process_decision_message(&mut self, decision_version: u64, decision_message: D) { + let version = decision_message.get_candidate_version(); + + let decision_outcome = match decision_message.get_decision() { + talos_certifier::model::Decision::Committed => Some(CandidateDecisionOutcome::Committed), + talos_certifier::model::Decision::Aborted => Some(CandidateDecisionOutcome::Aborted), + }; + self.suffix.update_suffix_item_decision(version, decision_version).unwrap(); + self.suffix.set_decision_outcome(version, decision_outcome); + self.suffix.set_safepoint(version, decision_message.get_safepoint()); + + // If this is a duplicate, we mark it as installed (assuming the original version always comes first and therefore that will be installed.) + if decision_message.is_duplicate() { + self.suffix.set_item_installed(version); + } + } + + pub(crate) fn generate_statemap_batch(&mut self) -> Vec<(u64, Vec)> { + // let instance = OffsetDateTime::now_utc().unix_timestamp_nanos(); + // let msg_batch_instance = Instant::now(); + // get batch of items from suffix to install. + let items_option = self.suffix.get_message_batch_from_version(self.last_installing, None); + // let msg_batch_instance_elapsed = msg_batch_instance.elapsed(); + + let mut statemaps_batch = vec![]; + + // #[allow(unused_assignments)] + // let mut msg_statemap_create_elapsed = Duration::from_nanos(0); + + if let Some(items) = items_option { + // let msg_batch_instance_filter = Instant::now(); + let filtered_message_batch = get_filtered_batch(items.iter().copied()); + // let msg_batch_instance_filter_elapsed = msg_batch_instance_filter.elapsed(); + + // let msg_statemap_create = Instant::now(); + // generate the statemap from each item in batch. + statemaps_batch = get_statemap_from_suffix_items(filtered_message_batch); + + // msg_statemap_create_elapsed = msg_statemap_create.elapsed(); + + // let elapsed = OffsetDateTime::now_utc().unix_timestamp_nanos() - instance; + + if let Some(last_item) = items.last() { + self.last_installing = last_item.item_ver; + } + + // TODO: Remove TEMP_CODE and replace with proper metrics from feature/cohort-db-mock + // if !items.is_empty() { + // let first_version = items.first().unwrap().item_ver; + // let last_version = items.last().unwrap().item_ver; + // debug!("[CREATE_STATEMAP] Processed total of count={} from_version={first_version:?} to_version={last_version:?} with batch_create_time={:?}, filter_time={msg_batch_instance_filter_elapsed:?} statemap_create_time={msg_statemap_create_elapsed:?} and total_time={elapsed:?} {}", items.len(), msg_batch_instance_elapsed, elapsed); + // }; + } + statemaps_batch + } + + pub(crate) async fn commit_till_last_installed(&mut self) { + if self.last_installing > 0 { + if let Some(last_installed) = self.suffix.get_last_installed(Some(self.last_installing)) { + let version = last_installed.decision_ver.unwrap(); + self.receiver.update_savepoint(version as i64).await.unwrap(); + + self.receiver.commit().await.unwrap(); + } + } + } +} diff --git a/packages/cohort_sdk/src/replicator/mod.rs b/packages/cohort_sdk/src/replicator/mod.rs new file mode 100644 index 00000000..94cd5e8a --- /dev/null +++ b/packages/cohort_sdk/src/replicator/mod.rs @@ -0,0 +1,7 @@ +pub mod core; +pub mod services; +pub mod suffix; +pub mod utils; + +#[cfg(test)] +pub mod tests; diff --git a/packages/cohort_sdk/src/replicator/services/mod.rs b/packages/cohort_sdk/src/replicator/services/mod.rs new file mode 100644 index 00000000..cc63915a --- /dev/null +++ b/packages/cohort_sdk/src/replicator/services/mod.rs @@ -0,0 +1,2 @@ +pub mod replicator_service; +pub mod statemap_installer_service; diff --git a/packages/cohort_sdk/src/replicator/services/replicator_service.rs b/packages/cohort_sdk/src/replicator/services/replicator_service.rs new file mode 100644 index 00000000..b6374b82 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/services/replicator_service.rs @@ -0,0 +1,88 @@ +// $coverage:ignore-start +use std::{fmt::Debug, time::Duration}; + +use crate::replicator::{ + core::{Replicator, ReplicatorCandidate, ReplicatorChannel, StatemapItem}, + suffix::ReplicatorSuffixTrait, +}; + +use log::{debug, info}; +use talos_certifier::{ports::MessageReciever, ChannelMessage}; +use tokio::sync::mpsc; + +pub async fn replicator_service( + statemaps_tx: mpsc::Sender)>>, + mut replicator_rx: mpsc::Receiver, + mut replicator: Replicator, +) -> Result<(), String> +where + S: ReplicatorSuffixTrait + Debug, + M: MessageReciever + Send + Sync, +{ + info!("Starting Replicator Service.... "); + let mut interval = tokio::time::interval(Duration::from_millis(10_000)); + + loop { + tokio::select! { + // 1. Consume message. + res = replicator.receiver.consume_message() => { + if let Ok(Some(msg)) = res { + + // 2. Add/update to suffix. + match msg { + // 2.1 For CM - Install messages on the version + ChannelMessage::Candidate( message) => { + let version = message.version; + replicator.process_consumer_message(version, message.into()).await; + }, + // 2.2 For DM - Update the decision with outcome + safepoint. + ChannelMessage::Decision(decision_version, decision_message) => { + replicator.process_decision_message(decision_version, decision_message).await; + + + // Get a batch of remaining versions with their statemaps to install. + let statemaps_batch = replicator.generate_statemap_batch(); + + if !statemaps_batch.is_empty() { + statemaps_tx.send(statemaps_batch).await.unwrap(); + } + + }, + } + } + } + // Commit offsets at interval. + _ = interval.tick() => { + replicator.commit_till_last_installed().await; + } + // Receive feedback from installer. + res = replicator_rx.recv() => { + if let Some(result) = res { + match result { + // 4. Remove the versions if installations are complete. + ReplicatorChannel::InstallationSuccess(vers) => { + + let version = vers.last().unwrap().to_owned(); + debug!("Installated successfully till version={version:?}"); + // Mark the suffix item as installed. + replicator.suffix.set_item_installed(version); + + // if all prior items are installed, then update the prune vers + replicator.suffix.update_prune_index(version); + + + // Prune suffix and update suffix head. + if replicator.suffix.get_suffix_meta().prune_index >= replicator.suffix.get_suffix_meta().prune_start_threshold { + replicator.suffix.prune_till_version(version).unwrap(); + } + + } + } + } + } + + } + } +} + +// $coverage:ignore-end diff --git a/packages/cohort_sdk/src/replicator/services/statemap_installer_service.rs b/packages/cohort_sdk/src/replicator/services/statemap_installer_service.rs new file mode 100644 index 00000000..8ec9023d --- /dev/null +++ b/packages/cohort_sdk/src/replicator/services/statemap_installer_service.rs @@ -0,0 +1,35 @@ +// $coverage:ignore-start + +use crate::replicator::core::{ReplicatorChannel, ReplicatorInstaller, StatemapItem}; + +use log::{debug, info}; +use tokio::sync::mpsc; + +pub async fn installer_service( + mut statemaps_rx: mpsc::Receiver)>>, + replicator_tx: mpsc::Sender, + mut statemap_installer: T, +) -> Result<(), String> +where + T: ReplicatorInstaller, +{ + info!("Starting Installer Service.... "); + + loop { + if let Some(batch) = statemaps_rx.recv().await { + for (ver, statemap_batch) in batch { + debug!("[Statemap Installer Service] Received statemap batch ={statemap_batch:?} and version={ver:?}"); + match statemap_installer.install(statemap_batch, Some(ver)).await { + Ok(true) => { + replicator_tx.send(ReplicatorChannel::InstallationSuccess(vec![ver])).await.unwrap(); + } + Ok(false) => { + // Do nothing if result is false. + } + Err(err) => return Err(err), + } + } + } + } +} +// $coverage:ignore-end diff --git a/packages/cohort_sdk/src/replicator/suffix.rs b/packages/cohort_sdk/src/replicator/suffix.rs new file mode 100644 index 00000000..a41de4dc --- /dev/null +++ b/packages/cohort_sdk/src/replicator/suffix.rs @@ -0,0 +1,113 @@ +use std::{collections::HashMap, fmt::Debug}; + +use log::warn; +use serde_json::Value; +use talos_suffix::{ + core::{SuffixMeta, SuffixResult}, + get_nonempty_suffix_items, Suffix, SuffixItem, SuffixTrait, +}; + +use super::core::CandidateDecisionOutcome; + +pub trait ReplicatorSuffixItemTrait { + fn get_safepoint(&self) -> &Option; + fn get_statemap(&self) -> &Option>>; + fn set_safepoint(&mut self, safepoint: Option); + fn set_decision_outcome(&mut self, decision_outcome: Option); + fn set_suffix_item_installed(&mut self); + fn is_installed(&self) -> bool; +} + +pub trait ReplicatorSuffixTrait: SuffixTrait { + fn set_decision_outcome(&mut self, version: u64, decision_outcome: Option); + fn set_safepoint(&mut self, version: u64, safepoint: Option); + fn set_item_installed(&mut self, version: u64); + fn get_last_installed(&self, to_version: Option) -> Option<&SuffixItem>; + fn update_suffix_item_decision(&mut self, version: u64, decision_ver: u64) -> SuffixResult<()>; + fn update_prune_index(&mut self, version: u64); + /// Returns the items from suffix + fn get_suffix_meta(&self) -> &SuffixMeta; + fn get_message_batch_from_version(&self, from: u64, count: Option) -> Option>>; +} + +impl ReplicatorSuffixTrait for Suffix +where + T: ReplicatorSuffixItemTrait + Debug + Clone, +{ + fn set_decision_outcome(&mut self, version: u64, decision_outcome: Option) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.item.set_decision_outcome(decision_outcome); + } else { + warn!("Unable to update decision as message with version={version} not found"); + } + } + } + + fn set_safepoint(&mut self, version: u64, safepoint: Option) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.item.set_safepoint(safepoint); + } else { + warn!("Unable to update safepoint as message with version={version} not found"); + } + } + } + + fn set_item_installed(&mut self, version: u64) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.item.set_suffix_item_installed(); + } else { + warn!("Unable to update is_installed flag as message with version={version} not found"); + // info!("All some items on suffix.... {:?}", self.retrieve_all_some_vec_items()); + } + } + } + + fn update_prune_index(&mut self, version: u64) { + if self.are_prior_items_decided(version) { + let index = self.index_from_head(version).unwrap(); + self.update_prune_index(Some(index)); + } + } + + fn get_message_batch_from_version(&self, from: u64, count: Option) -> Option>> { + // let mut batch = vec![]; + let batch_size = match count { + Some(c) => c as usize, + None => self.messages.len(), + }; + + let from_index = if from != 0 { self.index_from_head(from).unwrap() + 1 } else { 0 }; + + let items = get_nonempty_suffix_items(self.messages.range(from_index..)) // take only some items in suffix + .take_while(|m| m.is_decided) // take items till we find a not decided item. + .filter(|m| !m.item.is_installed()) // remove already installed items. + .take(batch_size) + .collect::>>(); + + if !items.is_empty() { + Some(items) + } else { + None + } + } + + fn update_suffix_item_decision(&mut self, version: u64, decision_ver: u64) -> SuffixResult<()> { + self.update_decision_suffix_item(version, decision_ver) + } + + fn get_suffix_meta(&self) -> &SuffixMeta { + &self.meta + } + + fn get_last_installed(&self, to_version: Option) -> Option<&SuffixItem> { + let version = to_version?; + let to_index = self.index_from_head(version)?; + self.messages.range(..to_index).flatten().rev().find(|&i| i.is_decided && i.item.is_installed()) + } +} diff --git a/packages/cohort_sdk/src/replicator/tests/mod.rs b/packages/cohort_sdk/src/replicator/tests/mod.rs new file mode 100644 index 00000000..d2581c78 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/tests/mod.rs @@ -0,0 +1,3 @@ +pub mod suffix; +pub mod test_utils; +pub mod utils; diff --git a/packages/cohort_sdk/src/replicator/tests/suffix.rs b/packages/cohort_sdk/src/replicator/tests/suffix.rs new file mode 100644 index 00000000..8fd6b525 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/tests/suffix.rs @@ -0,0 +1,181 @@ +use std::collections::{HashMap, VecDeque}; + +use serde_json::Value; +use talos_suffix::{core::SuffixMeta, Suffix, SuffixTrait}; + +use crate::replicator::{ + core::CandidateDecisionOutcome, + suffix::{ReplicatorSuffixItemTrait, ReplicatorSuffixTrait}, +}; + +#[derive(Debug, Default, PartialEq, Clone)] +struct TestReplicatorSuffixItem { + safepoint: Option, + decision: Option, + statemap: Option>>, + is_installed: bool, +} + +impl ReplicatorSuffixItemTrait for TestReplicatorSuffixItem { + fn get_safepoint(&self) -> &Option { + &self.safepoint + } + + fn get_statemap(&self) -> &Option>> { + &self.statemap + } + + fn set_safepoint(&mut self, safepoint: Option) { + self.safepoint = safepoint + } + + fn set_decision_outcome(&mut self, decision_outcome: Option) { + self.decision = decision_outcome + } + fn set_suffix_item_installed(&mut self) { + self.is_installed = true + } + + fn is_installed(&self) -> bool { + self.is_installed + } +} + +#[test] +fn test_replicator_suffix_item() { + let mut suffix_item = TestReplicatorSuffixItem::default(); + + // test - safepoint + assert!(suffix_item.get_safepoint().is_none()); + suffix_item.set_safepoint(Some(120)); + assert_eq!(suffix_item.get_safepoint(), &Some(120)); + + // test - statemap + assert!(suffix_item.get_statemap().is_none()); + + let mut statemap_item = HashMap::new(); + statemap_item.insert("k".to_owned(), Value::Bool(true)); + suffix_item.statemap = Some(vec![statemap_item]); + + assert!(suffix_item.get_statemap().is_some()); + + // test - decision_outcome + assert!(suffix_item.decision.is_none()); + suffix_item.set_decision_outcome(Some(CandidateDecisionOutcome::Committed)); +} + +#[test] +fn test_replicator_suffix() { + let suffix_messages = VecDeque::new(); + + let mut suffix: Suffix = Suffix { + meta: SuffixMeta::default(), + messages: suffix_messages, + }; + + assert_eq!(suffix.messages.len(), 0); + suffix.insert(3, TestReplicatorSuffixItem::default()).unwrap(); + assert_eq!(suffix.messages.len(), 1); + + suffix.insert(5, TestReplicatorSuffixItem::default()).unwrap(); + suffix.insert(8, TestReplicatorSuffixItem::default()).unwrap(); + + // Message batch is empty as the decision is not added. + assert_eq!(suffix.get_message_batch_from_version(5, Some(5)), None); + + // Nothing happens for version 50 updates as the item doesn't exist. + suffix.set_safepoint(50, Some(2)); + suffix.set_decision_outcome(50, Some(CandidateDecisionOutcome::Committed)); + + //add safepoint and decision for version 3 + suffix.set_safepoint(3, Some(2)); + suffix.set_decision_outcome(3, Some(CandidateDecisionOutcome::Committed)); + suffix.update_decision(3, 10).unwrap(); + + let item_at_version3 = suffix.get(3).unwrap().unwrap(); + assert_eq!(item_at_version3.item.safepoint.unwrap(), 2); + assert_eq!(item_at_version3.item.decision.unwrap(), CandidateDecisionOutcome::Committed); + assert!(!item_at_version3.item.is_installed); + // Message batch will be one as only version 3's decision is recorded.. + assert_eq!(suffix.get_message_batch_from_version(0, None).unwrap().len(), 1); + + suffix.update_decision(4, 12).unwrap(); + // Message batch will still be 1 as there was no version 1 inserted. + // So the decision will be discarded + assert_eq!(suffix.get_message_batch_from_version(0, Some(4)).unwrap().len(), 1); + + suffix.update_decision(5, 19).unwrap(); + // Message batch will be 2 as safepoint is not set a decision is made, therefore version 3 and 4 are picked. + // version 3 is considered as commited as the safepoint and decision_outcome is set. + // version 4 is considered as aborted at this point as safepoint is not set. + assert_eq!(suffix.get_message_batch_from_version(0, Some(10)).unwrap().len(), 2); + assert_eq!(suffix.get_message_batch_from_version(3, Some(10)).unwrap().len(), 1); + + //add safepoint and decision for version 8 + suffix.update_decision(8, 19).unwrap(); + suffix.set_safepoint(8, Some(2)); + suffix.set_decision_outcome(8, Some(CandidateDecisionOutcome::Committed)); + // Message batch will be 3, as version 3,5, and 8 are not installed. + assert_eq!(suffix.get_message_batch_from_version(0, Some(10)).unwrap().len(), 3); + + //add safepoint and decision for version 5 + suffix.set_safepoint(5, Some(2)); + suffix.set_decision_outcome(5, Some(CandidateDecisionOutcome::Committed)); + // Message batch will be 3 as version 3, 4 and 5 has Some safepoint value + assert_eq!(suffix.get_message_batch_from_version(0, Some(10)).unwrap().len(), 3); +} + +#[test] +fn test_replicator_suffix_installed() { + let suffix_messages = VecDeque::new(); + + let mut suffix: Suffix = Suffix { + meta: SuffixMeta::default(), + messages: suffix_messages, + }; + + assert_eq!(suffix.messages.len(), 0); + suffix.insert(3, TestReplicatorSuffixItem::default()).unwrap(); + suffix.insert(6, TestReplicatorSuffixItem::default()).unwrap(); + suffix.insert(9, TestReplicatorSuffixItem::default()).unwrap(); + + // update decision for version 3 + suffix.update_suffix_item_decision(3, 19).unwrap(); + suffix.set_safepoint(3, Some(2)); + suffix.set_decision_outcome(3, Some(CandidateDecisionOutcome::Committed)); + + // Batch returns one item as only version 3 is decided, others haven't got the decisions yet. + assert_eq!(suffix.get_message_batch_from_version(0, Some(1)).unwrap().len(), 1); + suffix.set_item_installed(3); + // Batch returns 0 items as version 3 is already installed, others haven't got the decisions yet. + assert!(suffix.get_message_batch_from_version(0, Some(1)).is_none()); + + let suffix_item_3 = suffix.get(3).unwrap().unwrap(); + // confirm version 3 is marked as installed. + assert!(suffix_item_3.item.is_installed()); + + // update decision for version 9 + suffix.update_suffix_item_decision(9, 23).unwrap(); + suffix.set_safepoint(9, Some(2)); + suffix.set_decision_outcome(9, Some(CandidateDecisionOutcome::Committed)); + // Batch returns 0, because there is a version in between which is not decided. + assert!(suffix.get_message_batch_from_version(0, Some(1)).is_none()); + + // update decision for version 6 + suffix.update_suffix_item_decision(6, 23).unwrap(); + suffix.set_safepoint(6, None); + suffix.set_decision_outcome(6, Some(CandidateDecisionOutcome::Aborted)); + // Batch returns 2 items (version 6 & 9). + let batch = suffix.get_message_batch_from_version(0, None).unwrap(); + assert_eq!(batch.len(), 2); + + // Confirm the batch returned the correct item. + assert_eq!(batch.first().unwrap().item_ver, 6); + + // Mark version 9 as installed. + suffix.set_item_installed(9); + // Although version 9 is installed, version 6 is not, therefore it is picked up here. + assert_eq!(suffix.get_message_batch_from_version(3, Some(1)).unwrap().len(), 1); + + assert_eq!(suffix.get_suffix_meta().head, 3); +} diff --git a/packages/cohort_sdk/src/replicator/tests/test_utils.rs b/packages/cohort_sdk/src/replicator/tests/test_utils.rs new file mode 100644 index 00000000..20d9d816 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/tests/test_utils.rs @@ -0,0 +1,119 @@ +// $coverage:ignore-start +// helper functions for testing. Not required for coverage. + +use rand::{seq::SliceRandom, thread_rng}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::collections::HashMap; +use talos_suffix::SuffixItem; + +use crate::replicator::{core::CandidateDecisionOutcome, suffix::ReplicatorSuffixItemTrait}; + +fn generate_bank_transfer_statemap_value() -> Value { + let accounts_vec = (0..10).collect::>(); + let accounts_slice = accounts_vec.as_slice(); + + let amounts_slice: &[u32] = &[100, 120, 200, 300, 450]; + + let mut rng = thread_rng(); + let first_account_suffix = accounts_slice.choose(&mut rng).unwrap(); + + let second_account_suffix = loop { + let random_suffix = accounts_slice.choose(&mut rng).unwrap(); + if random_suffix != first_account_suffix { + break random_suffix; + } + }; + + let amount = amounts_slice.choose(&mut rng).unwrap(); + + json!(format!( + r#" + {{ + from: account-{}, + to: account-{}, + amount: {} + }} + "#, + first_account_suffix, second_account_suffix, amount + )) +} + +pub(crate) fn generate_test_statemap(action: &str, value_generator_fn: F) -> HashMap +where + F: Fn() -> Value, +{ + let mut statemap_item = HashMap::new(); + statemap_item.insert(action.to_owned(), value_generator_fn()); + statemap_item +} + +#[derive(Debug, Default, Serialize, Deserialize, Clone, Eq, PartialEq)] +pub(crate) struct BankStatemapTestCandidate { + pub safepoint: Option, + pub decision_outcome: Option, + pub statemap: Option>>, + pub is_installed: bool, +} + +impl BankStatemapTestCandidate { + pub(crate) fn create_with_statemap(statemap_count: u32) -> Self { + let item = Self { + safepoint: Default::default(), + decision_outcome: Default::default(), + statemap: Default::default(), + is_installed: false, + }; + + item.generate_bank_transfers_statemap(statemap_count) + } + + pub(crate) fn set_safepoint(mut self, safepoint: Option) -> Self { + ReplicatorSuffixItemTrait::set_safepoint(&mut self, safepoint); + self + } + + pub(crate) fn generate_bank_transfers_statemap(mut self, count: u32) -> Self { + let statemap = (0..count).map(|_| generate_test_statemap("transfer", generate_bank_transfer_statemap_value)); + + self.statemap = if count > 0 { Some(statemap.collect()) } else { None }; + self + } +} + +impl ReplicatorSuffixItemTrait for BankStatemapTestCandidate { + fn get_safepoint(&self) -> &Option { + &self.safepoint + } + + fn get_statemap(&self) -> &Option>> { + &self.statemap + } + + fn set_safepoint(&mut self, safepoint: Option) { + self.safepoint = safepoint + } + + fn set_decision_outcome(&mut self, decision_outcome: Option) { + self.decision_outcome = decision_outcome + } + + fn set_suffix_item_installed(&mut self) { + self.is_installed = true + } + + fn is_installed(&self) -> bool { + self.is_installed + } +} + +pub(crate) fn build_test_suffix_item(version: u64, decision_ver: Option, item: T) -> SuffixItem { + SuffixItem { + item, + item_ver: version, + decision_ver, + is_decided: decision_ver.is_some(), + } +} + +// $coverage:ignore-end diff --git a/packages/cohort_sdk/src/replicator/tests/utils.rs b/packages/cohort_sdk/src/replicator/tests/utils.rs new file mode 100644 index 00000000..7021de80 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/tests/utils.rs @@ -0,0 +1,102 @@ +use crate::replicator::{ + tests::test_utils::{build_test_suffix_item, BankStatemapTestCandidate}, + utils::{get_filtered_batch, get_statemap_from_suffix_items}, +}; + +#[test] +fn test_get_filtered_batch_all_pass() { + //Test data + let item1 = build_test_suffix_item(10, Some(11), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(1))); + let item2 = build_test_suffix_item(12, Some(15), BankStatemapTestCandidate::create_with_statemap(2).set_safepoint(Some(1))); + let item3 = build_test_suffix_item(13, Some(14), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let item4 = build_test_suffix_item(16, Some(18), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let item5 = build_test_suffix_item(17, Some(20), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let suffix_item = vec![&item1, &item2, &item3, &item4, &item5]; + + let result = get_filtered_batch(suffix_item.into_iter()); + + assert_eq!(result.count(), 5); +} + +#[test] +fn test_get_filtered_batch_stop_on_undecided() { + //Test data + let item1 = build_test_suffix_item(10, Some(11), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(1))); + // Undecided item. + let item2 = build_test_suffix_item(12, None, BankStatemapTestCandidate::create_with_statemap(2).set_safepoint(Some(1))); + let item3 = build_test_suffix_item(13, Some(14), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let suffix_item = vec![&item1, &item2, &item3]; + + let result = get_filtered_batch(suffix_item.into_iter()); + + assert_eq!(result.count(), 1); +} + +#[test] +fn test_get_filtered_batch_remove_items_no_safepoint() { + //Test data + let item1 = build_test_suffix_item(10, Some(11), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(1))); + let item2 = build_test_suffix_item(12, Some(15), BankStatemapTestCandidate::create_with_statemap(1)); // This item should be removed as safepoint is None + let item3 = build_test_suffix_item(13, Some(14), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let suffix_item = vec![&item1, &item2, &item3]; + + let mut result = get_filtered_batch(suffix_item.into_iter()); + + assert_eq!(result.next().unwrap().item_ver, 10); + assert_eq!(result.next().unwrap().item_ver, 13); + assert!(result.next().is_none()); +} + +#[test] +fn test_get_filtered_batch_remove_items_no_statemap() { + //Test data + + // item1 doesn't have statemap, and therefore shouldn't be in the result + let item1 = build_test_suffix_item(10, Some(11), BankStatemapTestCandidate::default().set_safepoint(Some(1))); + let item2 = build_test_suffix_item(12, Some(15), BankStatemapTestCandidate::create_with_statemap(2).set_safepoint(Some(1))); + let item3 = build_test_suffix_item(13, Some(14), BankStatemapTestCandidate::create_with_statemap(3).set_safepoint(Some(2))); + let item4 = build_test_suffix_item(16, Some(18), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let suffix_item = vec![&item1, &item2, &item3, &item4]; + + let mut result = get_filtered_batch(suffix_item.into_iter()); + + assert_eq!(result.next().unwrap().item_ver, 12); + assert_eq!(result.last().unwrap().item_ver, 16); +} +#[test] +fn test_get_all_statemap_from_suffix_items() { + //Test data + + // item1 doesn't have statemap, and therefore shouldn't be in the result + let item1 = build_test_suffix_item(10, Some(11), BankStatemapTestCandidate::create_with_statemap(3).set_safepoint(Some(1))); + let item2 = build_test_suffix_item(12, Some(15), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(1))); + let item3 = build_test_suffix_item(13, Some(14), BankStatemapTestCandidate::create_with_statemap(5).set_safepoint(Some(2))); + let item4 = build_test_suffix_item(16, Some(18), BankStatemapTestCandidate::create_with_statemap(1).set_safepoint(Some(2))); + let suffix_item = vec![&item1, &item2, &item3, &item4]; + + let result = get_filtered_batch(suffix_item.into_iter()); + + let state_map_batch = get_statemap_from_suffix_items(result); + assert_eq!(state_map_batch[0].1.len(), 3); // three items in statemap for version 10 + assert_eq!(state_map_batch[0].0, 10); + assert_eq!(state_map_batch[2].0, 13); + assert_eq!(state_map_batch[3].0, 16); + assert_eq!(state_map_batch.last().unwrap().0, 16); +} + +#[test] +fn test_get_statemap_from_suffix_items_no_statemaps() { + //Test data + + // item1 doesn't have statemap, and therefore shouldn't be in the result + let item1 = build_test_suffix_item(10, Some(11), BankStatemapTestCandidate::default().set_safepoint(Some(1))); + let item2 = build_test_suffix_item(12, Some(15), BankStatemapTestCandidate::default().set_safepoint(Some(1))); + let suffix_item = vec![&item1, &item2]; + + // let result = get_filtered_batch(suffix_item.into_iter()); + + let state_map_batch = get_statemap_from_suffix_items(suffix_item.into_iter()); + assert_eq!(state_map_batch.len(), 2); + assert!(state_map_batch[0].1.is_empty()); + assert!(state_map_batch[1].1.is_empty()); +} diff --git a/packages/cohort_sdk/src/replicator/utils.rs b/packages/cohort_sdk/src/replicator/utils.rs new file mode 100644 index 00000000..5a9734f0 --- /dev/null +++ b/packages/cohort_sdk/src/replicator/utils.rs @@ -0,0 +1,36 @@ +use talos_suffix::SuffixItem; + +use super::{core::StatemapItem, suffix::ReplicatorSuffixItemTrait}; + +pub fn get_filtered_batch<'a, T: ReplicatorSuffixItemTrait + 'a>(messages: impl Iterator>) -> impl Iterator> { + messages + .into_iter() + .take_while(|&m| m.is_decided) + // select only the messages that have safepoint i.e committed messages and select only the messages that have statemap. + .filter(|&m| m.item.get_safepoint().is_some() && m.item.get_statemap().is_some()) + // .filter(|&m| m.item.get_statemap().is_some()) // select only the messages that have statemap. +} + +pub fn get_statemap_from_suffix_items<'a, T: ReplicatorSuffixItemTrait + 'a>( + messages: impl Iterator>, +) -> Vec<(u64, Vec)> { + messages.into_iter().fold(vec![], |mut acc, m| match m.item.get_statemap().as_ref() { + Some(sm_items) => { + let state_maps_to_append = sm_items.iter().map(|sm| { + let key = sm.keys().next().unwrap().to_string(); + let payload = sm.get(&key).unwrap().clone(); + StatemapItem { + action: key, + payload, + version: m.item_ver, + } + }); + acc.push((m.item_ver, state_maps_to_append.collect::>())); + acc + } + None => { + acc.push((m.item_ver, vec![])); + acc + } + }) +} diff --git a/packages/cohort_sdk/src/replicator2/cohort_replicator.rs b/packages/cohort_sdk/src/replicator2/cohort_replicator.rs new file mode 100644 index 00000000..b0086387 --- /dev/null +++ b/packages/cohort_sdk/src/replicator2/cohort_replicator.rs @@ -0,0 +1,130 @@ +use talos_certifier::{ + model::{CandidateMessage, DecisionMessageTrait}, + ports::MessageReciever, + ChannelMessage, +}; + +use crate::replicator::core::StatemapItem; + +use super::cohort_suffix::CohortSuffix; + +pub struct CohortReplicator +where + M: MessageReciever + Send + Sync, +{ + latest_in_flight: Option, + receiver: M, + suffix: CohortSuffix, +} + +impl CohortReplicator +where + M: MessageReciever + Send + Sync, +{ + pub fn new(receiver: M, suffix: CohortSuffix) -> CohortReplicator { + CohortReplicator { + receiver, + suffix, + latest_in_flight: None, + } + } + + fn process_consumer_message(&mut self, version: u64, message: CandidateMessage) { + if version > 0 { + if let Err(e) = self.suffix.insert(version, message.into()) { + log::error!("Unable to insert version {} into suffix. Error: {}", version, e) + } + } else { + log::warn!("Version 0 will not be inserted into suffix.") + } + } + + fn process_decision_message(&mut self, decision_version: u64, decision_message: D) { + let version = decision_message.get_candidate_version(); + + let decision_outcome = decision_message.get_decision().clone(); + + self.suffix + .update_decision(version, decision_version, decision_message.get_decided_at()) + .unwrap(); + self.suffix.set_decision_outcome(version, decision_outcome); + self.suffix.set_safepoint(version, decision_message.get_safepoint()); + + // If this is a duplicate, we mark it as installed (assuming the original version always comes first and therefore that will be installed.) + if decision_message.is_duplicate() { + self.suffix.set_item_installed(version); + } + } + + /// Return "true" if decision was received + pub(crate) async fn receive(&mut self) -> bool { + if let Ok(Some(message)) = self.receiver.consume_message().await { + match message { + ChannelMessage::Candidate(msg_candidate) => { + self.process_consumer_message(msg_candidate.version, msg_candidate); + false + } + + ChannelMessage::Decision(version, msg_decision) => { + self.process_decision_message(version, msg_decision); + true + } + } + } else { + false + } + } + + pub async fn commit(&mut self) { + if let Some(offset) = self.suffix.find_commit_offset(self.latest_in_flight) { + if let Err(error) = self.receiver.commit().await { + log::warn!("Unable to commit offset: {}. Error: {}", offset, error); + } + } + } + + pub(crate) fn get_next_statemap(&mut self) -> Option<(Vec, u64, Option)> { + if let Some(decided) = self.suffix.find_new_decided(self.latest_in_flight, true) { + let has_statemap = decided.item.candidate.statemap.is_some(); + let is_committed = decided.item.safepoint.is_some(); + + let statemaps = if !has_statemap || !is_committed { + Vec::new() + } else { + let raw_statemaps = decided.item.candidate.statemap.unwrap(); + raw_statemaps + .iter() + .map(|map| { + let key = map.keys().next().unwrap().to_string(); + let payload = map.get(&key).unwrap().clone(); + StatemapItem { + action: key, + payload, + version: decided.item_ver, + } + }) + .collect::>() + }; + + self.suffix.set_item_in_flight(decided.item_ver); + self.latest_in_flight = Some(decided.item_ver); + + Some((statemaps, decided.item_ver, decided.decided_at)) + } else { + None + } + } + + pub(crate) fn update_suffix(&mut self, version: u64) -> Result { + self.suffix.set_item_installed(version); + self.latest_in_flight = None; + self.suffix.update_prune_index(version); + let meta = self.suffix.get_suffix_meta(); + if meta.prune_index >= meta.prune_start_threshold { + let _ = self.suffix.prune_till_version(version).map_err(|suffix_error| format!("{}", suffix_error))?; + Ok(true) + } else { + Ok(false) + } + } +} diff --git a/packages/cohort_sdk/src/replicator2/cohort_suffix.rs b/packages/cohort_sdk/src/replicator2/cohort_suffix.rs new file mode 100644 index 00000000..ad47ca6d --- /dev/null +++ b/packages/cohort_sdk/src/replicator2/cohort_suffix.rs @@ -0,0 +1,368 @@ +use std::collections::VecDeque; + +use talos_certifier::model::Decision; +use talos_suffix::{ + core::{SuffixConfig, SuffixMeta}, + errors::SuffixError, +}; + +use super::model::ReplicatorCandidate2; + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct SuffixItem { + /// Data received from Kafka Consumer + pub item: ReplicatorCandidate2, + pub item_ver: u64, + pub decision_ver: Option, + /// Flag to denote if this item has been decided (is_decided is set to true irrespective of the outcome). + pub is_decided: bool, + pub is_in_flight: bool, + pub is_installed: bool, + pub decided_at: Option, +} + +pub struct CohortSuffix { + pub meta: SuffixMeta, + pub messages: VecDeque>, +} + +impl CohortSuffix { + /// Creates a new suffix using the config passed. + /// + /// The config can be used to control + /// - Required: + /// - `capacity` - The initial capacity of the suffix. + /// - Optional: + /// - `prune_start_threshold` - The threshold index beyond which pruning starts. + /// - If `None`, no pruning will occur. + /// - If `Some()`, attempts to prune suffix if suffix length crosses the threshold. + /// + pub fn with_config(config: SuffixConfig) -> CohortSuffix { + let SuffixConfig { + capacity, + prune_start_threshold, + min_size_after_prune, + } = config; + + let messages = VecDeque::with_capacity(capacity); + + assert!( + min_size_after_prune <= prune_start_threshold, + "The config min_size_after={:?} is greater than prune_start_threshold={:?}", + min_size_after_prune, + prune_start_threshold + ); + + let meta = SuffixMeta { + head: 0, + last_insert_vers: 0, + prune_index: None, + prune_start_threshold, + min_size_after_prune, + }; + + CohortSuffix { meta, messages } + } + + pub fn print(&self) { + log::warn!("<<<<<<....."); + log::warn!("{:?}", self.meta); + for (i, d) in self.messages.iter().enumerate() { + if let Some(entry) = d { + if !entry.is_installed { + log::warn!( + "{}: {}, d={}, fl={}, ins={}, {:?}", + i, + entry.item_ver, + entry.is_decided, + entry.is_in_flight, + entry.is_installed, + entry.item.decision + ) + } + } else { + //log::warn!("{}: {:?}", i, d); + } + } + log::warn!(".....>>>>>>"); + } + + fn get(&mut self, version: u64) -> Result, SuffixError> { + let index = self.index_from_head(version).ok_or(SuffixError::VersionToIndexConversionError(version))?; + let suffix_item = self.messages.get(index).and_then(|x| x.as_ref()).cloned(); + + Ok(suffix_item) + } + + fn update_head(&mut self, version: u64) { + self.meta.head = version; + } + + /// Reserve space when the version we are inserting is + /// outside the upper bounds of suffix. + /// + /// Reserves space and defaults them with None. + fn reserve_space_if_required(&mut self, version: u64) -> Result<(), SuffixError> { + let ver_diff: usize = (version - self.meta.head) as usize + 1; + + if ver_diff > self.messages.len() { + // let resize_len = if ver_diff < self.meta.min_size { self.meta.min_size + 1 } else { ver_diff }; + self.messages.reserve(ver_diff + 1); + } + + Ok(()) + } + + fn index_from_head(&self, version: u64) -> Option { + let head = self.meta.head; + if version < head { + None + } else { + Some((version - head) as usize) + } + } + + pub fn insert(&mut self, version: u64, data: ReplicatorCandidate2) -> Result<(), SuffixError> { + // he very first item inserted on the suffix will automatically be made head of the suffix. + if self.meta.head == 0 { + self.update_head(version); + } + + if self.meta.head <= version { + self.reserve_space_if_required(version)?; + let index = self.index_from_head(version).ok_or(SuffixError::ItemNotFound(version, None))?; + + if index > 0 { + let last_item_index = self.index_from_head(self.meta.last_insert_vers).unwrap_or(0); + for _ in (last_item_index + 1)..index { + self.messages.push_back(None); + } + } + + self.messages.push_back(Some(SuffixItem { + item: data, + item_ver: version, + decision_ver: None, + is_decided: false, + is_in_flight: false, + is_installed: false, + decided_at: None, + })); + + self.meta.last_insert_vers = version; + } + + Ok(()) + } + + pub fn update_decision(&mut self, version: u64, decision_ver: u64, decided_at: i128) -> Result<(), SuffixError> { + // When Certifier is catching up with messages ignore the messages which are prior to the head + if version < self.meta.head { + log::info!("Returned due to version < self.meta.head for version={version} and decision version={decision_ver}"); + return Ok(()); + } + + let Some(sfx_item) = self.get(version)? else { + log::info!("Returned due item not found in suffix for version={version} with index={:?} and decision version={decision_ver}", self.index_from_head(version)); + return Ok(()); + }; + + let new_sfx_item = SuffixItem { + decision_ver: Some(decision_ver), + is_decided: true, + decided_at: Some(decided_at), + ..sfx_item + }; + + let index = self + .index_from_head(version) + .ok_or(SuffixError::IndexCalculationError(self.meta.head, version))?; + + log::debug!("Updating version={version} with index={index:?} and decision version={decision_ver}"); + self.messages[index] = Some(new_sfx_item); + Ok(()) + } + + pub fn set_decision_outcome(&mut self, version: u64, decision_outcome: Decision) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.item.decision = Some(decision_outcome); + } else { + log::warn!( + "Unable to update decision as message with version={version} not found. len: {}, index: {}, head: {}", + self.messages.len(), + index, + self.meta.head + ); + } + } + } + + pub fn set_safepoint(&mut self, version: u64, safepoint: Option) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.item.safepoint = safepoint; + } else { + log::warn!("Unable to update safepoint as message with version={version} not found"); + } + } + } + + pub fn set_item_installed(&mut self, version: u64) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.is_installed = true; + item_to_update.is_in_flight = false; + } else { + log::warn!("Unable to update is_installed flag as message with version={version} not found"); + } + } + } + + pub fn set_item_in_flight(&mut self, version: u64) { + if version >= self.meta.head { + let index = self.index_from_head(version).unwrap(); + if let Some(Some(item_to_update)) = self.messages.get_mut(index) { + item_to_update.is_in_flight = true; + } else { + log::warn!("Unable to update is_in_flight flag as message with version={version} not found"); + } + } + } + + pub fn find_new_decided(&mut self, last_in_flight: Option, skip: bool) -> Option { + let skip_count = if skip { + if let Some(last_in_flight) = last_in_flight { + match self.index_from_head(last_in_flight) { + None => 0, + Some(index) => index + 1, + } + } else { + 0 + } + } else { + 0 + }; + + self.messages + .iter() + .skip(skip_count) + .flatten() + .take_while(|i| i.is_decided) + .filter(|i| !i.is_installed && !i.is_in_flight) + .take(1) + .last() + .cloned() + } + + pub fn find_commit_offset(&mut self, last_in_flight: Option) -> Option { + let skip_count = if let Some(last_in_flight) = last_in_flight { + match self.index_from_head(last_in_flight) { + None => 0, + Some(index) => index + 1, + } + } else { + 0 + }; + + self.messages + .iter() + .skip(skip_count) + .flatten() + .take_while(|i| i.is_decided && i.is_installed) + .take(1) + .map(|i| i.item_ver) + .last() + } + + pub fn update_prune_index(&mut self, version: u64) { + if self.are_prior_items_decided(version) { + let index = self.index_from_head(version).unwrap(); + self.meta.prune_index = Some(index); + } + } + + /// Find prior versions are all decided. + /// + /// Returns true, if the versions prior to the current version has either been decided or + /// if suffix item is empty (None). + pub fn are_prior_items_decided(&mut self, version: u64) -> bool { + let Some(index) = self.index_from_head(version) else { + return false; + }; + + // If prune index is `None` assumption is this is the first item. + let prune_index = self.meta.prune_index.unwrap_or(0); + + let range = if index > prune_index { prune_index..index } else { 0..index }; + self.messages.range(range).flatten().all(|k| k.is_decided) + } + + pub fn get_suffix_meta(&self) -> &SuffixMeta { + &self.meta + } + + pub fn prune_till_version(&mut self, version: u64) -> Result>, SuffixError> { + log::info!("Suffix before prune.... {}", self.suffix_length()); + if let Some(index) = self.index_from_head(version) { + log::info!("Index send for pruning is {index} for version={version}"); + let prune_index = self.find_prune_till_index(index); + let prune_result = self.prune_till_index(prune_index); + log::info!("Suffix items pruned.... {prune_result:?}"); + log::info!("Suffix after prune.... {}", self.suffix_length()); + log::info!("Items on suffix after pruning = {:#?}", self.retrieve_all_some_vec_items()); + return prune_result; + } else { + log::warn!("Unable to prune as index not found for version {version}.") + } + Ok(vec![]) + } + + fn suffix_length(&self) -> usize { + self.messages.len() + } + + fn find_prune_till_index(&mut self, prune_till_index: usize) -> usize { + let prune_till_index = self + .messages + .range(..prune_till_index + 1) + .enumerate() + .rev() + .find_map(|(i, x)| x.is_some().then_some(i)) + .unwrap(); + + prune_till_index + } + + /// Prune the suffix + /// + /// Looks at the meta to find the prune ready version. + /// + /// Prune is allowed when + /// 1. The meta has a valid prune version. + /// 2. And there is atleast one suffix item remaining, which can be the new head. + /// This enables to move the head to the appropiate location. + fn prune_till_index(&mut self, index: usize) -> Result>, SuffixError> { + log::info!("Suffix message length BEFORE pruning={} and head={}!!!", self.messages.len(), self.meta.head); + + let drained_entries = self.messages.drain(..index).collect(); + self.meta.prune_index = None; + if let Some(Some(s_item)) = self.messages.iter().find(|m| m.is_some()) { + self.update_head(s_item.item_ver); + } else { + self.update_head(0) + } + Ok(drained_entries) + } + + fn retrieve_all_some_vec_items(&self) -> Vec<(usize, u64, Option)> { + self.messages + .iter() + .enumerate() + .filter_map(|(i, x)| x.is_some().then(|| (i, x.as_ref().unwrap().item_ver, x.as_ref().unwrap().decision_ver))) + .collect() + } +} diff --git a/packages/cohort_sdk/src/replicator2/mod.rs b/packages/cohort_sdk/src/replicator2/mod.rs new file mode 100644 index 00000000..67f913ad --- /dev/null +++ b/packages/cohort_sdk/src/replicator2/mod.rs @@ -0,0 +1,4 @@ +pub mod cohort_replicator; +pub mod cohort_suffix; +pub mod model; +pub mod service; diff --git a/packages/cohort_sdk/src/replicator2/model.rs b/packages/cohort_sdk/src/replicator2/model.rs new file mode 100644 index 00000000..c3404eea --- /dev/null +++ b/packages/cohort_sdk/src/replicator2/model.rs @@ -0,0 +1,46 @@ +use serde::{Deserialize, Serialize}; +use talos_certifier::model::{CandidateMessage, Decision}; + +use crate::replicator::core::StatemapItem; + +#[derive(Debug)] +pub struct StateMapWithVersion { + pub statemap: Vec, + pub version: u64, +} + +#[derive(Debug, Clone)] +pub enum InstallOutcome { + Success { + version: u64, + started_at: i128, + finished_at: i128, + }, + Error { + version: u64, + started_at: i128, + finished_at: i128, + error: String, + }, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)] +pub struct ReplicatorCandidate2 { + pub candidate: CandidateMessage, + + #[serde(skip_deserializing)] + pub safepoint: Option, + + #[serde(skip_deserializing)] + pub decision: Option, +} + +impl From for ReplicatorCandidate2 { + fn from(value: CandidateMessage) -> Self { + ReplicatorCandidate2 { + candidate: value, + safepoint: None, + decision: None, + } + } +} diff --git a/packages/cohort_sdk/src/replicator2/service.rs b/packages/cohort_sdk/src/replicator2/service.rs new file mode 100644 index 00000000..c0f5bb35 --- /dev/null +++ b/packages/cohort_sdk/src/replicator2/service.rs @@ -0,0 +1,106 @@ +use talos_certifier::{ports::MessageReciever, ChannelMessage}; + +use crate::replicator::core::ReplicatorInstaller; + +use super::cohort_replicator::CohortReplicator; +use super::model::{InstallOutcome, StateMapWithVersion}; + +pub struct ReplicatorService2 {} + +impl ReplicatorService2 { + /// Infinite loop which receives StateMaps through mpsc channel and + /// passes them to "installer". + pub async fn start_installer( + mut rx: tokio::sync::mpsc::Receiver<(StateMapWithVersion, i128)>, + tx_response: tokio::sync::mpsc::Sender, + mut statemap_installer: T, + ) -> Result<(), String> + where + T: ReplicatorInstaller, + { + loop { + let received = rx.recv().await; + if let Some((item, _decided_at)) = received { + let result = statemap_installer.install(item.statemap, Some(item.version)).await; + let response = if let Err(error) = result { + log::error!("installer service: install error: {:?}, {}", error, item.version); + + InstallOutcome::Error { + version: item.version, + started_at: 0, + finished_at: 0, + error, + } + } else { + InstallOutcome::Success { + version: item.version, + started_at: 0, + finished_at: 0, + } + }; + + let _ = tx_response.send(response).await; + } + } + } + + pub async fn start_replicator( + mut replicator: CohortReplicator, + statemaps_tx: tokio::sync::mpsc::Sender<(StateMapWithVersion, i128)>, + mut rx_install_response: tokio::sync::mpsc::Receiver, + ) -> Result<(), String> + where + M: MessageReciever + Send + Sync, + { + loop { + tokio::select! { + is_decision = replicator.receive() => { + if !is_decision { + continue + } + + let mut recent_version = 0; + loop { + let next = replicator.get_next_statemap(); + if next.is_none() { + break; + } + + let (statemap, decided_at) = next + .map(|(statemap, version, decided_at)| (StateMapWithVersion { statemap, version }, decided_at)) + .unwrap(); + + let v = statemap.version; + if recent_version == statemap.version { + log::warn!(" will not schedule the same statemap ver({})", v); + break; + } else { + recent_version = statemap.version; + } + + // m_f_send.clock_start(); + let rslt_send = statemaps_tx.send((statemap, decided_at.unwrap_or(0))).await.map_err(|e| format!("Error: {}", e)); + // m_f_send.clock_end(); + + if let Err(e) = rslt_send { + log::warn!("Unable to send statemap to installer. {}", e); + } else { + log::debug!(" scheduled statemap to install ver({})", v); + } + } + } + + opt_install_resp = rx_install_response.recv() => { + if let Some(InstallOutcome::Success { version, started_at: _started_at, finished_at: _finished_at }) = opt_install_resp { + // ack += 1; + if let Err(e) = replicator.update_suffix(version) { + log::warn!("Error updating suffix for version: {}. Error: {}", version, e); + } else { + // let _ = replicator.receiver.commit(version).await; + } + } + } + } + } + } +} diff --git a/packages/examples_support/src/cohort/mod.rs b/packages/examples_support/src/cohort/mod.rs deleted file mode 100644 index 5c15d028..00000000 --- a/packages/examples_support/src/cohort/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod queue_workers; diff --git a/packages/examples_support/src/lib.rs b/packages/examples_support/src/lib.rs index b5a37ee9..825247d7 100644 --- a/packages/examples_support/src/lib.rs +++ b/packages/examples_support/src/lib.rs @@ -1,2 +1 @@ -pub mod cohort; pub mod load_generator; diff --git a/packages/examples_support/src/load_generator/generator.rs b/packages/examples_support/src/load_generator/generator.rs index d4aa516a..059a37bd 100644 --- a/packages/examples_support/src/load_generator/generator.rs +++ b/packages/examples_support/src/load_generator/generator.rs @@ -5,10 +5,15 @@ use time::OffsetDateTime; use crate::load_generator::models::{Progress, StopType}; +use super::models::Generator; + pub struct ControlledRateLoadGenerator {} impl ControlledRateLoadGenerator { - pub async fn generate(stop_type: StopType, target_rate: f32, fn_item_factory: &impl Fn() -> T, tx_output: Arc>) -> Result<(), String> { + pub async fn generate(stop_type: StopType, target_rate: f32, mut generator_impl: G, tx_output: Arc>) -> Result<(), String> + where + G: Generator + Sized + 'static, + { let started_at = OffsetDateTime::now_utc().unix_timestamp_nanos(); if let StopType::LimitExecutionDuration { run_duration } = stop_type { let stop_at = started_at + run_duration.as_nanos() as i128; @@ -69,7 +74,7 @@ impl ControlledRateLoadGenerator { } } - let new_item: T = fn_item_factory(); + let new_item: T = generator_impl.generate(); let _ = tx_output.send(new_item).await; generated_count += 1; diff --git a/packages/examples_support/src/load_generator/models.rs b/packages/examples_support/src/load_generator/models.rs index aa460c03..602fb9f6 100644 --- a/packages/examples_support/src/load_generator/models.rs +++ b/packages/examples_support/src/load_generator/models.rs @@ -26,3 +26,7 @@ pub enum StopType { LimitExecutionDuration { run_duration: Duration }, LimitGeneratedTransactions { count: u64 }, } + +pub trait Generator { + fn generate(&mut self) -> T; +} diff --git a/packages/talos_agent/src/agent/core.rs b/packages/talos_agent/src/agent/core.rs index c5b8f02d..2ebe7fbf 100644 --- a/packages/talos_agent/src/agent/core.rs +++ b/packages/talos_agent/src/agent/core.rs @@ -12,6 +12,7 @@ use crate::mpsc::core::{Receiver, Sender}; use async_trait::async_trait; use std::sync::Arc; use std::time::Duration; +use tokio::task::JoinHandle; use tokio::time::error::Elapsed; use tokio::time::timeout; @@ -67,7 +68,7 @@ where rx_decision: TDecisionRx, publisher: Arc>, consumer: Arc>, - ) -> Result<(), AgentError> + ) -> AgentServices where TCertifyRx: Receiver + 'static, TCancelRx: Receiver + 'static, @@ -78,22 +79,30 @@ where log::info!("Publisher and Consumer are ready."); let metrics_client = Arc::clone(&self.metrics_client); - tokio::spawn(async move { + let handle_to_manager_task = tokio::spawn(async move { StateManager::new(agent_config, metrics_client) .run(rx_certify, rx_cancel, rx_decision, publisher) .await; }); let consumer_ref = Arc::clone(&consumer); - tokio::spawn(async move { + let handle_to_decision_reader_task = tokio::spawn(async move { DecisionReaderService::new(consumer_ref, tx_decision).run().await; }); - Ok(()) + AgentServices { + state_manager: handle_to_manager_task, + decision_reader: handle_to_decision_reader_task, + } } // $coverage:ignore-end } +pub struct AgentServices { + pub state_manager: JoinHandle<()>, + pub decision_reader: JoinHandle<()>, +} + #[async_trait] impl TalosAgent for TalosAgentImpl where @@ -112,7 +121,7 @@ where let m = CertifyRequestChannelMessage::new(&request, Arc::new(Box::new(tx))); let to_state_manager = Arc::clone(&self.tx_certify); - let max_wait: Duration = request.timeout.unwrap_or_else(|| Duration::from_millis(self.agent_config.timout_ms)); + let max_wait: Duration = request.timeout.unwrap_or_else(|| Duration::from_millis(self.agent_config.timeout_ms)); let result: Result, Elapsed> = timeout(max_wait, async { match to_state_manager.send(m).await { @@ -225,7 +234,7 @@ mod tests { agent: String::from("agent-1"), cohort: String::from("cohort-1"), buffer_size: 10_000, - timout_ms: 1, + timeout_ms: 1, } } diff --git a/packages/talos_agent/src/agent/errors.rs b/packages/talos_agent/src/agent/errors.rs index 97d16991..23959bca 100644 --- a/packages/talos_agent/src/agent/errors.rs +++ b/packages/talos_agent/src/agent/errors.rs @@ -10,7 +10,6 @@ pub enum AgentErrorKind { Certification { xid: String }, CertificationTimeout { xid: String, elapsed_ms: u128 }, Messaging, - Internal, } #[derive(Debug, ThisError)] diff --git a/packages/talos_agent/src/agent/state_manager.rs b/packages/talos_agent/src/agent/state_manager.rs index 730a882b..5b746cf5 100644 --- a/packages/talos_agent/src/agent/state_manager.rs +++ b/packages/talos_agent/src/agent/state_manager.rs @@ -309,7 +309,7 @@ mod tests { agent: String::from("agent-1"), cohort: String::from("cohort-1"), buffer_size: 10_000, - timout_ms: 1, + timeout_ms: 1, } } diff --git a/packages/talos_agent/src/api.rs b/packages/talos_agent/src/api.rs index 9fd33dde..82515eab 100644 --- a/packages/talos_agent/src/api.rs +++ b/packages/talos_agent/src/api.rs @@ -48,7 +48,7 @@ pub struct AgentConfig { pub cohort: String, // The size of internal buffer for candidates pub buffer_size: usize, - pub timout_ms: u64, + pub timeout_ms: u64, } #[derive(Clone, Debug)] @@ -77,6 +77,20 @@ pub struct KafkaConfig { pub password: Option, } +impl KafkaConfig { + pub fn map_log_level(level: u64) -> RDKafkaLogLevel { + match level { + 0 => RDKafkaLogLevel::Emerg, + 1 => RDKafkaLogLevel::Alert, + 2 => RDKafkaLogLevel::Critical, + 3 => RDKafkaLogLevel::Error, + 4 => RDKafkaLogLevel::Warning, + 5 => RDKafkaLogLevel::Notice, + 6 => RDKafkaLogLevel::Info, + _ => RDKafkaLogLevel::Debug, + } + } +} /// The agent interface exposed to the client #[async_trait] pub trait TalosAgent {