diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 0000000..7684e17 --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,6 @@ +[[profile.default.overrides]] +filter = 'package(kora-e2e)' +test-group = 'e2e' + +[test-groups.e2e] +max-threads = 1 diff --git a/.dockerignore b/.dockerignore index 38a8060..bdcb788 100644 --- a/.dockerignore +++ b/.dockerignore @@ -11,3 +11,10 @@ node_modules/ *~ .DS_Store Thumbs.db + +# Documentation - exclude markdown but keep README files needed by crate docs +*.md +!README.md +!bin/**/README.md +!crates/**/README.md +!docker/README.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b7ae1e..e54ad03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: Build - run: cargo build --all-targets + run: cargo build --workspace --all-targets --locked test: name: Test @@ -33,7 +33,28 @@ jobs: - uses: Swatinem/rust-cache@v2 - uses: taiki-e/install-action@nextest - name: Test - run: cargo nextest run --all-features --no-tests=pass + run: cargo nextest run --workspace --all-features --exclude kora-e2e --no-tests=pass + + e2e: + name: E2E Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - uses: taiki-e/install-action@nextest + - name: E2E tests + run: cargo nextest run -p kora-e2e --all-features --run-ignored all -j1 --no-tests=fail + + doctest: + name: Doc Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Doc tests + run: cargo test --workspace --all-features --doc fmt: name: Format @@ -56,7 +77,7 @@ jobs: components: clippy - uses: Swatinem/rust-cache@v2 - name: Clippy - run: cargo clippy --all-targets --all-features -- -D warnings + run: cargo clippy --workspace --all-targets --all-features -- -D warnings deny: name: Deny diff --git a/.gitignore b/.gitignore index 8d1ef8a..ec264d9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ target/ *.swp *.swo .env + +# Ansible inventory contains real server IPs — use hosts.yml.example as template +ansible/inventory/hosts.yml diff --git a/Cargo.lock b/Cargo.lock index 8dc7718..2e3bf95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -190,7 +190,7 @@ dependencies = [ "either", "serde", "serde_with", - "sha2 0.10.9", + "sha2", ] [[package]] @@ -213,7 +213,7 @@ dependencies = [ "either", "serde", "serde_with", - "sha2 0.10.9", + "sha2", ] [[package]] @@ -608,6 +608,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ark-ed-on-bls12-381-bandersnatch" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1786b2e3832f6f0f7c8d62d5d5a282f6952a1ab99981c54cd52b6ac1d8f02df5" +dependencies = [ + "ark-bls12-381", + "ark-ec", + "ark-ff 0.5.0", + "ark-r1cs-std", + "ark-std 0.5.0", +] + [[package]] name = "ark-ff" version = "0.3.0" @@ -1069,15 +1082,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "block-buffer" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" -dependencies = [ - "generic-array", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -1330,28 +1334,42 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "commonware-actor" +version = "2026.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10915b384ab9478721f5ff63eec55096bbce48a6ccaf695065875d392c021e92" +dependencies = [ + "cfg-if", + "commonware-macros", + "commonware-runtime", + "crossbeam-queue", + "futures-util", + "parking_lot", +] + [[package]] name = "commonware-broadcast" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afe7362c8942f20f0eab11756932b7d1c41f4cc99e142cb563e17a04b40095d5" +checksum = "5ca9f35723f84c7f18e7832da263b86249aaa42e035f5b34d61896392fcc3a64" dependencies = [ + "commonware-actor", "commonware-codec", "commonware-cryptography", "commonware-macros", "commonware-p2p", "commonware-runtime", "commonware-utils", - "prometheus-client", "thiserror 2.0.18", "tracing", ] [[package]] name = "commonware-codec" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06e32817f35fb517ceb6102d984f9a85fde85666c96f053638e323b8597f2f7" +checksum = "a771439216c7b5813e743937cb9b8dd700bce435c47fc73cd9aae1492f8696ce" dependencies = [ "bytes", "cfg-if", @@ -1364,9 +1382,9 @@ dependencies = [ [[package]] name = "commonware-coding" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e60b2b324de47773c3d4af4d83bfc76d2c287ba7f2d6eb8c2aa5068f877b4bb" +checksum = "5d0f4083138dd8c873165a2c0b4ae46a7530a6b2a49a8544153e61d12bbc215a" dependencies = [ "bytes", "commonware-codec", @@ -1386,16 +1404,18 @@ dependencies = [ [[package]] name = "commonware-consensus" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a67374d82c69e870105f010b895f1768952df5d0fa0d0550dedf162de16f44e" +checksum = "2170a9a7f6fd97e102d17f4fa02306821a5b6b4a6707652b0bbeeb5e878348cd" dependencies = [ "bytes", "cfg-if", + "commonware-actor", "commonware-broadcast", "commonware-codec", "commonware-coding", "commonware-cryptography", + "commonware-formatting", "commonware-macros", "commonware-math", "commonware-p2p", @@ -1406,7 +1426,6 @@ dependencies = [ "commonware-utils", "futures", "pin-project", - "prometheus-client", "rand 0.8.6", "rand_core 0.6.4", "rand_distr", @@ -1417,11 +1436,17 @@ dependencies = [ [[package]] name = "commonware-cryptography" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f09b55dd5510c3b7613a573606a41961c2788709ffde053d4e644bec0bff2c" +checksum = "8b13a9a7f8870ed9b65f387aedd56c3859a487317871cdb5d0baf8b0f7f99d37" dependencies = [ "anyhow", + "ark-ec", + "ark-ed-on-bls12-381-bandersnatch", + "ark-ff 0.5.0", + "ark-r1cs-std", + "ark-relations", + "ark-serialize 0.5.0", "aws-lc-rs", "blake3", "blst", @@ -1429,14 +1454,15 @@ dependencies = [ "cfg-if", "chacha20poly1305", "commonware-codec", + "commonware-formatting", "commonware-macros", "commonware-math", "commonware-parallel", "commonware-utils", "crc-fast", "ctutils", + "curve25519-dalek", "ecdsa", - "ed25519-consensus", "getrandom 0.2.17", "num-rational", "num-traits", @@ -1444,17 +1470,27 @@ dependencies = [ "rand 0.8.6", "rand_chacha 0.3.1", "rand_core 0.6.4", - "sha2 0.10.9", + "sha2", "thiserror 2.0.18", "x25519-dalek", "zeroize", ] +[[package]] +name = "commonware-formatting" +version = "2026.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c134e31411b32f337a60bb19e5bb0397fafa0a92b7c156cb0643b729e7135b49" +dependencies = [ + "commonware-macros", + "const-hex", +] + [[package]] name = "commonware-macros" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd313d9299e13bf995999c7a0ed8cc570eef6cd0972fcffc6e2c682cfba6663" +checksum = "5419e6eb2c4c9e56517cfc07062a984b882dabf573d53191a73b98358cd9782a" dependencies = [ "commonware-macros-impl", "tokio", @@ -1462,9 +1498,9 @@ dependencies = [ [[package]] name = "commonware-macros-impl" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc385e646d91b5397c93816985421878d627839834f7cf85a8da2ac9f8b98b7" +checksum = "82dd7062336fc7d2107e9a63312ef1b9811d06bfe56dfd56f97e6ce5d4aa0565" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -1475,9 +1511,9 @@ dependencies = [ [[package]] name = "commonware-math" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d834ed8bf601e113b9cd2ba284dd0e95adf558933dc727f52f8879434cb286" +checksum = "d94e682199bad2c4b18a6704711b3e8fd7e6dbd5b7b87224882d8be350e3f7a2" dependencies = [ "bytes", "commonware-codec", @@ -1489,10 +1525,11 @@ dependencies = [ [[package]] name = "commonware-p2p" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c93f730bf4aaeadffb589eb50e431f7a5f8495c158dda1127c61f6e74c597ab" +checksum = "24ae6f28f844da58482233d6b140b63b79e5a124c111d1e999cceb660af2ede1" dependencies = [ + "commonware-actor", "commonware-codec", "commonware-cryptography", "commonware-macros", @@ -1506,7 +1543,6 @@ dependencies = [ "num-integer", "num-rational", "num-traits", - "prometheus-client", "rand 0.8.6", "rand_core 0.6.4", "rand_distr", @@ -1516,9 +1552,9 @@ dependencies = [ [[package]] name = "commonware-parallel" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db29306a40279ad54d06b42c623a05fbb5333546b5003c921796bc856b423106" +checksum = "9d6a412b4c868174963b38ff2dad6686c573ec56834d9b39d20b73437c6d9726" dependencies = [ "cfg-if", "commonware-macros", @@ -1527,11 +1563,12 @@ dependencies = [ [[package]] name = "commonware-resolver" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00dfe9932b33cc31a04b7c68bf543eef7e6d04b70cf6a53880d03407d60a01e6" +checksum = "4ac5a7f3bb4b4f6478d1bf3630b7dabad1e7f960b6f54189bc508572f55cdb5d" dependencies = [ "bytes", + "commonware-actor", "commonware-codec", "commonware-cryptography", "commonware-macros", @@ -1540,7 +1577,6 @@ dependencies = [ "commonware-stream", "commonware-utils", "futures", - "prometheus-client", "rand 0.8.6", "thiserror 2.0.18", "tracing", @@ -1548,20 +1584,22 @@ dependencies = [ [[package]] name = "commonware-runtime" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d4ae4c804d0d9c1df615b1c7846e4e5e64fdb4228685487cb67803e67388411" +checksum = "f1a177e596a023fe1aca8d1b6dc202598322fce3c49797cad5f5c21b6c0a3bcd" dependencies = [ "axum", "bytes", "cfg-if", "commonware-codec", "commonware-cryptography", + "commonware-formatting", "commonware-macros", "commonware-parallel", + "commonware-runtime-macros", "commonware-utils", "criterion", - "crossbeam-queue", + "crossbeam-utils", "futures", "getrandom 0.2.17", "governor", @@ -1573,7 +1611,7 @@ dependencies = [ "rand 0.8.6", "rand_core 0.6.4", "rayon", - "sha2 0.10.9", + "sha2", "sysinfo", "thiserror 2.0.18", "tokio", @@ -1582,11 +1620,23 @@ dependencies = [ "tracing-subscriber 0.3.23", ] +[[package]] +name = "commonware-runtime-macros" +version = "2026.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a79b930d67e8c12dc653bdcc907fa60df07e00220026b83341d5e4e7df0592" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "commonware-storage" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca1c42cf37aa27c3f83c31591cad4f1d96317eff81c1eb442e17191adcf9b413" +checksum = "e295ccb2e7af312c82d3f2852e532f08c83cd848c96c0eb96e87fd30f863256a" dependencies = [ "ahash", "anyhow", @@ -1594,14 +1644,13 @@ dependencies = [ "cfg-if", "commonware-codec", "commonware-cryptography", + "commonware-formatting", "commonware-macros", "commonware-parallel", "commonware-runtime", "commonware-utils", "futures", "futures-util", - "prometheus-client", - "rayon", "thiserror 2.0.18", "tracing", "zstd", @@ -1609,13 +1658,14 @@ dependencies = [ [[package]] name = "commonware-stream" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c15b328d5f05fff750368a71e2307c380cce52df9712a5b30199b8af4e700c" +checksum = "18038fa443164afdfbfe0ab6a38ddc71327f5da18376fd42d6184ebd5d93d8d2" dependencies = [ "chacha20poly1305", "commonware-codec", "commonware-cryptography", + "commonware-formatting", "commonware-macros", "commonware-runtime", "commonware-utils", @@ -1629,13 +1679,14 @@ dependencies = [ [[package]] name = "commonware-utils" -version = "2026.4.0" +version = "2026.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faf66d7b5c89489d71b0669bda2e014e7c9ffcdf65629ae31886efe5361b1179" +checksum = "dfad44dd2c8e97d55dbe271802740e919d2ce8839277ea53d958381caab92a44" dependencies = [ "bytes", "cfg-if", "commonware-codec", + "commonware-formatting", "commonware-macros", "futures", "getrandom 0.2.17", @@ -1889,6 +1940,7 @@ dependencies = [ "cfg-if", "cpufeatures 0.2.17", "curve25519-dalek-derive", + "digest 0.10.7", "fiat-crypto", "rustc_version 0.4.1", "subtle", @@ -1906,19 +1958,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "curve25519-dalek-ng" -version = "4.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c359b7249347e46fb28804470d071c921156ad62b3eef5d34e2ba867533dec8" -dependencies = [ - "byteorder", - "digest 0.9.0", - "rand_core 0.6.4", - "subtle-ng", - "zeroize", -] - [[package]] name = "darling" version = "0.23.0" @@ -2048,7 +2087,7 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer 0.10.4", + "block-buffer", "const-oid", "crypto-common", "subtle", @@ -2098,21 +2137,6 @@ dependencies = [ "spki", ] -[[package]] -name = "ed25519-consensus" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8465edc8ee7436ffea81d21a019b16676ee3db267aa8d5a8d729581ecf998b" -dependencies = [ - "curve25519-dalek-ng", - "hex", - "rand_core 0.6.4", - "serde", - "sha2 0.9.9", - "thiserror 1.0.69", - "zeroize", -] - [[package]] name = "educe" version = "0.6.0" @@ -3120,7 +3144,7 @@ dependencies = [ "elliptic-curve", "once_cell", "serdect", - "sha2 0.10.9", + "sha2", "signature", ] @@ -3147,13 +3171,14 @@ dependencies = [ name = "keygen" version = "0.1.0" dependencies = [ + "alloy-primitives", "clap", "commonware-codec", "commonware-cryptography", "commonware-utils", - "ed25519-consensus", "eyre", "hex", + "k256", "kora-config", "kora-dkg", "kora-domain", @@ -3184,6 +3209,7 @@ checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" name = "kora" version = "0.1.0" dependencies = [ + "axum", "clap", "commonware-codec", "commonware-cryptography", @@ -3191,7 +3217,6 @@ dependencies = [ "commonware-runtime", "commonware-utils", "eyre", - "futures", "hex", "kora-cli", "kora-config", @@ -3216,6 +3241,7 @@ dependencies = [ "bytes", "commonware-codec", "commonware-cryptography", + "commonware-parallel", "commonware-runtime", "commonware-storage", "commonware-utils", @@ -3223,6 +3249,7 @@ dependencies = [ "kora-qmdb", "tempfile", "thiserror 2.0.18", + "tracing", ] [[package]] @@ -3249,7 +3276,6 @@ dependencies = [ "alloy-primitives", "commonware-codec", "commonware-cryptography", - "ed25519-consensus", "rand 0.8.6", "rstest", "serde", @@ -3264,9 +3290,11 @@ name = "kora-consensus" version = "0.1.0" dependencies = [ "alloy-consensus 1.8.3", + "alloy-eips 1.8.3", "alloy-primitives", "commonware-cryptography", "futures", + "k256", "kora-config", "kora-domain", "kora-executor", @@ -3276,6 +3304,7 @@ dependencies = [ "rstest", "thiserror 2.0.18", "tokio", + "tracing", ] [[package]] @@ -3346,6 +3375,7 @@ dependencies = [ "commonware-utils", "futures", "k256", + "kora-config", "kora-consensus", "kora-crypto", "kora-domain", @@ -3371,14 +3401,16 @@ dependencies = [ "alloy-consensus 1.8.3", "alloy-eips 1.8.3", "alloy-primitives", - "alloy-rlp", "futures", + "k256", "kora-qmdb", "kora-traits", "revm", "rstest", + "sha3", "thiserror 2.0.18", "tokio", + "tracing", ] [[package]] @@ -3393,6 +3425,7 @@ dependencies = [ "revm", "thiserror 2.0.18", "tokio", + "tracing", ] [[package]] @@ -3412,11 +3445,13 @@ dependencies = [ "alloy-consensus 1.8.3", "alloy-evm", "alloy-primitives", + "commonware-consensus", "commonware-cryptography", "commonware-runtime", "commonware-utils", "futures", "k256", + "kora-config", "kora-consensus", "kora-domain", "kora-executor", @@ -3424,7 +3459,10 @@ dependencies = [ "kora-qmdb", "kora-qmdb-ledger", "kora-traits", + "kora-txpool", "thiserror 2.0.18", + "tokio", + "tracing", ] [[package]] @@ -3432,6 +3470,7 @@ name = "kora-marshal" version = "0.1.0" dependencies = [ "bytes", + "commonware-actor", "commonware-broadcast", "commonware-codec", "commonware-consensus", @@ -3439,7 +3478,6 @@ dependencies = [ "commonware-macros", "commonware-p2p", "commonware-parallel", - "commonware-resolver", "commonware-runtime", "commonware-storage", "commonware-utils", @@ -3449,6 +3487,13 @@ dependencies = [ "tracing-subscriber 0.3.23", ] +[[package]] +name = "kora-metrics" +version = "0.1.0" +dependencies = [ + "prometheus-client", +] + [[package]] name = "kora-overlay" version = "0.1.0" @@ -3482,6 +3527,7 @@ dependencies = [ "kora-traits", "thiserror 2.0.18", "tokio", + "tracing", ] [[package]] @@ -3491,19 +3537,26 @@ dependencies = [ "alloy-consensus 1.8.3", "alloy-eips 1.8.3", "alloy-primitives", + "commonware-actor", "commonware-codec", "commonware-consensus", "commonware-cryptography", "commonware-runtime", "commonware-utils", + "k256", "kora-consensus", "kora-domain", "kora-executor", "kora-indexer", "kora-ledger", + "kora-metrics", "kora-overlay", "kora-qmdb-ledger", "kora-rpc", + "sha3", + "tempfile", + "thiserror 2.0.18", + "tokio", "tracing", ] @@ -3518,15 +3571,19 @@ dependencies = [ "axum", "jsonrpsee", "k256", + "kora-domain", "kora-executor", "kora-indexer", "kora-traits", + "kora-txpool", "parking_lot", + "prometheus-client", "serde", "serde_json", "sha3", "thiserror 2.0.18", "tokio", + "tower 0.4.13", "tower 0.5.3", "tower-http", "tracing", @@ -3539,14 +3596,19 @@ dependencies = [ "alloy-consensus 1.8.3", "alloy-primitives", "anyhow", + "axum", + "bytes", + "commonware-actor", "commonware-codec", "commonware-consensus", "commonware-cryptography", "commonware-p2p", - "commonware-parallel", "commonware-runtime", + "commonware-storage", "commonware-utils", "futures", + "governor", + "hex", "kora-config", "kora-consensus", "kora-dkg", @@ -3555,6 +3617,7 @@ dependencies = [ "kora-indexer", "kora-ledger", "kora-marshal", + "kora-metrics", "kora-overlay", "kora-qmdb-ledger", "kora-reporters", @@ -3563,7 +3626,11 @@ dependencies = [ "kora-simplex", "kora-transport", "kora-txpool", + "parking_lot", + "prometheus-client", "rand 0.8.6", + "tempfile", + "tokio", "tracing", ] @@ -3571,6 +3638,7 @@ dependencies = [ name = "kora-service" version = "0.1.0" dependencies = [ + "commonware-actor", "commonware-consensus", "commonware-cryptography", "commonware-p2p", @@ -3641,7 +3709,6 @@ dependencies = [ "kora-config", "kora-service", "kora-transport", - "prometheus-client", "rand 0.8.6", "thiserror 2.0.18", ] @@ -3656,6 +3723,7 @@ dependencies = [ "alloy-rlp", "k256", "kora-domain", + "kora-metrics", "kora-traits", "parking_lot", "rand 0.8.6", @@ -3711,7 +3779,6 @@ dependencies = [ "alloy-primitives", "clap", "eyre", - "futures", "hex", "k256", "rand 0.8.6", @@ -4125,7 +4192,7 @@ dependencies = [ "ecdsa", "elliptic-curve", "primeorder", - "sha2 0.10.9", + "sha2", ] [[package]] @@ -4484,7 +4551,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.117", @@ -4945,7 +5012,7 @@ dependencies = [ "revm-primitives", "ripemd", "secp256k1 0.31.1", - "sha2 0.10.9", + "sha2", ] [[package]] @@ -5474,19 +5541,6 @@ dependencies = [ "digest 0.10.7", ] -[[package]] -name = "sha2" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" -dependencies = [ - "block-buffer 0.9.0", - "cfg-if", - "cpufeatures 0.2.17", - "digest 0.9.0", - "opaque-debug", -] - [[package]] name = "sha2" version = "0.10.9" @@ -5670,12 +5724,6 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "subtle-ng" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "734676eb262c623cec13c3155096e08d1f8f29adce39ba17948b18dad1e54142" - [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 3361f0f..3172aef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ codegen-units = 1 # Local crates kora-builder = { path = "crates/node/builder" } kora-domain = { path = "crates/node/domain" } +kora-metrics = { path = "crates/node/metrics" } kora-cli = { path = "crates/utilities/cli" } kora-crypto = { path = "crates/utilities/crypto" } kora-backend = { path = "crates/storage/backend" } @@ -74,18 +75,19 @@ kora-rpc = { path = "crates/node/rpc" } kora-txpool = { path = "crates/node/txpool" } kora-e2e = { path = "crates/e2e" } -commonware-p2p = "2026.4.0" -commonware-utils = "2026.4.0" -commonware-codec = "2026.4.0" -commonware-stream = "2026.4.0" -commonware-macros = "2026.4.0" -commonware-storage = "2026.4.0" -commonware-runtime = "2026.4.0" -commonware-resolver = "2026.4.0" -commonware-parallel = "2026.4.0" -commonware-broadcast = "2026.4.0" -commonware-consensus = "2026.4.0" -commonware-cryptography = "2026.4.0" +commonware-p2p = "2026.5.0" +commonware-actor = "2026.5.0" +commonware-utils = "2026.5.0" +commonware-codec = "2026.5.0" +commonware-stream = "2026.5.0" +commonware-macros = "2026.5.0" +commonware-storage = "2026.5.0" +commonware-runtime = "2026.5.0" +commonware-resolver = "2026.5.0" +commonware-parallel = "2026.5.0" +commonware-broadcast = "2026.5.0" +commonware-consensus = "2026.5.0" +commonware-cryptography = "2026.5.0" # Alloy alloy-primitives = "1.0" @@ -94,7 +96,7 @@ alloy-eips = "1.0" alloy-rlp = "0.3" # Execution -revm = { version = "38.0.0", default-features = false } +revm = { version = "38.0.0", default-features = false, features = ["optional_balance_check", "optional_no_base_fee"] } alloy-evm = { version = "0.34.0", default-features = false } # Async @@ -111,7 +113,7 @@ clap = { version = "4", features = ["derive"] } # Tracing tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } # Error handling thiserror = "2" @@ -134,6 +136,7 @@ k256 = "0.13" sha3 = "0.10" # HTTP +axum = "0.8" reqwest = { version = "0.12", features = ["json"] } # Testing diff --git a/Justfile b/Justfile index f029142..e8f6009 100644 --- a/Justfile +++ b/Justfile @@ -3,14 +3,22 @@ default: @just --list # Run the full CI suite -ci: fmt clippy test deny +ci: fmt build-all-locked clippy test test-e2e test-doc deny # Run all checks -check: fmt clippy test +check: fmt build-all-locked clippy test test-e2e test-doc -# Run tests +# Run non-e2e tests test: - cargo nextest run --workspace --all-features + cargo nextest run --workspace --all-features --exclude kora-e2e --no-tests=pass + +# Run e2e tests serially +test-e2e: + cargo nextest run -p kora-e2e --all-features --run-ignored all -j1 --no-tests=fail + +# Run doc tests +test-doc: + cargo test --workspace --all-features --doc # Build in release mode build: @@ -18,7 +26,11 @@ build: # Build all targets build-all: - cargo build --all-targets + cargo build --workspace --all-targets + +# Build all targets with the checked-in lockfile +build-all-locked: + cargo build --workspace --all-targets --locked # Check formatting fmt: @@ -69,18 +81,38 @@ devnet-status: devnet-stats: cd docker && just stats +# Devnet health diagnostics report +devnet-health: + cd docker && just health + # Build docker images docker-build: cd docker && just build # Run load generator against devnet loadgen *args: - cargo run --release --bin loadgen -- {{args}} + cargo run --release -p loadgen --bin loadgen -- {{args}} # Quick load test (1000 txs) loadtest: - cargo run --release --bin loadgen -- --total-txs 1000 + cargo run --release -p loadgen --bin loadgen -- --total-txs 1000 --broadcast-rpc-urls http://127.0.0.1:8546,http://127.0.0.1:8547,http://127.0.0.1:8548 # Stress test (10000 txs with 50 accounts) stresstest: - cargo run --release --bin loadgen -- --total-txs 10000 --accounts 50 + cargo run --release -p loadgen --bin loadgen -- --total-txs 10000 --accounts 50 --broadcast-rpc-urls http://127.0.0.1:8546,http://127.0.0.1:8547,http://127.0.0.1:8548 + +# Provision the remote server (one-time) +remote-provision: + cd ansible && ansible-playbook playbooks/provision.yml + +# Deploy to remote server +remote-deploy *args: + cd ansible && ansible-playbook playbooks/deploy.yml {{args}} + +# Reset remote devnet (clean slate) +remote-reset: + cd ansible && ansible-playbook playbooks/reset.yml + +# Start observability on remote +remote-observe: + cd ansible && ansible-playbook playbooks/observe.yml diff --git a/README.md b/README.md index 8e7bdb5..11a840c 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,9 @@ peers are authenticated Commonware P2P nodes that follow validator traffic witho participating in consensus. See the [Docker devnet README](./docker/README.md#secondary-peers) for how a secondary peer joins the network. +For a multi-host public testnet with validator public IP addresses, see the +[public testnet standup runbook](./docs/public-testnet.md). + > [!TIP] > See the [Justfile](./Justfile) for other useful commands. diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..39c46f4 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/hosts.yml +roles_path = roles +# Disabled for first-time provisioning of fresh servers. Re-enable or use +# ANSIBLE_HOST_KEY_CHECKING=True once host keys are trusted. +host_key_checking = False +timeout = 30 +stdout_callback = default +result_format = yaml + +[ssh_connection] +pipelining = True +ssh_args = -o ControlMaster=auto -o ControlPersist=60s diff --git a/ansible/inventory/group_vars/devnet.yml b/ansible/inventory/group_vars/devnet.yml new file mode 100644 index 0000000..9309d9c --- /dev/null +++ b/ansible/inventory/group_vars/devnet.yml @@ -0,0 +1,39 @@ +# Remote paths +remote_project_dir: /opt/kora +compose_file: "{{ remote_project_dir }}/docker/compose/devnet.yaml" +bake_file: "{{ remote_project_dir }}/docker/docker-bake.hcl" + +# Docker +docker_image: "kora:local" +compose_project_name: kora-devnet +docker_dns_servers: [] # Set to e.g. ["185.12.64.1", "185.12.64.2"] for Hetzner + +# Devnet config +num_validators: 4 +dkg_mode: trusted # 'trusted' or 'interactive' +rust_log: info +chain_id: 1337 + +# Build +build_timeout: 3600 # 1 hour +build_poll: 30 # poll every 30s + +# Health check +health_retries: 24 +health_delay: 5 + +# Ports +ssh_port: 22 +p2p_ports: "30400:30403" +secondary_p2p_port: 30500 +rpc_ports: "8545:8548" +metrics_ports: "9000:9003" +prometheus_port: 9090 +loki_port: 3100 +grafana_port: 3000 +grafana_admin_password: admin # CHANGEME: override in host_vars or vault for production + +# Trusted IPs allowed to access RPC, metrics, Prometheus, and Grafana. +# CHANGEME: restrict to your operator/monitoring IPs for production. +trusted_ips: + - "0.0.0.0/0" # WARNING: allows all traffic. Replace with specific IPs. diff --git a/ansible/inventory/hosts.yml.example b/ansible/inventory/hosts.yml.example new file mode 100644 index 0000000..3fd2294 --- /dev/null +++ b/ansible/inventory/hosts.yml.example @@ -0,0 +1,17 @@ +# Kora devnet Ansible inventory +# +# Copy this file to hosts.yml and fill in your values: +# cp hosts.yml.example hosts.yml +# +# Requirements: +# - Target must be Arch Linux (playbooks use pacman) +# - Root SSH access via key-based authentication +# - hosts.yml is gitignored to protect server IPs +all: + children: + devnet: + hosts: + hetzner-devnet: + ansible_host: YOUR_SERVER_IP + ansible_user: root + ansible_python_interpreter: /usr/bin/python3 diff --git a/ansible/playbooks/chaos-node-failure.yml b/ansible/playbooks/chaos-node-failure.yml new file mode 100644 index 0000000..96b6ac7 --- /dev/null +++ b/ansible/playbooks/chaos-node-failure.yml @@ -0,0 +1,125 @@ +--- +# Chaos test: stop a validator, measure impact, restart, verify recovery +# +# Usage: +# ansible-playbook playbooks/chaos-node-failure.yml -i inventory/hosts.yml \ +# -e target_node=2 -e stop_duration=45 -e recovery_wait=60 +# +# Variables: +# target_node: validator index to stop (0-3, default: 2) +# stop_duration: seconds to keep the node stopped (default: 45) +# recovery_wait: seconds to wait after restart before checking (default: 60) +# +- name: "Chaos: Single Node Failure Test" + hosts: devnet + become: true + vars: + target_node: "{{ target_node | default(2) }}" + stop_duration: "{{ stop_duration | default(45) }}" + recovery_wait: "{{ recovery_wait | default(60) }}" + compose_dir: "{{ remote_project_dir }}/docker/compose" + + tasks: + - name: "Phase 0: Capture baseline block rate" + ansible.builtin.uri: + url: "http://localhost:{{ prometheus_port }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: baseline_query + + - name: Show baseline + ansible.builtin.debug: + msg: "Baseline blocks/sec: {{ (baseline_query.json.data.result[0].value[1] | float) | round(1) }}" + when: baseline_query.json.data.result | length > 0 + + - name: "Phase 1: Stop validator-node{{ target_node }}" + ansible.builtin.command: + cmd: "docker compose -f devnet.yaml stop validator-node{{ target_node }}" + chdir: "{{ compose_dir }}" + + - name: "Wait {{ stop_duration }}s during outage" + ansible.builtin.pause: + seconds: "{{ stop_duration | int }}" + + - name: "Phase 2: Measure block rate during outage" + ansible.builtin.uri: + url: "http://localhost:{{ prometheus_port }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: outage_query + + - name: Show outage impact + ansible.builtin.debug: + msg: "Outage blocks/sec: {{ (outage_query.json.data.result[0].value[1] | float) | round(1) }}" + when: outage_query.json.data.result | length > 0 + + - name: "Phase 3: Restart validator-node{{ target_node }}" + ansible.builtin.command: + cmd: "docker compose -f devnet.yaml start validator-node{{ target_node }}" + chdir: "{{ compose_dir }}" + + - name: "Wait {{ recovery_wait }}s for recovery" + ansible.builtin.pause: + seconds: "{{ recovery_wait | int }}" + + - name: "Phase 4: Measure post-recovery block rate" + ansible.builtin.uri: + url: "http://localhost:{{ prometheus_port }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: recovery_query + + - name: "Phase 4: Check heights across all validators" + ansible.builtin.uri: + url: "http://localhost:{{ prometheus_port }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=finalized_height" + return_content: true + register: heights_query + + - name: "Phase 4: Check resolver blocked peers" + ansible.builtin.uri: + url: "http://localhost:{{ prometheus_port }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=engine_resolver_resolver_peers_blocked" + return_content: true + register: blocked_query + + - name: "Phase 4: Check restarted node logs" + ansible.builtin.command: + cmd: "docker logs {{ compose_project_name }}-validator-node{{ target_node }}-1 --tail 15" + register: node_logs + + - name: "=== TEST RESULTS ===" + ansible.builtin.debug: + msg: | + === Node Failure Test Results === + Target: validator-node{{ target_node }} + Stop duration: {{ stop_duration }}s + Recovery wait: {{ recovery_wait }}s + + Block rate: + Baseline: {{ (baseline_query.json.data.result[0].value[1] | float) | round(1) }} blocks/sec + During outage: {{ (outage_query.json.data.result[0].value[1] | float) | round(1) }} blocks/sec + After recovery: {{ (recovery_query.json.data.result[0].value[1] | float) | round(1) }} blocks/sec + + Heights: + {% for r in heights_query.json.data.result %} + {{ r.metric.instance }}: {{ r.value[1] }} + {% endfor %} + + Resolver blocked peers: + {% for r in blocked_query.json.data.result %} + {{ r.metric.instance }}: {{ r.value[1] }} + {% endfor %} + + Node{{ target_node }} logs (last 15 lines): + {{ node_logs.stderr | default(node_logs.stdout, true) }} diff --git a/ansible/playbooks/chaos-rolling-restart.yml b/ansible/playbooks/chaos-rolling-restart.yml new file mode 100644 index 0000000..076d943 --- /dev/null +++ b/ansible/playbooks/chaos-rolling-restart.yml @@ -0,0 +1,72 @@ +--- +# Chaos test: rolling restart of all validators one at a time +# +# Usage: +# ansible-playbook playbooks/chaos-rolling-restart.yml -i inventory/hosts.yml \ +# -e stop_duration=30 -e recovery_wait=60 +# +# Simulates a rolling upgrade: stop each node, wait, restart, verify health, +# then move to the next node. Reports whether each node recovers and whether +# the network survives the full rolling restart. +# +- name: "Chaos: Rolling Restart Test" + hosts: devnet + become: true + vars: + stop_duration: "{{ stop_duration | default(30) }}" + recovery_wait: "{{ recovery_wait | default(60) }}" + compose_dir: "{{ remote_project_dir }}/docker/compose" + prom: "http://localhost:{{ prometheus_port }}" + + tasks: + - name: "Baseline: capture block rate" + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: baseline + + - name: Show baseline + ansible.builtin.debug: + msg: "Baseline: {{ (baseline.json.data.result[0].value[1] | float) | round(1) }} blocks/sec" + + - name: "Rolling restart: iterate through nodes 0-3" + ansible.builtin.include_tasks: + file: ../roles/chaos/tasks/restart-one-node.yml + loop: "{{ range(num_validators | int) | list }}" + loop_control: + loop_var: node_idx + + - name: "Final: check all heights" + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=finalized_height" + return_content: true + register: final_heights + + - name: "Final: check block rate" + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: final_rate + + - name: "=== ROLLING RESTART RESULTS ===" + ansible.builtin.debug: + msg: | + Rolling restart complete. + Baseline: {{ (baseline.json.data.result[0].value[1] | float) | round(1) }} blocks/sec + Final: {{ (final_rate.json.data.result[0].value[1] | float) | round(1) }} blocks/sec + + Final heights: + {% for r in final_heights.json.data.result %} + {{ r.metric.instance }}: {{ r.value[1] }} + {% endfor %} + + VERDICT: {% if (final_rate.json.data.result[0].value[1] | float) > 1.0 %}PASS - network survived{% else %}FAIL - network degraded or stalled{% endif %} diff --git a/ansible/playbooks/collect-logs.yml b/ansible/playbooks/collect-logs.yml new file mode 100644 index 0000000..a56dc37 --- /dev/null +++ b/ansible/playbooks/collect-logs.yml @@ -0,0 +1,71 @@ +--- +# Collect and archive validator logs for debugging +# +# Usage: +# ansible-playbook playbooks/collect-logs.yml -i inventory/hosts.yml +# ansible-playbook playbooks/collect-logs.yml -i inventory/hosts.yml -e since="10m" +# ansible-playbook playbooks/collect-logs.yml -i inventory/hosts.yml -e level="ERROR" +# +- name: Collect validator logs + hosts: devnet + become: true + vars: + log_since: "{{ since | default('30m') }}" + log_level: "{{ level | default('WARN') }}" + output_dir: "/tmp/kora-logs-{{ ansible_date_time.iso8601_basic_short }}" + + tasks: + - name: Create output directory + ansible.builtin.file: + path: "{{ output_dir }}" + state: directory + mode: "0755" + + - name: Export full logs per validator + ansible.builtin.shell: | + docker logs {{ compose_project_name }}-validator-node{{ item }}-1 \ + --since {{ log_since }} > {{ output_dir }}/validator-node{{ item }}.log 2>&1 + loop: "{{ range(num_validators | int) | list }}" + + - name: Export secondary logs + ansible.builtin.shell: | + docker logs {{ compose_project_name }}-secondary-node0-1 \ + --since {{ log_since }} > {{ output_dir }}/secondary-node0.log 2>&1 + + - name: Extract warnings/errors summary + ansible.builtin.shell: | + echo "=== {{ log_level }}+ Messages (last {{ log_since }}) ===" > {{ output_dir }}/summary.txt + echo "" >> {{ output_dir }}/summary.txt + for n in $(seq 0 {{ (num_validators | int) - 1 }}); do + echo "--- validator-node$n ---" >> {{ output_dir }}/summary.txt + grep -oP '(?:{{ log_level }}|ERROR)\s+\S+.*' {{ output_dir }}/validator-node${n}.log \ + | sort | uniq -c | sort -rn | head -20 >> {{ output_dir }}/summary.txt + echo "" >> {{ output_dir }}/summary.txt + done + + - name: Create tar archive + ansible.builtin.command: + cmd: "tar czf {{ output_dir }}.tar.gz -C /tmp {{ output_dir | basename }}" + + - name: Fetch archive to local machine + ansible.builtin.fetch: + src: "{{ output_dir }}.tar.gz" + dest: "{{ playbook_dir }}/../tmp/logs/" + flat: true + + - name: Show summary + ansible.builtin.command: + cmd: "cat {{ output_dir }}/summary.txt" + register: summary + + - name: Display summary + ansible.builtin.debug: + msg: "{{ summary.stdout }}" + + - name: Cleanup remote + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: + - "{{ output_dir }}" + - "{{ output_dir }}.tar.gz" diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000..811ee71 --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,12 @@ +--- +# Repeatable deploy: sync code → build image → start devnet +- name: Deploy devnet + hosts: devnet + become: true + roles: + - role: sync + tags: [sync] + - role: build + tags: [build] + - role: devnet + tags: [devnet] diff --git a/ansible/playbooks/diagnose.yml b/ansible/playbooks/diagnose.yml new file mode 100644 index 0000000..9163188 --- /dev/null +++ b/ansible/playbooks/diagnose.yml @@ -0,0 +1,165 @@ +--- +# Comprehensive diagnostic snapshot of the devnet +# +# Usage: +# ansible-playbook playbooks/diagnose.yml -i inventory/hosts.yml +# +# Collects: block rate, heights, peer counts, resolver state, resource usage, +# recent warnings/errors, txpool state, and alert status. +# +- name: Devnet diagnostic snapshot + hosts: devnet + become: true + vars: + prom: "http://localhost:{{ prometheus_port }}" + + tasks: + # -- Consensus health -- + - name: Query block rate + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: block_rate + ignore_errors: true + + - name: Query finalized heights + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=finalized_height" + return_content: true + register: heights + ignore_errors: true + + - name: Query skip rate + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=1 - avg(rate(finalized_height[1m])) / avg(rate(current_view[1m]))" + return_content: true + register: skip_rate + ignore_errors: true + + # -- Resolver & P2P -- + - name: Query resolver blocked peers + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=engine_resolver_resolver_peers_blocked" + return_content: true + register: blocked_peers + ignore_errors: true + + - name: Query P2P dropped messages + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=sum(rate(network_router_messages_dropped_total[1m]))" + return_content: true + register: dropped_msgs + ignore_errors: true + + # -- Resources -- + - name: Query memory usage + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=process_resident_memory_bytes" + return_content: true + register: memory + ignore_errors: true + + # -- Container status -- + - name: Check container status + ansible.builtin.command: + cmd: docker ps --format "table {{'{{'}}.Names{{'}}'}}\t{{'{{'}}.Status{{'}}'}}" + register: docker_status + + # -- Recent warnings -- + - name: Collect unique warnings per node + ansible.builtin.shell: | + for n in $(seq 0 {{ (num_validators | int) - 1 }}); do + echo "--- node$n ---" + docker logs {{ compose_project_name }}-validator-node${n}-1 --since 5m 2>&1 \ + | grep -oP 'WARN\s+\S+::\S+' | sort | uniq -c | sort -rn | head -10 + done + register: warnings + + # -- Firing alerts -- + - name: Check firing alerts + ansible.builtin.uri: + url: "{{ prom }}/api/v1/alerts" + return_content: true + register: alerts + ignore_errors: true + + # -- Txpool state -- + - name: Query txpool status on node0 + ansible.builtin.uri: + url: "http://localhost:8545" + method: POST + body_format: json + body: + jsonrpc: "2.0" + method: txpool_status + params: [] + id: 1 + return_content: true + register: txpool + ignore_errors: true + + # -- Report -- + - name: "=== DIAGNOSTIC REPORT ===" + ansible.builtin.debug: + msg: | + ╔══════════════════════════════════════════════════╗ + ║ DEVNET DIAGNOSTIC SNAPSHOT ║ + ╚══════════════════════════════════════════════════╝ + + ── Consensus ── + Block rate: {{ (block_rate.json.data.result[0].value[1] | default('N/A') | float) | round(1) }} blocks/sec + Skip rate: {{ ((skip_rate.json.data.result[0].value[1] | default('0') | float) * 100) | round(1) }}% + Heights: + {% for r in heights.json.data.result | default([]) %} + {{ r.metric.instance }}: {{ r.value[1] }} + {% endfor %} + + ── Resolver & P2P ── + Blocked peers: + {% for r in blocked_peers.json.data.result | default([]) %} + {{ r.metric.instance }}: {{ r.value[1] }} + {% endfor %} + Dropped msgs/sec: {{ (dropped_msgs.json.data.result[0].value[1] | default('0') | float) | round(1) }} + + ── Resources ── + Memory (RSS): + {% for r in memory.json.data.result | default([]) %} + {{ r.metric.instance }}: {{ (r.value[1] | float / 1048576) | round(0) }} MB + {% endfor %} + + ── Containers ── + {{ docker_status.stdout }} + + ── Txpool (node0) ── + {{ txpool.json.result | default('unavailable') }} + + ── Firing Alerts ── + {% for a in alerts.json.data.alerts | default([]) %} + {% if a.state == 'firing' %} + [{{ a.labels.severity | default('?') }}] {{ a.labels.alertname }}: {{ a.annotations.summary | default('') }} + {% endif %} + {% endfor %} + {% if alerts.json.data.alerts | default([]) | selectattr('state', 'eq', 'firing') | list | length == 0 %} + None firing + {% endif %} + + ── Recent Warnings (last 5m) ── + {{ warnings.stdout }} diff --git a/ansible/playbooks/observe.yml b/ansible/playbooks/observe.yml new file mode 100644 index 0000000..2126a3b --- /dev/null +++ b/ansible/playbooks/observe.yml @@ -0,0 +1,7 @@ +--- +# Start/restart Prometheus + Grafana observability stack +- name: Start observability + hosts: devnet + become: true + roles: + - observe diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000..d2c6a38 --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,12 @@ +--- +# One-time server provisioning: install system deps, firewall, Docker +- name: Provision devnet server + hosts: devnet + become: true + roles: + - role: base + tags: [base] + - role: firewall + tags: [firewall] + - role: docker + tags: [docker] diff --git a/ansible/playbooks/query-metrics.yml b/ansible/playbooks/query-metrics.yml new file mode 100644 index 0000000..94c9421 --- /dev/null +++ b/ansible/playbooks/query-metrics.yml @@ -0,0 +1,78 @@ +--- +# Quick Prometheus metric queries for debugging +# +# Usage: +# # Default: run all queries +# ansible-playbook playbooks/query-metrics.yml -i inventory/hosts.yml +# +# # Specific query: +# ansible-playbook playbooks/query-metrics.yml -i inventory/hosts.yml \ +# -e promql="rate(finalized_height[1m])" +# +- name: Query Prometheus metrics + hosts: devnet + become: true + vars: + prom: "http://localhost:{{ prometheus_port }}" + # Default queries to run (override with -e promql="...") + default_queries: + - name: "Block rate (blocks/sec)" + query: "rate(finalized_height[1m])" + - name: "Skip rate" + query: "1 - rate(finalized_height[1m]) / rate(current_view[1m])" + - name: "Nullifications/sec" + query: "rate(simplex_voter_nullifications_total[1m])" + - name: "Resolver blocked peers" + query: "engine_resolver_resolver_peers_blocked" + - name: "Memory (MB)" + query: "process_resident_memory_bytes / 1048576" + - name: "P2P dropped msgs/sec" + query: "sum by (instance) (rate(network_router_messages_dropped_total[1m]))" + - name: "Height drift (max - min)" + query: "max(finalized_height) - min(finalized_height)" + + tasks: + # Single custom query mode + - name: "Run custom query: {{ promql }}" + when: promql is defined + block: + - name: Execute custom query + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query={{ promql }}" + return_content: true + register: custom_result + + - name: Show custom result + ansible.builtin.debug: + msg: | + Query: {{ promql }} + {% for r in custom_result.json.data.result %} + {{ r.metric | default({}) }}: {{ r.value[1] }} + {% endfor %} + + # Default multi-query mode + - name: Run default diagnostic queries + when: promql is not defined + block: + - name: Execute queries + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query={{ item.query }}" + return_content: true + loop: "{{ default_queries }}" + register: query_results + + - name: "=== METRICS SNAPSHOT ===" + ansible.builtin.debug: + msg: | + {% for result in query_results.results %} + ── {{ result.item.name }} ── + {% for r in result.json.data.result %} + {{ r.metric.instance | default('aggregate') }}: {{ (r.value[1] | float) | round(2) }} + {% endfor %} + {% endfor %} diff --git a/ansible/playbooks/reset.yml b/ansible/playbooks/reset.yml new file mode 100644 index 0000000..c5f9be8 --- /dev/null +++ b/ansible/playbooks/reset.yml @@ -0,0 +1,7 @@ +--- +# Wipe devnet clean: stop containers, remove volumes, optionally remove image +- name: Reset devnet + hosts: devnet + become: true + roles: + - reset diff --git a/ansible/playbooks/stop.yml b/ansible/playbooks/stop.yml new file mode 100644 index 0000000..d89a2fc --- /dev/null +++ b/ansible/playbooks/stop.yml @@ -0,0 +1,30 @@ +--- +# Stop all devnet containers without wiping data or volumes +# +# Usage: +# ansible-playbook playbooks/stop.yml -i inventory/hosts.yml +# +- name: Stop devnet + hosts: devnet + become: true + + tasks: + - name: Stop all containers + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} + --profile observability --profile interactive-dkg + stop + changed_when: true + failed_when: false + + - name: Show container status + ansible.builtin.command: + cmd: docker ps --format "table {{'{{'}}.Names{{'}}'}}\t{{'{{'}}.Status{{'}}'}}" + register: docker_status + + - name: Print status + ansible.builtin.debug: + msg: | + Devnet stopped (data volumes preserved). + {{ docker_status.stdout }} diff --git a/ansible/requirements.yml b/ansible/requirements.yml new file mode 100644 index 0000000..982d633 --- /dev/null +++ b/ansible/requirements.yml @@ -0,0 +1,7 @@ +collections: + - name: ansible.posix + version: ">=1.5.0" + - name: community.docker + version: ">=3.0.0" + - name: community.general + version: ">=7.0.0" diff --git a/ansible/roles/base/tasks/main.yml b/ansible/roles/base/tasks/main.yml new file mode 100644 index 0000000..e56dc03 --- /dev/null +++ b/ansible/roles/base/tasks/main.yml @@ -0,0 +1,33 @@ +--- +- name: Update pacman cache + community.general.pacman: + update_cache: true + +- name: Install base packages + community.general.pacman: + name: + - base-devel + - git + - rsync + - curl + - jq + - htop + - python + - nftables + state: present + +- name: Set timezone to UTC + community.general.timezone: + name: UTC + +- name: Enable and start systemd-timesyncd (NTP) + ansible.builtin.systemd: + name: systemd-timesyncd + enabled: true + state: started + +- name: Create project directory + ansible.builtin.file: + path: "{{ remote_project_dir }}" + state: directory + mode: "0755" diff --git a/ansible/roles/build/tasks/main.yml b/ansible/roles/build/tasks/main.yml new file mode 100644 index 0000000..8ab1438 --- /dev/null +++ b/ansible/roles/build/tasks/main.yml @@ -0,0 +1,8 @@ +--- +- name: Build kora:local image with buildx bake + ansible.builtin.command: + cmd: docker buildx bake --allow=fs.read=.. --load -f docker-bake.hcl kora-local + chdir: "{{ remote_project_dir }}/docker" + async: "{{ build_timeout }}" + poll: "{{ build_poll }}" + changed_when: true diff --git a/ansible/roles/chaos/tasks/restart-one-node.yml b/ansible/roles/chaos/tasks/restart-one-node.yml new file mode 100644 index 0000000..2df8519 --- /dev/null +++ b/ansible/roles/chaos/tasks/restart-one-node.yml @@ -0,0 +1,48 @@ +--- +# Included by chaos-rolling-restart.yml for each node_idx +- name: "Node {{ node_idx }}: Stop" + ansible.builtin.command: + cmd: "docker compose -f devnet.yaml stop validator-node{{ node_idx }}" + chdir: "{{ compose_dir }}" + +- name: "Node {{ node_idx }}: Wait {{ stop_duration }}s" + ansible.builtin.pause: + seconds: "{{ stop_duration | int }}" + +- name: "Node {{ node_idx }}: Measure outage block rate" + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: outage_rate + +- name: "Node {{ node_idx }}: Restart" + ansible.builtin.command: + cmd: "docker compose -f devnet.yaml start validator-node{{ node_idx }}" + chdir: "{{ compose_dir }}" + +- name: "Node {{ node_idx }}: Wait {{ recovery_wait }}s for recovery" + ansible.builtin.pause: + seconds: "{{ recovery_wait | int }}" + +- name: "Node {{ node_idx }}: Measure recovery block rate" + ansible.builtin.uri: + url: "{{ prom }}/api/v1/query" + method: POST + body_format: form-urlencoded + body: "query=avg(rate(finalized_height[30s]))" + return_content: true + register: recovery_rate + +- name: "Node {{ node_idx }}: Check catch-up logs" + ansible.builtin.command: + cmd: "docker logs {{ compose_project_name }}-validator-node{{ node_idx }}-1 --tail 5" + register: node_log + +- name: "Node {{ node_idx }}: Results" + ansible.builtin.debug: + msg: | + Node {{ node_idx }}: outage={{ (outage_rate.json.data.result[0].value[1] | float) | round(1) }} blocks/sec → recovery={{ (recovery_rate.json.data.result[0].value[1] | float) | round(1) }} blocks/sec + Logs: {{ node_log.stderr | default(node_log.stdout, true) | regex_replace('\x1b\\[[0-9;]*m', '') }} diff --git a/ansible/roles/devnet/tasks/main.yml b/ansible/roles/devnet/tasks/main.yml new file mode 100644 index 0000000..06f7bd1 --- /dev/null +++ b/ansible/roles/devnet/tasks/main.yml @@ -0,0 +1,134 @@ +--- +- name: Stop existing validators + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} stop + validator-node0 validator-node1 validator-node2 validator-node3 secondary-node0 + changed_when: true + failed_when: false + +- name: Check if DKG shares exist + ansible.builtin.shell: | + for i in 0 1 2 3; do + volume="{{ compose_project_name }}_data_node${i}" + docker volume inspect "$volume" >/dev/null 2>&1 || exit 1 + docker run --rm -v "${volume}:/data" alpine \ + test -f /data/share.key -a -f /data/output.json || exit 1 + done + register: dkg_check + changed_when: false + failed_when: false + +- name: Run init-config (trusted dealer DKG) + ansible.builtin.command: + cmd: docker compose -f {{ compose_file }} run --rm init-config + environment: + RUST_LOG: "{{ rust_log }}" + CHAIN_ID: "{{ chain_id }}" + when: dkg_check.rc != 0 and dkg_mode == "trusted" + changed_when: true + +- name: Run init-setup (interactive DKG - setup only) + ansible.builtin.command: + cmd: docker compose -f {{ compose_file }} run --rm init-setup + environment: + RUST_LOG: "{{ rust_log }}" + CHAIN_ID: "{{ chain_id }}" + when: dkg_check.rc != 0 and dkg_mode == "interactive" + changed_when: true + +- name: Run interactive DKG ceremony + when: dkg_check.rc != 0 and dkg_mode == "interactive" + block: + - name: Start DKG nodes + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} --profile interactive-dkg up -d + dkg-node0 dkg-node1 dkg-node2 dkg-node3 + changed_when: true + + - name: Wait for DKG ceremony to complete + ansible.builtin.shell: | + EXITED=$(docker compose -f {{ compose_file }} ps -a --format json 2>/dev/null | \ + jq -r 'select(.Service | startswith("dkg-")) | select(.State == "exited") | select(.ExitCode == 0) | .Service' 2>/dev/null | wc -l | tr -d ' ') + FAILED=$(docker compose -f {{ compose_file }} ps -a --format json 2>/dev/null | \ + jq -r 'select(.Service | startswith("dkg-")) | select(.State == "exited") | select(.ExitCode != 0) | .Service' 2>/dev/null | wc -l | tr -d ' ') + [[ "$FAILED" -gt 0 ]] && exit 1 + [[ "$EXITED" -ge 4 ]] && exit 0 + exit 2 + register: dkg_result + until: dkg_result.rc == 0 + retries: 60 + delay: 5 + changed_when: false + failed_when: dkg_result.rc == 1 + + - name: Stop DKG containers + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} --profile interactive-dkg stop + dkg-node0 dkg-node1 dkg-node2 dkg-node3 + changed_when: true + failed_when: false + +- name: Clear runtime state from data volumes + ansible.builtin.shell: | + for volume in \ + {{ compose_project_name }}_data_node0 \ + {{ compose_project_name }}_data_node1 \ + {{ compose_project_name }}_data_node2 \ + {{ compose_project_name }}_data_node3 \ + {{ compose_project_name }}_data_secondary0; do + docker volume inspect "$volume" >/dev/null 2>&1 || continue + docker run --rm -v "${volume}:/data" alpine rm -rf /data/runtime 2>/dev/null || true + done + changed_when: true + +- name: Clear startup barrier + ansible.builtin.shell: | + volume="{{ compose_project_name }}_startup_barrier" + docker volume inspect "$volume" >/dev/null 2>&1 || exit 0 + docker run --rm -v "${volume}:/barrier" alpine sh -c 'rm -f /barrier/*.ready' + changed_when: true + +- name: Start validators and secondary + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} up -d + validator-node0 validator-node1 validator-node2 validator-node3 secondary-node0 + environment: + RUST_LOG: "{{ rust_log }}" + CHAIN_ID: "{{ chain_id }}" + changed_when: true + +- name: Wait for validators to become healthy + ansible.builtin.shell: | + HEALTHY=$(docker compose -f {{ compose_file }} ps --format json 2>/dev/null | \ + jq -r 'select(.Service | startswith("validator-")) | select(.Health == "healthy") | .Service' 2>/dev/null | wc -l | tr -d ' ') + [[ "$HEALTHY" -ge {{ num_validators }} ]] + register: health_result + until: health_result.rc == 0 + retries: "{{ health_retries }}" + delay: "{{ health_delay }}" + changed_when: false + +- name: Wait for secondary peer to become healthy + ansible.builtin.shell: | + HEALTH=$(docker compose -f {{ compose_file }} ps --format json 2>/dev/null | \ + jq -r 'select(.Service == "secondary-node0") | .Health' 2>/dev/null) + [[ "$HEALTH" == "healthy" ]] + register: secondary_health + until: secondary_health.rc == 0 + retries: "{{ health_retries }}" + delay: "{{ health_delay }}" + changed_when: false + +- name: Print devnet status + ansible.builtin.debug: + msg: | + Devnet is healthy! + Validators: {{ num_validators }}/{{ num_validators }} healthy + Secondary: healthy + RPC: http://{{ ansible_host }}:8545-8548 + P2P: {{ ansible_host }}:30400-30403 + Secondary: {{ ansible_host }}:30500 diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000..4c92b03 --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + ansible.builtin.systemd: + name: docker + state: restarted diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000..994ecd3 --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,41 @@ +--- +- name: Install Docker packages + community.general.pacman: + name: + - docker + - docker-compose + - docker-buildx + state: present + +- name: Configure Docker daemon + ansible.builtin.copy: + content: "{{ docker_daemon_config | to_nice_json }}\n" + dest: /etc/docker/daemon.json + mode: "0644" + when: docker_dns_servers | default([]) | length > 0 + notify: restart docker + vars: + docker_daemon_config: + dns: "{{ docker_dns_servers }}" + +- name: Enable and start Docker + ansible.builtin.systemd: + name: docker + enabled: true + state: started + +- name: Flush handlers to ensure Docker is restarted if needed + ansible.builtin.meta: flush_handlers + +- name: Check if kora-builder exists + ansible.builtin.command: + cmd: docker buildx inspect kora-builder + register: builder_check + changed_when: false + failed_when: false + +- name: Create buildx builder with host networking + ansible.builtin.command: + cmd: docker buildx create --name kora-builder --use --driver docker-container --driver-opt network=host + when: builder_check.rc != 0 + changed_when: true diff --git a/ansible/roles/firewall/handlers/main.yml b/ansible/roles/firewall/handlers/main.yml new file mode 100644 index 0000000..0786a10 --- /dev/null +++ b/ansible/roles/firewall/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart nftables + ansible.builtin.systemd: + name: nftables + state: restarted diff --git a/ansible/roles/firewall/tasks/main.yml b/ansible/roles/firewall/tasks/main.yml new file mode 100644 index 0000000..88824c9 --- /dev/null +++ b/ansible/roles/firewall/tasks/main.yml @@ -0,0 +1,14 @@ +--- +- name: Deploy nftables configuration + ansible.builtin.template: + src: nftables.conf.j2 + dest: /etc/nftables.conf + mode: "0644" + validate: 'nft -c -f %s' + notify: restart nftables + +- name: Enable and start nftables + ansible.builtin.systemd: + name: nftables + enabled: true + state: started diff --git a/ansible/roles/firewall/templates/nftables.conf.j2 b/ansible/roles/firewall/templates/nftables.conf.j2 new file mode 100644 index 0000000..b204bf3 --- /dev/null +++ b/ansible/roles/firewall/templates/nftables.conf.j2 @@ -0,0 +1,86 @@ +#!/usr/sbin/nft -f + +flush ruleset + +table inet filter { + chain input { + type filter hook input priority 0; policy drop; + + # Loopback + iif "lo" accept + + # Established/related connections + ct state established,related accept + + # ICMP / ICMPv6 + ip protocol icmp accept + ip6 nexthdr icmpv6 accept + + # SSH (rate-limited to mitigate brute-force) + tcp dport {{ ssh_port }} ct state new limit rate 10/minute accept + tcp dport {{ ssh_port }} ct state established accept + + # Kora P2P (validators) + tcp dport { {{ p2p_ports | replace(':', '-') }} } accept + udp dport { {{ p2p_ports | replace(':', '-') }} } accept + + # Kora P2P (secondary) + tcp dport {{ secondary_p2p_port }} accept + udp dport {{ secondary_p2p_port }} accept + + # Kora RPC (restricted to trusted IPs) +{% if trusted_ips | default([]) | length > 0 %} +{% for ip in trusted_ips %} + ip saddr {{ ip }} tcp dport { {{ rpc_ports | replace(':', '-') }} } accept +{% endfor %} +{% else %} + # WARNING: RPC is open to the world. Set 'trusted_ips' to restrict access. + tcp dport { {{ rpc_ports | replace(':', '-') }} } accept +{% endif %} + + # Metrics (restricted to trusted IPs) +{% if trusted_ips | default([]) | length > 0 %} +{% for ip in trusted_ips %} + ip saddr {{ ip }} tcp dport { {{ metrics_ports | replace(':', '-') }} } accept +{% endfor %} +{% else %} + # WARNING: Metrics are open to the world. Set 'trusted_ips' to restrict access. + tcp dport { {{ metrics_ports | replace(':', '-') }} } accept +{% endif %} + + # Prometheus (restricted to trusted IPs) +{% if trusted_ips | default([]) | length > 0 %} +{% for ip in trusted_ips %} + ip saddr {{ ip }} tcp dport {{ prometheus_port }} accept +{% endfor %} +{% else %} + # WARNING: Prometheus is open to the world. Set 'trusted_ips' to restrict access. + tcp dport {{ prometheus_port }} accept +{% endif %} + + # Grafana (restricted to trusted IPs) +{% if trusted_ips | default([]) | length > 0 %} +{% for ip in trusted_ips %} + ip saddr {{ ip }} tcp dport {{ grafana_port }} accept +{% endfor %} +{% else %} + # WARNING: Grafana is open to the world. Set 'trusted_ips' to restrict access. + tcp dport {{ grafana_port }} accept +{% endif %} + + # Log and drop + log prefix "[nftables drop] " flags all counter drop + } + + chain forward { + type filter hook forward priority 0; policy drop; + + # Allow Docker bridge traffic + iifname "docker0" oifname "docker0" accept + ct state established,related accept + } + + chain output { + type filter hook output priority 0; policy accept; + } +} diff --git a/ansible/roles/observe/tasks/main.yml b/ansible/roles/observe/tasks/main.yml new file mode 100644 index 0000000..e744040 --- /dev/null +++ b/ansible/roles/observe/tasks/main.yml @@ -0,0 +1,52 @@ +--- +- name: Start observability stack + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} --profile observability up -d + prometheus loki promtail grafana + environment: + GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_admin_password }}" + changed_when: true + +- name: Wait for Prometheus to be ready + ansible.builtin.uri: + url: "http://localhost:{{ prometheus_port }}/-/ready" + method: GET + status_code: 200 + register: prom_health + until: prom_health.status == 200 + retries: 12 + delay: 5 + +- name: Wait for Loki to be ready + ansible.builtin.uri: + url: "http://localhost:{{ loki_port }}/ready" + method: GET + status_code: 200 + register: loki_health + until: loki_health.status == 200 + retries: 12 + delay: 5 + +- name: Wait for Grafana to be ready + ansible.builtin.uri: + url: "http://localhost:{{ grafana_port }}/api/health" + method: GET + status_code: 200 + register: grafana_health + until: grafana_health.status == 200 + retries: 12 + delay: 5 + +- name: Print observability endpoints + ansible.builtin.debug: + msg: | + Observability stack is ready! + Prometheus: http://{{ ansible_host }}:{{ prometheus_port }} + Loki: http://{{ ansible_host }}:{{ loki_port }} + Grafana: http://{{ ansible_host }}:{{ grafana_port }} (user: admin, password set via grafana_admin_password) + Dashboards: + Overview: http://{{ ansible_host }}:{{ grafana_port }}/d/kora-overview + Performance: http://{{ ansible_host }}:{{ grafana_port }}/d/kora-performance + Stall Diagnostics: http://{{ ansible_host }}:{{ grafana_port }}/d/kora-stall-diagnostics + Logs Explorer: http://{{ ansible_host }}:{{ grafana_port }}/d/kora-logs diff --git a/ansible/roles/reset/tasks/main.yml b/ansible/roles/reset/tasks/main.yml new file mode 100644 index 0000000..a6cf273 --- /dev/null +++ b/ansible/roles/reset/tasks/main.yml @@ -0,0 +1,37 @@ +--- +- name: Stop and remove all containers and volumes + ansible.builtin.command: + cmd: > + docker compose -f {{ compose_file }} + --profile observability --profile interactive-dkg + down -v --remove-orphans + changed_when: true + failed_when: false + +- name: Remove Docker image + ansible.builtin.command: + cmd: docker rmi {{ docker_image }} + when: reset_remove_image | default(false) | bool + changed_when: true + failed_when: false + +- name: Prune dangling images + ansible.builtin.command: + cmd: docker image prune -f + changed_when: true + +- name: Remove project volumes + ansible.builtin.shell: | + docker volume ls --quiet --filter "name={{ compose_project_name }}" | xargs -r docker volume rm + changed_when: true + failed_when: false + +- name: Prune all dangling volumes + ansible.builtin.command: + cmd: docker volume prune -f + changed_when: true + when: reset_prune_all_volumes | default(false) | bool + +- name: Print reset status + ansible.builtin.debug: + msg: "Devnet reset complete. Run deploy.yml for a fresh start." diff --git a/ansible/roles/sync/tasks/main.yml b/ansible/roles/sync/tasks/main.yml new file mode 100644 index 0000000..0160692 --- /dev/null +++ b/ansible/roles/sync/tasks/main.yml @@ -0,0 +1,12 @@ +--- +- name: Sync project to remote server + ansible.posix.synchronize: + src: "{{ playbook_dir }}/../../" + dest: "{{ remote_project_dir }}/" + delete: true + rsync_opts: + - "--exclude=.git" + - "--exclude=target/" + - "--exclude=.DS_Store" + - "--exclude=testnet-artifacts/" + - "--exclude=ansible/" diff --git a/bin/keygen/Cargo.toml b/bin/keygen/Cargo.toml index dd3b72f..0a021b2 100644 --- a/bin/keygen/Cargo.toml +++ b/bin/keygen/Cargo.toml @@ -16,7 +16,8 @@ commonware-cryptography.workspace = true commonware-codec.workspace = true commonware-utils.workspace = true -ed25519-consensus = "2" +alloy-primitives.workspace = true +k256.workspace = true clap.workspace = true serde.workspace = true diff --git a/bin/keygen/src/dkg_deal.rs b/bin/keygen/src/dkg_deal.rs index e98df5a..9b1a061 100644 --- a/bin/keygen/src/dkg_deal.rs +++ b/bin/keygen/src/dkg_deal.rs @@ -3,15 +3,15 @@ //! Generates all BLS12-381 threshold shares using a single trusted dealer. //! This is NOT secure for production but allows testing the validator workflow. -use std::{fs, path::PathBuf}; +use std::{fs, io::Write as _, path::PathBuf}; use clap::Args; use commonware_codec::{ReadExt, Write as _}; use commonware_cryptography::bls12381::{ - dkg, + dkg::feldman_desmedt as dkg, primitives::{sharing::Mode, variant::MinSig}, }; -use commonware_utils::{N3f1, TryCollect, ordered::Set}; +use commonware_utils::{Faults, N3f1, TryCollect, ordered::Set}; use eyre::{Result, WrapErr}; use serde::{Deserialize, Serialize}; @@ -20,9 +20,6 @@ pub(crate) struct DkgDealArgs { #[arg(long, default_value = "4")] pub validators: usize, - #[arg(long, default_value = "3")] - pub threshold: u32, - #[arg(long, default_value = "/shared")] pub output_dir: PathBuf, } @@ -43,10 +40,14 @@ struct ShareJson { } pub(crate) fn run(args: DkgDealArgs) -> Result<()> { + let quorum = N3f1::quorum(args.validators); tracing::info!( validators = args.validators, - threshold = args.threshold, - "Running trusted dealer DKG" + quorum = quorum, + max_faulty = args.validators as u32 - quorum, + "Running trusted dealer DKG (quorum determined by N3f1: need {} of {} validators)", + quorum, + args.validators ); let mut participants = Vec::with_capacity(args.validators); @@ -119,7 +120,7 @@ pub(crate) fn run(args: DkgDealArgs) -> Result<()> { let output_json = OutputJson { group_public_key: hex::encode(&group_key_bytes), public_polynomial: hex::encode(&public_polynomial_bytes), - threshold: args.threshold, + threshold: quorum, participants: args.validators, participant_keys: participant_keys.clone(), }; @@ -128,14 +129,27 @@ pub(crate) fn run(args: DkgDealArgs) -> Result<()> { let share_json = ShareJson { index: share.index.get(), secret: hex::encode(&share_bytes) }; let share_path = node_dir.join("share.key"); - fs::write(&share_path, serde_json::to_string_pretty(&share_json)?)?; + write_secret_file(&share_path, serde_json::to_string_pretty(&share_json)?.as_bytes())?; tracing::info!(node = i, "Wrote DKG output and share"); } tracing::info!("Trusted dealer DKG complete"); tracing::info!(" Validators: {}", args.validators); - tracing::info!(" Threshold: {}", args.threshold); + tracing::info!(" Quorum (N3f1): {}", quorum); Ok(()) } + +/// Write `data` to `path` with mode `0600` so key material is never world-readable. +fn write_secret_file(path: &std::path::Path, data: &[u8]) -> Result<()> { + use std::os::unix::fs::OpenOptionsExt; + let mut f = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(path) + .wrap_err_with(|| format!("Failed to create secret file {}", path.display()))?; + f.write_all(data).wrap_err_with(|| format!("Failed to write secret file {}", path.display())) +} diff --git a/bin/keygen/src/setup.rs b/bin/keygen/src/setup.rs index 0f3f286..cd56c24 100644 --- a/bin/keygen/src/setup.rs +++ b/bin/keygen/src/setup.rs @@ -1,14 +1,20 @@ //! Generates initial configuration for a Kora devnet. -use std::{collections::BTreeMap, fs, path::PathBuf}; +use std::{collections::BTreeMap, fs, io::Write as _, path::PathBuf}; +use alloy_primitives::{Address, keccak256}; use clap::Args; -use commonware_codec::Encode; +use commonware_codec::{Encode, ReadExt as _}; use commonware_cryptography::{Signer, ed25519}; +use commonware_utils::{Faults, N3f1}; use eyre::{Result, WrapErr}; +use k256::ecdsa::SigningKey; use rand::RngCore; use serde::{Deserialize, Serialize}; +const GENESIS_BALANCE: &str = "1000000000000000000000000"; +const LOADGEN_ACCOUNT_COUNT: u8 = 50; + #[derive(Args, Debug)] pub(crate) struct SetupArgs { #[arg(long, default_value = "4")] @@ -17,9 +23,6 @@ pub(crate) struct SetupArgs { #[arg(long, default_value = "0")] pub secondary_peers: usize, - #[arg(long, default_value = "3")] - pub threshold: u32, - #[arg(long, default_value = "1337")] pub chain_id: u64, @@ -33,7 +36,10 @@ pub(crate) struct SetupArgs { #[derive(Serialize, Deserialize)] struct PeersConfig { validators: usize, - threshold: u32, + /// Minimum active validators required for consensus (N3f1 quorum). + /// This value is computed automatically from the validator count and + /// cannot be overridden -- it is persisted here for operator reference. + quorum: u32, participants: Vec, secondary_participants: Vec, bootstrappers: BTreeMap, @@ -59,12 +65,39 @@ struct NodeSetupConfig { port: u16, } +fn funded_allocation(address: impl Into) -> GenesisAllocation { + GenesisAllocation { address: address.into(), balance: GENESIS_BALANCE.to_string() } +} + +fn loadgen_address(seed: u8) -> Address { + let mut secret = [0u8; 32]; + secret[31] = seed; + let key = SigningKey::from_bytes((&secret).into()) + .expect("loadgen seed should produce valid secp256k1 key"); + let encoded = key.verifying_key().to_encoded_point(false); + let pubkey = encoded.as_bytes(); + let hash = keccak256(&pubkey[1..]); + Address::from_slice(&hash[12..]) +} + +fn funded_loadgen_allocations() -> impl Iterator { + (1..=LOADGEN_ACCOUNT_COUNT).map(|seed| funded_allocation(loadgen_address(seed).to_string())) +} + +fn private_key_from_seed(seed: [u8; 32]) -> ed25519::PrivateKey { + ed25519::PrivateKey::read(&mut seed.as_slice()).expect("32-byte ed25519 seed should decode") +} + pub(crate) fn run(args: SetupArgs) -> Result<()> { + let quorum = N3f1::quorum(args.validators); tracing::info!( validators = args.validators, - threshold = args.threshold, + quorum = quorum, + max_faulty = args.validators as u32 - quorum, chain_id = args.chain_id, - "Generating devnet configuration" + "Generating devnet configuration (quorum determined by N3f1: need {} of {} validators)", + quorum, + args.validators ); fs::create_dir_all(&args.output_dir).wrap_err("Failed to create output directory")?; @@ -84,13 +117,13 @@ pub(crate) fn run(args: SetupArgs) -> Result<()> { let bytes = fs::read(&key_path)?; let mut seed = [0u8; 32]; seed.copy_from_slice(&bytes); - ed25519::PrivateKey::from(ed25519_consensus::SigningKey::from(seed)) + private_key_from_seed(seed) } else { tracing::info!(node = i, "Generating new identity key"); let mut seed = [0u8; 32]; rand::rngs::OsRng.fill_bytes(&mut seed); - fs::write(&key_path, seed)?; - ed25519::PrivateKey::from(ed25519_consensus::SigningKey::from(seed)) + write_secret_file(&key_path, &seed)?; + private_key_from_seed(seed) }; let public_key = key.public_key(); @@ -120,13 +153,13 @@ pub(crate) fn run(args: SetupArgs) -> Result<()> { let bytes = fs::read(&key_path)?; let mut seed = [0u8; 32]; seed.copy_from_slice(&bytes); - ed25519::PrivateKey::from(ed25519_consensus::SigningKey::from(seed)) + private_key_from_seed(seed) } else { tracing::info!(node = i, "Generating new secondary identity key"); let mut seed = [0u8; 32]; rand::rngs::OsRng.fill_bytes(&mut seed); - fs::write(&key_path, seed)?; - ed25519::PrivateKey::from(ed25519_consensus::SigningKey::from(seed)) + write_secret_file(&key_path, &seed)?; + private_key_from_seed(seed) }; let public_key = key.public_key(); @@ -143,7 +176,7 @@ pub(crate) fn run(args: SetupArgs) -> Result<()> { let peers = PeersConfig { validators: args.validators, - threshold: args.threshold, + quorum, participants, secondary_participants, bootstrappers, @@ -152,26 +185,76 @@ pub(crate) fn run(args: SetupArgs) -> Result<()> { fs::write(&peers_path, serde_json::to_string_pretty(&peers)?)?; tracing::info!(path = ?peers_path, "Wrote peers configuration"); - let genesis = GenesisConfig { - chain_id: args.chain_id, - timestamp: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs(), - allocations: vec![GenesisAllocation { - address: "0x0000000000000000000000000000000000000001".to_string(), - balance: "1000000000000000000000000".to_string(), - }], - }; + let mut allocations = vec![ + funded_allocation("0x0000000000000000000000000000000000000001"), + funded_allocation("0xEb1Ba7Fc58b3416361a0EE07d140c91410c0AA8c"), + funded_allocation("0xa883208a74152107475a3Fa6b0c21121894B647F"), + funded_allocation("0x105be5081ceba05be11976150abc277ee365fc3f"), + funded_allocation("0x30b68d56AE9173566055a69ee7cCB0E755B6a201"), + funded_allocation("0xDdE169289B51C512268D0b11EE2b15160b1e1793"), + funded_allocation("0xde738C4084dDE5083A7959235Fd230e27eAFC63B"), + ]; + allocations.extend(funded_loadgen_allocations()); + + let genesis = GenesisConfig { chain_id: args.chain_id, timestamp: 0, allocations }; let genesis_path = args.output_dir.join("genesis.json"); fs::write(&genesis_path, serde_json::to_string_pretty(&genesis)?)?; tracing::info!(path = ?genesis_path, "Wrote genesis configuration"); tracing::info!("Setup complete"); - tracing::info!(" Validators: {}", args.validators); - tracing::info!(" Secondary peers: {}", args.secondary_peers); - tracing::info!(" Threshold: {}", args.threshold); - tracing::info!(" Chain ID: {}", args.chain_id); + tracing::info!( + " Validators: {} | Quorum (N3f1): {} (tolerates {} faults)", + args.validators, + quorum, + args.validators as u32 - quorum + ); + tracing::info!(" Secondary: {}", args.secondary_peers); + tracing::info!(" Chain ID: {}", args.chain_id); Ok(()) } + +/// Write `data` to `path` with mode `0600` so key material is never world-readable. +fn write_secret_file(path: &std::path::Path, data: &[u8]) -> Result<()> { + use std::os::unix::fs::OpenOptionsExt; + let mut f = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(path) + .wrap_err_with(|| format!("Failed to create secret file {}", path.display()))?; + f.write_all(data).wrap_err_with(|| format!("Failed to write secret file {}", path.display())) +} + +#[cfg(test)] +mod tests { + use super::*; + + const LOADGEN_ADDRESS_FIXTURES: &[(u8, &str)] = &[ + (1, "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf"), + (2, "0x2B5AD5c4795c026514f8317c7a215E218DcCD6cF"), + (3, "0x6813Eb9362372EEF6200f3b1dbC3f819671cBA69"), + ]; + + #[test] + fn loadgen_address_matches_seed_fixtures() { + for &(seed, expected) in LOADGEN_ADDRESS_FIXTURES { + assert_eq!(loadgen_address(seed).to_string(), expected); + } + } + + #[test] + fn funded_loadgen_allocations_include_expected_seed_addresses() { + let allocations: Vec<_> = funded_loadgen_allocations().collect(); + + assert_eq!(allocations.len(), usize::from(LOADGEN_ACCOUNT_COUNT)); + for &(_, expected) in LOADGEN_ADDRESS_FIXTURES { + let allocation = allocations + .iter() + .find(|allocation| allocation.address == expected) + .expect("expected loadgen seed address to be funded"); + assert_eq!(allocation.balance, GENESIS_BALANCE); + } + } +} diff --git a/bin/kora/Cargo.toml b/bin/kora/Cargo.toml index 229df56..f0a131b 100644 --- a/bin/kora/Cargo.toml +++ b/bin/kora/Cargo.toml @@ -22,8 +22,8 @@ commonware-p2p.workspace = true commonware-runtime.workspace = true commonware-utils.workspace = true +axum.workspace = true clap.workspace = true -futures.workspace = true tokio.workspace = true tracing.workspace = true tracing-subscriber.workspace = true diff --git a/bin/kora/README.md b/bin/kora/README.md index 493f1a0..04aee16 100644 --- a/bin/kora/README.md +++ b/bin/kora/README.md @@ -35,7 +35,7 @@ Run as a validator (requires completed DKG): kora validator --peers peers.json ``` -The `--chain-id` and `--data-dir` flags can override configuration values. Set `RUST_LOG` to control log level (e.g., `info`, `debug`, `kora=trace`). +The `--chain-id` and `--data-dir` flags can override configuration values. Set `RUST_LOG` to control log level (e.g., `info`, `debug`, `kora=trace`). Set `KORA_RUNTIME_DIR` to override the Commonware runtime storage directory. ## Configuration diff --git a/bin/kora/src/cli.rs b/bin/kora/src/cli.rs index e168afb..cb68110 100644 --- a/bin/kora/src/cli.rs +++ b/bin/kora/src/cli.rs @@ -1,10 +1,12 @@ -use std::path::PathBuf; +use std::{path::PathBuf, sync::Arc}; use clap::{Parser, Subcommand}; +use commonware_runtime::Supervisor as _; +use commonware_utils::{Faults, N3f1}; use kora_config::NodeConfig; use kora_domain::BootstrapConfig; use kora_rpc::NodeState; -use kora_runner::{ProductionRunner, load_threshold_scheme}; +use kora_runner::{ProductionRunner, load_threshold_scheme, runtime_storage_directory}; use kora_service::LegacyNodeService; #[derive(Parser, Debug)] @@ -49,6 +51,14 @@ pub(crate) struct DkgArgs { pub(crate) struct ValidatorArgs { #[arg(long)] pub peers: Option, + + /// Prometheus metrics server bind address. + #[arg(long, default_value = "0.0.0.0:9002")] + pub metrics_addr: String, + + /// Enable P2P transaction gossip between validators. + #[arg(long, default_value = "false")] + pub tx_gossip: bool, } #[derive(clap::Args, Debug)] @@ -56,6 +66,14 @@ pub(crate) struct SecondaryArgs { /// Path to peers.json file containing primary and secondary peer information. #[arg(long)] pub peers: PathBuf, + + /// JSON-RPC server bind address (reserved for future read-only RPC). + #[arg(long, default_value = "0.0.0.0:8545")] + pub rpc_addr: String, + + /// Prometheus metrics server bind address. + #[arg(long, default_value = "0.0.0.0:9002")] + pub metrics_addr: String, } impl Cli { @@ -97,11 +115,21 @@ impl Cli { .position(|pk| *pk == my_pk) .ok_or_else(|| eyre::eyre!("Our public key not found in participants list"))?; + let n = peers.participants.len(); + let quorum = N3f1::quorum(n); + tracing::info!( + n = n, + quorum = quorum, + max_faulty = n as u32 - quorum, + "Consensus quorum determined by N3f1: need {} of {} validators active", + quorum, + n + ); + let dkg_config = DkgConfig { identity_key, validator_index, participants: peers.participants, - threshold: peers.threshold, chain_id: node_config.chain_id, data_dir: node_config.data_dir.clone(), listen_addr: node_config.network.listen_addr.parse()?, @@ -126,6 +154,10 @@ impl Cli { fn run_validator(&self, args: &ValidatorArgs) -> eyre::Result<()> { let mut config = self.load_config()?; + if args.tx_gossip { + config.network.tx_gossip = true; + } + tracing::info!(chain_id = config.chain_id, "Starting validator"); if !kora_dkg::DkgOutput::exists(&config.data_dir) { @@ -158,24 +190,57 @@ impl Cli { .map_err(|e| eyre::eyre!("Failed to load genesis: {}", e))?; tracing::info!(allocations = bootstrap.genesis_alloc.len(), "Loaded genesis configuration"); - let rpc_addr: std::net::SocketAddr = "0.0.0.0:8545".parse()?; - let node_state = NodeState::new(config.chain_id, dkg_output.share_index); + if bootstrap.chain_id != config.chain_id { + return Err(eyre::eyre!( + "genesis.json chain_id ({}) does not match node chain_id ({})", + bootstrap.chain_id, + config.chain_id + )); + } + + let rpc_addr: std::net::SocketAddr = config.rpc.http_addr.parse().map_err(|err| { + eyre::eyre!("invalid rpc.http_addr '{}': {}", config.rpc.http_addr, err) + })?; + let validator_count = u32::try_from(dkg_output.participants).map_err(|_| { + eyre::eyre!("DKG participant count {} exceeds u32::MAX", dkg_output.participants) + })?; + if validator_count == 0 { + return Err(eyre::eyre!("DKG participant count must be non-zero")); + } + let validator_index = dkg_output.share_index; + if validator_index >= validator_count { + return Err(eyre::eyre!( + "DKG share_index ({validator_index}) must be less than participant count ({validator_count})" + )); + } + + let quorum = N3f1::quorum(validator_count as usize); + tracing::info!( + validator_count = validator_count, + quorum = quorum, + max_faulty = validator_count - quorum, + "Consensus requires {} of {} validators active (N3f1 BFT)", + quorum, + validator_count + ); - let runner = ProductionRunner::new( - scheme, - config.chain_id, - kora_config::DEFAULT_GAS_LIMIT, - bootstrap, - ) - .with_rpc(node_state, rpc_addr) - .with_secondary_peers(secondary_participants); + let node_state = + NodeState::with_validator_count(config.chain_id, validator_index, validator_count); + + let metrics_addr: std::net::SocketAddr = args.metrics_addr.parse().map_err(|err| { + eyre::eyre!("invalid --metrics-addr '{}': {}", args.metrics_addr, err) + })?; + let runner = ProductionRunner::new(scheme, config.chain_id, bootstrap) + .with_rpc(node_state, rpc_addr) + .with_metrics_addr(metrics_addr) + .with_secondary_peers(secondary_participants); runner.run_standalone(config).map_err(|e| eyre::eyre!("Runner failed: {}", e.0)) } fn run_secondary(&self, args: &SecondaryArgs) -> eyre::Result<()> { use commonware_p2p::{Manager, TrackedPeers}; - use commonware_runtime::Runner; + use commonware_runtime::{Clock as _, Metrics as _, Runner, Spawner}; use commonware_utils::ordered::Set; use kora_transport::NetworkConfigExt; @@ -191,21 +256,41 @@ impl Cli { )); } + let validator_count = peers.participants.len(); + let secondary_count = peers.secondary_participants.len(); + + // Parse and validate addresses early so we fail before starting the runtime. + let metrics_addr: std::net::SocketAddr = args.metrics_addr.parse().map_err(|err| { + eyre::eyre!("invalid --metrics-addr '{}': {}", args.metrics_addr, err) + })?; + let _rpc_addr: std::net::SocketAddr = args + .rpc_addr + .parse() + .map_err(|err| eyre::eyre!("invalid --rpc-addr '{}': {}", args.rpc_addr, err))?; + tracing::info!( chain_id = config.chain_id, bootstrap_peers = config.network.bootstrap_peers.len(), - secondary_peers = peers.secondary_participants.len(), + secondary_peers = secondary_count, "Starting secondary peer" ); + tracing::warn!("Secondary node is in follower mode - read-only RPC not yet implemented"); + let runtime_dir = runtime_storage_directory(&config.data_dir); + tracing::info!( + runtime_dir = %runtime_dir.display(), + worker_threads = config.worker_threads, + "Starting Commonware runtime" + ); let executor = commonware_runtime::tokio::Runner::new( commonware_runtime::tokio::Config::default() - .with_storage_directory(config.data_dir.join("runtime")), + .with_storage_directory(runtime_dir) + .with_worker_threads(config.worker_threads), ); executor.start(|context| async move { let mut transport = config .network - .build_local_transport(identity_key, context.clone()) + .build_local_transport(identity_key, context.child("transport")) .map_err(|e| eyre::eyre!("failed to build transport: {}", e))?; transport @@ -216,12 +301,67 @@ impl Cli { Set::from_iter_dedup(peers.participants), Set::from_iter_dedup(peers.secondary_participants), ), - ) - .await; + ); tracing::info!("secondary peer joined network"); - futures::future::pending::<()>().await; - #[allow(unreachable_code)] + + // Spawn a metrics server so Prometheus can scrape this node. + let metrics_context = Arc::new(context.child("metrics_endpoint")); + context.child("metrics").shared(true).spawn(move |_| async move { + let app = axum::Router::new().route( + "/metrics", + axum::routing::get(move || { + let metrics_context = metrics_context.clone(); + async move { + let body = metrics_context.encode(); + ( + axum::http::StatusCode::OK, + [( + axum::http::header::CONTENT_TYPE, + "application/openmetrics-text; version=1.0.0; charset=utf-8", + )], + body, + ) + } + }), + ); + + let listener = match tokio::net::TcpListener::bind(metrics_addr).await { + Ok(l) => l, + Err(e) => { + tracing::error!(addr = %metrics_addr, error = %e, "Failed to bind metrics server"); + return; + } + }; + + tracing::info!(addr = %metrics_addr, "Starting metrics server"); + if let Err(e) = axum::serve(listener, app).await { + tracing::error!(error = %e, "Metrics server error"); + } + }); + + // Spawn periodic health logging. + context.child("health").shared(true).spawn(move |ctx| async move { + let interval = std::time::Duration::from_secs(30); + loop { + ctx.sleep(interval).await; + tracing::info!( + validators = validator_count, + secondary_peers = secondary_count, + "Secondary node health: connected to P2P network" + ); + } + }); + + // Block until shutdown signal (SIGTERM / SIGINT / Ctrl-C). + let mut sigterm = + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to register SIGTERM handler"); + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + _ = sigterm.recv() => {}, + } + tracing::info!("Received shutdown signal, stopping secondary node..."); Ok::<(), eyre::Error>(()) }) } @@ -240,7 +380,6 @@ impl Cli { struct PeersInfo { participants: Vec, secondary_participants: Vec, - threshold: u32, bootstrappers: Vec<(commonware_cryptography::ed25519::PublicKey, String)>, } @@ -253,15 +392,17 @@ fn format_bootstrappers( .collect() } +/// Load peers configuration from a JSON file. +/// +/// Accepts peers.json files with either "quorum" (new format) or "threshold" +/// (legacy format) key -- both are ignored at runtime since the quorum is +/// always computed from the validator count via N3f1. fn load_peers(path: &PathBuf) -> eyre::Result { use commonware_codec::ReadExt; let content = std::fs::read_to_string(path)?; let json: serde_json::Value = serde_json::from_str(&content)?; - let threshold = - json["threshold"].as_u64().ok_or_else(|| eyre::eyre!("missing threshold"))? as u32; - let participants_hex: Vec = json["participants"] .as_array() .ok_or_else(|| eyre::eyre!("missing participants"))? @@ -290,7 +431,7 @@ fn load_peers(path: &PathBuf) -> eyre::Result { bootstrappers.push((pk, addr_str.to_string())); } - Ok(PeersInfo { participants, secondary_participants, threshold, bootstrappers }) + Ok(PeersInfo { participants, secondary_participants, bootstrappers }) } fn parse_public_keys( diff --git a/bin/kora/src/main.rs b/bin/kora/src/main.rs index a96926d..0211386 100644 --- a/bin/kora/src/main.rs +++ b/bin/kora/src/main.rs @@ -7,15 +7,29 @@ mod cli; fn main() -> eyre::Result<()> { use clap::Parser; - use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + use tracing_subscriber::{EnvFilter, Layer, layer::SubscriberExt, util::SubscriberInitExt}; kora_cli::Backtracing::enable(); kora_cli::SigsegvHandler::install(); - tracing_subscriber::registry() - .with(tracing_subscriber::fmt::layer()) - .with(tracing_subscriber::EnvFilter::from_default_env()) - .init(); + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { + EnvFilter::new( + "info,kora_runner=info,kora_rpc=info,kora_executor=info,commonware_consensus=info,commonware_p2p=warn", + ) + }); + + let json_format = std::env::var("LOG_FORMAT").map(|v| v == "json").unwrap_or(false); + if json_format { + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer().json().boxed()) + .with(filter) + .init(); + } else { + tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer().boxed()) + .with(filter) + .init(); + } cli::Cli::parse().run() } diff --git a/bin/loadgen/Cargo.toml b/bin/loadgen/Cargo.toml index ed2e736..cf389b4 100644 --- a/bin/loadgen/Cargo.toml +++ b/bin/loadgen/Cargo.toml @@ -17,7 +17,6 @@ alloy-consensus.workspace = true alloy-eips.workspace = true tokio.workspace = true -futures.workspace = true clap.workspace = true serde.workspace = true diff --git a/bin/loadgen/README.md b/bin/loadgen/README.md index 4ae1fe9..1bb53a4 100644 --- a/bin/loadgen/README.md +++ b/bin/loadgen/README.md @@ -19,6 +19,12 @@ cargo run --release --bin loadgen -- --total-txs 10000 --concurrency 100 --accou # Target specific RPC endpoint cargo run --release --bin loadgen -- --rpc-url http://localhost:8546 --total-txs 5000 +# Broadcast each transaction to all validator RPCs in a multi-validator devnet +cargo run --release --bin loadgen -- \ + --rpc-url http://localhost:8545 \ + --broadcast-rpc-urls http://localhost:8546,http://localhost:8547,http://localhost:8548 \ + --total-txs 10000 --accounts 50 + # Dry run (test tx signing performance only) cargo run --release --bin loadgen -- --total-txs 10000 --dry-run ``` @@ -28,28 +34,47 @@ cargo run --release --bin loadgen -- --total-txs 10000 --dry-run | Flag | Default | Description | |------|---------|-------------| | `--rpc-url` | `http://127.0.0.1:8545` | RPC endpoint URL | -| `--accounts` | `10` | Number of sender accounts | +| `--broadcast-rpc-urls` | none | Additional comma-separated RPC endpoint URLs to broadcast each transaction to | +| `--accounts` | `10` | Number of sender accounts, from 1 to 255 | | `--total-txs` | `1000` | Total transactions to send | | `--concurrency` | `50` | Maximum concurrent in-flight requests | | `--chain-id` | `1337` | Chain ID for transactions | | `--dry-run` | `false` | Sign transactions without sending | | `--verbose` | `false` | Print each transaction hash | +| `--timeout-secs` | `0` | Overall timeout in seconds (0 = no timeout) | ## Notes -The generated accounts need to be funded in the genesis configuration for transactions to succeed. For testing RPC connectivity and mempool acceptance, transactions will be accepted even without funds (they will fail during execution). +Standard `keygen setup` devnet genesis output funds the default loadgen seed range, currently accounts 1 through 50. The default `--accounts 10` and the common `--accounts 50` stress-test configuration work against a fresh trusted devnet without manually funding sender accounts. + +If you run with non-standard accounts above the funded default range, such as `--accounts 75`, the additional seed accounts need to be included in genesis with sufficient balance or funded manually before loadgen transactions can execute successfully. + +In multi-validator devnets, pass every validator RPC endpoint through `--rpc-url` and `--broadcast-rpc-urls`. Devnet mempools are validator-local, so broadcasting gives the active proposer a copy of each transaction. Sender addresses are deterministically generated from seed bytes: - Account 1: seed `[0,0,...,0,1]` - Account 2: seed `[0,0,...,0,2]` - etc. -The loadgen outputs the sender addresses at startup so you can fund them in your genesis configuration. +The loadgen outputs the sender addresses at startup so you can verify which genesis allocations or manual transfers are needed for custom account ranges. + +## Resilience + +The loadgen handles nonce desynchronization with the chain automatically: + +- **Nonce gap** (loadgen ahead of chain): waits, re-queries the on-chain nonce, and resumes from the correct nonce +- **Nonce too low** (transaction already included): treats as success and resyncs the local counter +- **Already in pool** (duplicate nonce in mempool): treats as success and moves on +- **Transient errors** (timeouts, connection refused): retries with exponential backoff up to 10 attempts +- **Transport-only fallback**: only falls back to other RPC endpoints on connection errors, not semantic rejections + +Progress is reported every 5 seconds with success/failed/TPS counters. After all transactions are submitted, an inclusion verification step compares expected nonces against on-chain state to detect silently dropped transactions. ## Performance The loadgen uses: -- `FuturesUnordered` for concurrent request handling +- Per-account sequential sends with cross-account parallelism - Connection pooling via `reqwest` -- Atomic nonce tracking for parallel account access -- Arc-wrapped accounts for thread-safe sharing +- Semaphore-bounded concurrency for in-flight HTTP requests +- Atomic nonce tracking for thread-safe access +- Arc-wrapped accounts for zero-copy sharing across tasks diff --git a/bin/loadgen/src/main.rs b/bin/loadgen/src/main.rs index ef49822..6c66ef3 100644 --- a/bin/loadgen/src/main.rs +++ b/bin/loadgen/src/main.rs @@ -15,12 +15,46 @@ use alloy_consensus::{SignableTransaction as _, TxEip1559, TxEnvelope}; use alloy_eips::eip2718::Encodable2718; use alloy_primitives::{Address, Bytes, Signature, TxKind, U256, keccak256}; use clap::Parser; -use eyre::Result; -use futures::stream::{FuturesUnordered, StreamExt}; +use eyre::{Result, WrapErr as _}; use k256::ecdsa::SigningKey; use sha3::{Digest as _, Keccak256}; +use tokio::sync::Semaphore; use tracing::{error, info, warn}; +const MIN_LOADGEN_ACCOUNTS: usize = 1; +const MAX_LOADGEN_ACCOUNTS: usize = u8::MAX as usize; + +/// Intrinsic gas for a simple ETH transfer (21,000). +const TRANSFER_GAS_LIMIT: u64 = 21_000; + +/// Max fee per gas for load-generated transactions (10 gwei). +/// +/// Must exceed the chain's base fee (currently `INITIAL_BASE_FEE` = 1 gwei) +/// plus any priority fee. 10 gwei gives ample headroom for base-fee +/// fluctuations during sustained load. +const MAX_FEE_PER_GAS: u128 = 10_000_000_000; + +/// Max priority fee (tip) per gas for load-generated transactions (1 gwei). +const MAX_PRIORITY_FEE_PER_GAS: u128 = 1_000_000_000; + +/// Maximum retry attempts before giving up on a transaction. +const MAX_RETRY_ATTEMPTS: u64 = 10; + +/// Base delay between retries; grows exponentially (base * 2^attempt). +const RETRY_BASE_DELAY: Duration = Duration::from_millis(100); + +/// Delay before retrying after a nonce gap (chain is behind). +const NONCE_GAP_DELAY: Duration = Duration::from_secs(1); + +/// Interval between periodic progress reports. +const PROGRESS_INTERVAL: Duration = Duration::from_secs(5); + +/// HTTP request timeout for RPC calls. +const RPC_TIMEOUT: Duration = Duration::from_secs(30); + +/// Maximum idle connections per host in the HTTP connection pool. +const RPC_POOL_MAX_IDLE: usize = 100; + /// Load generator CLI. #[derive(Parser, Debug)] #[command(name = "loadgen", about = "Load generator for Kora devnet")] @@ -29,6 +63,14 @@ struct Args { #[arg(long, default_value = "http://127.0.0.1:8545")] rpc_url: String, + /// Additional RPC endpoint URLs to broadcast each transaction to. + /// + /// Kora's current devnet mempools are validator-local, so devnet load tests + /// should submit to all validator RPCs to ensure the active proposer has the + /// transaction in its local mempool. + #[arg(long, value_delimiter = ',')] + broadcast_rpc_urls: Vec, + /// Number of accounts to use for sending transactions. #[arg(long, default_value = "10")] accounts: usize, @@ -52,6 +94,11 @@ struct Args { /// Print each transaction hash. #[arg(long)] verbose: bool, + + /// Overall timeout in seconds. The load test aborts if it exceeds this duration. + /// Defaults to 0 (no timeout). + #[arg(long, default_value = "0")] + timeout_secs: u64, } /// Account with signing key and nonce tracker. @@ -59,6 +106,9 @@ struct Account { key: SigningKey, address: Address, nonce: AtomicU64, + /// The on-chain nonce when this run started. Used to compute per-run + /// confirmed counts during post-run verification. + starting_nonce: AtomicU64, } impl Account { @@ -67,12 +117,38 @@ impl Account { secret[31] = seed; let key = SigningKey::from_bytes((&secret).into()).expect("valid key"); let address = address_from_key(&key); - Self { key, nonce: AtomicU64::new(0), address } + Self { key, nonce: AtomicU64::new(0), starting_nonce: AtomicU64::new(0), address } } fn next_nonce(&self) -> u64 { self.nonce.fetch_add(1, Ordering::Relaxed) } + + fn set_nonce(&self, nonce: u64) { + self.nonce.store(nonce, Ordering::Relaxed); + } + + fn set_starting_nonce(&self, nonce: u64) { + self.starting_nonce.store(nonce, Ordering::Relaxed); + } + + fn get_starting_nonce(&self) -> u64 { + self.starting_nonce.load(Ordering::Relaxed) + } +} + +fn loadgen_seeds(accounts: usize) -> Result> { + if !(MIN_LOADGEN_ACCOUNTS..=MAX_LOADGEN_ACCOUNTS).contains(&accounts) { + eyre::bail!( + "loadgen accounts must be between {} and {}, got {}", + MIN_LOADGEN_ACCOUNTS, + MAX_LOADGEN_ACCOUNTS, + accounts + ); + } + + let accounts = u8::try_from(accounts).expect("loadgen account count was validated"); + Ok((1..=accounts).collect()) } fn address_from_key(key: &SigningKey) -> Address { @@ -82,6 +158,7 @@ fn address_from_key(key: &SigningKey) -> Address { Address::from_slice(&hash[12..]) } +#[allow(clippy::too_many_arguments)] fn sign_eip1559_transfer( key: &SigningKey, chain_id: u64, @@ -89,13 +166,15 @@ fn sign_eip1559_transfer( value: U256, nonce: u64, gas_limit: u64, + max_fee_per_gas: u128, + max_priority_fee_per_gas: u128, ) -> Bytes { let tx = TxEip1559 { chain_id, nonce, gas_limit, - max_fee_per_gas: 0, - max_priority_fee_per_gas: 0, + max_fee_per_gas, + max_priority_fee_per_gas, to: TxKind::Call(to), value, access_list: Default::default(), @@ -112,7 +191,35 @@ fn sign_eip1559_transfer( Bytes::from(raw_bytes) } +fn parse_json_rpc_quantity(quantity: &str) -> Result { + let value = quantity + .strip_prefix("0x") + .ok_or_else(|| eyre::eyre!("JSON-RPC quantity missing 0x prefix: {quantity}"))?; + if value.is_empty() { + eyre::bail!("JSON-RPC quantity has no digits: {quantity}"); + } + + u64::from_str_radix(value, 16) + .wrap_err_with(|| format!("invalid JSON-RPC quantity: {quantity}")) +} + +/// Returns `true` if the error message indicates a transport-level failure +/// (connection refused, timeout, etc.) rather than a semantic rejection +/// (nonce error, pool error, etc.). +fn is_transport_error(err: &str) -> bool { + err.contains("error sending request") + || err.contains("Connection refused") + || err.contains("connection refused") + || err.contains("timed out") + || err.contains("connection closed") + || err.contains("broken pipe") + || err.contains("reset by peer") +} + /// HTTP client for RPC calls. +/// +/// Multiple `RpcClient`s share a single underlying `reqwest::Client` connection +/// pool, which is more efficient than creating separate pools per endpoint. #[derive(Clone)] struct RpcClient { client: reqwest::Client, @@ -120,12 +227,7 @@ struct RpcClient { } impl RpcClient { - fn new(url: String) -> Self { - let client = reqwest::Client::builder() - .timeout(Duration::from_secs(30)) - .pool_max_idle_per_host(100) - .build() - .expect("build http client"); + const fn new(url: String, client: reqwest::Client) -> Self { Self { client, url } } @@ -149,6 +251,78 @@ impl RpcClient { Ok(json["result"].as_str().unwrap_or("").to_string()) } + + async fn get_transaction_count(&self, address: Address) -> Result { + let body = serde_json::json!({ + "jsonrpc": "2.0", + "method": "eth_getTransactionCount", + "params": [address.to_string(), "latest"], + "id": 1 + }); + + let resp = self.client.post(&self.url).json(&body).send().await?; + let json: serde_json::Value = resp.json().await?; + + if let Some(error) = json.get("error") { + eyre::bail!("RPC error: {}", error); + } + + let nonce_hex = + json["result"].as_str().ok_or_else(|| eyre::eyre!("missing nonce result"))?; + parse_json_rpc_quantity(nonce_hex) + } +} + +/// Query `eth_getTransactionCount` from any available RPC client, trying each +/// in order until one succeeds. +async fn get_nonce_from_any(clients: &[RpcClient], address: Address) -> Result { + let mut last_err = None; + for client in clients { + match client.get_transaction_count(address).await { + Ok(nonce) => return Ok(nonce), + Err(e) => last_err = Some(e), + } + } + Err(last_err.unwrap_or_else(|| eyre::eyre!("no RPC clients configured"))) +} + +/// Send a transaction to a specific client (by index). Falls back to trying +/// other clients only on transport-level errors (timeouts, connection refused). +/// Semantic rejections (nonce errors, pool errors) are returned immediately +/// since they would fail identically on every validator. +async fn send_raw_transaction_to( + clients: &[RpcClient], + raw_tx: Bytes, + target_idx: usize, +) -> Result { + let idx = target_idx % clients.len(); + + // Try the target client first + match clients[idx].send_raw_transaction(&raw_tx).await { + Ok(hash) => Ok(hash), + Err(e) => { + let err_str = e.to_string(); + + // Semantic rejections (nonce errors, pool errors) will fail on all + // validators identically. Only fall back for transport errors. + if !is_transport_error(&err_str) { + return Err(e); + } + + // Transport error: try other clients + let mut errors = vec![err_str]; + for (i, client) in clients.iter().enumerate() { + if i == idx { + continue; + } + match client.send_raw_transaction(&raw_tx).await { + Ok(hash) => return Ok(hash), + Err(e) => errors.push(e.to_string()), + } + } + eyre::bail!("all RPC endpoints failed: {}", errors.join("; ")) + } + } } #[tokio::main] @@ -160,36 +334,68 @@ async fn main() -> Result<()> { .init(); let args = Args::parse(); + let mut rpc_urls = Vec::with_capacity(args.broadcast_rpc_urls.len() + 1); + rpc_urls.push(args.rpc_url.clone()); + rpc_urls.extend(args.broadcast_rpc_urls.iter().cloned()); info!( rpc_url = %args.rpc_url, + broadcast_rpc_urls = ?args.broadcast_rpc_urls, accounts = args.accounts, total_txs = args.total_txs, concurrency = args.concurrency, chain_id = args.chain_id, dry_run = args.dry_run, + timeout_secs = args.timeout_secs, "Starting load generator" ); + let account_seeds = loadgen_seeds(args.accounts)?; let accounts: Vec> = - (1..=args.accounts).map(|i| Arc::new(Account::new(i as u8))).collect(); + account_seeds.into_iter().map(|seed| Arc::new(Account::new(seed))).collect(); - info!("Sender addresses (fund these with ETH):"); + info!("Sender addresses:"); for acc in &accounts { info!(" {}", acc.address); } let receiver = Address::repeat_byte(0xBB); let transfer_amount = U256::from(1u64); - let gas_limit = 21_000u64; - let client = RpcClient::new(args.rpc_url.clone()); + let http_client = reqwest::Client::builder() + .timeout(RPC_TIMEOUT) + .pool_max_idle_per_host(RPC_POOL_MAX_IDLE) + .build() + .expect("build http client"); + let clients: Arc> = Arc::new( + rpc_urls.into_iter().map(|url| RpcClient::new(url, http_client.clone())).collect(), + ); + + // Initialize nonces from chain state, with fallback across all RPC endpoints + if !args.dry_run { + for account in &accounts { + let nonce = + get_nonce_from_any(&clients, account.address).await.wrap_err_with(|| { + format!("failed to query nonce for {} from any RPC endpoint", account.address) + })?; + account.set_starting_nonce(nonce); + account.set_nonce(nonce); + } + } let success_count = Arc::new(AtomicU64::new(0)); let failure_count = Arc::new(AtomicU64::new(0)); + let nonce_resync_count = Arc::new(AtomicU64::new(0)); let start = Instant::now(); + // Derive optional deadline from --timeout-secs + let deadline = if args.timeout_secs > 0 { + Some(start + Duration::from_secs(args.timeout_secs)) + } else { + None + }; + if args.dry_run { for i in 0..args.total_txs { let account = &accounts[i as usize % accounts.len()]; @@ -200,7 +406,9 @@ async fn main() -> Result<()> { receiver, transfer_amount, nonce, - gas_limit, + TRANSFER_GAS_LIMIT, + MAX_FEE_PER_GAS, + MAX_PRIORITY_FEE_PER_GAS, ); success_count.fetch_add(1, Ordering::Relaxed); if (i + 1) % 1000 == 0 { @@ -208,55 +416,235 @@ async fn main() -> Result<()> { } } } else { - let mut futures = FuturesUnordered::new(); + // Per-account sequential sends with cross-account parallelism. + // Each account sends its transactions one at a time (ensuring nonce ordering), + // but all accounts run in parallel. A semaphore limits total in-flight requests. + let num_accounts = accounts.len(); + let txs_per_account = args.total_txs / num_accounts as u64; + let remainder = args.total_txs % num_accounts as u64; + + // Global concurrency limiter -- bounds total in-flight HTTP requests + if args.concurrency == 0 { + eyre::bail!("--concurrency must be >= 1"); + } + let semaphore = Arc::new(Semaphore::new(args.concurrency)); + + // Spawn periodic progress reporter + let progress_success = success_count.clone(); + let progress_failure = failure_count.clone(); + let progress_resyncs = nonce_resync_count.clone(); + let progress_total = args.total_txs; + let progress_start = start; + let progress_handle = tokio::spawn(async move { + let mut interval = tokio::time::interval(PROGRESS_INTERVAL); + interval.tick().await; // skip first immediate tick + loop { + interval.tick().await; + let s = progress_success.load(Ordering::Relaxed); + let f = progress_failure.load(Ordering::Relaxed); + let r = progress_resyncs.load(Ordering::Relaxed); + let completed = s + f; + let elapsed = progress_start.elapsed().as_secs_f64(); + let tps = if elapsed > 0.0 { s as f64 / elapsed } else { 0.0 }; + info!( + success = s, + failed = f, + total = progress_total, + nonce_resyncs = r, + elapsed_secs = format!("{:.1}", elapsed), + tps = format!("{:.1}", tps), + pct = format!("{:.1}%", completed as f64 / progress_total as f64 * 100.0), + "progress" + ); + if completed >= progress_total { + break; + } + } + }); - for i in 0..args.total_txs { - let account = accounts[i as usize % accounts.len()].clone(); - let client = client.clone(); + let mut handles = Vec::with_capacity(num_accounts); + + for (idx, account) in accounts.iter().enumerate() { + let account = account.clone(); + let clients = clients.clone(); let success = success_count.clone(); let failure = failure_count.clone(); + let resyncs = nonce_resync_count.clone(); + let semaphore = semaphore.clone(); let verbose = args.verbose; + let chain_id = args.chain_id; + + // Each account is pinned to one validator (avoids stale copies in other mempools) + let target_validator = idx; + + // First `remainder` accounts send one extra tx + let count = txs_per_account + if (idx as u64) < remainder { 1 } else { 0 }; + + let handle = tokio::spawn(async move { + // Use a while loop that tracks transactions completed (sent or + // permanently failed), not nonces attempted. A nonce resync does + // not consume a "send slot" -- the outer loop re-acquires a fresh + // nonce and re-signs a new transaction. + let mut sent = 0u64; + while sent < count { + // Check deadline before each transaction + if let Some(dl) = deadline + && Instant::now() >= dl + { + warn!( + account = %account.address, + completed = sent, + target = count, + "timeout reached, stopping account" + ); + break; + } - let nonce = account.next_nonce(); - let tx = sign_eip1559_transfer( - &account.key, - args.chain_id, - receiver, - transfer_amount, - nonce, - gas_limit, - ); - - let fut = async move { - match client.send_raw_transaction(&tx).await { - Ok(hash) => { - success.fetch_add(1, Ordering::Relaxed); - if verbose { - info!(nonce, hash = %hash, "tx sent"); + let nonce = account.next_nonce(); + let tx = sign_eip1559_transfer( + &account.key, + chain_id, + receiver, + transfer_amount, + nonce, + TRANSFER_GAS_LIMIT, + MAX_FEE_PER_GAS, + MAX_PRIORITY_FEE_PER_GAS, + ); + + // Retry with exponential backoff on transient errors. Nonce + // errors trigger resync instead of blind retries. The semaphore + // permit is acquired per-attempt and dropped after the HTTP call + // completes, so backoff sleeps do not consume concurrency slots. + let mut attempts = 0u32; + let mut needs_resync = false; + loop { + let _permit = semaphore.acquire().await.expect("semaphore closed"); + let result = + send_raw_transaction_to(&clients, tx.clone(), target_validator).await; + drop(_permit); + + match result { + Ok(hash) => { + success.fetch_add(1, Ordering::Relaxed); + if verbose { + info!(nonce, hash = %hash, account = %account.address, "tx sent"); + } + sent += 1; + break; + } + Err(e) => { + let err_msg = e.to_string(); + attempts += 1; + + if err_msg.contains("nonce too low") { + // Transaction was already included on-chain + // (e.g. via broadcast copy). Re-query chain + // nonce and advance local counter. + match get_nonce_from_any(&clients, account.address).await { + Ok(chain_nonce) => { + account.set_nonce(chain_nonce); + resyncs.fetch_add(1, Ordering::Relaxed); + } + Err(resync_err) => { + warn!( + account = %account.address, + error = %resync_err, + "nonce resync failed after nonce-too-low, \ + keeping local nonce" + ); + } + } + // The nonce was consumed on-chain; count as success. + success.fetch_add(1, Ordering::Relaxed); + sent += 1; + break; + } else if err_msg.contains("already in pool") { + // Transaction with this nonce is already pending + // in the pool. The nonce is covered. + success.fetch_add(1, Ordering::Relaxed); + sent += 1; + break; + } else if err_msg.contains("nonce gap") { + // We are ahead of the chain. Wait, resync nonce, + // and restart the outer loop with a fresh nonce + // and re-signed transaction. + warn!( + nonce, + error = %e, + account = %account.address, + "nonce gap detected, resyncing" + ); + tokio::time::sleep(NONCE_GAP_DELAY).await; + match get_nonce_from_any(&clients, account.address).await { + Ok(chain_nonce) => { + account.set_nonce(chain_nonce); + resyncs.fetch_add(1, Ordering::Relaxed); + } + Err(resync_err) => { + warn!( + account = %account.address, + error = %resync_err, + "nonce resync failed during gap recovery, \ + will retry on next iteration" + ); + // Brief backoff before the outer loop retries + tokio::time::sleep(NONCE_GAP_DELAY).await; + } + } + // Do NOT increment `sent` -- this nonce was never + // consumed. Break inner loop and let the outer + // while-loop re-acquire a correct nonce. + needs_resync = true; + break; + } else { + // Transient error -- exponential backoff + if u64::from(attempts) >= MAX_RETRY_ATTEMPTS { + warn!( + nonce, + error = %e, + account = %account.address, + "tx failed after retries" + ); + failure.fetch_add(1, Ordering::Relaxed); + sent += 1; + break; + } + // Exponential backoff: 100ms, 200ms, 400ms, ... + let delay = + RETRY_BASE_DELAY * 2u32.saturating_pow(attempts - 1); + tokio::time::sleep(delay).await; + } + } } } - Err(e) => { - failure.fetch_add(1, Ordering::Relaxed); - warn!(nonce, error = %e, "tx failed"); + + // After a nonce resync, the pre-signed tx is stale. The outer + // while-loop will re-acquire a fresh nonce on the next iteration. + // No nonce rewind is needed -- nonce management is handled + // exclusively inside the error handlers above. + if needs_resync { + continue; } } - }; + }); - futures.push(fut); + handles.push(handle); + } - // Limit concurrency by waiting when we hit the limit - if futures.len() >= args.concurrency { - futures.next().await; - } + // Wait for all account tasks to finish + for handle in handles { + handle.await?; } - // Drain remaining futures - while futures.next().await.is_some() {} + // Stop the progress reporter + progress_handle.abort(); } let elapsed = start.elapsed(); let success = success_count.load(Ordering::Relaxed); let failure = failure_count.load(Ordering::Relaxed); + let resyncs = nonce_resync_count.load(Ordering::Relaxed); let tps = if elapsed.as_secs_f64() > 0.0 { success as f64 / elapsed.as_secs_f64() } else { 0.0 }; @@ -264,14 +652,166 @@ async fn main() -> Result<()> { sent = success + failure, success, failed = failure, + nonce_resyncs = resyncs, elapsed_secs = format!("{:.2}", elapsed.as_secs_f64()), tps = format!("{:.2}", tps), "Load generation complete" ); + // Post-run inclusion verification: compare expected nonces against on-chain + // state to detect silently dropped transactions. + if !args.dry_run { + info!("Verifying on-chain inclusion..."); + let mut total_confirmed = 0u64; + let mut total_pending = 0u64; + + for account in &accounts { + let expected_nonce = account.nonce.load(Ordering::Relaxed); + let starting_nonce = account.get_starting_nonce(); + match get_nonce_from_any(&clients, account.address).await { + Ok(chain_nonce) => { + let gap = expected_nonce.saturating_sub(chain_nonce); + let confirmed_this_run = chain_nonce.saturating_sub(starting_nonce); + if gap > 0 { + warn!( + account = %account.address, + expected = expected_nonce, + confirmed = chain_nonce, + pending = gap, + "account has unconfirmed transactions" + ); + } + total_confirmed += confirmed_this_run; + total_pending += gap; + } + Err(e) => { + warn!( + account = %account.address, + error = %e, + "failed to verify on-chain nonce" + ); + } + } + } + + info!(total_confirmed, total_pending, "Inclusion verification complete"); + } + if failure > 0 { error!(failed = failure, "Some transactions failed"); } Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + const LOADGEN_ADDRESS_FIXTURES: &[(u8, &str)] = &[ + (1, "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf"), + (2, "0x2B5AD5c4795c026514f8317c7a215E218DcCD6cF"), + (3, "0x6813Eb9362372EEF6200f3b1dbC3f819671cBA69"), + ]; + + #[test] + fn account_addresses_match_seed_fixtures() { + for &(seed, expected) in LOADGEN_ADDRESS_FIXTURES { + let account = Account::new(seed); + assert_eq!(account.address.to_string(), expected); + } + } + + #[test] + fn loadgen_seeds_accepts_supported_range() { + assert_eq!(loadgen_seeds(1).unwrap(), vec![1]); + assert_eq!(loadgen_seeds(3).unwrap(), vec![1, 2, 3]); + + let seeds = loadgen_seeds(255).unwrap(); + assert_eq!(seeds.len(), 255); + assert_eq!(seeds.first(), Some(&1)); + assert_eq!(seeds.last(), Some(&255)); + } + + #[test] + fn loadgen_seeds_rejects_unsupported_counts() { + for accounts in [0, 256, usize::MAX] { + let error = loadgen_seeds(accounts).unwrap_err().to_string(); + assert!(error.contains("between 1 and 255")); + assert!(error.contains(&accounts.to_string())); + } + } + + #[test] + fn parse_json_rpc_quantity_accepts_hex_quantities() { + assert_eq!(parse_json_rpc_quantity("0x0").unwrap(), 0); + assert_eq!(parse_json_rpc_quantity("0xa").unwrap(), 10); + assert_eq!(parse_json_rpc_quantity("0x10").unwrap(), 16); + assert_eq!(parse_json_rpc_quantity("0xFF").unwrap(), 255); + } + + #[test] + fn parse_json_rpc_quantity_rejects_invalid_quantities() { + for quantity in ["", "10", "0x", "0xzz"] { + assert!(parse_json_rpc_quantity(quantity).is_err()); + } + } + + #[test] + fn sign_eip1559_transfer_produces_valid_envelope() { + let account = Account::new(1); + let to = Address::repeat_byte(0xBB); + let raw = sign_eip1559_transfer( + &account.key, + 1337, + to, + U256::from(1), + 0, + TRANSFER_GAS_LIMIT, + MAX_FEE_PER_GAS, + MAX_PRIORITY_FEE_PER_GAS, + ); + // EIP-2718 type-2 envelope starts with 0x02 + assert!(!raw.is_empty()); + assert_eq!(raw[0], 0x02, "expected EIP-1559 type prefix"); + } + + #[test] + fn retry_backoff_is_exponential() { + let delays: Vec = + (1..=5).map(|attempt| RETRY_BASE_DELAY * 2u32.saturating_pow(attempt - 1)).collect(); + assert_eq!(delays[0], Duration::from_millis(100)); + assert_eq!(delays[1], Duration::from_millis(200)); + assert_eq!(delays[2], Duration::from_millis(400)); + assert_eq!(delays[3], Duration::from_millis(800)); + assert_eq!(delays[4], Duration::from_millis(1600)); + } + + #[test] + fn nonce_increments_sequentially() { + let account = Account::new(1); + assert_eq!(account.next_nonce(), 0); + assert_eq!(account.next_nonce(), 1); + assert_eq!(account.next_nonce(), 2); + account.set_nonce(42); + assert_eq!(account.next_nonce(), 42); + } + + #[test] + fn is_transport_error_classifies_correctly() { + // Transport errors should return true + assert!(is_transport_error("error sending request for url")); + assert!(is_transport_error("Connection refused (os error 111)")); + assert!(is_transport_error("connection refused")); + assert!(is_transport_error("request timed out")); + assert!(is_transport_error("connection closed before message completed")); + assert!(is_transport_error("broken pipe")); + assert!(is_transport_error("reset by peer")); + + // Semantic errors should return false + assert!(!is_transport_error("RPC error: nonce too low")); + assert!(!is_transport_error("RPC error: nonce gap: got 339, expected 57")); + assert!(!is_transport_error("nonce 42 already in pool for sender 0x1234")); + assert!(!is_transport_error("transaction rejected by mempool")); + } +} diff --git a/changelogs/pr-69-indexed-response-fidelity.md b/changelogs/pr-69-indexed-response-fidelity.md new file mode 100644 index 0000000..e0fbcc5 --- /dev/null +++ b/changelogs/pr-69-indexed-response-fidelity.md @@ -0,0 +1,141 @@ +# PR #69: Preserve Indexed Transaction and Log Metadata for JSON-RPC Response Fidelity + +## Problem + +When the JSON-RPC server returned transactions, receipts, and logs fetched from the +block index, several fields were either zeroed out or hardcoded to placeholder +values. Specifically: + +- **Transaction responses** were missing `tx_type`, `chain_id`, `max_fee_per_gas`, + `max_priority_fee_per_gas`, and the signature components (`v`, `r`, `s`). + All of these were returned as zero/null regardless of the actual transaction. + +- **Receipt responses** were missing `logs_bloom`, `tx_type`, and + `effective_gas_price`. The logs bloom was always empty bytes, the type was + always `0x0`, and the effective gas price was always zero. + +- **Log responses** (both within receipts and from `eth_getLogs`) were missing + `block_number`, `block_hash`, `transaction_hash`, and `transaction_index`. + These were returned as zero values, breaking any client that relies on log + context to correlate events with their originating transactions and blocks. + +- **Signature `v` field** was typed as `U64` in the RPC transaction struct, + which cannot represent legacy EIP-155 `v` values (which can be `chain_id * 2 + 35` + and exceed `u64::MAX` for very large chain IDs). The `v` value was also + computed as raw y-parity (0 or 1) instead of the EIP-155 encoded value for + legacy transactions. + +These gaps caused Ethereum tooling (ethers.js, viem, Foundry cast, block +explorers) to reject or misinterpret responses, since the missing fields are +required by the Ethereum JSON-RPC specification. + +## Solution + +The fix enriches the indexer's data model and the RPC conversion layer so that +every field mandated by the Ethereum JSON-RPC specification is captured at +indexing time and faithfully reproduced in responses. + +### How it works + +1. **At indexing time** (when a finalized block is processed), the transaction + envelope is decoded to extract the full set of metadata: transaction type, + chain ID, EIP-1559 fee parameters, and the cryptographic signature (v, r, s). + The `v` component is computed using `to_eip155_value` for legacy transactions + to produce the correct EIP-155 encoded value. + +2. **For receipts**, the logs bloom filter is computed from the receipt's logs + using `alloy_primitives::logs_bloom`, the transaction type is carried through + from the transaction metadata, and the effective gas price is calculated + using the standard formula: + `min(max_fee_per_gas, base_fee_per_gas + max_priority_fee_per_gas)`. + +3. **For logs**, block-level and transaction-level context (block number, block + hash, transaction hash, transaction index) is attached to each log entry at + index time, so it is available when logs are returned individually via + `eth_getLogs` or embedded in receipt responses. + +4. **The `v` field type** in `RpcTransaction` was widened from `U64` to `U256` + to accommodate the full EIP-155 value range. + +5. **The pending transaction path** (`raw_tx_to_pending_rpc` in `eth.rs`) was + updated to compute `v` using the same EIP-155 logic, ensuring consistency + between pending and indexed transaction responses. + +## Files Modified + +- **`crates/storage/indexer/src/types.rs`** -- Added fields to `IndexedTransaction` + (`tx_type`, `chain_id`, `max_fee_per_gas`, `max_priority_fee_per_gas`, `v`, + `r`, `s`), `IndexedReceipt` (`logs_bloom`, `tx_type`, `effective_gas_price`), + and `IndexedLog` (`block_number`, `block_hash`, `transaction_hash`, + `transaction_index`). + +- **`crates/storage/indexer/src/store.rs`** -- Updated all test helpers to + populate the new fields. + +- **`crates/node/reporters/src/lib.rs`** -- Extended `TxMetadata` and the + `index_finalized_block` function to extract and propagate the new fields. + Added helper functions: `signature_v`, `transaction_type`, + `transaction_gas_price` (renamed from `effective_gas_price`), + `max_fee_per_gas`, `max_priority_fee_per_gas`, and + `receipt_effective_gas_price`. Added an integration test that constructs a + real signed EIP-1559 transaction, indexes it, and verifies all fields. + +- **`crates/node/reporters/Cargo.toml`** -- Added `k256` and `sha3` as + dev-dependencies for the integration test's transaction signing. + +- **`crates/node/rpc/src/types.rs`** -- Widened `RpcTransaction::v` from `U64` + to `U256`. + +- **`crates/node/rpc/src/eth.rs`** -- Updated `raw_tx_to_pending_rpc` to + compute `v` using EIP-155 encoding for legacy transactions (matching the + indexed path). Added a `signature_v` helper function. + +- **`crates/node/rpc/src/indexed_provider.rs`** -- Updated `indexed_tx_to_rpc` + and `indexed_receipt_to_rpc` to propagate the new fields instead of returning + zeros. Updated `get_logs` to use per-log block/transaction metadata instead + of block-level placeholders. Added tests for EIP-1559 field preservation, + receipt metadata, and `get_logs` metadata. + +- **`Cargo.lock`** -- Updated to reflect the new dev-dependencies. + +## Breaking Changes + +- **`IndexedTransaction`**, **`IndexedReceipt`**, and **`IndexedLog`** have new + required fields. Any code that constructs these types directly (e.g., in tests + or alternative indexer implementations) must be updated to provide the + additional fields. + +- **`RpcTransaction::v`** changed from `U64` to `U256`. Any code that reads + this field and expects a `U64` type must be updated. The JSON serialization + format is unchanged (both serialize as hex-encoded integers), so downstream + JSON-RPC clients are not affected. + +## Testing + +The following tests cover these changes: + +- **`kora-reporters::tests::finalized_index_preserves_transaction_receipt_and_log_metadata`** -- + End-to-end test that signs a real EIP-1559 transaction with k256, constructs + a block and execution outcome, runs `index_finalized_block`, and verifies + that the indexed transaction has correct `tx_type`, `chain_id`, gas fields, + and non-zero signature components; that the receipt has the correct + `effective_gas_price` (13 = min(20, 10+3)) and a non-zero logs bloom; and + that logs carry the correct block and transaction metadata. + +- **`kora-rpc::indexed_provider::tests::indexed_tx_preserves_eip1559_fields`** -- + Unit test verifying that `indexed_tx_to_rpc` correctly converts all new + `IndexedTransaction` fields into the corresponding `RpcTransaction` fields. + +- **`kora-rpc::indexed_provider::tests::indexed_receipt_preserves_fee_type_bloom_and_log_metadata`** -- + Unit test verifying that `indexed_receipt_to_rpc` correctly converts receipt + type, effective gas price, logs bloom bytes, and per-log metadata. + +- **`kora-rpc::indexed_provider::tests::get_logs_returns_indexed_block_and_transaction_metadata`** -- + Integration test that inserts a block with a receipt containing a log, + queries via `get_logs`, and verifies that the returned RPC log carries the + correct block number, block hash, transaction hash, transaction index, and + log index. + +- All pre-existing tests in `kora-indexer::store` and + `kora-rpc::indexed_provider` have been updated to construct the enriched + types and continue to pass. diff --git a/changelogs/pr-70-gas-price-oracle.md b/changelogs/pr-70-gas-price-oracle.md new file mode 100644 index 0000000..4fe6b52 --- /dev/null +++ b/changelogs/pr-70-gas-price-oracle.md @@ -0,0 +1,134 @@ +# PR #70: Estimate gas fees from recent blocks + +## Problem + +The Ethereum JSON-RPC fee endpoints (`eth_gasPrice`, `eth_maxPriorityFeePerGas`, +and `eth_feeHistory`) previously returned hardcoded values of 1 gwei regardless +of actual on-chain activity. Wallets and client libraries rely on these endpoints +to choose transaction fees, so static responses led to poor fee suggestions, +all-zero reward percentiles, and a misleading fee market signal. + +## Solution + +This PR introduces a recent-block fee oracle that samples indexed block and +transaction data to produce dynamic fee estimates. The oracle: + +- Scans a configurable window of recent blocks (default: 20) to collect + transaction gas prices and priority fees. +- Computes a percentile-based estimate (default: 60th percentile) from the + sampled values. +- Enforces configurable minimum and maximum bounds to prevent extreme values. +- Caches results by head block number so repeated fee queries within the same + block do not rescan history. +- Falls back to safe defaults (1 gwei base + 1 gwei tip) when there are no + transactions or no blocks available. + +For `eth_feeHistory`, the implementation now returns real per-block data: +- Actual base fees from indexed blocks (with carry-forward for missing blocks). +- Computed gas used ratios from each block's gas_used / gas_limit. +- EIP-1559 next-block base fee prediction using the standard elasticity formula. +- Gas-weighted reward percentiles derived from transaction priority fees. + +### Key design decisions + +- **EIP-1559 effective price**: For type-2 (and later) transactions, the oracle + uses `min(max_fee, base_fee + tip)` instead of the raw `gas_price` field, which + for EIP-1559 transactions represents `max_fee_per_gas` and would inflate + estimates. +- **Indexed EIP-1559 transactions without fee fields**: When a type-2+ transaction + is missing `max_fee_per_gas` / `max_priority_fee_per_gas` (possible depending on + the indexer), the oracle returns zero priority fee rather than computing a + misleading value from `gas_price - base_fee`. +- **Max price bypass**: When the chain's base fee alone exceeds the configured + `max_price`, the oracle still returns a usable price (base_fee + tip) instead of + clamping to a value that would make transactions un-submittable. + +## Files modified + +### `crates/node/rpc/src/eth.rs` + +- Added `GasOracleConfig` (public, configurable), `GasOracleEstimate`, and + `CachedGasOracleEstimate` types. +- Added constants: `DEFAULT_GAS_ORACLE_BLOCKS`, `DEFAULT_GAS_ORACLE_PERCENTILE`, + `GWEI`, `DEFAULT_MAX_GAS_PRICE`. +- Added `gas_oracle_config` and `gas_oracle_cache` fields to `EthApiImpl`. +- Added builder method `with_gas_oracle_config()` and internal constructor + `from_parts()` to support oracle configuration. +- Added `recent_fee_estimate()` method that drives `eth_gasPrice` and + `eth_maxPriorityFeePerGas`. +- Replaced hardcoded `eth_gasPrice` and `eth_maxPriorityFeePerGas` with + oracle-derived values. +- Rewrote `eth_feeHistory` to return real block data instead of static values. +- Added helper functions: `estimate_recent_fees`, `block_by_number_or_none`, + `resolve_fee_history_newest`, `default_base_fee`, `percentile_value`, + `block_gas_used_ratio`, `compute_reward_percentiles`, + `weighted_percentile_reward`, `percentile_threshold`, `effective_priority_fee`, + `is_dynamic_fee_type`, `effective_gas_price_for_sampling`, + `calculate_next_base_fee`. +- Added `MockFeeStateProvider` and test helpers (`make_fee_block`, + `make_eip1559_fee_block`, `Eip1559TxParams`, `gwei`) for fee oracle testing. +- Added tests covering: oracle with recent transactions, empty-chain fallback, + fee history base fees and gas ratios, fee history rewards (non-empty and empty + blocks), EIP-1559 effective priority fee (normal, capped-at-headroom, missing + fields), EIP-1559 effective gas price sampling, max_price enforcement, + base-fee-above-cap bypass, calculate_next_base_fee (at/above/below target, zero + limit), percentile edge cases, resolve_fee_history_newest with Earliest tag, + multi-block fee_history structure, legacy tx gas price sampling, and + block_gas_used_ratio edge cases. + +### `crates/node/rpc/src/lib.rs` + +- Added `GasOracleConfig` to the public re-exports so downstream crates can + configure the gas oracle. + +## Breaking changes + +None. The public API is additive: +- `GasOracleConfig` is a new public type. +- `EthApiImpl::with_gas_oracle_config()` is a new builder method. +- Existing constructors (`new`, `with_tx_submit`) continue to work with default + oracle settings. + +The only behavioral change is that `eth_gasPrice`, `eth_maxPriorityFeePerGas`, +and `eth_feeHistory` now return dynamic values instead of hardcoded 1 gwei. This +is the intended fix and should not break any correctly-written clients (clients +that hardcoded expectations around 1 gwei responses were already working around +a bug). + +## Migration considerations + +No code changes required for existing consumers. The default oracle configuration +(20-block window, 60th percentile, 1 gwei min, 500 gwei max) is suitable for +most deployments. Operators who need different bounds can use +`EthApiImpl::with_gas_oracle_config()`. + +## Testing + +The test suite covers: +- **Happy path**: Gas price and priority fee derived from recent transaction data + across multiple blocks. +- **Empty chain**: Fallback to base_fee + min_priority_fee when no transactions + exist. +- **Fee history structure**: Base fees, gas used ratios, and next-block prediction + from indexed blocks. +- **Fee history rewards**: Non-zero reward percentiles for blocks with + transactions, zero rewards for empty blocks. +- **EIP-1559 correctness**: Effective priority fee uses min(tip, headroom), caps + at headroom when tip exceeds it, and returns zero for indexed transactions + missing fee fields. +- **Gas price sampling**: EIP-1559 transactions use effective gas price, not + max_fee_per_gas. +- **Max price enforcement**: Gas price is clamped to max_price when base fee is + below the cap. +- **Base fee above cap**: Oracle returns a usable price when base fee alone + exceeds max_price. +- **EIP-1559 base fee calculation**: Next-block base fee increases above target, + decreases below target, stays flat at target, and handles zero gas limit. +- **Percentile edge cases**: 0th and 100th percentile return min/max values; + empty input returns None. +- **Block tag resolution**: Earliest tag resolves to block 0. +- **Multi-block fee history**: Correct array lengths across a 3-block window. +- **Legacy transactions**: Gas price sampling uses raw gas_price field. +- **Gas used ratio**: Handles zero gas limit and full blocks correctly. + +Run with: `cargo test -p kora-rpc` diff --git a/changelogs/pr-71-pending-mempool-subscriptions.md b/changelogs/pr-71-pending-mempool-subscriptions.md new file mode 100644 index 0000000..6bfae93 --- /dev/null +++ b/changelogs/pr-71-pending-mempool-subscriptions.md @@ -0,0 +1,204 @@ +# PR #71: Pending Transaction & Mempool Subscription Support + +## Problem + +Before this change, Kora nodes had no way for external clients to receive +real-time notifications about transaction lifecycle events. Wallets, block +explorers, and monitoring tools had to poll `eth_getTransactionByHash` or +`eth_getTransactionReceipt` in a loop to discover when a transaction was +accepted, included in a block, or evicted from the mempool. This created +unnecessary RPC load and introduced latency between events and their +observation. + +## Solution + +This PR adds two WebSocket/SSE subscription endpoints that push transaction +lifecycle events to connected clients: + +1. **`eth_subscribe("newPendingTransactions")`** -- standard Ethereum + subscription that notifies clients whenever a new transaction enters the + mempool. Supports an optional `{ "fullTx": true }` parameter to receive + the full `RpcTransaction` object instead of just the hash. + +2. **`kora_subscribe("mempool")`** -- Kora-specific subscription that streams + the full mempool lifecycle for every transaction: `TxAdded` (accepted into + the pool), `TxIncluded` (finalized in a block), and `TxEvicted` (removed + without inclusion, with a human-readable reason). + +Both subscriptions use `tokio::sync::broadcast` channels so that multiple +WebSocket clients can subscribe independently without blocking the main +transaction processing pipeline. + +## How It Works + +### Event Flow + +``` +eth_sendRawTransaction + --> EthApiImpl::broadcast_pending_tx() + --> PendingTxEvent::Added (eth_subscribe consumers) + --> MempoolEvent::TxAdded (kora_subscribe consumers) + +Block finalized + --> FinalizedReporter::report() + --> publish_mempool_inclusions() + --> MempoolEvent::TxIncluded (kora_subscribe consumers) + +Transaction replaced / removed from pool + --> TransactionPool::remove_with_reason() + --> MempoolEvent::TxEvicted (kora_subscribe consumers) +``` + +### Channel Architecture + +- `PendingTxEventSender` (`broadcast::Sender`) -- carries + Ethereum-standard pending transaction notifications (hash or full tx). +- `MempoolEventSender` (`broadcast::Sender`) -- carries + Kora-specific mempool lifecycle events with richer metadata. +- Both channels are created in the runner, wired into the RPC server and the + `FinalizedReporter`, and passed through to the subscription module. + +## Breaking Changes + +None. All new types and endpoints are additive. Existing RPC methods and +behavior are unchanged. The `TransactionPool` API gains a new +`remove_with_reason()` method while the original `remove()` continues to work +unchanged (it delegates to `remove_with_reason` with reason `"removed"`). + +## Migration Notes + +- Node operators do not need to change configuration. Subscriptions are + available automatically when RPC is enabled. +- If the RPC is not configured, the broadcast channels are `None` and no events + are emitted (zero overhead). + +## Files Modified + +### `crates/node/domain/Cargo.toml` +- Added `serde` feature to `alloy-primitives` for serializing `Address`, `B256`, + and `U256` inside `MempoolEvent`. + +### `crates/node/domain/src/events.rs` +- Added `MempoolEvent` enum with three variants: `TxAdded`, `TxIncluded`, and + `TxEvicted`. +- `MempoolEvent` derives `Serialize`/`Deserialize` with `camelCase` serde + renaming for JSON-RPC compatibility. +- Added `mempool_event_serde_roundtrip` unit test. + +### `crates/node/domain/src/lib.rs` +- Re-exported `MempoolEvent` from the crate root. + +### `crates/node/rpc/Cargo.toml` +- Added `kora-domain` dependency (needed for `MempoolEvent` type in + subscriptions). + +### `crates/node/rpc/src/subscription.rs` (new file) +- `PendingTxEvent` / `PendingTxInfo` -- types for Ethereum-standard pending + transaction notifications. +- `subscription_module()` -- builds the `RpcModule` with `eth_subscribe` and + `kora_subscribe` handlers. +- `pending_tx_channel()` / `mempool_event_channel()` -- factory functions for + broadcast channels with default capacities (2048 / 4096). +- `recv_broadcast()` -- helper that handles `Lagged` errors by skipping missed + events and logging a warning. +- Tests covering hash-only subscriptions, full-tx subscriptions, Kora mempool + subscriptions, and lagged-receiver recovery. + +### `crates/node/rpc/src/eth.rs` +- `EthApiImpl` gains optional `pending_tx_broadcast` and `mempool_broadcast` + fields, set via builder methods `with_pending_tx_broadcast()` and + `with_mempool_broadcast()`. +- `broadcast_pending_tx()` sends both `PendingTxEvent::Added` and + `MempoolEvent::TxAdded` after a transaction is accepted via + `eth_sendRawTransaction`. +- Tests verify that broadcasts fire on acceptance and do not fire when the raw + transaction fails to decode. + +### `crates/node/rpc/src/server.rs` +- `RpcServer` and `JsonRpcServer` gain `pending_tx_broadcast` and + `mempool_broadcast` fields with corresponding builder methods. +- The subscription module is merged into the RPC module at startup. +- Debug impl updated to show broadcast channel presence. + +### `crates/node/rpc/src/lib.rs` +- Re-exports the new subscription types and channel factory functions from the + crate root. + +### `crates/node/reporters/src/lib.rs` +- `FinalizedReporter` gains an optional `mempool_broadcast` field set via + `with_mempool_broadcast()`. +- `publish_mempool_inclusions()` iterates finalized block transactions and sends + `MempoolEvent::TxIncluded` for each one. +- Unit test verifies `TxIncluded` events are emitted with correct block + number and hash. + +### `crates/node/runner/src/runner.rs` +- Creates `pending_tx_broadcast` and `mempool_broadcast` channels when RPC is + configured. +- Wires both channels into the `RpcServer` and the `FinalizedReporter`. + +### `crates/node/txpool/Cargo.toml` +- Added `tokio` dependency with `sync` feature for `broadcast::Sender`. + +### `crates/node/txpool/src/config.rs` +- No functional change; a blank line was added for consistency. + +### `crates/node/txpool/src/pool.rs` +- `TransactionPool` gains an optional `events: Option>` + field and a `new_with_events()` constructor. +- `add()` emits `MempoolEvent::TxEvicted` (reason: `"replaced"`) when a + transaction at the same nonce is displaced, followed by `MempoolEvent::TxAdded` + for the new transaction. +- `remove_with_reason()` emits `MempoolEvent::TxEvicted` with a caller-supplied + reason string. +- `remove()` delegates to `remove_with_reason()` with reason `"removed"`. +- `tx_added_event()` helper constructs `MempoolEvent::TxAdded` from an + `OrderedTransaction`. +- Tests cover: `TxAdded` on insert, `TxEvicted` on replacement, `TxEvicted` on + remove, and custom eviction reasons. + +### `Cargo.lock` +- Updated to reflect the new `kora-domain` dependency from `kora-rpc`. + +## Testing + +The following test cases cover the subscription functionality: + +**Domain events (`kora-domain`)** +- `mempool_event_serde_roundtrip` -- verifies `MempoolEvent::TxAdded` + serializes to JSON with camelCase field names and deserializes back + identically. + +**RPC subscriptions (`kora-rpc`)** +- `eth_pending_subscription_receives_hash` -- subscribes to + `newPendingTransactions` and verifies the hash is received. +- `eth_pending_subscription_receives_full_tx` -- subscribes with `fullTx: true` + and verifies the full `RpcTransaction` object is received. +- `kora_mempool_subscription_receives_event` -- subscribes to `kora_subscribe("mempool")` + and verifies a `MempoolEvent::TxIncluded` event is received. +- `broadcast_receiver_skips_lagged_events` -- verifies the `recv_broadcast` + helper correctly recovers from a lagged receiver by skipping to the latest + available message. + +**RPC broadcast integration (`kora-rpc`)** +- `eth_send_raw_transaction_broadcasts_after_acceptance` -- verifies that + `eth_sendRawTransaction` emits both `PendingTxEvent` and `MempoolEvent` + after successful validation. +- `invalid_raw_transaction_does_not_broadcast` -- verifies that a malformed + transaction does not emit any broadcast events. + +**Transaction pool events (`kora-txpool`)** +- `pool_broadcasts_tx_added_on_insert` -- verifies `MempoolEvent::TxAdded` is + emitted when a transaction is added to the pool. +- `pool_broadcasts_replaced_transaction_as_evicted` -- verifies that replacing + a transaction emits `TxEvicted` for the old transaction followed by `TxAdded` + for the new one. +- `pool_remove_broadcasts_tx_evicted` -- verifies `remove()` emits `TxEvicted` + with reason `"removed"`. +- `pool_remove_with_reason_broadcasts_custom_reason` -- verifies + `remove_with_reason()` emits `TxEvicted` with the caller-supplied reason. + +**Reporter integration (`kora-reporters`)** +- `publish_mempool_inclusions_broadcasts_tx_included` -- verifies that + `publish_mempool_inclusions()` emits `TxIncluded` with the correct block + number and block hash for each transaction in a finalized block. diff --git a/changelogs/pr-73-eth-filter-api.md b/changelogs/pr-73-eth-filter-api.md new file mode 100644 index 0000000..ed1ea66 --- /dev/null +++ b/changelogs/pr-73-eth-filter-api.md @@ -0,0 +1,168 @@ +# PR #73: Implement Ethereum HTTP Filter API + +## Overview + +This PR adds server-side support for the Ethereum JSON-RPC filter methods +used by HTTP clients. These methods allow callers to register interest in +new blocks, pending transactions, or log events and then poll for changes +rather than holding a persistent WebSocket connection. + +Prior to this change, the node's RPC layer had no filter support. Clients +that relied on `eth_newFilter`, `eth_newBlockFilter`, +`eth_newPendingTransactionFilter`, `eth_getFilterChanges`, +`eth_getFilterLogs`, or `eth_uninstallFilter` would receive +"method not found" errors. + +## New RPC Methods + +| Method | Description | +|---|---| +| `eth_newFilter` | Create a log filter with address/topic criteria and an optional block range. Returns a filter ID. | +| `eth_newBlockFilter` | Create a filter that tracks new block hashes. Returns a filter ID. | +| `eth_newPendingTransactionFilter` | Create a filter that tracks new pending transaction hashes. Returns a filter ID. | +| `eth_getFilterChanges` | Poll a filter for changes since the last call. Returns logs (for log filters) or hashes (for block/pending-tx filters). | +| `eth_getFilterLogs` | Return all logs matching a log filter's original criteria (does not advance the cursor). Only valid for log filters. | +| `eth_uninstallFilter` | Remove a filter by ID. Returns `true` if the filter existed. | + +## How It Works + +### Filter Store + +A bounded, in-memory `FilterStore` holds active filters keyed by +monotonically increasing `u64` IDs. Each filter entry tracks: + +- The filter variant (log, block, or pending transaction) and its matching + criteria. +- A cursor recording what has already been reported (last polled block + number, or last seen index into the pending-tx insertion-order vector). +- A last-poll timestamp used for TTL-based expiry. + +Filters expire after 5 minutes of inactivity (configurable). When the +store reaches its maximum capacity (default 1024), the oldest filter is +evicted to make room. + +### Cursor Initialization + +When a log filter is created: + +- If `from_block` is an explicit block number, the cursor is set so the + first poll starts at that block (inclusive). +- If `from_block` is "earliest", the cursor starts at genesis. +- If `from_block` is omitted, "latest", or another tag, the cursor starts + at the current head so only future events are returned. +- If `block_hash` is provided, the filter is treated as a single-block + query: the first poll returns matching logs and all subsequent polls + return empty. + +### Polling Semantics + +- **Log filters**: Each poll queries logs from `last_poll_block + 1` to the + current head (or the original `to_block` bound, whichever is lower), + then advances the cursor to the head. The original address, topic, and + `to_block`/`block_hash` criteria are preserved across polls. +- **Block filters**: Each poll iterates from `last_poll_block + 1` to the + current head, collecting block hashes. The cursor advances only to the + highest block actually observed (tolerating gaps). +- **Pending transaction filters**: Each poll returns new transaction hashes + in insertion order by scanning the shared `pending_tx_order` vector from + the last seen index. Already-known hashes are skipped. + +### Concurrency + +The filter's internal `Mutex` is held only to snapshot the cursor state, +then released before performing any async I/O (state provider queries). +After the query completes, the mutex is re-acquired to update the cursor. +This avoids holding a lock across `.await` points. + +## Files Modified + +### `crates/node/rpc/src/filters.rs` (new file, 232 lines) + +Defines the filter data model and storage layer: + +- `FilterChanges` -- response enum (`Logs` or `Hashes`) serialized with + `#[serde(untagged)]`. +- `Filter` -- cursor enum with `Log`, `Block`, and `PendingTransaction` + variants. +- `FilterEntry` -- wrapper pairing a `Mutex` with a + `RwLock` for TTL tracking. +- `FilterStore` -- bounded `HashMap` with monotonic ID generation, + TTL-based expiry, and oldest-entry eviction. +- Unit tests for create/get/remove, expiry cleanup, and bounded eviction. + +### `crates/node/rpc/src/eth.rs` (modified, +200 lines in implementation, +190 lines in tests) + +- Added six new trait methods to `EthApi` and their implementations on + `EthApiImpl`. +- Added `pending_tx_order: Arc>>` field to track pending + transaction insertion order. +- Added `filter_store: Arc` field. +- Extracted `current_block_number()` helper (also simplifies + `block_number()` RPC method). +- Added `filter_id_to_u64()` as a `const fn` to safely convert `U256` + filter IDs to `u64`. +- Added `TestStateProvider` for integration-style tests with controllable + block/log state. +- Added comprehensive test suite: block filter lifecycle, log filter + lifecycle, pending transaction filter lifecycle, block-hash log filter + single-return semantics, `getFilterLogs` rejection for non-log filters, + and `getFilterChanges` with invalid/overflow IDs. + +### `crates/node/rpc/src/error.rs` (modified, +19 lines) + +- Added `RpcError::FilterNotFound` variant mapped to `SERVER_ERROR` + (-32000), matching Geth's behavior. +- Added display and error-object conversion tests. + +### `crates/node/rpc/src/lib.rs` (modified, +3 lines) + +- Added `mod filters` and re-exported `FilterChanges`. + +### `crates/node/rpc/src/types.rs` (modified, +1 derive) + +- Added `PartialEq` and `Eq` derives to `RpcLog` so `FilterChanges` can + derive equality comparison (useful for tests and downstream consumers). + +## Breaking Changes + +- `RpcLog` now derives `PartialEq` and `Eq`. This is additive and should + not break existing code, but downstream types that embed `RpcLog` in + non-`PartialEq` contexts are unaffected since `PartialEq` is opt-in. +- The `EthApi` trait gains six new methods. Any custom implementations of + `EthApiServer` (outside this crate) will need to implement them. + +## Testing + +The following test cases cover the new functionality (all in +`crates/node/rpc/src/eth.rs` and `crates/node/rpc/src/filters.rs`): + +- **`filter_store_create_and_get`** -- Verifies filter creation returns + valid IDs and lookup works. +- **`filter_store_remove`** -- Verifies removal returns true once and + false on double-remove. +- **`filter_store_cleanup_expired`** -- Verifies TTL-based expiry removes + stale entries while keeping fresh ones. +- **`filter_store_evicts_oldest_when_bounded`** -- Verifies oldest filter + is evicted when the store is full. +- **`eth_block_filter_lifecycle`** -- Creates a block filter, inserts new + blocks, polls for changes (expecting new hashes), polls again (expecting + empty), then uninstalls. +- **`eth_log_filter_lifecycle`** -- Creates a log filter with address and + topic criteria, inserts matching and non-matching logs, polls for + changes (expecting only matching logs), polls again (expecting empty), + then calls `getFilterLogs` for the full history. +- **`eth_pending_transaction_filter_lifecycle`** -- Submits a transaction + before filter creation and one after, verifies only the post-creation + transaction is returned by the filter. +- **`eth_log_filter_block_hash_returns_once`** -- Verifies that a log + filter created with `block_hash` returns matching logs on the first poll + and empty results on all subsequent polls, even as new blocks arrive. +- **`eth_get_filter_logs_rejects_non_log_filter`** -- Verifies that + calling `getFilterLogs` on a block filter returns an error. +- **`eth_get_filter_changes_invalid_id`** -- Verifies that + `getFilterChanges` returns an error for non-existent and overflowing + filter IDs. +- **`filter_id_to_u64_edge_cases`** -- Verifies the `const fn` conversion + handles zero, valid values, `u64::MAX`, overflow, and `U256::MAX`. +- **Error tests** in `error.rs` -- Display and error-object conversion + for `FilterNotFound`. diff --git a/changelogs/pr-74-rpc-rate-limiting.md b/changelogs/pr-74-rpc-rate-limiting.md new file mode 100644 index 0000000..07380d7 --- /dev/null +++ b/changelogs/pr-74-rpc-rate-limiting.md @@ -0,0 +1,140 @@ +# PR #74: RPC Rate Limiting + +## Problem + +The RPC server had no mechanism to limit the rate of incoming requests. A single +client (or a bot) could send an unlimited number of requests per second, which +risked overwhelming the node, exhausting resources, and degrading service for all +other clients. This applied to both the HTTP status endpoints (`/status`, +`/health`) and the JSON-RPC transport layer (Ethereum API calls over WebSocket or +HTTP). + +## Solution + +This PR introduces a global token-bucket rate limiter that enforces a +configurable requests-per-second cap with burst tolerance. The limiter is applied +at two layers: + +1. **HTTP middleware** -- an Axum middleware intercepts every request to the HTTP + status endpoints. If the bucket is exhausted, the server returns + `429 Too Many Requests` before the handler runs. + +2. **JSON-RPC middleware** -- a `jsonrpsee` RPC service wrapper intercepts every + JSON-RPC call. If the bucket is exhausted, the server returns a standard + Ethereum JSON-RPC error with code `-32005` (`LIMIT_EXCEEDED`) and the message + `"rate limit exceeded"`. + +Each layer maintains its own independent token bucket so that HTTP and RPC +traffic are rate-limited separately. + +### Token Bucket Algorithm + +- The bucket starts full at `burst_size` tokens. +- Each request consumes one token. +- Tokens are replenished at `requests_per_second` per second, up to the + `burst_size` cap. +- If no tokens are available, the request is rejected immediately (no queuing). +- A `burst_size` of 0 is automatically clamped to 1 so that an enabled limiter + can always admit at least one request. Without this, a burst of 0 would start + with 0 tokens and never refill, permanently blocking all traffic. +- A `requests_per_second` of 0 means "reject everything" -- the bucket is + initialized empty and never refills. + +### Configuration + +Rate limiting is configured through `RateLimitConfig`: + +- **Default**: 100 requests/second, burst size of 200. +- **Disabled**: `RateLimitConfig::disabled()` sets both values to `u64::MAX`, + which causes `SharedRateLimiter::new` to return `None`, bypassing the limiter + entirely with zero overhead. + +Additionally, this PR surfaces the `max_subscriptions_per_connection` setting +(defaulting to 32) through `RpcServerConfig`, `RpcServer`, and `JsonRpcServer` +builder APIs, and passes it to the `jsonrpsee` server builder. + +## Files Modified + +### `crates/node/rpc/Cargo.toml` +- Added `"util"` to the `tower` dependency features. This is needed for the + `ServiceExt::oneshot` method used in the HTTP rate-limiting tests. + +### `crates/node/rpc/src/config.rs` +- Added `max_subscriptions_per_connection` field to `RpcServerConfig` (default + 32). +- Added `with_rate_limit_burst(requests_per_second, burst_size)` builder method + to `RpcServerConfig` for configuring both rate and burst together. +- Added `with_max_subscriptions_per_connection` builder method. +- Updated `RateLimitConfig` doc comment to clarify the rate limit is + server-wide, not per-client. +- Added `RateLimitConfig::is_disabled()` helper. +- Added tests for all new builder methods, the chained builder, and the + `is_disabled` predicate. + +### `crates/node/rpc/src/server.rs` +- Added `SharedRateLimiter` -- a thread-safe wrapper around an + `Arc>` that returns `None` when rate limiting is disabled. +- Added `TokenBucket` -- the core rate-limiting state machine with `const fn` + construction, deterministic `try_acquire_at(Instant)` for testability, and + internal refill logic. +- Added `rate_limit_allows()` helper that treats `None` as "always allow." +- Added `rate_limited_rpc_response()` to build the standard JSON-RPC error. +- Added `enforce_http_rate_limit` Axum middleware function. +- Added `RateLimitedRpcService` implementing `jsonrpsee::RpcServiceT`. +- Extracted `build_http_router()` to a standalone function (makes both + production code and tests cleaner). +- Threaded `rate_limit_config` and `max_subscriptions_per_connection` through + `RpcServer` and `JsonRpcServer` constructors, builders, `from_config`, and + `start` methods. +- Added `Debug` fields for the new config values. +- Added comprehensive unit tests: + - Token bucket burst and refill behavior + - `burst_size=0` clamping + - `requests_per_second=0` rejection + - Burst cap enforcement + - Disabled limiter produces `None` + - `rate_limit_allows` with no limiter + - Config threading through `RpcServer::from_config` + - Config threading through `JsonRpcServer` builders + - RPC-layer rate limiting with mock service + - HTTP-layer rate limiting with `tower::ServiceExt::oneshot` + +## Breaking Changes + +- `RpcServerConfig` has a new public field `max_subscriptions_per_connection`. + Code that constructs `RpcServerConfig` with struct literal syntax (rather than + the builder methods) will need to add this field. +- `RpcServer` and `JsonRpcServer` now carry `rate_limit_config` and + `max_subscriptions_per_connection` fields internally. This does not affect + public API since these structs are constructed via methods, not struct + literals. + +## Migration + +- No changes required for existing callers that use the builder API or + `Default` -- the defaults (100 rps, burst 200, 32 subscriptions) are applied + automatically. +- To opt out of rate limiting, call `.with_rate_limit_config(RateLimitConfig::disabled())`. +- To tune the limits, use `.with_rate_limit_burst(rps, burst)` on the config + or `.with_rate_limit_config(...)` on the server. + +## Testing + +The following test cases cover the rate-limiting implementation: + +| Test | What it verifies | +|------|-----------------| +| `token_bucket_honors_burst_and_refill` | Burst consumption and time-based refill | +| `token_bucket_clamps_zero_burst_to_one` | `burst_size=0` is clamped to 1; first request succeeds | +| `token_bucket_zero_rps_rejects_all` | `requests_per_second=0` rejects everything, even after time passes | +| `token_bucket_does_not_exceed_burst` | Tokens never accumulate beyond `burst_size` | +| `disabled_rate_limit_does_not_build_limiter` | `RateLimitConfig::disabled()` produces `None` | +| `rate_limit_allows_with_no_limiter` | `rate_limit_allows(&None)` returns `true` | +| `rate_limit_config_default_is_not_disabled` | Default config is not considered disabled | +| `rpc_server_from_config_threads_limits` | `RpcServer::from_config` propagates all limit fields | +| `json_rpc_server_builders_thread_limits` | `JsonRpcServer` builder methods propagate all limit fields | +| `rpc_rate_limiter_rejects_after_burst` | JSON-RPC middleware returns `-32005` after burst is exhausted | +| `http_status_rate_limiter_returns_too_many_requests` | HTTP middleware returns `429` after burst is exhausted | +| `rpc_server_config_with_rate_limit_burst` | Config builder sets both rps and burst | +| `rpc_server_config_with_max_subscriptions_per_connection` | Config builder sets subscription limit | +| `rpc_server_config_chained_builder` | Full builder chain applies all settings | diff --git a/changelogs/pr-75-configurable-node-parameters.md b/changelogs/pr-75-configurable-node-parameters.md new file mode 100644 index 0000000..b7b0926 --- /dev/null +++ b/changelogs/pr-75-configurable-node-parameters.md @@ -0,0 +1,181 @@ +# PR #75: Make validator runtime parameters configurable + +## Problem + +Several critical node runtime parameters were hardcoded across the codebase, +making it impossible to tune them without recompiling: + +- **RPC bind address**: Always bound to `0.0.0.0:8545`, with no way to change + it from configuration. +- **Gas limit**: Passed as a constructor argument to `ProductionRunner` even + though it already lived in `config.execution.gas_limit`, creating a redundant + source of truth that could drift. +- **Consensus tuning (Simplex)**: Buffer sizes, timeouts, leader/certification + deadlines, and fetch concurrency were all compile-time constants + (`NZUsize!(16 * 1024 * 1024)`, `Duration::from_secs(5)`, etc.). +- **Block codec limits**: Maximum transactions per block and maximum bytes per + transaction were module-level constants in the runner. +- **Leader election**: Hardcoded `view % 4` assumed exactly four validators, + producing incorrect leader rotation for any other validator set size. +- **Validator indexing**: The DKG ceremony produces 0-indexed share indices, + and leader election also expects 0-indexed values. + +## Solution + +All previously-hardcoded parameters are now expressed as configuration fields +with sensible defaults, so existing config files continue to work unchanged. + +### Configuration additions + +Two new nested config sections live under `[consensus]`: + +```toml +[consensus.block_codec] +max_txs = 10000 # maximum transactions decoded per block +max_tx_bytes = 8388608 # maximum bytes per transaction (8 MiB) + +[consensus.simplex] +replay_buffer_bytes = 16777216 +write_buffer_bytes = 16777216 +leader_timeout_secs = 5 +certification_timeout_secs = 10 +timeout_retry_secs = 2 +fetch_timeout_secs = 5 +activity_timeout_views = 20 +skip_timeout_views = 10 +fetch_concurrent = 8 +``` + +Every field uses `NonZeroUsize` or `NonZeroU64` so that zero values are +rejected at deserialization time rather than causing division-by-zero or +silent misconfiguration at runtime. + +### RPC bind address + +The runner now reads `config.rpc.http_addr` (which already defaulted to +`0.0.0.0:8545`) instead of hardcoding the address. Invalid addresses produce +a clear error message at startup. + +### Gas limit deduplication + +`ProductionRunner::new()` no longer accepts a `gas_limit` parameter. The gas +limit is read from `config.execution.gas_limit` at runtime, eliminating the +duplicate source of truth. + +### Leader election fix + +`NodeState::with_validator_count()` replaces the old `view % 4` leader +calculation with `view % validator_count`, and the constructor validates that +`validator_index < validator_count`. + +## Files modified + +### `bin/kora/src/cli.rs` + +- Reads `config.rpc.http_addr` and parses it into a `SocketAddr` with a + descriptive error on failure. +- Converts `dkg_output.participants` (a `usize`) to `u32` with overflow + checking, and rejects zero. +- Uses `dkg_output.share_index` directly as the validator index. +- Calls `NodeState::with_validator_count()` instead of `NodeState::new()`. +- Removes the `gas_limit` argument from `ProductionRunner::new()`. + +### `crates/node/config/src/consensus.rs` + +- Adds `ConsensusBlockCodecConfig` struct with `max_txs` and `max_tx_bytes` + (`NonZeroUsize` fields with serde defaults). +- Adds `ConsensusSimplexConfig` struct with nine tuning parameters (buffer + sizes, timeouts, concurrency) using `NonZeroUsize` and `NonZeroU64`. +- Adds `block_codec` and `simplex` fields to `ConsensusConfig` (both + `#[serde(default)]`). +- Adds 11 `const fn` default constructors (one per NonZero field). +- Adds 11 `pub const` default values for use in downstream assertions. +- Adds tests: default value coverage, partial deserialization of both + sub-configs, zero-value rejection for `NonZero` fields. + +### `crates/node/config/src/lib.rs` + +- Re-exports the two new config structs and all 11 default constants. + +### `crates/node/rpc/src/state.rs` + +- Adds `with_validator_count(chain_id, validator_index, validator_count)` + constructor that stores a `NonZeroU32` validator count. +- `set_view()` now uses `view % validator_count` instead of `view % 4`. +- `NodeState::new()` is preserved for backward compatibility, delegating to + `with_validator_count` with `DEFAULT_VALIDATOR_COUNT = 4`. +- Adds panics with descriptive messages when `validator_count == 0` or + `validator_index >= validator_count`. +- Adds tests for non-four-validator leadership, zero-count rejection, and + out-of-range index rejection. + +### `crates/node/runner/src/runner.rs` + +- Removes module-level `BLOCK_CODEC_MAX_TXS` and `BLOCK_CODEC_MAX_TX_BYTES` + constants (now sourced from config). +- Changes `block_codec_cfg()` from a no-arg `const fn` to one that accepts + `&ConsensusBlockCodecConfig`. +- Removes `gas_limit` field from `ProductionRunner`; reads it from + `config.execution.gas_limit` in `run()`. +- Reads `config.consensus.simplex` for all Simplex engine parameters. +- Removes the unused `NZUsize` import. +- Adds a unit test verifying `block_codec_cfg()` correctly maps config values. + +### `crates/node/config/README.md` + +- Adds the `[consensus.block_codec]` and `[consensus.simplex]` sections to + the example configuration schema. + +### `crates/node/runner/README.md` + +- Updates code examples to remove the `gas_limit` argument from + `ProductionRunner::new()`. +- Removes `gas_limit` from the configuration parameters table. +- Adds a note that gas limit comes from `config.execution.gas_limit` at + runtime. + +## Breaking changes + +- `ProductionRunner::new()` no longer accepts a `gas_limit` parameter. Callers + that previously passed `gas_limit` must remove that argument; the runner will + read it from the supplied `NodeConfig` at runtime. +- `NodeState::with_validator_count()` panics if `validator_count` is zero or + if `validator_index >= validator_count`. Code that previously constructed + `NodeState` with out-of-range indices will now panic at construction instead + of silently producing incorrect leader rotation. + +## Migration considerations + +- **Config files**: No changes required. All new fields have `#[serde(default)]` + with the same values that were previously hardcoded, so existing TOML/JSON + configs continue to work identically. +- **Downstream callers of `ProductionRunner::new()`**: Remove the third + (`gas_limit`) argument. The gas limit is now exclusively sourced from + `config.execution.gas_limit`. +- **Tests using `NodeState::new()`**: The legacy constructor still works with + the default four-validator assumption. Tests that need a different validator + count should use `NodeState::with_validator_count()`. + +## Testing + +The test suite covers: + +- **Default config values**: All 11 new `NonZero` fields match their declared + default constants. +- **Serde round-trip**: JSON and TOML serialization/deserialization preserves + all consensus config fields. +- **Partial deserialization**: Omitted `block_codec` or `simplex` sub-objects + fall back to defaults; specifying only some fields within a sub-object leaves + the rest at defaults. +- **Zero rejection**: `NonZeroUsize` and `NonZeroU64` fields correctly reject + zero values during deserialization. +- **Leader election with variable validator counts**: Verifies correct leader + rotation for 3-validator and 5-validator sets. +- **Validator index boundary**: `with_validator_count` panics when + `validator_index >= validator_count` (e.g., index 5 with count 4). +- **Zero validator count**: `with_validator_count` panics when + `validator_count == 0`. +- **Block codec config mapping**: The `block_codec_cfg()` function in the runner + correctly converts `ConsensusBlockCodecConfig` to the domain `BlockCfg`. + +Run with: `cargo test -p kora-config -p kora-rpc -p kora-runner` diff --git a/changelogs/pr-76-docker-build-runtime.md b/changelogs/pr-76-docker-build-runtime.md new file mode 100644 index 0000000..315306a --- /dev/null +++ b/changelogs/pr-76-docker-build-runtime.md @@ -0,0 +1,127 @@ +# PR #76: Docker Build and Runtime Healthcheck Configuration + +## Summary + +This PR adds a `HEALTHCHECK` instruction to the Dockerfile and extends the +root `.dockerignore` to exclude documentation files that inflate the Docker +build context. Together these changes ensure the Docker image carries its own +healthcheck contract and that builds transfer only the files needed to compile +the Rust binaries and package the runtime scripts. + +## Problem + +Before this change, health checking was defined only in the Compose file +(`docker/compose/devnet.yaml`). The Dockerfile itself had no `HEALTHCHECK` +instruction. This meant: + +- Running the image with plain `docker run` (outside Compose) produced a + container with no healthcheck -- orchestrators could not tell whether the + node was ready. +- The healthcheck contract (which script to call, intervals, retries) was + scattered across one file instead of being declared at the image level and + optionally overridden by Compose. +- The root `.dockerignore` did not exclude markdown documentation, so every + `*.md` file in the repository was copied into the build context, wasting + time and bandwidth on files the build never uses. + +## Solution + +### Dockerfile HEALTHCHECK + +A `HEALTHCHECK` instruction was added to the runtime stage of the Dockerfile: + +```dockerfile +HEALTHCHECK --interval=10s --timeout=5s --retries=3 --start-period=30s \ + CMD /scripts/healthcheck.sh +``` + +- The timing parameters (`interval`, `timeout`, `retries`, `start_period`) + match the values already used in the Compose `x-validator-common` anchor, + so the two configurations stay in sync. +- The default healthcheck mode is `p2p` (set inside `healthcheck.sh` via + `HEALTHCHECK_MODE:-p2p`), which checks that TCP port 30303 is listening. +- Compose services override this to `HEALTHCHECK_MODE=ready`, which checks + both the `.ready` sentinel file and the P2P port. +- The `HEALTHCHECK_MODE` environment variable supports three modes: + - `dkg` -- succeeds when `/data/share.key` and `/data/output.json` both + exist (DKG ceremony completed). + - `p2p` -- succeeds when port 30303 accepts TCP connections. + - `ready` -- succeeds when `/data/.ready` exists AND port 30303 is up. + +### .dockerignore extensions + +The root `.dockerignore` now excludes markdown files while preserving the +`README.md` files that Rust crates embed via `include_str!("../README.md")`: + +``` +*.md +!README.md +!bin/**/README.md +!crates/**/README.md +!docker/README.md +``` + +This keeps the build context small without breaking `cargo doc` or crate-level +documentation. + +## Files Modified + +| File | Change | +|------|--------| +| `docker/Dockerfile` | Added `HEALTHCHECK` instruction with timing parameters and a comment documenting all three healthcheck modes. | +| `.dockerignore` | Added rules to exclude `*.md` files from the Docker build context while keeping `README.md` files needed by crate documentation. | + +## Breaking Changes + +None. Existing Compose-based workflows are unaffected because the Compose +healthcheck definition takes precedence over the Dockerfile `HEALTHCHECK`. +Users running the image directly with `docker run` will now get automatic +healthchecks (previously there were none), which is additive, not breaking. + +## Migration + +No migration steps are required. The change is fully backward-compatible. + +## Testing + +1. **Validate Compose configuration:** + ```bash + docker compose -f docker/compose/devnet.yaml config --quiet + ``` + Should exit 0 with no output. + +2. **Check Dockerfile syntax:** + ```bash + docker build --check -f docker/Dockerfile . + ``` + Should report no warnings. + +3. **Build the image locally:** + ```bash + cd docker && just build + ``` + Confirm the build succeeds and does not copy unnecessary markdown files + into the context (watch the "transferring context" line for size). + +4. **Verify the healthcheck is embedded in the image:** + ```bash + docker inspect kora:local | jq '.[0].Config.Healthcheck' + ``` + Should show the `HEALTHCHECK` configuration with the correct interval, + timeout, retries, and start period. + +5. **Run a standalone container and observe health status:** + ```bash + docker run -d --name kora-test kora:local + # Wait ~40 seconds for start_period + interval + docker inspect --format='{{.State.Health.Status}}' kora-test + ``` + The status should transition from `starting` to `healthy` or `unhealthy` + depending on whether the node is actually running. + +6. **Run the devnet and verify validators become healthy:** + ```bash + cd docker && just trusted-devnet + docker compose -f compose/devnet.yaml ps + ``` + All validator and secondary nodes should show `healthy` status. diff --git a/changelogs/pr-77-compact-persisted-overlays.md b/changelogs/pr-77-compact-persisted-overlays.md new file mode 100644 index 0000000..536466e --- /dev/null +++ b/changelogs/pr-77-compact-persisted-overlays.md @@ -0,0 +1,103 @@ +# PR #77: Compact Persisted Ledger Snapshot Overlays + +## Problem + +When the ledger persisted a chain of snapshots to QMDB, only the **tip** snapshot +(the most recently committed block) had its in-memory overlay state compacted. +All intermediate ancestor snapshots in the chain retained their full overlay +change sets even though those changes had already been flushed to disk. + +Over time this caused **unbounded memory growth**: every persisted-but-not-compacted +snapshot kept a copy of its `OverlayState` changes and the corresponding +`ChangeSet` inside the `Snapshot` struct. On long-running nodes processing many +blocks, memory usage grew proportionally to the total number of persisted blocks +rather than only the number of *unpersisted* blocks. + +Additionally, when a snapshot was missing during the compaction loop, the code +silently continued (`continue`), masking a bug that should never occur in normal +operation. + +## Solution + +### Compact all snapshots in the persisted chain (not just the tip) + +The `persist_snapshot` method in `LedgerView` now iterates over **every** digest +in the persisted chain and replaces each snapshot with a compacted version. The +compacted snapshot: + +- Repoints its `state` field to a fresh `OverlayState` backed by the current + QMDB state with an empty change set. +- Clears its `changes` field to `QmdbChangeSet::default()`. +- Preserves `parent`, `state_root`, and `tx_ids` unchanged. + +This ensures that once data is flushed to QMDB, no snapshot retains a redundant +in-memory copy of the same state changes. + +### Return errors for missing snapshots instead of silently continuing + +If a snapshot in the persisted chain cannot be found during compaction, the code +now returns `ConsensusError::SnapshotNotFound` instead of silently skipping it. +This makes the failure observable and debuggable rather than hiding a +potentially serious internal inconsistency. + +### Add overlay state inspection helpers + +Two new methods on `OverlayState` allow callers (and tests) to inspect the +overlay change set without accessing private fields: + +- `changes_is_empty()` -- returns `true` when the change set has no entries. +- `change_len()` -- returns the number of accounts in the change set. + +Both methods are annotated with `#[must_use]`. + +## Files Modified + +### `crates/node/ledger/src/lib.rs` + +- **`persist_snapshot`**: Changed from compacting only the chain tip to + compacting every snapshot in the chain. Replaced the `if let` guard that + operated on `chain.last()` with a `for digest in &chain` loop. Each iteration + fetches the snapshot, builds a compact replacement, and reinserts it. +- **Error handling**: The `.get(digest)` call now uses + `.ok_or(ConsensusError::SnapshotNotFound(*digest))?` instead of a silent + `continue`, surfacing unexpected missing snapshots as errors. +- **New test `persist_snapshot_compacts_all_persisted_chain_snapshots`**: Builds + a two-block chain, persists it, and asserts that *both* snapshots have empty + `changes` and empty overlay change sets afterward, while preserving `parent`, + `state_root`, and `tx_ids`. +- **Formatting**: Several `setup_ledger(...)` call sites were reformatted by + `rustfmt` to use trailing-comma style (no semantic change). + +### `crates/storage/overlay/src/overlay.rs` + +- **New method `change_len(&self) -> usize`**: Returns the number of accounts + in the overlay change set. Annotated `#[must_use]`. +- **New method `changes_is_empty(&self) -> bool`**: Returns whether the overlay + change set is empty. Annotated `#[must_use]`. +- **New test `test_changes_is_empty_and_change_len`**: Exercises both helpers + on empty and non-empty overlays. +- **Formatting**: `AccountUpdate` struct literals in tests reformatted by + `rustfmt` to use trailing-comma style (no semantic change). + +## Breaking Changes + +None. The public API is only *expanded* (two new methods). The compaction +behavioral change is internal and does not alter any external-facing contract. + +## Migration Considerations + +No migration is needed. Existing persisted data is unaffected; the change only +alters how in-memory snapshots are handled after a successful QMDB commit. + +## Testing + +| Test | What it covers | +|------|----------------| +| `persist_snapshot_compacts_all_persisted_chain_snapshots` | Verifies that every snapshot in a two-block persisted chain has its overlay and change set emptied, while metadata (`parent`, `state_root`, `tx_ids`) is preserved. | +| `persist_snapshot_merges_unpersisted_ancestors` | Existing test ensuring multi-block changes merge correctly and the QMDB balance reflects the combined transfers. | +| `persist_snapshot_duplicate_is_noop` | Existing test confirming that persisting the same digest twice is idempotent (`Ok(false)`). | +| `persist_snapshot_merges_overlays` | Existing test with five independent senders verifying overlay merge correctness. | +| `persist_snapshot_unrelated_merges` | Existing test for two independent fork chains persisted sequentially. | +| `persist_snapshot_updates_snapshot_state` | Existing test confirming the state root is preserved after persistence. | +| `empty_child_inherits_parent_state_root_after_persist` | Existing test for empty-block root inheritance. | +| `test_changes_is_empty_and_change_len` | New unit test exercising the `changes_is_empty()` and `change_len()` accessors on empty and populated overlays. | diff --git a/changelogs/pr-78-real-block-timestamps.md b/changelogs/pr-78-real-block-timestamps.md new file mode 100644 index 0000000..780de19 --- /dev/null +++ b/changelogs/pr-78-real-block-timestamps.md @@ -0,0 +1,127 @@ +# PR #78: Real Block Timestamps + +## Problem + +Previously, blocks used their **height** (block number) as the EVM `timestamp` field. +This meant `block.timestamp` was `0`, `1`, `2`, ... instead of a real Unix epoch value. +Any Solidity contract that called `block.timestamp` received a meaningless +value that bore no relation to wall-clock time, breaking time-dependent +logic such as timelocks, vesting schedules, and oracle freshness checks. + +## Solution + +Blocks now carry an explicit `timestamp: u64` field that records a real Unix +timestamp (seconds since the epoch). The timestamp is chosen at proposal time +by reading the system clock and ensuring the value is **strictly greater** +than the parent block's timestamp, which is a standard monotonicity invariant. + +### Timestamp selection logic (`Block::next_timestamp`) + +``` +let timestamp = max(now_secs, parent_timestamp + 1) +``` + +- If the wall clock is ahead of the parent, the current time is used. +- If the clock lags (e.g. fast block production or clock skew), the parent + timestamp is incremented by one second to guarantee monotonicity. +- If `parent_timestamp` is `u64::MAX`, the function returns `None` to signal + that no valid timestamp can be produced (overflow protection). + +### Genesis timestamp + +The genesis block timestamp is now configurable via +`BootstrapConfig::genesis_timestamp` and is read from the `"timestamp"` field +in the genesis JSON file. When constructing a `BootstrapConfig` programmatically, +the default genesis timestamp is `0`. + +## Files Modified + +### `crates/node/domain/src/block.rs` +- Added `timestamp: u64` field to `Block`. +- Added `Block::next_timestamp(now_secs, parent_timestamp) -> Option`. +- Updated codec `Write`/`Read`/`EncodeSize` implementations to include `timestamp`. +- Added tests for timestamp-dependent block ID uniqueness and `next_timestamp` edge cases. + +### `crates/node/domain/src/bootstrap.rs` +- Added `genesis_timestamp: u64` to `BootstrapConfig`. +- Added builder method `with_genesis_timestamp`. +- `BootstrapConfig::load` now reads and preserves the `"timestamp"` field from the genesis JSON. +- Added unit tests for default and loaded genesis timestamps. + +### `crates/node/domain/src/idents.rs` +- Updated test block construction to include `timestamp`. + +### `crates/node/consensus/src/error.rs` +- Added `ConsensusError::TimestampOverflow` variant for when a valid next + timestamp cannot be produced. +- Added test for the error's `Display` implementation. + +### `crates/node/consensus/src/proposal.rs` +- `build_proposal` and `build_proposal_async` now accept a `now_secs: u64` + parameter and use `Block::next_timestamp` to derive the block timestamp. +- `block_context` helper now takes `timestamp` instead of using `height`. +- All test call sites updated to pass `now_secs`. + +### `crates/node/consensus/src/application.rs` +- Updated mock block construction in tests to include `timestamp: 0`. + +### `crates/node/runner/src/app.rs` +- `RevmApplication::propose` now reads the system clock via `unix_timestamp_secs` + and passes the timestamp to `Block::next_timestamp`. +- Timestamp overflow is logged at `error` level and the proposal is skipped. +- Block build and proposal logging now includes the `timestamp` field. + +### `crates/node/runner/src/runner.rs` +- `RevmContextProvider::context` uses `block.timestamp` instead of `block.height`. +- `ProductionRunner::run` initialises the ledger with + `LedgerView::init_with_genesis_timestamp`, passing through the bootstrap + genesis timestamp. + +### `crates/node/ledger/src/lib.rs` +- Added `LedgerView::init_with_genesis_timestamp` and + `init_with_config_and_genesis_timestamp`. +- The genesis block is constructed with the configured `genesis_timestamp`. +- The original `init` and `init_with_config` methods delegate with + `genesis_timestamp = 0` for backward compatibility. +- Added `init_uses_configured_genesis_timestamp` test. + +### `crates/node/reporters/src/lib.rs` +- `index_finalized_block` now sets `IndexedBlock.timestamp` from + `block.timestamp` rather than `block_context.header.timestamp`. + +### `crates/e2e/src/harness.rs` +- All block construction and context methods updated to thread `timestamp`. +- `TestApplication::propose` reads the clock and derives the timestamp + identically to production. +- `TestContextProvider::context` uses `block.timestamp`. +- Ledger initialization uses `init_with_genesis_timestamp`. + +## Breaking Changes + +- **Block codec**: The on-wire encoding of `Block` now includes the + `timestamp` field between `height` and `prevrandao`. Nodes running the old + codec will fail to decode blocks from nodes running the new codec and vice + versa. All nodes must be upgraded simultaneously. +- **`ProposalBuilder::build_proposal` / `build_proposal_async`**: These + methods now require an additional `now_secs: u64` parameter. +- **`LedgerView::init`**: Still works, but callers that need a non-zero + genesis timestamp must switch to `init_with_genesis_timestamp`. + +## Testing + +- **`block.rs`**: `next_timestamp_uses_clock_when_ahead`, + `next_timestamp_advances_parent_when_clock_lags`, + `next_timestamp_returns_none_at_u64_max`, `block_id_differs_by_timestamp` -- + cover the core timestamp selection logic and block identity. +- **`bootstrap.rs`**: `new_defaults_genesis_timestamp_to_zero`, + `load_preserves_genesis_timestamp` -- verify the genesis timestamp flows + through configuration. +- **`error.rs`**: `test_timestamp_overflow_display` -- verifies the new error + variant renders correctly. +- **`proposal.rs`**: All existing proposal tests updated to pass `now_secs`, + ensuring backward compatibility of the proposal builder. +- **`ledger/src/lib.rs`**: `init_uses_configured_genesis_timestamp` -- + confirms the ledger honours the configured genesis timestamp. +- **E2E harness**: The full e2e test suite exercises real-timestamp proposals + end-to-end across multiple simulated nodes. +- All proposal tests pass `now_secs` to validate the timestamp threading. diff --git a/crates/e2e/Cargo.toml b/crates/e2e/Cargo.toml index 186babd..3116856 100644 --- a/crates/e2e/Cargo.toml +++ b/crates/e2e/Cargo.toml @@ -12,6 +12,7 @@ workspace = true [dependencies] # Local crates +kora-config.workspace = true kora-consensus.workspace = true kora-crypto = { workspace = true, features = ["test-utils"] } kora-domain = { workspace = true, features = ["evm"] } diff --git a/crates/e2e/src/harness.rs b/crates/e2e/src/harness.rs index 9b71a1a..6f46561 100644 --- a/crates/e2e/src/harness.rs +++ b/crates/e2e/src/harness.rs @@ -2,7 +2,7 @@ use std::{ sync::{Arc, Mutex}, - time::Duration, + time::{Duration, UNIX_EPOCH}, }; use alloy_consensus::Header; @@ -17,9 +17,12 @@ use commonware_consensus::{ use commonware_cryptography::{bls12381::primitives::variant::MinSig, ed25519}; use commonware_p2p::{Manager as _, simulated}; use commonware_parallel::Sequential; -use commonware_runtime::{Clock, Metrics, Runner as _, Spawner, buffer::paged::CacheRef, tokio}; +use commonware_runtime::{ + Clock, Metrics, Runner as _, Spawner, Supervisor as _, buffer::paged::CacheRef, tokio, +}; use commonware_utils::{NZU64, NZUsize, TryCollect as _, ordered::Set}; use futures::{StreamExt as _, channel::mpsc}; +use kora_config::INITIAL_BASE_FEE; use kora_crypto::{ThresholdScheme, threshold_schemes}; use kora_domain::{ Block, BlockCfg, ConsensusDigest, FinalizationEvent, LedgerEvent, PublicKey, StateRoot, TxCfg, @@ -102,8 +105,20 @@ pub struct TestHarness; impl TestHarness { /// Run a test with the given configuration and setup. pub fn run(config: TestConfig, setup: TestSetup) -> Result { - let executor = tokio::Runner::default(); - executor.start(|context| async move { Self::run_inner(context, config, setup).await }) + let handle = std::thread::Builder::new() + .name("kora-e2e-harness".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(move || { + let executor = tokio::Runner::default(); + executor + .start(|context| async move { Self::run_inner(context, config, setup).await }) + }) + .expect("failed to spawn e2e harness thread"); + + match handle.join() { + Ok(result) => result, + Err(panic) => std::panic::resume_unwind(panic), + } } async fn run_inner( @@ -150,7 +165,7 @@ impl TestHarness { let sim_control = Arc::new(Mutex::new(sim_control)); // Start all nodes - let bootstrap = setup.to_bootstrap(); + let bootstrap = setup.to_bootstrap(config.chain_id); let (nodes, mut finalized_rx) = start_all_nodes( &context, &sim_control, @@ -205,7 +220,7 @@ async fn start_network( participants: Set, ) -> SimControl { let (network, oracle) = simulated::Network::new( - SimContext::new(context.with_label("network")), + SimContext::new(context.child("network")), simulated::Config { max_size: MAX_MSG_SIZE as u32, disconnect_on_block: true, @@ -215,7 +230,7 @@ async fn start_network( network.start(); let control = SimControl::new(oracle); - control.manager().track(0, participants).await; + control.manager().track(0, participants); control } @@ -232,16 +247,17 @@ impl BlockContextProvider for TestContextProvider { fn context(&self, block: &Block) -> BlockContext { let header = Header { number: block.height, - timestamp: block.height, + timestamp: block.timestamp, gas_limit: self.gas_limit, beneficiary: Address::ZERO, - base_fee_per_gas: Some(0), + base_fee_per_gas: Some(INITIAL_BASE_FEE), ..Default::default() }; BlockContext::new(header, B256::ZERO, block.prevrandao) } } +#[allow(clippy::too_many_arguments)] async fn start_all_nodes( context: &tokio::Context, sim_control: &Arc>>, @@ -310,26 +326,36 @@ async fn start_single_node( .map_err(|e| anyhow::anyhow!("channel registration failed: {e}"))?; // Initialize ledger - let state = LedgerView::init( - context.with_label(&format!("state_{index}")), + let state = LedgerView::init_with_genesis_timestamp( + context.child("state").with_attribute("node", index), format!("{partition_prefix}-qmdb-{index}"), bootstrap.genesis_alloc.clone(), + bootstrap.genesis_timestamp, ) .await .context("init qmdb")?; let ledger = LedgerService::new(state.clone()); - spawn_ledger_observers(ledger.clone(), context.clone(), index, finalized_tx); + spawn_ledger_observers(ledger.clone(), context.child("ledger_observers"), index, finalized_tx); let test_node = TestNode::new(index, ledger.clone()); // Create application - let app = TestApplication::::new(block_cfg.max_txs, state.clone()); + let app = TestApplication::::new( + block_cfg.max_txs, + state.clone(), + chain_id, + gas_limit, + ); // Create finalized reporter let executor = RevmExecutor::new(chain_id); let context_provider = TestContextProvider { gas_limit }; - let finalized_reporter = - FinalizedReporter::new(ledger.clone(), context.clone(), executor, context_provider); + let finalized_reporter = FinalizedReporter::new( + ledger.clone(), + context.child("finalized_reporter"), + executor, + context_provider, + ); // Start marshal let marshal_mailbox = start_marshal( @@ -344,6 +370,7 @@ async fn start_single_node( channels.marshal.blocks, channels.marshal.backfill, finalized_reporter, + ledger.genesis_block(), partition_prefix, ) .await?; @@ -351,7 +378,7 @@ async fn start_single_node( // Create marshaled application let epocher = FixedEpocher::new(NZU64!(EPOCH_LENGTH)); let marshaled = Inline::new( - context.with_label(&format!("marshaled_{index}")), + context.child("marshaled").with_attribute("node", index), app, marshal_mailbox.clone(), epocher, @@ -368,7 +395,7 @@ async fn start_single_node( // Start consensus engine let engine = simplex::Engine::new( - context.with_label(&format!("engine_{index}")), + context.child("engine").with_attribute("node", index), simplex::Config { scheme, elector: Random, @@ -378,8 +405,9 @@ async fn start_single_node( reporter, strategy: Sequential, partition: format!("{partition_prefix}-{index}"), - mailbox_size: MAILBOX_SIZE, + mailbox_size: NZUsize!(MAILBOX_SIZE), epoch: Epoch::zero(), + floor: simplex::Floor::Genesis(ledger.genesis_block().commitment()), replay_buffer: NZUsize!(1024 * 1024), write_buffer: NZUsize!(1024 * 1024), leader_timeout: Duration::from_secs(1), @@ -388,7 +416,7 @@ async fn start_single_node( fetch_timeout: Duration::from_secs(1), activity_timeout: ViewDelta::new(20), skip_timeout: ViewDelta::new(10), - fetch_concurrent: 8, + fetch_concurrent: NZUsize!(8), page_cache, forwarding: simplex::ForwardingPolicy::Disabled, }, @@ -437,6 +465,7 @@ async fn start_marshal( blocks: (simulated::Sender, simulated::Receiver), backfill: (simulated::Sender, simulated::Receiver), application: R, + genesis: Block, partition_prefix: &str, ) -> anyhow::Result>> where @@ -448,7 +477,7 @@ where use commonware_cryptography::certificate::Scheme as _; use commonware_utils::acknowledgement::Exact; - let ctx = context.with_label(&format!("marshal_{index}")); + let ctx = context.child("marshal").with_attribute("node", index); let marshal_partition = format!("{partition_prefix}-marshal-{index}"); #[derive(Clone)] @@ -470,7 +499,7 @@ where let scheme_provider = ConstantSchemeProvider(Arc::new(scheme)); let resolver = PeerInitializer::init::<_, _, _, Block, _, _, _>( - &ctx, + ctx.child("resolver"), public_key.clone(), manager.clone(), control, @@ -478,7 +507,7 @@ where ); let (broadcast_engine, buffer) = BroadcastInitializer::init::<_, PublicKey, Block, M>( - ctx.with_label("broadcast"), + ctx.child("broadcast"), public_key, manager, block_codec_config, @@ -486,16 +515,17 @@ where broadcast_engine.start(blocks); ThresholdScheme::certificate_codec_config_unbounded(); - let finalizations_by_height = ArchiveInitializer::init::<_, ConsensusDigest, CertArchive>( - ctx.with_label("finalizations_by_height"), - format!("{marshal_partition}-finalizations-by-height"), - (), - ) - .await - .context("init finalizations archive")?; + let finalizations_by_height = + ArchiveInitializer::init_prunable::<_, ConsensusDigest, CertArchive>( + ctx.child("finalizations_by_height"), + format!("{marshal_partition}-finalizations-by-height"), + (), + ) + .await + .context("init finalizations archive")?; - let finalized_blocks = ArchiveInitializer::init::<_, ConsensusDigest, Block>( - ctx.with_label("finalized_blocks"), + let finalized_blocks = ArchiveInitializer::init_prunable::<_, ConsensusDigest, Block>( + ctx.child("finalized_blocks"), format!("{marshal_partition}-finalized-blocks"), block_codec_config, ) @@ -504,10 +534,11 @@ where let (actor, mailbox, _last_processed_height) = kora_marshal::ActorInitializer::init_with_partition::<_, Block, _, _, _, Exact>( - ctx.clone(), + ctx.child("actor"), finalizations_by_height, finalized_blocks, scheme_provider, + commonware_consensus::marshal::Start::Genesis(genesis), buffer_pool, block_codec_config, format!("{marshal_partition}-actor"), @@ -598,20 +629,21 @@ async fn verify_state_convergence( } }; - let node_seed = node.query_seed(head).await.ok_or_else(|| { - HarnessError::MissingState(format!("node {} missing seed", node.index)) - })?; - - seed = match seed { - None => Some(node_seed), - Some(prev) if prev == node_seed => Some(prev), - Some(prev) => { - return Err(HarnessError::StateDivergence { - digest: head, - message: format!("seed mismatch: {:?} vs {:?}", prev, node_seed), - }); - } - }; + // SeedReporter only fires on nodes that independently construct the + // finalization certificate, so not all nodes will have seeds. Only + // verify consistency across nodes that do have them. + if let Some(node_seed) = node.query_seed(head).await { + seed = match seed { + None => Some(node_seed), + Some(prev) if prev == node_seed => Some(prev), + Some(prev) => { + return Err(HarnessError::StateDivergence { + digest: head, + message: format!("seed mismatch: {:?} vs {:?}", prev, node_seed), + }); + } + }; + } } let state_root = @@ -648,9 +680,7 @@ use std::collections::BTreeSet; use alloy_primitives::Bytes; use commonware_consensus::{ - Application, Block as _, VerifyingApplication, - marshal::ancestry::{AncestorStream, BlockProvider}, - simplex::types::Context, + Application, Block as _, marshal::ancestry::Ancestry, simplex::types::Context, }; use commonware_cryptography::{Committable as _, certificate::Scheme as CertScheme}; use kora_consensus::{ @@ -677,23 +707,23 @@ impl std::fmt::Debug for TestApplication { } impl TestApplication { - const fn new(max_txs: usize, ledger: LedgerView) -> Self { + const fn new(max_txs: usize, ledger: LedgerView, chain_id: u64, gas_limit: u64) -> Self { Self { ledger, - executor: RevmExecutor::new(1337), + executor: RevmExecutor::new(chain_id), max_txs, - gas_limit: 30_000_000, + gas_limit, _scheme: std::marker::PhantomData, } } - fn block_context(&self, height: u64, prevrandao: B256) -> BlockContext { + fn block_context(&self, height: u64, timestamp: u64, prevrandao: B256) -> BlockContext { let header = Header { number: height, - timestamp: height, + timestamp, gas_limit: self.gas_limit, beneficiary: Address::ZERO, - base_fee_per_gas: Some(0), + base_fee_per_gas: Some(INITIAL_BASE_FEE), ..Default::default() }; BlockContext::new(header, B256::ZERO, prevrandao) @@ -703,7 +733,7 @@ impl TestApplication { self.ledger.seed_for_parent(parent_digest).await.unwrap_or(B256::ZERO) } - async fn build_block(&self, parent: &Block) -> Option { + async fn build_block(&self, parent: &Block, timestamp: u64) -> Option { let parent_digest = parent.commitment(); let parent_snapshot = self.ledger.parent_snapshot(parent_digest).await?; @@ -713,18 +743,15 @@ impl TestApplication { let prevrandao = self.get_prevrandao(parent_digest).await; let height = parent.height + 1; - let context = self.block_context(height, prevrandao); + let context = self.block_context(height, timestamp, prevrandao); let txs_bytes: Vec = txs.iter().map(|tx| tx.bytes.clone()).collect(); let outcome = self.executor.execute(&parent_snapshot.state, &context, &txs_bytes).ok()?; - let state_root = self - .ledger - .compute_root_from_store(parent_digest, outcome.changes.clone()) - .await - .ok()?; + let state_root = + self.ledger.compute_root_from_store(parent_digest, &outcome.changes).await.ok()?; - let block = Block { parent: parent.id(), height, prevrandao, state_root, txs }; + let block = Block::new(parent.id(), height, timestamp, prevrandao, state_root, txs); let merged_changes = parent_snapshot.state.merge_changes(outcome.changes.clone()); let next_state = OverlayState::new(parent_snapshot.state.base(), merged_changes); @@ -756,7 +783,7 @@ impl TestApplication { return false; }; - let context = self.block_context(block.height, block.prevrandao); + let context = self.block_context(block.height, block.timestamp, block.prevrandao); let execution = match BlockExecution::execute(&parent_snapshot, &self.executor, &context, &block.txs) .await @@ -767,7 +794,7 @@ impl TestApplication { let state_root = match self .ledger - .compute_root_from_store(parent_digest, execution.outcome.changes.clone()) + .compute_root_from_store(parent_digest, &execution.outcome.changes) .await { Ok(root) => root, @@ -827,49 +854,41 @@ where type Context = Context; type Block = Block; - fn genesis(&mut self) -> impl std::future::Future + Send { - async move { self.ledger.genesis_block() } - } - - fn propose>( + fn propose( &mut self, - _context: (Env, Self::Context), - mut ancestry: AncestorStream, + context: (Env, Self::Context), + mut ancestry: impl Ancestry, ) -> impl std::future::Future> + Send { + let env = context.0; async move { let parent = ancestry.next().await?; - self.build_block(&parent).await + let now_secs = + env.current().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0); + let timestamp = Block::next_timestamp(now_secs, parent.timestamp)?; + self.build_block(&parent, timestamp).await } } -} -impl VerifyingApplication for TestApplication -where - Env: Rng + Spawner + Metrics + Clock, - S: CertScheme + Send + Sync + 'static, -{ - fn verify>( + async fn verify( &mut self, _context: (Env, Self::Context), - mut ancestry: AncestorStream, - ) -> impl std::future::Future + Send { - async move { - let mut blocks_to_verify = Vec::new(); - while let Some(block) = ancestry.next().await { - let digest = block.commitment(); - if self.ledger.query_state_root(digest).await.is_some() { - break; - } - blocks_to_verify.push(block); + mut ancestry: impl Ancestry, + ) -> bool { + let mut blocks_to_verify = Vec::new(); + while let Some(block) = ancestry.next().await { + let digest = block.commitment(); + if self.ledger.query_state_root(digest).await.is_some() { + break; } + blocks_to_verify.push(block); + } - for block in blocks_to_verify.into_iter().rev() { - if !self.verify_block(&block).await { - return false; - } + for block in blocks_to_verify.into_iter().rev() { + if !self.verify_block(&block).await { + return false; } - - true } + + true } } diff --git a/crates/e2e/src/setup.rs b/crates/e2e/src/setup.rs index d3e1b1a..0725056 100644 --- a/crates/e2e/src/setup.rs +++ b/crates/e2e/src/setup.rs @@ -4,9 +4,19 @@ use std::time::Duration; use alloy_primitives::{Address, U256}; use k256::ecdsa::SigningKey; +use kora_config::INITIAL_BASE_FEE; use kora_domain::{BootstrapConfig, Tx, evm::Evm}; use kora_transport_sim::SimLinkConfig; +const TEST_INITIAL_BALANCE: u64 = 1_000_000_000_000_000_000; +const TRANSFER_GAS_LIMIT: u64 = 21_000; +const TRANSFER_MAX_FEE_PER_GAS: u128 = INITIAL_BASE_FEE as u128; +const TRANSFER_MAX_PRIORITY_FEE_PER_GAS: u128 = 0; + +fn transfer_gas_cost(tx_count: usize) -> U256 { + U256::from(TRANSFER_GAS_LIMIT) * U256::from(INITIAL_BASE_FEE) * U256::from(tx_count) +} + /// Configuration for an e2e test run. #[derive(Clone, Debug)] pub struct TestConfig { @@ -111,17 +121,25 @@ impl TestSetup { let sender = Evm::address_from_key(&sender_key); let receiver = Evm::address_from_key(&receiver_key); - let initial_balance = U256::from(1_000_000u64); + let initial_balance = U256::from(TEST_INITIAL_BALANCE); let transfer_amount = U256::from(100u64); - let tx = - Evm::sign_eip1559_transfer(&sender_key, chain_id, receiver, transfer_amount, 0, 21_000); + let tx = Evm::sign_eip1559_transfer( + &sender_key, + chain_id, + receiver, + transfer_amount, + 0, + TRANSFER_GAS_LIMIT, + TRANSFER_MAX_FEE_PER_GAS, + TRANSFER_MAX_PRIORITY_FEE_PER_GAS, + ); Self { genesis_alloc: vec![(sender, initial_balance), (receiver, U256::ZERO)], bootstrap_txs: vec![tx], expected_balances: vec![ - (sender, initial_balance - transfer_amount), + (sender, initial_balance - transfer_amount - transfer_gas_cost(1)), (receiver, transfer_amount), ], } @@ -133,7 +151,7 @@ impl TestSetup { let mut bootstrap_txs = Vec::with_capacity(count); let mut expected_balances = Vec::with_capacity(count * 2); - let initial_balance = U256::from(1_000_000u64); + let initial_balance = U256::from(TEST_INITIAL_BALANCE); let transfer_amount = U256::from(100u64); for i in 0..count { @@ -153,11 +171,14 @@ impl TestSetup { receiver, transfer_amount, 0, - 21_000, + TRANSFER_GAS_LIMIT, + TRANSFER_MAX_FEE_PER_GAS, + TRANSFER_MAX_PRIORITY_FEE_PER_GAS, ); bootstrap_txs.push(tx); - expected_balances.push((sender, initial_balance - transfer_amount)); + expected_balances + .push((sender, initial_balance - transfer_amount - transfer_gas_cost(1))); expected_balances.push((receiver, transfer_amount)); } @@ -171,7 +192,7 @@ impl TestSetup { let sender = Evm::address_from_key(&sender_key); let receiver = Evm::address_from_key(&receiver_key); - let initial_balance = U256::from(10_000_000u64); + let initial_balance = U256::from(TEST_INITIAL_BALANCE); let transfer_amount = U256::from(100u64); let mut bootstrap_txs = Vec::with_capacity(tx_count); @@ -182,25 +203,28 @@ impl TestSetup { receiver, transfer_amount, nonce as u64, - 21_000, + TRANSFER_GAS_LIMIT, + TRANSFER_MAX_FEE_PER_GAS, + TRANSFER_MAX_PRIORITY_FEE_PER_GAS, ); bootstrap_txs.push(tx); } let total_transferred = transfer_amount * U256::from(tx_count); + let total_gas_cost = transfer_gas_cost(tx_count); Self { genesis_alloc: vec![(sender, initial_balance), (receiver, U256::ZERO)], bootstrap_txs, expected_balances: vec![ - (sender, initial_balance - total_transferred), + (sender, initial_balance - total_transferred - total_gas_cost), (receiver, total_transferred), ], } } /// Convert to bootstrap config. - pub fn to_bootstrap(&self) -> BootstrapConfig { - BootstrapConfig::new(self.genesis_alloc.clone(), self.bootstrap_txs.clone()) + pub fn to_bootstrap(&self, chain_id: u64) -> BootstrapConfig { + BootstrapConfig::new(chain_id, self.genesis_alloc.clone(), self.bootstrap_txs.clone()) } } diff --git a/crates/e2e/src/tests/consensus.rs b/crates/e2e/src/tests/consensus.rs index b4195d1..4ee9ae8 100644 --- a/crates/e2e/src/tests/consensus.rs +++ b/crates/e2e/src/tests/consensus.rs @@ -19,7 +19,6 @@ fn test_four_validators_reach_consensus() { /// Test that a 7-validator network can finalize blocks (larger quorum). #[test] -#[ignore = "requires investigation - larger quorums time out"] fn test_seven_validators_reach_consensus() { let config = TestConfig::default() .with_validators(7) @@ -75,7 +74,6 @@ fn test_sequential_block_production() { /// Test with different random seeds for reproducibility. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_deterministic_with_seed() { let config = TestConfig::default().with_validators(4).with_max_blocks(3).with_seed(42); let setup = TestSetup::simple_transfer(config.chain_id); @@ -100,7 +98,6 @@ fn test_empty_blocks() { /// Test minimum viable network (4 validators, threshold 3). #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_minimum_quorum() { // 4 validators with threshold 3 is the minimum for BFT let config = TestConfig::default().with_validators(4).with_max_blocks(3); @@ -114,7 +111,6 @@ fn test_minimum_quorum() { /// Test that transactions affect balances correctly after finalization. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_balance_updates_after_finalization() { let config = TestConfig::default().with_validators(4).with_max_blocks(3); let setup = TestSetup::simple_transfer(config.chain_id); diff --git a/crates/e2e/src/tests/execution.rs b/crates/e2e/src/tests/execution.rs index a02fe16..7ffee44 100644 --- a/crates/e2e/src/tests/execution.rs +++ b/crates/e2e/src/tests/execution.rs @@ -7,7 +7,6 @@ use crate::{TestConfig, TestHarness, TestSetup}; /// Test a simple ETH transfer between two accounts. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_simple_transfer() { let config = TestConfig::default().with_validators(4).with_max_blocks(3); let setup = TestSetup::simple_transfer(config.chain_id); @@ -19,7 +18,6 @@ fn test_simple_transfer() { /// Test multiple independent transfers in a single block. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_multiple_transfers_single_block() { let config = TestConfig::default().with_validators(4).with_max_blocks(3); let setup = TestSetup::multi_transfer(config.chain_id, 5); @@ -31,7 +29,6 @@ fn test_multiple_transfers_single_block() { /// Test multiple transactions from the same sender with sequential nonces. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_sequential_nonces() { let config = TestConfig::default().with_validators(4).with_max_blocks(3); let setup = TestSetup::sequential_nonces(config.chain_id, 3); @@ -43,7 +40,6 @@ fn test_sequential_nonces() { /// Test that larger transfer counts work correctly. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_many_transfers() { let config = TestConfig::default().with_validators(4).with_max_blocks(5); let setup = TestSetup::multi_transfer(config.chain_id, 10); @@ -55,7 +51,6 @@ fn test_many_transfers() { /// Test that state is correctly accumulated across multiple blocks. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_state_accumulation() { // This test uses sequential nonces to ensure state accumulates correctly let config = TestConfig::default().with_validators(4).with_max_blocks(5); @@ -66,20 +61,45 @@ fn test_state_accumulation() { assert_eq!(outcome.blocks_finalized, 5); } -/// Test with different chain IDs. +fn run_chain_id(chain_id: u64) { + let mut config = TestConfig::default().with_validators(4).with_max_blocks(2); + config.chain_id = chain_id; + let setup = TestSetup::simple_transfer(chain_id); + + let outcome = TestHarness::run(config, setup) + .unwrap_or_else(|e| panic!("chain_id {chain_id} failed: {e}")); + + assert_eq!(outcome.blocks_finalized, 2); +} + +/// Test execution with chain ID 1. +#[test] +fn test_chain_id_1() { + run_chain_id(1); +} + +/// Test execution with chain ID 5. +#[test] +fn test_chain_id_5() { + run_chain_id(5); +} + +/// Test execution with chain ID 1337. +#[test] +fn test_chain_id_1337() { + run_chain_id(1337); +} + +/// Test execution with chain ID 31337. +#[test] +fn test_chain_id_31337() { + run_chain_id(31337); +} + +/// Test execution with chain ID 42161. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] -fn test_different_chain_ids() { - for chain_id in [1, 5, 1337, 31337, 42161] { - let mut config = TestConfig::default().with_validators(4).with_max_blocks(2); - config.chain_id = chain_id; - let setup = TestSetup::simple_transfer(chain_id); - - let outcome = TestHarness::run(config, setup) - .unwrap_or_else(|e| panic!("chain_id {chain_id} failed: {e}")); - - assert_eq!(outcome.blocks_finalized, 2); - } +fn test_chain_id_42161() { + run_chain_id(42161); } /// Test that gas limits are respected. @@ -96,7 +116,6 @@ fn test_gas_limit_enforcement() { /// Test maximum transactions per block. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_max_transactions_per_block() { let config = TestConfig::default().with_validators(4).with_max_blocks(3); // BLOCK_CODEC_MAX_TXS is 64, so test with fewer @@ -109,7 +128,6 @@ fn test_max_transactions_per_block() { /// Test that execution is deterministic across validators. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_deterministic_execution() { let config = TestConfig::default() .with_validators(4) diff --git a/crates/e2e/src/tests/resilience.rs b/crates/e2e/src/tests/resilience.rs index 4b66402..fddfee6 100644 --- a/crates/e2e/src/tests/resilience.rs +++ b/crates/e2e/src/tests/resilience.rs @@ -11,7 +11,6 @@ use crate::{TestConfig, TestHarness, TestSetup}; /// Test with high network latency. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_high_latency_network() { let high_latency_link = SimLinkConfig { latency: Duration::from_millis(100), @@ -54,26 +53,43 @@ fn test_network_jitter() { assert_eq!(outcome.blocks_finalized, 5); } -/// Test that consensus works with varying validator counts. +fn run_validator_count(n: usize) { + let config = TestConfig::default().with_validators(n).with_max_blocks(3).with_seed(n as u64); + + let setup = TestSetup::simple_transfer(config.chain_id); + + let outcome = + TestHarness::run(config, setup).unwrap_or_else(|e| panic!("{n} validators failed: {e}")); + + assert_eq!(outcome.blocks_finalized, 3, "Failed with {n} validators"); +} + +/// Test that consensus works with four validators. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] -fn test_varying_validator_counts() { - for n in [4, 5, 6, 7] { - let config = - TestConfig::default().with_validators(n).with_max_blocks(3).with_seed(n as u64); +fn test_four_validator_count() { + run_validator_count(4); +} - let setup = TestSetup::simple_transfer(config.chain_id); +/// Test that consensus works with five validators. +#[test] +fn test_five_validator_count() { + run_validator_count(5); +} - let outcome = TestHarness::run(config.clone(), setup) - .unwrap_or_else(|e| panic!("{n} validators failed: {e}")); +/// Test that consensus works with six validators. +#[test] +fn test_six_validator_count() { + run_validator_count(6); +} - assert_eq!(outcome.blocks_finalized, 3, "Failed with {n} validators"); - } +/// Test that consensus works with seven validators. +#[test] +fn test_seven_validator_count() { + run_validator_count(7); } /// Test longer chains to detect state accumulation issues. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_longer_chain() { let config = TestConfig::default() .with_validators(4) @@ -105,7 +121,6 @@ fn test_sustained_throughput() { /// Test that different seeds produce different (but valid) outcomes. #[test] -#[ignore = "flaky when run in parallel - run with --test-threads=1"] fn test_different_seeds_different_paths() { let setup = TestSetup::simple_transfer(1337); let timeout = std::time::Duration::from_secs(45); @@ -134,7 +149,6 @@ fn test_different_seeds_different_paths() { /// Stress test with maximum transactions. #[test] -#[ignore = "slow stress test"] fn test_stress_max_transactions() { let config = TestConfig::default() .with_validators(4) @@ -151,7 +165,6 @@ fn test_stress_max_transactions() { /// Stress test with many blocks. #[test] -#[ignore = "slow stress test"] fn test_stress_many_blocks() { let config = TestConfig::default() .with_validators(4) diff --git a/crates/network/marshal/Cargo.toml b/crates/network/marshal/Cargo.toml index 557dae1..4bb2154 100644 --- a/crates/network/marshal/Cargo.toml +++ b/crates/network/marshal/Cargo.toml @@ -17,17 +17,17 @@ commonware-consensus.workspace = true commonware-cryptography.workspace = true commonware-p2p.workspace = true commonware-parallel.workspace = true -commonware-resolver.workspace = true commonware-runtime.workspace = true commonware-storage.workspace = true commonware-utils.workspace = true rand.workspace = true rand_core.workspace = true +tracing.workspace = true [dev-dependencies] bytes.workspace = true +commonware-actor.workspace = true commonware-consensus = { workspace = true, features = ["mocks"] } commonware-cryptography = { workspace = true, features = ["mocks"] } commonware-macros.workspace = true -tracing.workspace = true tracing-subscriber.workspace = true diff --git a/crates/network/marshal/src/actor.rs b/crates/network/marshal/src/actor.rs index 675df92..d1cd47b 100644 --- a/crates/network/marshal/src/actor.rs +++ b/crates/network/marshal/src/actor.rs @@ -8,7 +8,7 @@ use std::num::{NonZeroU64, NonZeroUsize}; use commonware_consensus::{ Block, marshal::{ - Config, + Config, Start, core::{Actor, Mailbox}, standard::Standard, store::{Blocks, Certificates}, @@ -17,7 +17,7 @@ use commonware_consensus::{ types::{Epoch, FixedEpocher, Height, ViewDelta}, }; use commonware_cryptography::certificate::Provider; -use commonware_parallel::Sequential; +use commonware_parallel::{Sequential, Strategy}; use commonware_runtime::{BufferPooler, Clock, Metrics, Spawner, Storage, buffer::paged::CacheRef}; use commonware_utils::{Acknowledgement, NZU64, NZUsize}; use rand_core::CryptoRngCore; @@ -45,26 +45,34 @@ impl ActorInitializer { /// The default mailbox size. pub const DEFAULT_MAILBOX_SIZE: usize = 1024; - /// The default view retention timeout (10 views). - pub const DEFAULT_VIEW_RETENTION_TIMEOUT: ViewDelta = ViewDelta::new(10); + /// The default view retention timeout. + /// + /// 256 views provides ~2.7 seconds of catch-up history at 93 blocks/s, + /// which is sufficient for consensus. The previous value of 2560 retained + /// ~27 seconds of cache data across 4 cache types, wasting ~10x more memory. + pub const DEFAULT_VIEW_RETENTION_TIMEOUT: ViewDelta = ViewDelta::new(256); /// The default maximum number of blocks to repair at once. - pub const DEFAULT_MAX_REPAIR: NonZeroUsize = NZUsize!(10); + pub const DEFAULT_MAX_REPAIR: NonZeroUsize = NZUsize!(128); /// The default prunable items per section. - pub const DEFAULT_PRUNABLE_ITEMS_PER_SECTION: NonZeroU64 = NZU64!(10); + /// + /// Pruning operates at section granularity -- items are only freed when an + /// entire section falls below the retention window. A smaller section size + /// (256 vs 4096) makes pruning more responsive and reduces peak memory. + pub const DEFAULT_PRUNABLE_ITEMS_PER_SECTION: NonZeroU64 = NZU64!(256); /// The default replay buffer size. - pub const DEFAULT_REPLAY_BUFFER: NonZeroUsize = NZUsize!(1024); + pub const DEFAULT_REPLAY_BUFFER: NonZeroUsize = NZUsize!(8 * 1024 * 1024); /// The default key write buffer size. - pub const DEFAULT_KEY_WRITE_BUFFER: NonZeroUsize = NZUsize!(1024); + pub const DEFAULT_KEY_WRITE_BUFFER: NonZeroUsize = NZUsize!(1024 * 1024); /// The default value write buffer size. - pub const DEFAULT_VALUE_WRITE_BUFFER: NonZeroUsize = NZUsize!(1024); + pub const DEFAULT_VALUE_WRITE_BUFFER: NonZeroUsize = NZUsize!(1024 * 1024); /// The default blocks per epoch. - pub const DEFAULT_BLOCKS_PER_EPOCH: NonZeroU64 = NZU64!(20); + pub const DEFAULT_BLOCKS_PER_EPOCH: NonZeroU64 = NZU64!(u64::MAX); /// The default partition prefix. pub const DEFAULT_PARTITION_PREFIX: &'static str = "marshal"; @@ -99,6 +107,7 @@ impl ActorInitializer { finalizations_by_height: FC, finalized_blocks: FB, provider: P, + start: Start, page_cache: CacheRef, block_codec_config: B::Cfg, ) -> ( @@ -113,12 +122,51 @@ impl ActorInitializer { FC: Certificates, FB: Blocks, A: Acknowledgement, + { + Self::init_with_strategy( + context, + finalizations_by_height, + finalized_blocks, + provider, + start, + page_cache, + block_codec_config, + Sequential, + ) + .await + } + + /// Initializes the marshal actor with a custom verification strategy. + #[allow(clippy::too_many_arguments, clippy::type_complexity)] + pub async fn init_with_strategy( + context: E, + finalizations_by_height: FC, + finalized_blocks: FB, + provider: P, + start: Start, + page_cache: CacheRef, + block_codec_config: B::Cfg, + strategy: S, + ) -> ( + Actor, P, FC, FB, FixedEpocher, S, A>, + Mailbox>, + Height, + ) + where + E: BufferPooler + CryptoRngCore + Spawner + Metrics + Clock + Storage, + B: Block, + P: Provider>, + FC: Certificates, + FB: Blocks, + A: Acknowledgement, + S: Strategy, { let config = Config { provider, + start, epocher: FixedEpocher::new(Self::DEFAULT_BLOCKS_PER_EPOCH), partition_prefix: Self::DEFAULT_PARTITION_PREFIX.to_string(), - mailbox_size: Self::DEFAULT_MAILBOX_SIZE, + mailbox_size: NZUsize!(Self::DEFAULT_MAILBOX_SIZE), view_retention_timeout: Self::DEFAULT_VIEW_RETENTION_TIMEOUT, prunable_items_per_section: Self::DEFAULT_PRUNABLE_ITEMS_PER_SECTION, page_cache, @@ -128,22 +176,25 @@ impl ActorInitializer { block_codec_config, max_repair: Self::DEFAULT_MAX_REPAIR, max_pending_acks: NZUsize!(1024), - strategy: Sequential, + strategy, }; - Actor::init(context, finalizations_by_height, finalized_blocks, config).await + let (actor, mailbox, processed_height) = + Actor::init(context, finalizations_by_height, finalized_blocks, config).await; + (actor, mailbox, processed_height.unwrap_or_else(Height::zero)) } /// Initializes the marshal actor with a custom partition prefix. /// /// This is the same as [`init`](Self::init) but allows specifying a custom partition prefix /// for storage isolation. Useful for testing multiple nodes in the same process. - #[allow(clippy::type_complexity)] + #[allow(clippy::too_many_arguments, clippy::type_complexity)] pub async fn init_with_partition( context: E, finalizations_by_height: FC, finalized_blocks: FB, provider: P, + start: Start, page_cache: CacheRef, block_codec_config: B::Cfg, partition_prefix: impl Into, @@ -162,9 +213,10 @@ impl ActorInitializer { { let config = Config { provider, + start, epocher: FixedEpocher::new(Self::DEFAULT_BLOCKS_PER_EPOCH), partition_prefix: partition_prefix.into(), - mailbox_size: Self::DEFAULT_MAILBOX_SIZE, + mailbox_size: NZUsize!(Self::DEFAULT_MAILBOX_SIZE), view_retention_timeout: Self::DEFAULT_VIEW_RETENTION_TIMEOUT, prunable_items_per_section: Self::DEFAULT_PRUNABLE_ITEMS_PER_SECTION, page_cache, @@ -177,7 +229,9 @@ impl ActorInitializer { strategy: Sequential, }; - Actor::init(context, finalizations_by_height, finalized_blocks, config).await + let (actor, mailbox, processed_height) = + Actor::init(context, finalizations_by_height, finalized_blocks, config).await; + (actor, mailbox, processed_height.unwrap_or_else(Height::zero)) } } @@ -188,13 +242,13 @@ mod tests { #[test] fn test_defaults() { assert_eq!(ActorInitializer::DEFAULT_MAILBOX_SIZE, 1024); - assert_eq!(ActorInitializer::DEFAULT_VIEW_RETENTION_TIMEOUT, ViewDelta::new(10)); - assert_eq!(ActorInitializer::DEFAULT_MAX_REPAIR.get(), 10); - assert_eq!(ActorInitializer::DEFAULT_PRUNABLE_ITEMS_PER_SECTION.get(), 10); - assert_eq!(ActorInitializer::DEFAULT_REPLAY_BUFFER.get(), 1024); - assert_eq!(ActorInitializer::DEFAULT_KEY_WRITE_BUFFER.get(), 1024); - assert_eq!(ActorInitializer::DEFAULT_VALUE_WRITE_BUFFER.get(), 1024); - assert_eq!(ActorInitializer::DEFAULT_BLOCKS_PER_EPOCH.get(), 20); + assert_eq!(ActorInitializer::DEFAULT_VIEW_RETENTION_TIMEOUT, ViewDelta::new(256)); + assert_eq!(ActorInitializer::DEFAULT_MAX_REPAIR.get(), 128); + assert_eq!(ActorInitializer::DEFAULT_PRUNABLE_ITEMS_PER_SECTION.get(), 256); + assert_eq!(ActorInitializer::DEFAULT_REPLAY_BUFFER.get(), 8 * 1024 * 1024); + assert_eq!(ActorInitializer::DEFAULT_KEY_WRITE_BUFFER.get(), 1024 * 1024); + assert_eq!(ActorInitializer::DEFAULT_VALUE_WRITE_BUFFER.get(), 1024 * 1024); + assert_eq!(ActorInitializer::DEFAULT_BLOCKS_PER_EPOCH.get(), u64::MAX); assert_eq!(ActorInitializer::DEFAULT_PARTITION_PREFIX, "marshal"); } } diff --git a/crates/network/marshal/src/archive.rs b/crates/network/marshal/src/archive.rs index 4250713..31825a4 100644 --- a/crates/network/marshal/src/archive.rs +++ b/crates/network/marshal/src/archive.rs @@ -1,46 +1,337 @@ -//! Contains the [`ArchiveInitializer`] which initializes immutable archive storage. +//! Contains the [`ArchiveInitializer`] which initializes archive storage, and +//! the [`CheckpointedArchive`] wrapper that batches syncs to checkpoint boundaries. use std::num::{NonZeroU16, NonZeroU64, NonZeroUsize}; use commonware_codec::Codec; +use commonware_consensus::{ + Block, + marshal::store::{Blocks, Certificates}, + simplex::types::Finalization, + types::Height, +}; +use commonware_cryptography::{Digest, Digestible, certificate::Scheme}; use commonware_runtime::{BufferPooler, Clock, Metrics, Spawner, Storage, buffer::paged::CacheRef}; -use commonware_storage::archive::immutable::{Archive, Config}; +use commonware_storage::{ + archive::{ + Archive as ArchiveTrait, Error as ArchiveError, Identifier, + immutable::{Archive, Config}, + prunable::{Archive as PrunableArchive, Config as PrunableConfig}, + }, + translator::{EightCap, Translator}, +}; use commonware_utils::{NZU16, NZU64, NZUsize, sequence::Array}; +use tracing::warn; -/// Initializes immutable archive storage with sensible defaults. +/// Trait for archive backends that support pruning old entries. +/// +/// This enables [`CheckpointedArchive`] to forward `prune` calls from the +/// marshal's [`Blocks`] and [`Certificates`] stores to the underlying archive. +pub trait Prunable { + /// Remove all entries with index strictly below `min`. + fn prune( + &mut self, + min: u64, + ) -> impl std::future::Future> + Send; +} + +impl Prunable for PrunableArchive +where + T: Translator, + E: BufferPooler + Storage + Metrics + Send, + K: Array, + V: Codec + Send + Sync, +{ + async fn prune(&mut self, min: u64) -> Result<(), ArchiveError> { + Self::prune(self, min).await + } +} + +/// Immutable archive wrapper that only durably syncs on checkpoint boundaries. +/// +/// `put` still updates the in-memory archive immediately, so marshal can serve +/// and query freshly finalized blocks. `sync` is forwarded to disk only when the +/// highest dirty height is divisible by `checkpoint_interval`. +#[derive(Debug)] +pub struct CheckpointedArchive { + inner: A, + checkpoint_interval: u64, + highest_dirty: Option, +} + +impl CheckpointedArchive { + /// Create a checkpointed archive around an existing archive. + /// + /// A `checkpoint_interval` of 0 is clamped to 1 to prevent + /// division-by-zero in [`should_sync`]. This matches the guards in + /// `NoSyncStorage::new()` (`.max(1)`) and + /// `FinalizedReporter::with_checkpoint_interval()` (`if 0 then 1`). + pub const fn new(inner: A, checkpoint_interval: u64) -> Self { + let interval = if checkpoint_interval == 0 { 1 } else { checkpoint_interval }; + Self { inner, checkpoint_interval: interval, highest_dirty: None } + } + + fn mark_dirty(&mut self, height: u64) { + self.highest_dirty = + Some(self.highest_dirty.map_or(height, |existing| existing.max(height))); + } + + fn should_sync(&self) -> bool + where + A: ArchiveTrait, + { + match self.highest_dirty { + Some(height) if self.checkpoint_interval <= 1 => self.is_contiguous_through(height), + Some(height) => { + // Compute the highest checkpoint boundary at or below the + // dirty height. This handles out-of-order insertion: even if + // highest_dirty overshoots a boundary (e.g. 65 with interval + // 64), we recognise that the boundary at 64 has been reached + // and sync when the archive is contiguous through it. The + // inner archive's sync() flushes ALL in-memory data, so + // blocks above the boundary are also persisted. + let boundary = (height / self.checkpoint_interval) * self.checkpoint_interval; + boundary > 0 && self.is_contiguous_through(boundary) + } + None => false, + } + } + + fn is_contiguous_through(&self, target: u64) -> bool + where + A: ArchiveTrait, + { + let mut expected_start = None; + + for (start, end) in self.inner.ranges() { + let Some(expected) = expected_start else { + if start > target { + return false; + } + if end >= target { + return true; + } + expected_start = end.checked_add(1); + continue; + }; + + if start > expected { + return false; + } + if end >= target { + return true; + } + expected_start = end.checked_add(1); + } + + false + } +} + +impl ArchiveTrait for CheckpointedArchive +where + A: ArchiveTrait + Sync, +{ + type Key = A::Key; + type Value = A::Value; + + async fn put( + &mut self, + index: u64, + key: Self::Key, + value: Self::Value, + ) -> Result<(), ArchiveError> { + self.inner.put(index, key, value).await?; + self.mark_dirty(index); + Ok(()) + } + + async fn get<'a>( + &'a self, + identifier: Identifier<'a, Self::Key>, + ) -> Result, ArchiveError> { + self.inner.get(identifier).await + } + + async fn has<'a>( + &'a self, + identifier: Identifier<'a, Self::Key>, + ) -> Result { + self.inner.has(identifier).await + } + + fn next_gap(&self, index: u64) -> (Option, Option) { + self.inner.next_gap(index) + } + + fn missing_items(&self, index: u64, max: usize) -> Vec { + self.inner.missing_items(index, max) + } + + fn ranges(&self) -> impl Iterator { + self.inner.ranges() + } + + fn ranges_from(&self, from: u64) -> impl Iterator { + self.inner.ranges_from(from) + } + + fn first_index(&self) -> Option { + self.inner.first_index() + } + + fn last_index(&self) -> Option { + self.inner.last_index() + } + + async fn sync(&mut self) -> Result<(), ArchiveError> { + if self.should_sync() { + self.inner.sync().await?; + self.highest_dirty = None; + } + Ok(()) + } + + async fn destroy(self) -> Result<(), ArchiveError> { + self.inner.destroy().await + } +} + +impl Certificates for CheckpointedArchive +where + A: ArchiveTrait> + Prunable + Send + Sync + 'static, + B: Digest, + C: Digest, + S: Scheme, +{ + type BlockDigest = B; + type Commitment = C; + type Scheme = S; + type Error = ArchiveError; + + async fn put( + &mut self, + height: Height, + digest: Self::BlockDigest, + finalization: Finalization, + ) -> Result<(), Self::Error> { + ArchiveTrait::put(self, height.get(), digest, finalization).await + } + + async fn sync(&mut self) -> Result<(), Self::Error> { + ArchiveTrait::sync(self).await + } + + async fn get( + &self, + id: Identifier<'_, Self::BlockDigest>, + ) -> Result>, Self::Error> { + ArchiveTrait::get(self, id).await + } + + async fn prune(&mut self, min: Height) -> Result<(), Self::Error> { + self.inner.prune(min.get()).await + } + + fn last_index(&self) -> Option { + ArchiveTrait::last_index(self).map(Height::new) + } + + fn ranges_from(&self, from: Height) -> impl Iterator { + ArchiveTrait::ranges_from(self, from.get()) + .map(|(start, end)| (Height::new(start), Height::new(end))) + } +} + +impl Blocks for CheckpointedArchive +where + A: ArchiveTrait + Prunable + Send + Sync + 'static, + B: Block, +{ + type Block = B; + type Error = ArchiveError; + + async fn put(&mut self, block: Self::Block) -> Result<(), Self::Error> { + ArchiveTrait::put(self, block.height().get(), block.digest(), block).await + } + + async fn sync(&mut self) -> Result<(), Self::Error> { + ArchiveTrait::sync(self).await + } + + async fn get( + &self, + id: Identifier<'_, ::Digest>, + ) -> Result, Self::Error> { + ArchiveTrait::get(self, id).await + } + + async fn prune(&mut self, min: Height) -> Result<(), Self::Error> { + self.inner.prune(min.get()).await + } + + fn missing_items(&self, start: Height, max: usize) -> Vec { + ArchiveTrait::missing_items(self, start.get(), max).into_iter().map(Height::new).collect() + } + + fn next_gap(&self, value: Height) -> (Option, Option) { + let (current, next) = ArchiveTrait::next_gap(self, value.get()); + (current.map(Height::new), next.map(Height::new)) + } + + fn last_index(&self) -> Option { + ArchiveTrait::last_index(self).map(Height::new) + } +} + +/// Initializes archive storage with sensible defaults. +/// +/// Provides both immutable (append-only) and prunable archive backends. +/// Production deployments should use the prunable variants +/// ([`init_prunable`](Self::init_prunable), +/// [`init_prunable_checkpointed`](Self::init_prunable_checkpointed)) +/// so the marshal can reclaim disk space for old finalized blocks and +/// certificates via the [`Prunable`] trait. #[derive(Debug, Clone, Copy)] pub struct ArchiveInitializer; impl ArchiveInitializer { /// The default freezer table initial size. - pub const DEFAULT_FREEZER_TABLE_INITIAL_SIZE: u32 = 65_536; + pub const DEFAULT_FREEZER_TABLE_INITIAL_SIZE: u32 = 2_097_152; /// The default freezer table resize frequency. pub const DEFAULT_FREEZER_TABLE_RESIZE_FREQUENCY: u8 = 4; /// The default freezer table resize chunk size. - pub const DEFAULT_FREEZER_TABLE_RESIZE_CHUNK_SIZE: u32 = 16_384; + pub const DEFAULT_FREEZER_TABLE_RESIZE_CHUNK_SIZE: u32 = 65_536; /// The default freezer value target size. - pub const DEFAULT_FREEZER_VALUE_TARGET_SIZE: u64 = 1024; + pub const DEFAULT_FREEZER_VALUE_TARGET_SIZE: u64 = 1024 * 1024 * 1024; /// The default compression level (zstd level 3). pub const DEFAULT_COMPRESSION_LEVEL: Option = Some(3); /// The default items per section. - pub const DEFAULT_ITEMS_PER_SECTION: NonZeroU64 = NZU64!(1024); + pub const DEFAULT_ITEMS_PER_SECTION: NonZeroU64 = NZU64!(262_144); + + /// The default prunable items per section. + /// + /// Pruning operates at section granularity -- items are only freed when an + /// entire section falls below the retention window. A smaller section size + /// (256) makes pruning more responsive and reduces peak disk usage. + pub const DEFAULT_PRUNABLE_ITEMS_PER_SECTION: NonZeroU64 = NZU64!(256); /// The default write buffer size. - pub const DEFAULT_WRITE_BUFFER: NonZeroUsize = NZUsize!(1024); + pub const DEFAULT_WRITE_BUFFER: NonZeroUsize = NZUsize!(1024 * 1024); /// The default replay buffer size. - pub const DEFAULT_REPLAY_BUFFER: NonZeroUsize = NZUsize!(1024); + pub const DEFAULT_REPLAY_BUFFER: NonZeroUsize = NZUsize!(8 * 1024 * 1024); /// The default page size. - pub const DEFAULT_PAGE_SIZE: NonZeroU16 = NZU16!(1024); + pub const DEFAULT_PAGE_SIZE: NonZeroU16 = NZU16!(4_096); /// The default page cache size. - pub const DEFAULT_PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(10); + pub const DEFAULT_PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(8_192); /// The default partition prefix for finalizations archive. pub const DEFAULT_FINALIZATIONS_PREFIX: &'static str = "finalizations"; @@ -65,7 +356,7 @@ impl ArchiveInitializer { codec_config: V::Cfg, ) -> Result, commonware_storage::archive::Error> where - E: BufferPooler + Spawner + Storage + Metrics + Clock + Clone, + E: BufferPooler + Spawner + Storage + Metrics + Clock, K: Array, V: Codec + Send + Sync, { @@ -96,6 +387,22 @@ impl ArchiveInitializer { Archive::init(ctx, config).await } + /// Initializes an immutable archive wrapped with checkpointed sync behavior. + pub async fn init_checkpointed( + ctx: E, + partition_prefix: impl Into, + codec_config: V::Cfg, + checkpoint_interval: u64, + ) -> Result>, commonware_storage::archive::Error> + where + E: BufferPooler + Spawner + Storage + Metrics + Clock, + K: Array, + V: Codec + Send + Sync, + { + let archive = Self::init(ctx, partition_prefix, codec_config).await?; + Ok(CheckpointedArchive::new(archive, checkpoint_interval)) + } + /// Initializes a finalizations archive with the default prefix. /// /// Uses [`DEFAULT_FINALIZATIONS_PREFIX`](Self::DEFAULT_FINALIZATIONS_PREFIX) as the partition prefix. @@ -104,7 +411,7 @@ impl ArchiveInitializer { codec_config: V::Cfg, ) -> Result, commonware_storage::archive::Error> where - E: BufferPooler + Spawner + Storage + Metrics + Clock + Clone, + E: BufferPooler + Spawner + Storage + Metrics + Clock, K: Array, V: Codec + Send + Sync, { @@ -119,31 +426,297 @@ impl ArchiveInitializer { codec_config: V::Cfg, ) -> Result, commonware_storage::archive::Error> where - E: BufferPooler + Spawner + Storage + Metrics + Clock + Clone, + E: BufferPooler + Spawner + Storage + Metrics + Clock, K: Array, V: Codec + Send + Sync, { Self::init(ctx, Self::DEFAULT_BLOCKS_PREFIX, codec_config).await } + + /// Initializes a prunable archive with a custom partition prefix. + /// + /// Unlike [`init`](Self::init), this creates a [`prunable::Archive`] that + /// supports removing old entries via [`Prunable::prune`]. Uses [`EightCap`] + /// as the key translator, which takes the first 8 bytes of each key digest + /// for hash-table indexing. + /// + /// [`prunable::Archive`]: commonware_storage::archive::prunable::Archive + pub async fn init_prunable( + ctx: E, + partition_prefix: impl Into, + codec_config: V::Cfg, + ) -> Result, commonware_storage::archive::Error> + where + E: BufferPooler + Spawner + Storage + Metrics + Clock, + K: Array, + V: Codec + Send + Sync, + { + let prefix = partition_prefix.into(); + let config = PrunableConfig { + translator: EightCap, + key_partition: format!("{prefix}-key"), + key_page_cache: CacheRef::from_pooler( + &ctx, + Self::DEFAULT_PAGE_SIZE, + Self::DEFAULT_PAGE_CACHE_SIZE, + ), + value_partition: format!("{prefix}-value"), + compression: Self::DEFAULT_COMPRESSION_LEVEL, + codec_config, + items_per_section: Self::DEFAULT_PRUNABLE_ITEMS_PER_SECTION, + key_write_buffer: Self::DEFAULT_WRITE_BUFFER, + value_write_buffer: Self::DEFAULT_WRITE_BUFFER, + replay_buffer: Self::DEFAULT_REPLAY_BUFFER, + }; + PrunableArchive::init(ctx, config).await + } + + /// Initializes a prunable archive wrapped with checkpointed sync behavior. + /// + /// Combines [`init_prunable`](Self::init_prunable) with + /// [`CheckpointedArchive`] so that syncs are batched to `checkpoint_interval` + /// boundaries while pruning remains fully functional. + pub async fn init_prunable_checkpointed( + ctx: E, + partition_prefix: impl Into, + codec_config: V::Cfg, + checkpoint_interval: u64, + ) -> Result< + CheckpointedArchive>, + commonware_storage::archive::Error, + > + where + E: BufferPooler + Spawner + Storage + Metrics + Clock, + K: Array, + V: Codec + Send + Sync, + { + let archive = Self::init_prunable(ctx, partition_prefix, codec_config).await?; + Ok(CheckpointedArchive::new(archive, checkpoint_interval)) + } + + /// Partition suffixes used by the old `immutable::Archive` backend. + /// + /// When migrating from immutable to prunable archives, these partitions + /// contain orphaned data that will never be read by the new backend. + const LEGACY_IMMUTABLE_SUFFIXES: &'static [&'static str] = + &["-metadata", "-freezer-table", "-freezer-key", "-freezer-value", "-ordinal"]; + + /// Detect and remove legacy immutable archive partitions for a given prefix. + /// + /// The old `immutable::Archive` backend used five partitions per archive + /// (`{prefix}-metadata`, `{prefix}-freezer-table`, `{prefix}-freezer-key`, + /// `{prefix}-freezer-value`, `{prefix}-ordinal`). The new `prunable::Archive` + /// backend uses different partition names (`{prefix}-key`, `{prefix}-value`), + /// so upgrading silently orphans the old data on disk. + /// + /// This method scans for legacy partitions and removes any that contain + /// data, logging a warning for each one removed. Call this before + /// [`init_prunable`](Self::init_prunable) or + /// [`init_prunable_checkpointed`](Self::init_prunable_checkpointed) to + /// ensure a clean migration. + /// + /// Returns the number of legacy partitions that were detected and removed. + pub async fn migrate_from_immutable(ctx: &E, partition_prefix: &str) -> usize + where + E: Storage, + { + let mut removed = 0; + for suffix in Self::LEGACY_IMMUTABLE_SUFFIXES { + let partition_name = format!("{partition_prefix}{suffix}"); + match ctx.scan(&partition_name).await { + Ok(blobs) if !blobs.is_empty() => { + warn!( + partition = %partition_name, + blobs = blobs.len(), + "removing legacy immutable archive partition \ + (replaced by prunable backend)" + ); + if let Err(e) = ctx.remove(&partition_name, None).await { + warn!( + partition = %partition_name, + error = %e, + "failed to remove legacy immutable archive partition" + ); + } else { + removed += 1; + } + } + Ok(_) => { + // Partition exists but is empty, or doesn't exist -- nothing to do. + } + Err(e) => { + warn!( + partition = %partition_name, + error = %e, + "failed to scan for legacy immutable archive partition" + ); + } + } + } + if removed > 0 { + warn!( + prefix = %partition_prefix, + removed, + "cleaned up legacy immutable archive partitions; \ + archive history has been reset with the new prunable backend" + ); + } + removed + } } #[cfg(test)] mod tests { + use commonware_utils::sequence::Unit; + use super::*; + #[derive(Debug)] + struct FakeArchive { + ranges: Vec<(u64, u64)>, + } + + impl ArchiveTrait for FakeArchive { + type Key = Unit; + type Value = u64; + + async fn put( + &mut self, + index: u64, + _: Self::Key, + _: Self::Value, + ) -> Result<(), ArchiveError> { + self.ranges.push((index, index)); + Ok(()) + } + + async fn get<'a>( + &'a self, + _: Identifier<'a, Self::Key>, + ) -> Result, ArchiveError> { + Ok(None) + } + + async fn has<'a>(&'a self, _: Identifier<'a, Self::Key>) -> Result { + Ok(false) + } + + fn next_gap(&self, _: u64) -> (Option, Option) { + (None, None) + } + + fn missing_items(&self, _: u64, _: usize) -> Vec { + Vec::new() + } + + fn ranges(&self) -> impl Iterator { + self.ranges.clone().into_iter() + } + + fn ranges_from(&self, from: u64) -> impl Iterator { + self.ranges.clone().into_iter().filter(move |(_, end)| *end >= from) + } + + fn first_index(&self) -> Option { + self.ranges.first().map(|(start, _)| *start) + } + + fn last_index(&self) -> Option { + self.ranges.last().map(|(_, end)| *end) + } + + async fn sync(&mut self) -> Result<(), ArchiveError> { + Ok(()) + } + + async fn destroy(self) -> Result<(), ArchiveError> { + Ok(()) + } + } + #[test] fn test_defaults() { - assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_TABLE_INITIAL_SIZE, 65_536); + assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_TABLE_INITIAL_SIZE, 2_097_152); assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_TABLE_RESIZE_FREQUENCY, 4); - assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_TABLE_RESIZE_CHUNK_SIZE, 16_384); - assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_VALUE_TARGET_SIZE, 1024); + assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_TABLE_RESIZE_CHUNK_SIZE, 65_536); + assert_eq!(ArchiveInitializer::DEFAULT_FREEZER_VALUE_TARGET_SIZE, 1024 * 1024 * 1024); assert_eq!(ArchiveInitializer::DEFAULT_COMPRESSION_LEVEL, Some(3)); - assert_eq!(ArchiveInitializer::DEFAULT_ITEMS_PER_SECTION.get(), 1024); - assert_eq!(ArchiveInitializer::DEFAULT_WRITE_BUFFER.get(), 1024); - assert_eq!(ArchiveInitializer::DEFAULT_REPLAY_BUFFER.get(), 1024); - assert_eq!(ArchiveInitializer::DEFAULT_PAGE_SIZE.get(), 1024); - assert_eq!(ArchiveInitializer::DEFAULT_PAGE_CACHE_SIZE.get(), 10); + assert_eq!(ArchiveInitializer::DEFAULT_ITEMS_PER_SECTION.get(), 262_144); + assert_eq!(ArchiveInitializer::DEFAULT_PRUNABLE_ITEMS_PER_SECTION.get(), 256); + assert_eq!(ArchiveInitializer::DEFAULT_WRITE_BUFFER.get(), 1024 * 1024); + assert_eq!(ArchiveInitializer::DEFAULT_REPLAY_BUFFER.get(), 8 * 1024 * 1024); + assert_eq!(ArchiveInitializer::DEFAULT_PAGE_SIZE.get(), 4_096); + assert_eq!(ArchiveInitializer::DEFAULT_PAGE_CACHE_SIZE.get(), 8_192); assert_eq!(ArchiveInitializer::DEFAULT_FINALIZATIONS_PREFIX, "finalizations"); assert_eq!(ArchiveInitializer::DEFAULT_BLOCKS_PREFIX, "blocks"); } + + #[test] + fn checkpointed_archive_syncs_only_on_boundary() { + let inner = FakeArchive { ranges: vec![(1, 64)] }; + let mut archive = CheckpointedArchive::new(inner, 64); + + assert!(!archive.should_sync()); + + archive.mark_dirty(63); + assert!(!archive.should_sync()); + + archive.mark_dirty(64); + assert!(archive.should_sync()); + } + + #[test] + fn checkpointed_archive_interval_one_preserves_default_sync_behavior() { + let inner = FakeArchive { ranges: vec![(1, 7)] }; + let mut archive = CheckpointedArchive::new(inner, 1); + + assert!(!archive.should_sync()); + + archive.mark_dirty(7); + assert!(archive.should_sync()); + } + + #[test] + fn checkpointed_archive_does_not_sync_sparse_boundary() { + let inner = FakeArchive { ranges: vec![(1, 32), (34, 64)] }; + let mut archive = CheckpointedArchive::new(inner, 64); + + archive.mark_dirty(64); + assert!(!archive.should_sync()); + } + + #[test] + fn checkpointed_archive_syncs_when_dirty_past_boundary() { + // Simulate out-of-order: block 65 arrives, then 64. + // highest_dirty = 65, but the boundary at 64 should still trigger sync. + let inner = FakeArchive { ranges: vec![(1, 65)] }; + let mut archive = CheckpointedArchive::new(inner, 64); + + archive.mark_dirty(65); + // 65 is past the boundary at 64, and archive is contiguous through 64 + assert!(archive.should_sync()); + } + + #[test] + fn checkpointed_archive_no_sync_before_first_boundary() { + let inner = FakeArchive { ranges: vec![(1, 63)] }; + let mut archive = CheckpointedArchive::new(inner, 64); + + archive.mark_dirty(63); + // 63 / 64 = 0, boundary = 0, which is not > 0 + assert!(!archive.should_sync()); + } + + #[test] + fn checkpointed_archive_zero_interval_behaves_as_one() { + let inner = FakeArchive { ranges: vec![(1, 3)] }; + let mut archive_zero = CheckpointedArchive::new(inner, 0); + archive_zero.mark_dirty(3); + assert!(archive_zero.should_sync()); + + let inner = FakeArchive { ranges: vec![(1, 3)] }; + let mut archive_one = CheckpointedArchive::new(inner, 1); + archive_one.mark_dirty(3); + assert!(archive_one.should_sync()); + } } diff --git a/crates/network/marshal/src/broadcast.rs b/crates/network/marshal/src/broadcast.rs index 2b07737..03ab6de 100644 --- a/crates/network/marshal/src/broadcast.rs +++ b/crates/network/marshal/src/broadcast.rs @@ -5,6 +5,7 @@ use commonware_codec::Codec; use commonware_cryptography::{Committable, Digestible, PublicKey}; use commonware_p2p::Provider; use commonware_runtime::{BufferPooler, Clock, Metrics, Spawner}; +use commonware_utils::NZUsize; /// Initializes the buffered broadcast engine with sensible defaults. #[derive(Debug, Clone, Copy)] @@ -18,7 +19,7 @@ impl BroadcastInitializer { pub const DEFAULT_DEQUE_SIZE: usize = 256; /// Whether messages are sent with priority by default. - pub const DEFAULT_PRIORITY: bool = false; + pub const DEFAULT_PRIORITY: bool = true; } impl BroadcastInitializer { @@ -39,7 +40,7 @@ impl BroadcastInitializer { { let config = Config { public_key, - mailbox_size: Self::DEFAULT_MAILBOX_SIZE, + mailbox_size: NZUsize!(Self::DEFAULT_MAILBOX_SIZE), deque_size: Self::DEFAULT_DEQUE_SIZE, priority: Self::DEFAULT_PRIORITY, codec_config, @@ -57,6 +58,6 @@ mod tests { fn test_defaults() { assert_eq!(BroadcastInitializer::DEFAULT_MAILBOX_SIZE, 1024); assert_eq!(BroadcastInitializer::DEFAULT_DEQUE_SIZE, 256); - assert!(!BroadcastInitializer::DEFAULT_PRIORITY); + const { assert!(BroadcastInitializer::DEFAULT_PRIORITY) }; } } diff --git a/crates/network/marshal/src/lib.rs b/crates/network/marshal/src/lib.rs index 6ed6346..e68d9b1 100644 --- a/crates/network/marshal/src/lib.rs +++ b/crates/network/marshal/src/lib.rs @@ -9,7 +9,7 @@ mod actor; pub use actor::ActorInitializer; mod archive; -pub use archive::ArchiveInitializer; +pub use archive::{ArchiveInitializer, CheckpointedArchive, Prunable}; mod broadcast; pub use broadcast::BroadcastInitializer; diff --git a/crates/network/marshal/src/peers.rs b/crates/network/marshal/src/peers.rs index d38150a..ae19f3f 100644 --- a/crates/network/marshal/src/peers.rs +++ b/crates/network/marshal/src/peers.rs @@ -5,22 +5,20 @@ use std::time::Duration; use commonware_consensus::{ Block, marshal::resolver::{ - handler::{Message, Request}, - p2p::Config, + handler::Receiver as HandlerReceiver, + p2p::{Config, Mailbox as P2pMailbox}, }, }; use commonware_cryptography::{Digestible, PublicKey}; use commonware_p2p::{Blocker, Provider, Receiver, Sender}; -use commonware_resolver::p2p; use commonware_runtime::{BufferPooler, Clock, Metrics, Spawner}; -use commonware_utils::channel::mpsc; use rand::Rng; /// Receiver for inbound resolver messages. -pub type ResolverReceiver = mpsc::Receiver::Digest>>; +pub type ResolverReceiver = HandlerReceiver<::Digest>; /// Mailbox used to submit resolver requests. -pub type ResolverMailbox = p2p::Mailbox::Digest>, P>; +pub type ResolverMailbox = P2pMailbox<::Digest, P>; /// Resolver channels returned by peer initialization. pub type ResolverChannels = (ResolverReceiver, ResolverMailbox); @@ -43,16 +41,16 @@ impl PeerInitializer { pub const DEFAULT_FETCH_RETRY_TIMEOUT: Duration = Duration::from_millis(100); /// Whether there are priority requests. - pub const PRIORITY_REQUESTS: bool = false; + pub const PRIORITY_REQUESTS: bool = true; /// Whether there are priority responses. - pub const PRIORITY_RESPONSES: bool = false; + pub const PRIORITY_RESPONSES: bool = true; } impl PeerInitializer { /// Initializes the p2p resolver. pub fn init( - ctx: &E, + ctx: E, public_key: P, peer_provider: C, blocker: Bl, @@ -71,7 +69,7 @@ impl PeerInitializer { public_key, peer_provider, blocker, - mailbox_size: Self::DEFAULT_MAILBOX_SIZE, + mailbox_size: commonware_utils::NZUsize!(Self::DEFAULT_MAILBOX_SIZE), initial: Self::DEFAULT_INITIAL_DELAY, timeout: Self::DEFAULT_TIMEOUT, fetch_retry_timeout: Self::DEFAULT_FETCH_RETRY_TIMEOUT, @@ -92,7 +90,7 @@ mod tests { assert_eq!(PeerInitializer::DEFAULT_INITIAL_DELAY, Duration::from_millis(200)); assert_eq!(PeerInitializer::DEFAULT_TIMEOUT, Duration::from_millis(200)); assert_eq!(PeerInitializer::DEFAULT_FETCH_RETRY_TIMEOUT, Duration::from_millis(100)); - assert!(!PeerInitializer::PRIORITY_REQUESTS); - assert!(!PeerInitializer::PRIORITY_RESPONSES); + const { assert!(PeerInitializer::PRIORITY_REQUESTS) }; + const { assert!(PeerInitializer::PRIORITY_RESPONSES) }; } } diff --git a/crates/network/marshal/tests/integration.rs b/crates/network/marshal/tests/integration.rs index 0514daa..68e3c36 100644 --- a/crates/network/marshal/tests/integration.rs +++ b/crates/network/marshal/tests/integration.rs @@ -11,15 +11,15 @@ mod common; use std::{ collections::BTreeMap, - future::Future, num::NonZeroU32, sync::{Arc, Mutex}, time::Duration, }; +use commonware_actor::Feedback; use commonware_consensus::{ Heightable, Reporter, - marshal::{Update, core::Mailbox, standard::Standard}, + marshal::{Start, Update, core::Mailbox, standard::Standard}, simplex::{ scheme::bls12381_threshold::standard as bls12381_threshold, types::{Activity, Finalization, Finalize, Notarization, Notarize, Proposal}, @@ -39,7 +39,7 @@ use commonware_p2p::{ simulated::{self, Link, Network, Oracle}, }; use commonware_parallel::Sequential; -use commonware_runtime::{Clock, Metrics, Quota, Runner, deterministic}; +use commonware_runtime::{Clock, Quota, Runner, Supervisor as _, deterministic}; use commonware_utils::{Acknowledgement, NZU16, NZUsize, ordered::Set}; use kora_marshal::{ActorInitializer, ArchiveInitializer, BroadcastInitializer, PeerInitializer}; @@ -63,6 +63,10 @@ const LINK: Link = Link { }; const TEST_QUOTA: Quota = Quota::per_second(NonZeroU32::MAX); +fn genesis_block() -> Block { + Block::new(Sha256::hash(b"genesis-parent"), Height::zero(), 0) +} + /// Mock application that tracks received blocks. #[derive(Clone, Default)] struct MockApplication { @@ -79,7 +83,7 @@ impl MockApplication { impl Reporter for MockApplication { type Activity = Update; - fn report(&mut self, activity: Self::Activity) -> impl Future + Send { + fn report(&mut self, activity: Self::Activity) -> Feedback { match activity { Update::Block(block, ack) => { let height = block.height(); @@ -90,7 +94,7 @@ impl Reporter for MockApplication { *self.tip.lock().unwrap() = Some((height, commitment)); } } - async {} + Feedback::Ok } } @@ -126,7 +130,7 @@ async fn setup_validator( let backfill = control.register(1, TEST_QUOTA).await.unwrap(); let resolver = PeerInitializer::init::<_, _, _, B, _, _, _>( - &context, + context.child("resolver"), validator.clone(), oracle.manager(), control.clone(), @@ -135,7 +139,7 @@ async fn setup_validator( // 2. Use BroadcastInitializer::init() for the broadcast engine let (broadcast_engine, buffer) = BroadcastInitializer::init::<_, _, B, _>( - context.clone(), + context.child("broadcast"), validator.clone(), oracle.manager(), (), @@ -143,26 +147,28 @@ async fn setup_validator( let network = control.register(2, TEST_QUOTA).await.unwrap(); broadcast_engine.start(network); - // 3. Use ArchiveInitializer::init_finalizations() for finalizations archive - let finalizations_by_height = ArchiveInitializer::init_finalizations( - context.with_label("finalizations_by_height"), + // 3. Use ArchiveInitializer::init_prunable() for finalizations archive + let finalizations_by_height = ArchiveInitializer::init_prunable( + context.child("finalizations_by_height"), + "finalizations", S::certificate_codec_config_unbounded(), ) .await .expect("failed to init finalizations archive"); - // 4. Use ArchiveInitializer::init_blocks() for blocks archive + // 4. Use ArchiveInitializer::init_prunable() for blocks archive let finalized_blocks = - ArchiveInitializer::init_blocks(context.with_label("finalized_blocks"), ()) + ArchiveInitializer::init_prunable(context.child("finalized_blocks"), "blocks", ()) .await .expect("failed to init blocks archive"); // 5. Use ActorInitializer::init() for the actor let (actor, mailbox, processed_height) = ActorInitializer::init( - context.clone(), + context.child("actor"), finalizations_by_height, finalized_blocks, provider, + Start::Genesis(genesis_block()), commonware_runtime::buffer::paged::CacheRef::from_pooler( &context, NZU16!(1024), @@ -202,7 +208,7 @@ fn test_start_marshal_and_finalize_block() { runner.start(|mut context| async move { // Setup network let (network, mut oracle) = Network::new( - context.with_label("network"), + context.child("network"), simulated::Config { max_size: 1024 * 1024, disconnect_on_block: true, @@ -218,7 +224,7 @@ fn test_start_marshal_and_finalize_block() { // Setup a single validator using all initializers let validator = participants[0].clone(); let (application, mut mailbox, processed_height) = setup_validator( - context.with_label("validator_0"), + context.child("validator"), &mut oracle, validator.clone(), ConstantProvider::new(schemes[0].clone()), @@ -230,34 +236,33 @@ fn test_start_marshal_and_finalize_block() { assert!(application.blocks().is_empty()); // Create a block - let parent = Sha256::hash(b"genesis"); + let parent = genesis_block().digest(); let block = Block::new(parent, Height::new(1), 1); let round = Round::new(Epoch::new(0), View::new(1)); // Submit verified block - mailbox.verified(round, block.clone()).await; + let _ = mailbox.verified(round, block.clone()).await; // Create proposal let proposal = Proposal { round, parent: View::new(0), payload: block.digest() }; // Notarize the block let notarization = make_notarization(proposal.clone(), &schemes, QUORUM); - mailbox.report(Activity::Notarization(notarization)).await; + mailbox.report(Activity::Notarization(notarization)); // Finalize the block let finalization = make_finalization(proposal, &schemes, QUORUM); - mailbox.report(Activity::Finalization(finalization)).await; + mailbox.report(Activity::Finalization(finalization)); // Wait for block to be delivered to application let mut attempts = 0; - while application.blocks().is_empty() && attempts < 100 { + while !application.blocks().contains_key(&Height::new(1)) && attempts < 100 { context.sleep(Duration::from_millis(10)).await; attempts += 1; } // Verify block was delivered let blocks = application.blocks(); - assert_eq!(blocks.len(), 1, "Expected 1 block to be finalized"); assert!(blocks.contains_key(&Height::new(1))); // Verify block can be retrieved from mailbox @@ -281,7 +286,7 @@ fn test_start_marshal_multiple_validators() { runner.start(|mut context| async move { // Setup network let (network, mut oracle) = Network::new( - context.with_label("network"), + context.child("network"), simulated::Config { max_size: 1024 * 1024, disconnect_on_block: true, @@ -296,7 +301,7 @@ fn test_start_marshal_multiple_validators() { // Register peer set let mut manager = oracle.manager(); - manager.track(0, Set::from_iter_dedup(participants.clone())).await; + manager.track(0, Set::from_iter_dedup(participants.clone())); // Setup multiple validators let mut applications = Vec::new(); @@ -304,7 +309,7 @@ fn test_start_marshal_multiple_validators() { for (i, validator) in participants.iter().take(2).enumerate() { let (app, mailbox, _) = setup_validator( - context.with_label(&format!("validator_{i}")), + context.child("validator").with_attribute("index", i), &mut oracle, validator.clone(), ConstantProvider::new(schemes[i].clone()), @@ -318,13 +323,13 @@ fn test_start_marshal_multiple_validators() { setup_network_links(&mut oracle, &participants[..2], LINK).await; // Create and finalize a block - both validators verify it locally - let parent = Sha256::hash(b"genesis"); + let parent = genesis_block().digest(); let block = Block::new(parent, Height::new(1), 42); let round = Round::new(Epoch::new(0), View::new(1)); // Both validators verify the block locally for mailbox in &mut mailboxes { - mailbox.verified(round, block.clone()).await; + let _ = mailbox.verified(round, block.clone()).await; } let proposal = Proposal { round, parent: View::new(0), payload: block.digest() }; @@ -334,13 +339,14 @@ fn test_start_marshal_multiple_validators() { let finalization = make_finalization(proposal, &schemes, QUORUM); for mailbox in &mut mailboxes { - mailbox.report(Activity::Notarization(notarization.clone())).await; - mailbox.report(Activity::Finalization(finalization.clone())).await; + mailbox.report(Activity::Notarization(notarization.clone())); + mailbox.report(Activity::Finalization(finalization.clone())); } // Wait for blocks to be delivered let mut attempts = 0; - while (applications[0].blocks().is_empty() || applications[1].blocks().is_empty()) + while (!applications[0].blocks().contains_key(&Height::new(1)) + || !applications[1].blocks().contains_key(&Height::new(1))) && attempts < 100 { context.sleep(Duration::from_millis(10)).await; @@ -348,7 +354,7 @@ fn test_start_marshal_multiple_validators() { } // Verify both validators received the block - assert_eq!(applications[0].blocks().len(), 1); - assert_eq!(applications[1].blocks().len(), 1); + assert!(applications[0].blocks().contains_key(&Height::new(1))); + assert!(applications[1].blocks().contains_key(&Height::new(1))); }); } diff --git a/crates/network/transport-sim/Cargo.toml b/crates/network/transport-sim/Cargo.toml index 9700667..8759413 100644 --- a/crates/network/transport-sim/Cargo.toml +++ b/crates/network/transport-sim/Cargo.toml @@ -21,6 +21,5 @@ commonware-cryptography.workspace = true commonware-utils.workspace = true governor.workspace = true -prometheus-client.workspace = true rand.workspace = true thiserror.workspace = true diff --git a/crates/network/transport-sim/src/channels.rs b/crates/network/transport-sim/src/channels.rs index d21e4f6..834fef9 100644 --- a/crates/network/transport-sim/src/channels.rs +++ b/crates/network/transport-sim/src/channels.rs @@ -42,3 +42,15 @@ impl fmt::Debug for SimMarshalChannels

{ f.debug_struct("SimMarshalChannels").finish_non_exhaustive() } } + +/// Transaction gossip channel for simulated transport. +pub struct SimTxGossipChannel { + /// Transaction gossip channel. + pub channel: (Sender

, Receiver

), +} + +impl fmt::Debug for SimTxGossipChannel

{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SimTxGossipChannel").finish_non_exhaustive() + } +} diff --git a/crates/network/transport-sim/src/context.rs b/crates/network/transport-sim/src/context.rs index 165379c..d986e68 100644 --- a/crates/network/transport-sim/src/context.rs +++ b/crates/network/transport-sim/src/context.rs @@ -8,13 +8,12 @@ use std::{ use commonware_runtime::{self, tokio}; use governor::clock::{Clock as GovernorClock, ReasonablyRealtime}; -use prometheus_client::registry::Metric; use rand::{RngCore, rngs::OsRng}; const PORT_BASE_MIN: u16 = 40_000; const PORT_BASE_MAX: u16 = 65_535 - 1_024; -fn remap_socket(socket: SocketAddr, port_offset: u16) -> SocketAddr { +const fn remap_socket(socket: SocketAddr, port_offset: u16) -> SocketAddr { let port = socket.port(); if port >= 1024 { return socket; @@ -33,12 +32,14 @@ fn remap_socket(socket: SocketAddr, port_offset: u16) -> SocketAddr { pub struct SimContext { inner: tokio::Context, force_base_addr: bool, + base_addr: Ipv4Addr, port_offset: u16, } impl fmt::Debug for SimContext { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SimContext") + .field("base_addr", &self.base_addr) .field("port_offset", &self.port_offset) .field("force_base_addr", &self.force_base_addr) .finish_non_exhaustive() @@ -51,13 +52,20 @@ impl SimContext { let mut rng = OsRng; let span = u32::from(PORT_BASE_MAX - PORT_BASE_MIN + 1); let base = PORT_BASE_MIN + (rng.next_u32() % span) as u16; - Self { inner, force_base_addr: true, port_offset: base } + let seed = rng.next_u32() ^ std::process::id(); + let base_addr = Ipv4Addr::new(127, (seed >> 16) as u8, (seed >> 8) as u8, seed as u8); + Self { inner, force_base_addr: true, base_addr, port_offset: base } } } impl Clone for SimContext { fn clone(&self) -> Self { - Self { inner: self.inner.clone(), force_base_addr: false, port_offset: self.port_offset } + Self { + inner: commonware_runtime::Supervisor::child(&self.inner, "sim_context"), + force_base_addr: false, + base_addr: self.base_addr, + port_offset: self.port_offset, + } } } @@ -88,45 +96,54 @@ impl commonware_runtime::Clock for SimContext { } } -impl commonware_runtime::Metrics for SimContext { - fn label(&self) -> String { - self.inner.label() +impl commonware_runtime::Supervisor for SimContext { + fn name(&self) -> commonware_runtime::Name { + self.inner.name() } - fn with_label(&self, label: &str) -> Self { + fn child(&self, label: &'static str) -> Self { Self { - inner: self.inner.with_label(label), + inner: self.inner.child(label), force_base_addr: false, + base_addr: self.base_addr, port_offset: self.port_offset, } } - fn with_attribute(&self, key: &str, value: impl fmt::Display) -> Self { + fn with_attribute(self, key: &'static str, value: impl fmt::Display) -> Self { Self { inner: self.inner.with_attribute(key, value), force_base_addr: false, + base_addr: self.base_addr, port_offset: self.port_offset, } } +} - fn with_scope(&self) -> Self { - Self { - inner: self.inner.with_scope(), - force_base_addr: false, - port_offset: self.port_offset, - } - } - - fn with_span(&self) -> Self { +impl commonware_runtime::Tracing for SimContext { + fn with_span(self) -> Self { Self { inner: self.inner.with_span(), force_base_addr: false, + base_addr: self.base_addr, port_offset: self.port_offset, } } +} - fn register, H: Into>(&self, name: N, help: H, metric: impl Metric) { - self.inner.register(name, help, metric); +impl commonware_runtime::Metrics for SimContext { + fn register( + &self, + name: N, + help: H, + metric: M, + ) -> commonware_runtime::telemetry::metrics::Registered + where + N: Into, + H: Into, + M: commonware_runtime::telemetry::metrics::Metric, + { + self.inner.register(name, help, metric) } fn encode(&self) -> String { @@ -152,8 +169,9 @@ impl commonware_runtime::Spawner for SimContext { T: Send + 'static, { let port_offset = self.port_offset; + let base_addr = self.base_addr; self.inner.spawn(move |context| { - let context = SimContext { inner: context, force_base_addr: false, port_offset }; + let context = Self { inner: context, force_base_addr: false, base_addr, port_offset }; f(context) }) } @@ -199,7 +217,7 @@ impl RngCore for SimContext { fn next_u32(&mut self) -> u32 { if self.force_base_addr { self.force_base_addr = false; - return u32::from(Ipv4Addr::LOCALHOST); + return self.base_addr.to_bits(); } let mut rng = OsRng; RngCore::next_u32(&mut rng) diff --git a/crates/network/transport-sim/src/lib.rs b/crates/network/transport-sim/src/lib.rs index be37b57..297764a 100644 --- a/crates/network/transport-sim/src/lib.rs +++ b/crates/network/transport-sim/src/lib.rs @@ -6,7 +6,7 @@ #![cfg_attr(not(test), warn(unused_crate_dependencies))] mod channels; -pub use channels::{Receiver, Sender, SimMarshalChannels, SimSimplexChannels}; +pub use channels::{Receiver, Sender, SimMarshalChannels, SimSimplexChannels, SimTxGossipChannel}; mod context; pub use context::SimContext; diff --git a/crates/network/transport-sim/src/provider.rs b/crates/network/transport-sim/src/provider.rs index f02c936..edb4e70 100644 --- a/crates/network/transport-sim/src/provider.rs +++ b/crates/network/transport-sim/src/provider.rs @@ -13,12 +13,13 @@ use commonware_utils::NZUsize; use kora_config::NodeConfig; use kora_service::TransportProvider; use kora_transport::{ - CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_VOTES, + CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_TX_GOSSIP, + CHANNEL_VOTES, }; use crate::{ SimContext, SimTransportError, - channels::{SimMarshalChannels, SimSimplexChannels}, + channels::{SimMarshalChannels, SimSimplexChannels, SimTxGossipChannel}, }; /// Configuration for simulated network links. @@ -78,7 +79,7 @@ impl SimControl

{ epoch: u64, validators: commonware_utils::ordered::Set

, ) { - self.manager().track(epoch, validators).await; + self.manager().track(epoch, validators); } /// Returns a peer control handle for channel registration. @@ -115,6 +116,8 @@ pub struct SimChannels { pub simplex: SimSimplexChannels

, /// Marshal block dissemination channels. pub marshal: SimMarshalChannels

, + /// Transaction gossip channel. + pub tx_gossip: SimTxGossipChannel

, } impl fmt::Debug for SimChannels

{ @@ -148,10 +151,15 @@ pub async fn register_node_channels( .register(CHANNEL_BACKFILL, quota) .await .map_err(|e| SimTransportError::ChannelRegistration(format!("backfill: {e}")))?; + let tx_gossip = control + .register(CHANNEL_TX_GOSSIP, quota) + .await + .map_err(|e| SimTransportError::ChannelRegistration(format!("tx_gossip: {e}")))?; Ok(SimChannels { simplex: SimSimplexChannels { votes, certs, resolver }, marshal: SimMarshalChannels { blocks, backfill }, + tx_gossip: SimTxGossipChannel { channel: tx_gossip }, }) } @@ -189,7 +197,7 @@ impl fmt::Debug for SimTransportProvider

{ impl SimTransportProvider

{ /// Create a new provider for a specific peer. - pub fn new(oracle: Arc>>, peer_id: P) -> Self { + pub const fn new(oracle: Arc>>, peer_id: P) -> Self { Self { oracle, peer_id } } } diff --git a/crates/network/transport/README.md b/crates/network/transport/README.md index 27e1b97..f8bab01 100644 --- a/crates/network/transport/README.md +++ b/crates/network/transport/README.md @@ -38,7 +38,7 @@ let (cert_sender, cert_receiver) = transport.simplex.certs; let (block_sender, block_receiver) = transport.marshal.blocks; // Register validator set -transport.oracle.track(0, validators).await; +transport.oracle.track(0, validators); ``` ## License diff --git a/crates/network/transport/src/builder.rs b/crates/network/transport/src/builder.rs index ac597ee..b54c111 100644 --- a/crates/network/transport/src/builder.rs +++ b/crates/network/transport/src/builder.rs @@ -11,8 +11,8 @@ use rand_core::CryptoRngCore; use crate::{ channels::{ - CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_VOTES, - MarshalChannels, SimplexChannels, + CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_TX_GOSSIP, + CHANNEL_VOTES, MarshalChannels, SimplexChannels, TxGossipChannel, }, config::TransportConfig, transport::NetworkTransport, @@ -47,7 +47,7 @@ impl TransportConfig { /// let transport = config.build(context)?; /// /// // Register validators with oracle - /// transport.oracle.track(0, validators).await; + /// transport.oracle.track(0, validators); /// /// // Pass channels to consumers /// engine.start( @@ -71,31 +71,37 @@ impl TransportConfig { where E: Spawner + BufferPooler + Clock + CryptoRngCore + RNetwork + Resolver + Metrics, { - let backlog = self.backlog; + let consensus_backlog = self.consensus_backlog; + let block_backlog = self.block_backlog; + let resolver_backlog = self.resolver_backlog; + let gossip_backlog = self.gossip_backlog; // Create network and oracle - let (mut network, oracle) = - discovery::Network::new(context.with_label("network"), self.inner); + let (mut network, oracle) = discovery::Network::new(context.child("network"), self.inner); - // Register simplex channels - let votes = network.register(CHANNEL_VOTES, quota, backlog); - let certs = network.register(CHANNEL_CERTS, quota, backlog); - let resolver = network.register(CHANNEL_RESOLVER, quota, backlog); + // Register simplex channels (consensus: high frequency, small messages) + let votes = network.register(CHANNEL_VOTES, quota, consensus_backlog); + let certs = network.register(CHANNEL_CERTS, quota, consensus_backlog); + let resolver = network.register(CHANNEL_RESOLVER, quota, resolver_backlog); - // Register marshal channels - let blocks = network.register(CHANNEL_BLOCKS, quota, backlog); - let backfill = network.register(CHANNEL_BACKFILL, quota, backlog); + // Register marshal channels (blocks: large messages, backfill: burst-heavy) + let blocks = network.register(CHANNEL_BLOCKS, quota, block_backlog); + let backfill = network.register(CHANNEL_BACKFILL, quota, resolver_backlog); + + // Register transaction gossip channel + let tx_gossip_channel = network.register(CHANNEL_TX_GOSSIP, quota, gossip_backlog); // Start the network let handle = network.start(); - tracing::info!("network transport started with 5 channels"); + tracing::info!("network transport started with 6 channels"); NetworkTransport { oracle, handle, simplex: SimplexChannels { votes, certs, resolver }, marshal: MarshalChannels { blocks, backfill }, + tx_gossip: TxGossipChannel { channel: tx_gossip_channel }, } } } diff --git a/crates/network/transport/src/bundle.rs b/crates/network/transport/src/bundle.rs index 8befb88..6136007 100644 --- a/crates/network/transport/src/bundle.rs +++ b/crates/network/transport/src/bundle.rs @@ -5,7 +5,7 @@ use std::fmt; use commonware_cryptography::PublicKey; use commonware_runtime::{Clock, Handle}; -use crate::channels::{MarshalChannels, SimplexChannels}; +use crate::channels::{MarshalChannels, SimplexChannels, TxGossipChannel}; /// Bundle of registered transport channels ready for node use. /// @@ -18,6 +18,9 @@ pub struct TransportBundle { /// Channels for block dissemination and backfill (marshal). pub marshal: MarshalChannels, + /// Channel for transaction gossip. + pub tx_gossip: TxGossipChannel, + /// Network handle to keep the transport alive. pub handle: Handle<()>, } @@ -36,8 +39,9 @@ impl TransportBundle { pub const fn new( simplex: SimplexChannels, marshal: MarshalChannels, + tx_gossip: TxGossipChannel, handle: Handle<()>, ) -> Self { - Self { simplex, marshal, handle } + Self { simplex, marshal, tx_gossip, handle } } } diff --git a/crates/network/transport/src/channels.rs b/crates/network/transport/src/channels.rs index 59120ed..a676413 100644 --- a/crates/network/transport/src/channels.rs +++ b/crates/network/transport/src/channels.rs @@ -21,6 +21,9 @@ pub const CHANNEL_BLOCKS: u64 = 3; /// Channel ID for backfill messages. pub const CHANNEL_BACKFILL: u64 = 4; +/// Channel ID for transaction gossip messages. +pub const CHANNEL_TX_GOSSIP: u64 = 5; + /// Type alias for channel sender. pub type Sender = discovery::Sender; @@ -63,3 +66,18 @@ impl fmt::Debug for MarshalChannels { f.debug_struct("MarshalChannels").finish_non_exhaustive() } } + +/// Channel for transaction gossip. +/// +/// This channel handles broadcasting new transactions to peers and receiving +/// gossipped transactions from peers. +pub struct TxGossipChannel { + /// Sender/receiver pair for transaction gossip. + pub channel: (Sender, Receiver

), +} + +impl fmt::Debug for TxGossipChannel { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TxGossipChannel").finish_non_exhaustive() + } +} diff --git a/crates/network/transport/src/config.rs b/crates/network/transport/src/config.rs index ed50155..e2565e8 100644 --- a/crates/network/transport/src/config.rs +++ b/crates/network/transport/src/config.rs @@ -12,7 +12,22 @@ use crate::error::TransportError; pub const DEFAULT_MAX_MESSAGE_SIZE: u32 = 1024 * 1024; /// Default channel backlog size. -pub const DEFAULT_BACKLOG: usize = 256; +pub const DEFAULT_BACKLOG: usize = 1024; + +/// Default backlog for consensus channels (votes/certs): high frequency, small messages. +pub const DEFAULT_CONSENSUS_BACKLOG: usize = 2048; + +/// Default backlog for block dissemination channel: lower frequency, large messages. +/// Increased from 512 to 2048: devnet testing showed ~10% block broadcast drops at 512. +pub const DEFAULT_BLOCK_BACKLOG: usize = 2048; + +/// Default backlog for resolver/backfill channels: burst-heavy during catch-up. +/// Increased from 1024 to 2048: resolver traffic is critical for node recovery +/// and catch-up, matching the block backlog to prevent message drops. +pub const DEFAULT_RESOLVER_BACKLOG: usize = 2048; + +/// Default backlog for transaction gossip channel: high-volume, small messages. +pub const DEFAULT_GOSSIP_BACKLOG: usize = 1024; /// Default namespace for kora network messages. pub const DEFAULT_NAMESPACE: &[u8] = b"_COMMONWARE_KORA_NETWORK"; @@ -26,13 +41,27 @@ pub struct TransportConfig { /// Inner discovery config. pub(crate) inner: discovery::Config, - /// Channel backlog size. - pub(crate) backlog: usize, + /// Backlog size for consensus channels (votes, certs). + pub(crate) consensus_backlog: usize, + + /// Backlog size for block dissemination channel. + pub(crate) block_backlog: usize, + + /// Backlog size for resolver and backfill channels. + pub(crate) resolver_backlog: usize, + + /// Backlog size for transaction gossip channel. + pub(crate) gossip_backlog: usize, } impl fmt::Debug for TransportConfig { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("TransportConfig").field("backlog", &self.backlog).finish_non_exhaustive() + f.debug_struct("TransportConfig") + .field("consensus_backlog", &self.consensus_backlog) + .field("block_backlog", &self.block_backlog) + .field("resolver_backlog", &self.resolver_backlog) + .field("gossip_backlog", &self.gossip_backlog) + .finish_non_exhaustive() } } @@ -61,7 +90,10 @@ impl TransportConfig { bootstrappers, max_message_size, ), - backlog: DEFAULT_BACKLOG, + consensus_backlog: DEFAULT_CONSENSUS_BACKLOG, + block_backlog: DEFAULT_BLOCK_BACKLOG, + resolver_backlog: DEFAULT_RESOLVER_BACKLOG, + gossip_backlog: DEFAULT_GOSSIP_BACKLOG, } } @@ -85,14 +117,48 @@ impl TransportConfig { bootstrappers, max_message_size, ), - backlog: DEFAULT_BACKLOG, + consensus_backlog: DEFAULT_CONSENSUS_BACKLOG, + block_backlog: DEFAULT_BLOCK_BACKLOG, + resolver_backlog: DEFAULT_RESOLVER_BACKLOG, + gossip_backlog: DEFAULT_GOSSIP_BACKLOG, } } - /// Set the channel backlog size. + /// Set the backlog size for all channels uniformly. #[must_use] pub const fn with_backlog(mut self, backlog: usize) -> Self { - self.backlog = backlog; + self.consensus_backlog = backlog; + self.block_backlog = backlog; + self.resolver_backlog = backlog; + self.gossip_backlog = backlog; + self + } + + /// Set the backlog size for consensus channels (votes, certs). + #[must_use] + pub const fn with_consensus_backlog(mut self, backlog: usize) -> Self { + self.consensus_backlog = backlog; + self + } + + /// Set the backlog size for the block dissemination channel. + #[must_use] + pub const fn with_block_backlog(mut self, backlog: usize) -> Self { + self.block_backlog = backlog; + self + } + + /// Set the backlog size for resolver and backfill channels. + #[must_use] + pub const fn with_resolver_backlog(mut self, backlog: usize) -> Self { + self.resolver_backlog = backlog; + self + } + + /// Set the backlog size for the transaction gossip channel. + #[must_use] + pub const fn with_gossip_backlog(mut self, backlog: usize) -> Self { + self.gossip_backlog = backlog; self } @@ -236,7 +302,11 @@ mod tests { #[test] fn constants_values() { assert_eq!(DEFAULT_MAX_MESSAGE_SIZE, 1024 * 1024); - assert_eq!(DEFAULT_BACKLOG, 256); + assert_eq!(DEFAULT_BACKLOG, 1024); + assert_eq!(DEFAULT_CONSENSUS_BACKLOG, 2048); + assert_eq!(DEFAULT_BLOCK_BACKLOG, 2048); + assert_eq!(DEFAULT_RESOLVER_BACKLOG, 2048); + assert_eq!(DEFAULT_GOSSIP_BACKLOG, 1024); assert_eq!(DEFAULT_NAMESPACE, b"_COMMONWARE_KORA_NETWORK"); } } diff --git a/crates/network/transport/src/lib.rs b/crates/network/transport/src/lib.rs index 3be7e2d..d2894b4 100644 --- a/crates/network/transport/src/lib.rs +++ b/crates/network/transport/src/lib.rs @@ -12,13 +12,15 @@ pub use bundle::TransportBundle; mod channels; pub use channels::{ - CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_VOTES, - MarshalChannels, Receiver, Sender, SimplexChannels, + CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_TX_GOSSIP, + CHANNEL_VOTES, MarshalChannels, Receiver, Sender, SimplexChannels, TxGossipChannel, }; mod config; pub use config::{ - DEFAULT_BACKLOG, DEFAULT_MAX_MESSAGE_SIZE, DEFAULT_NAMESPACE, TransportConfig, TransportParsing, + DEFAULT_BACKLOG, DEFAULT_BLOCK_BACKLOG, DEFAULT_CONSENSUS_BACKLOG, DEFAULT_GOSSIP_BACKLOG, + DEFAULT_MAX_MESSAGE_SIZE, DEFAULT_NAMESPACE, DEFAULT_RESOLVER_BACKLOG, TransportConfig, + TransportParsing, }; mod error; diff --git a/crates/network/transport/src/network_provider.rs b/crates/network/transport/src/network_provider.rs index 5e29c4c..37dc59f 100644 --- a/crates/network/transport/src/network_provider.rs +++ b/crates/network/transport/src/network_provider.rs @@ -12,8 +12,8 @@ use rand_core::CryptoRngCore; use crate::{ TransportBundle, TransportConfig, TransportError, TransportProvider, channels::{ - CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_VOTES, - MarshalChannels, SimplexChannels, + CHANNEL_BACKFILL, CHANNEL_BLOCKS, CHANNEL_CERTS, CHANNEL_RESOLVER, CHANNEL_TX_GOSSIP, + CHANNEL_VOTES, MarshalChannels, SimplexChannels, TxGossipChannel, }, }; @@ -68,24 +68,29 @@ where self, context: E, ) -> Result<(TransportBundle, Self::Control), Self::Error> { - let backlog = self.config.backlog; + let consensus_backlog = self.config.consensus_backlog; + let block_backlog = self.config.block_backlog; + let resolver_backlog = self.config.resolver_backlog; + let gossip_backlog = self.config.gossip_backlog; let (mut network, oracle) = - discovery::Network::new(context.with_label("network"), self.config.inner); + discovery::Network::new(context.child("network"), self.config.inner); - let votes = network.register(CHANNEL_VOTES, self.quota, backlog); - let certs = network.register(CHANNEL_CERTS, self.quota, backlog); - let resolver = network.register(CHANNEL_RESOLVER, self.quota, backlog); - let blocks = network.register(CHANNEL_BLOCKS, self.quota, backlog); - let backfill = network.register(CHANNEL_BACKFILL, self.quota, backlog); + let votes = network.register(CHANNEL_VOTES, self.quota, consensus_backlog); + let certs = network.register(CHANNEL_CERTS, self.quota, consensus_backlog); + let resolver = network.register(CHANNEL_RESOLVER, self.quota, resolver_backlog); + let blocks = network.register(CHANNEL_BLOCKS, self.quota, block_backlog); + let backfill = network.register(CHANNEL_BACKFILL, self.quota, resolver_backlog); + let tx_gossip_channel = network.register(CHANNEL_TX_GOSSIP, self.quota, gossip_backlog); let handle = network.start(); - tracing::info!("network transport started with 5 channels"); + tracing::info!("network transport started with 6 channels"); let bundle = TransportBundle::new( SimplexChannels { votes, certs, resolver }, MarshalChannels { blocks, backfill }, + TxGossipChannel { channel: tx_gossip_channel }, handle, ); diff --git a/crates/network/transport/src/transport.rs b/crates/network/transport/src/transport.rs index 52c2d21..81cd8e8 100644 --- a/crates/network/transport/src/transport.rs +++ b/crates/network/transport/src/transport.rs @@ -6,13 +6,13 @@ use commonware_cryptography::PublicKey; use commonware_p2p::authenticated::discovery; use commonware_runtime::{Clock, Handle}; -use crate::channels::{MarshalChannels, SimplexChannels}; +use crate::channels::{MarshalChannels, SimplexChannels, TxGossipChannel}; /// Complete network transport bundle. /// /// Contains everything needed to wire up consensus and application layers: /// - The oracle for peer management and blocking -/// - All 5 channel pairs grouped by consumer +/// - All 6 channel pairs grouped by consumer /// - The network handle to keep it alive /// /// # Channel Groups @@ -20,6 +20,7 @@ use crate::channels::{MarshalChannels, SimplexChannels}; /// Channels are grouped by their consumer: /// - [`SimplexChannels`]: For consensus engine (votes, certs, resolver) /// - [`MarshalChannels`]: For block dissemination (blocks, backfill) +/// - [`TxGossipChannel`]: For transaction gossip between validators pub struct NetworkTransport { /// Oracle for peer management and Byzantine blocking. /// @@ -37,6 +38,9 @@ pub struct NetworkTransport { /// Channels for block dissemination and backfill (marshal). pub marshal: MarshalChannels, + + /// Channel for transaction gossip. + pub tx_gossip: TxGossipChannel, } impl fmt::Debug for NetworkTransport { @@ -44,6 +48,7 @@ impl fmt::Debug for NetworkTransport { f.debug_struct("NetworkTransport") .field("simplex", &self.simplex) .field("marshal", &self.marshal) + .field("tx_gossip", &self.tx_gossip) .finish_non_exhaustive() } } diff --git a/crates/node/config/Cargo.toml b/crates/node/config/Cargo.toml index e7f05e9..ec087b3 100644 --- a/crates/node/config/Cargo.toml +++ b/crates/node/config/Cargo.toml @@ -22,7 +22,6 @@ thiserror.workspace = true # Cryptography commonware-codec.workspace = true commonware-cryptography.workspace = true -ed25519-consensus = "2" rand.workspace = true # Misc diff --git a/crates/node/config/README.md b/crates/node/config/README.md index e518378..d92aa07 100644 --- a/crates/node/config/README.md +++ b/crates/node/config/README.md @@ -20,13 +20,27 @@ validator_key = "path/to/key" threshold = 2 participants = ["pk1", "pk2", "pk3"] +[consensus.block_codec] +max_txs = 10000 +max_tx_bytes = 8388608 + +[consensus.simplex] +replay_buffer_bytes = 16777216 +write_buffer_bytes = 16777216 +leader_timeout_secs = 1 +certification_timeout_secs = 2 +timeout_retry_secs = 2 +fetch_timeout_secs = 5 +activity_timeout_views = 20 +skip_timeout_views = 10 +fetch_concurrent = 8 + [network] listen_addr = "0.0.0.0:30303" bootstrap_peers = ["peer1:30303", "peer2:30303"] [execution] -gas_limit = 30000000 -block_time = 2 +gas_limit = 250000000 [rpc] http_addr = "0.0.0.0:8545" diff --git a/crates/node/config/src/consensus.rs b/crates/node/config/src/consensus.rs index f41b1a0..2e06294 100644 --- a/crates/node/config/src/consensus.rs +++ b/crates/node/config/src/consensus.rs @@ -1,6 +1,9 @@ //! Consensus configuration. -use std::path::PathBuf; +use std::{ + num::{NonZeroU64, NonZeroUsize}, + path::PathBuf, +}; use alloy_primitives::hex; use commonware_codec::{FixedSize, ReadExt}; @@ -12,6 +15,129 @@ use crate::ConfigError; /// Default validator threshold. pub const DEFAULT_THRESHOLD: u32 = 2; +/// Default maximum transactions decoded per block. +pub const DEFAULT_BLOCK_CODEC_MAX_TXS: usize = 10_000; + +/// Default maximum bytes decoded per transaction in a block. +pub const DEFAULT_BLOCK_CODEC_MAX_TX_BYTES: usize = 8 * 1024 * 1024; + +/// Default Simplex replay buffer size in bytes. +pub const DEFAULT_SIMPLEX_REPLAY_BUFFER_BYTES: usize = 16 * 1024 * 1024; + +/// Default Simplex write buffer size in bytes. +pub const DEFAULT_SIMPLEX_WRITE_BUFFER_BYTES: usize = 16 * 1024 * 1024; + +/// Default Simplex leader timeout in seconds. +/// +/// Healthy views complete in ~7ms, so even 1 second provides ample margin. +/// A lower timeout limits the throughput penalty when a dead leader's turn +/// is reached in the round-robin schedule. +pub const DEFAULT_SIMPLEX_LEADER_TIMEOUT_SECS: u64 = 1; + +/// Default Simplex certification timeout in seconds. +/// +/// Healthy views complete in ~7ms, so 2 seconds provides a generous margin +/// for stragglers while avoiding long stalls when certification fails. +/// This matches the underlying simplex crate default +/// ([`DEFAULT_NOTARIZATION_TIMEOUT`]). +pub const DEFAULT_SIMPLEX_CERTIFICATION_TIMEOUT_SECS: u64 = 2; + +/// Default Simplex nullification retry timeout in seconds. +/// +/// After a view is nullified, this controls how long the validator waits +/// before retrying. Reducing from 2 s to 1 s allows faster recovery +/// from transient snapshot misses under CPU contention. +pub const DEFAULT_SIMPLEX_TIMEOUT_RETRY_SECS: u64 = 1; + +/// Default Simplex fetch timeout in seconds. +pub const DEFAULT_SIMPLEX_FETCH_TIMEOUT_SECS: u64 = 5; + +/// Default Simplex activity timeout in views. +pub const DEFAULT_SIMPLEX_ACTIVITY_TIMEOUT_VIEWS: u64 = 20; + +/// Default Simplex skip timeout in views. +pub const DEFAULT_SIMPLEX_SKIP_TIMEOUT_VIEWS: u64 = 10; + +/// Default number of concurrent Simplex fetch requests. +pub const DEFAULT_SIMPLEX_FETCH_CONCURRENT: usize = 8; + +/// Block codec limits used by consensus networking and storage. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +pub struct ConsensusBlockCodecConfig { + /// Maximum number of transactions decoded per block. + #[serde(default = "default_block_codec_max_txs")] + pub max_txs: NonZeroUsize, + + /// Maximum bytes decoded per transaction in a block. + #[serde(default = "default_block_codec_max_tx_bytes")] + pub max_tx_bytes: NonZeroUsize, +} + +impl Default for ConsensusBlockCodecConfig { + fn default() -> Self { + Self { + max_txs: default_block_codec_max_txs(), + max_tx_bytes: default_block_codec_max_tx_bytes(), + } + } +} + +/// Simplex consensus tuning parameters. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +pub struct ConsensusSimplexConfig { + /// Replay buffer size in bytes. + #[serde(default = "default_simplex_replay_buffer_bytes")] + pub replay_buffer_bytes: NonZeroUsize, + + /// Write buffer size in bytes. + #[serde(default = "default_simplex_write_buffer_bytes")] + pub write_buffer_bytes: NonZeroUsize, + + /// Leader timeout in seconds. + #[serde(default = "default_simplex_leader_timeout_secs")] + pub leader_timeout_secs: NonZeroU64, + + /// Certification timeout in seconds. + #[serde(default = "default_simplex_certification_timeout_secs")] + pub certification_timeout_secs: NonZeroU64, + + /// Retry timeout after nullification in seconds. + #[serde(default = "default_simplex_timeout_retry_secs")] + pub timeout_retry_secs: NonZeroU64, + + /// Fetch timeout in seconds. + #[serde(default = "default_simplex_fetch_timeout_secs")] + pub fetch_timeout_secs: NonZeroU64, + + /// Activity timeout in views. + #[serde(default = "default_simplex_activity_timeout_views")] + pub activity_timeout_views: NonZeroU64, + + /// Skip timeout in views. + #[serde(default = "default_simplex_skip_timeout_views")] + pub skip_timeout_views: NonZeroU64, + + /// Maximum concurrent fetch requests. + #[serde(default = "default_simplex_fetch_concurrent")] + pub fetch_concurrent: NonZeroUsize, +} + +impl Default for ConsensusSimplexConfig { + fn default() -> Self { + Self { + replay_buffer_bytes: default_simplex_replay_buffer_bytes(), + write_buffer_bytes: default_simplex_write_buffer_bytes(), + leader_timeout_secs: default_simplex_leader_timeout_secs(), + certification_timeout_secs: default_simplex_certification_timeout_secs(), + timeout_retry_secs: default_simplex_timeout_retry_secs(), + fetch_timeout_secs: default_simplex_fetch_timeout_secs(), + activity_timeout_views: default_simplex_activity_timeout_views(), + skip_timeout_views: default_simplex_skip_timeout_views(), + fetch_concurrent: default_simplex_fetch_concurrent(), + } + } +} + /// Consensus layer configuration. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ConsensusConfig { @@ -30,11 +156,25 @@ pub struct ConsensusConfig { deserialize_with = "deserialize_participants" )] pub participants: Vec>, + + /// Block codec limits used by consensus. + #[serde(default)] + pub block_codec: ConsensusBlockCodecConfig, + + /// Simplex consensus tuning parameters. + #[serde(default)] + pub simplex: ConsensusSimplexConfig, } impl Default for ConsensusConfig { fn default() -> Self { - Self { validator_key: None, threshold: DEFAULT_THRESHOLD, participants: Vec::new() } + Self { + validator_key: None, + threshold: DEFAULT_THRESHOLD, + participants: Vec::new(), + block_codec: ConsensusBlockCodecConfig::default(), + simplex: ConsensusSimplexConfig::default(), + } } } @@ -61,6 +201,60 @@ const fn default_threshold() -> u32 { DEFAULT_THRESHOLD } +const fn default_block_codec_max_txs() -> NonZeroUsize { + NonZeroUsize::new(DEFAULT_BLOCK_CODEC_MAX_TXS).expect("default block codec max txs is non-zero") +} + +const fn default_block_codec_max_tx_bytes() -> NonZeroUsize { + NonZeroUsize::new(DEFAULT_BLOCK_CODEC_MAX_TX_BYTES) + .expect("default block codec max tx bytes is non-zero") +} + +const fn default_simplex_replay_buffer_bytes() -> NonZeroUsize { + NonZeroUsize::new(DEFAULT_SIMPLEX_REPLAY_BUFFER_BYTES) + .expect("default simplex replay buffer is non-zero") +} + +const fn default_simplex_write_buffer_bytes() -> NonZeroUsize { + NonZeroUsize::new(DEFAULT_SIMPLEX_WRITE_BUFFER_BYTES) + .expect("default simplex write buffer is non-zero") +} + +const fn default_simplex_leader_timeout_secs() -> NonZeroU64 { + NonZeroU64::new(DEFAULT_SIMPLEX_LEADER_TIMEOUT_SECS) + .expect("default simplex leader timeout is non-zero") +} + +const fn default_simplex_certification_timeout_secs() -> NonZeroU64 { + NonZeroU64::new(DEFAULT_SIMPLEX_CERTIFICATION_TIMEOUT_SECS) + .expect("default simplex certification timeout is non-zero") +} + +const fn default_simplex_timeout_retry_secs() -> NonZeroU64 { + NonZeroU64::new(DEFAULT_SIMPLEX_TIMEOUT_RETRY_SECS) + .expect("default simplex retry timeout is non-zero") +} + +const fn default_simplex_fetch_timeout_secs() -> NonZeroU64 { + NonZeroU64::new(DEFAULT_SIMPLEX_FETCH_TIMEOUT_SECS) + .expect("default simplex fetch timeout is non-zero") +} + +const fn default_simplex_activity_timeout_views() -> NonZeroU64 { + NonZeroU64::new(DEFAULT_SIMPLEX_ACTIVITY_TIMEOUT_VIEWS) + .expect("default simplex activity timeout is non-zero") +} + +const fn default_simplex_skip_timeout_views() -> NonZeroU64 { + NonZeroU64::new(DEFAULT_SIMPLEX_SKIP_TIMEOUT_VIEWS) + .expect("default simplex skip timeout is non-zero") +} + +const fn default_simplex_fetch_concurrent() -> NonZeroUsize { + NonZeroUsize::new(DEFAULT_SIMPLEX_FETCH_CONCURRENT) + .expect("default simplex fetch concurrency is non-zero") +} + fn serialize_participants(participants: &[Vec], serializer: S) -> Result where S: serde::Serializer, @@ -92,20 +286,40 @@ mod tests { use super::*; fn create_valid_public_key_bytes() -> Vec { - let private_key = - ed25519::PrivateKey::from(ed25519_consensus::SigningKey::from([42u8; 32])); + let private_key = private_key_from_seed([42u8; 32]); let public_key = private_key.public_key(); let mut bytes = Vec::new(); public_key.write(&mut bytes); bytes } + fn private_key_from_seed(seed: [u8; 32]) -> ed25519::PrivateKey { + ed25519::PrivateKey::read(&mut seed.as_slice()).expect("32-byte ed25519 seed should decode") + } + #[test] fn default_consensus_config() { let config = ConsensusConfig::default(); assert!(config.validator_key.is_none()); assert_eq!(config.threshold, DEFAULT_THRESHOLD); assert!(config.participants.is_empty()); + assert_eq!(config.block_codec.max_txs.get(), DEFAULT_BLOCK_CODEC_MAX_TXS); + assert_eq!(config.block_codec.max_tx_bytes.get(), DEFAULT_BLOCK_CODEC_MAX_TX_BYTES); + assert_eq!(config.simplex.replay_buffer_bytes.get(), DEFAULT_SIMPLEX_REPLAY_BUFFER_BYTES); + assert_eq!(config.simplex.write_buffer_bytes.get(), DEFAULT_SIMPLEX_WRITE_BUFFER_BYTES); + assert_eq!(config.simplex.leader_timeout_secs.get(), DEFAULT_SIMPLEX_LEADER_TIMEOUT_SECS); + assert_eq!( + config.simplex.certification_timeout_secs.get(), + DEFAULT_SIMPLEX_CERTIFICATION_TIMEOUT_SECS + ); + assert_eq!(config.simplex.timeout_retry_secs.get(), DEFAULT_SIMPLEX_TIMEOUT_RETRY_SECS); + assert_eq!(config.simplex.fetch_timeout_secs.get(), DEFAULT_SIMPLEX_FETCH_TIMEOUT_SECS); + assert_eq!( + config.simplex.activity_timeout_views.get(), + DEFAULT_SIMPLEX_ACTIVITY_TIMEOUT_VIEWS + ); + assert_eq!(config.simplex.skip_timeout_views.get(), DEFAULT_SIMPLEX_SKIP_TIMEOUT_VIEWS); + assert_eq!(config.simplex.fetch_concurrent.get(), DEFAULT_SIMPLEX_FETCH_CONCURRENT); } #[test] @@ -121,6 +335,7 @@ mod tests { validator_key: Some(PathBuf::from("/path/to/key")), threshold: 3, participants: vec![pk_bytes], + ..Default::default() }; let serialized = serde_json::to_string(&config).expect("serialize"); let deserialized: ConsensusConfig = serde_json::from_str(&serialized).expect("deserialize"); @@ -142,6 +357,8 @@ mod tests { assert!(config.validator_key.is_none()); assert_eq!(config.threshold, DEFAULT_THRESHOLD); assert!(config.participants.is_empty()); + assert_eq!(config.block_codec, ConsensusBlockCodecConfig::default()); + assert_eq!(config.simplex, ConsensusSimplexConfig::default()); } #[test] @@ -151,6 +368,8 @@ mod tests { assert_eq!(config.threshold, 7); assert!(config.validator_key.is_none()); assert!(config.participants.is_empty()); + assert_eq!(config.block_codec, ConsensusBlockCodecConfig::default()); + assert_eq!(config.simplex, ConsensusSimplexConfig::default()); } #[test] @@ -159,6 +378,52 @@ mod tests { serde_json::from_str(r#"{"validator_key": "/etc/key"}"#).expect("deserialize"); assert_eq!(config.validator_key, Some(PathBuf::from("/etc/key"))); assert_eq!(config.threshold, DEFAULT_THRESHOLD); + assert_eq!(config.block_codec, ConsensusBlockCodecConfig::default()); + assert_eq!(config.simplex, ConsensusSimplexConfig::default()); + } + + #[test] + fn serde_partial_block_codec_defaults() { + let config: ConsensusConfig = + serde_json::from_str(r#"{"block_codec": {"max_txs": 2048}}"#).expect("deserialize"); + + assert_eq!(config.block_codec.max_txs.get(), 2048); + assert_eq!(config.block_codec.max_tx_bytes.get(), DEFAULT_BLOCK_CODEC_MAX_TX_BYTES); + assert_eq!(config.simplex, ConsensusSimplexConfig::default()); + } + + #[test] + fn serde_partial_simplex_defaults() { + let config: ConsensusConfig = serde_json::from_str( + r#"{ + "simplex": { + "leader_timeout_secs": 7, + "fetch_concurrent": 3, + "activity_timeout_views": 30 + } + }"#, + ) + .expect("deserialize"); + + assert_eq!(config.simplex.leader_timeout_secs.get(), 7); + assert_eq!(config.simplex.fetch_concurrent.get(), 3); + assert_eq!(config.simplex.activity_timeout_views.get(), 30); + assert_eq!( + config.simplex.certification_timeout_secs.get(), + DEFAULT_SIMPLEX_CERTIFICATION_TIMEOUT_SECS + ); + assert_eq!(config.block_codec, ConsensusBlockCodecConfig::default()); + } + + #[test] + fn serde_rejects_zero_nonzero_fields() { + let block_codec = + serde_json::from_str::(r#"{"block_codec": {"max_tx_bytes": 0}}"#); + assert!(block_codec.is_err()); + + let simplex = + serde_json::from_str::(r#"{"simplex": {"fetch_concurrent": 0}}"#); + assert!(simplex.is_err()); } #[test] @@ -202,7 +467,7 @@ mod tests { fn build_validator_set_multiple_keys() { let keys: Vec<_> = (1..=3u8) .map(|i| { - let pk = ed25519::PrivateKey::from(ed25519_consensus::SigningKey::from([i; 32])); + let pk = private_key_from_seed([i; 32]); let mut bytes = Vec::new(); pk.public_key().write(&mut bytes); bytes @@ -240,6 +505,7 @@ mod tests { validator_key: Some(PathBuf::from("/custom/path")), threshold: 10, participants: vec![pk_bytes], + ..Default::default() }; assert_eq!(config, config.clone()); assert_ne!(config, ConsensusConfig::default()); diff --git a/crates/node/config/src/error.rs b/crates/node/config/src/error.rs index 24ff08d..40426bb 100644 --- a/crates/node/config/src/error.rs +++ b/crates/node/config/src/error.rs @@ -55,6 +55,10 @@ pub enum ConfigError { /// Failed to parse participant public key. #[error("invalid participant public key bytes")] InvalidParticipantKey, + + /// Invalid configuration value. + #[error("invalid config value: {0}")] + InvalidValue(String), } #[cfg(test)] @@ -148,4 +152,10 @@ mod tests { assert!(debug.contains("InvalidKeyLength")); assert!(debug.contains("24")); } + + #[test] + fn test_invalid_value_display() { + let err = ConfigError::InvalidValue("worker_threads must be >= 1".to_string()); + assert_eq!(err.to_string(), "invalid config value: worker_threads must be >= 1"); + } } diff --git a/crates/node/config/src/execution.rs b/crates/node/config/src/execution.rs index 17efdfb..3b7dac8 100644 --- a/crates/node/config/src/execution.rs +++ b/crates/node/config/src/execution.rs @@ -1,12 +1,18 @@ //! Execution configuration. +use alloy_primitives::Address; use serde::{Deserialize, Serialize}; /// Default gas limit per block. -pub const DEFAULT_GAS_LIMIT: u64 = 30_000_000; +pub const DEFAULT_GAS_LIMIT: u64 = 250_000_000; -/// Default block time in seconds. -pub const DEFAULT_BLOCK_TIME: u64 = 2; +/// Initial base fee per gas (1 gwei). +/// +/// EIP-1559 base-fee accounting requires a non-zero seed value; starting +/// from zero means `calculate_base_fee` can never increase the fee because +/// `0 * anything = 0`. One gwei is the Ethereum-mainnet genesis value and +/// a reasonable default for devnets. +pub const INITIAL_BASE_FEE: u64 = 1_000_000_000; /// Execution layer configuration. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -15,14 +21,24 @@ pub struct ExecutionConfig { #[serde(default = "default_gas_limit")] pub gas_limit: u64, - /// Target block time in seconds. - #[serde(default = "default_block_time")] - pub block_time: u64, + /// Address that receives priority fees (tips) from transactions. + /// + /// When set, this address is used as the `beneficiary` in the block + /// header, causing EIP-1559 priority fees to be credited to it. + /// When `None` (the default), `Address::ZERO` is used, which + /// effectively burns all priority fees. + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "serialize_optional_address", + deserialize_with = "deserialize_optional_address" + )] + pub fee_recipient: Option

, } impl Default for ExecutionConfig { fn default() -> Self { - Self { gas_limit: DEFAULT_GAS_LIMIT, block_time: DEFAULT_BLOCK_TIME } + Self { gas_limit: DEFAULT_GAS_LIMIT, fee_recipient: None } } } @@ -30,8 +46,28 @@ const fn default_gas_limit() -> u64 { DEFAULT_GAS_LIMIT } -const fn default_block_time() -> u64 { - DEFAULT_BLOCK_TIME +fn serialize_optional_address(addr: &Option
, serializer: S) -> Result +where + S: serde::Serializer, +{ + match addr { + Some(a) => serializer.serialize_str(&format!("{a:#x}")), + None => serializer.serialize_none(), + } +} + +fn deserialize_optional_address<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let opt: Option = Option::deserialize(deserializer)?; + opt.map_or_else( + || Ok(None), + |s| { + let s = s.trim(); + s.parse::
().map(Some).map_err(serde::de::Error::custom) + }, + ) } #[cfg(test)] @@ -42,12 +78,12 @@ mod tests { fn test_default_execution_config() { let config = ExecutionConfig::default(); assert_eq!(config.gas_limit, DEFAULT_GAS_LIMIT); - assert_eq!(config.block_time, DEFAULT_BLOCK_TIME); + assert_eq!(config.fee_recipient, None); } #[test] fn test_execution_config_serde_roundtrip() { - let config = ExecutionConfig { gas_limit: 50_000_000, block_time: 5 }; + let config = ExecutionConfig { gas_limit: 300_000_000, fee_recipient: None }; let serialized = serde_json::to_string(&config).expect("serialize"); let deserialized: ExecutionConfig = serde_json::from_str(&serialized).expect("deserialize"); assert_eq!(config, deserialized); @@ -55,7 +91,7 @@ mod tests { #[test] fn test_execution_config_toml_roundtrip() { - let config = ExecutionConfig { gas_limit: 15_000_000, block_time: 1 }; + let config = ExecutionConfig { gas_limit: 150_000_000, fee_recipient: None }; let serialized = toml::to_string(&config).expect("serialize toml"); let deserialized: ExecutionConfig = toml::from_str(&serialized).expect("deserialize toml"); assert_eq!(config, deserialized); @@ -65,7 +101,7 @@ mod tests { fn test_execution_config_serde_defaults() { let config: ExecutionConfig = serde_json::from_str("{}").expect("deserialize"); assert_eq!(config.gas_limit, DEFAULT_GAS_LIMIT); - assert_eq!(config.block_time, DEFAULT_BLOCK_TIME); + assert_eq!(config.fee_recipient, None); } #[test] @@ -73,18 +109,44 @@ mod tests { let config: ExecutionConfig = serde_json::from_str(r#"{"gas_limit": 10000000}"#).expect("deserialize"); assert_eq!(config.gas_limit, 10_000_000); - assert_eq!(config.block_time, DEFAULT_BLOCK_TIME); + assert_eq!(config.fee_recipient, None); + } - let config: ExecutionConfig = - serde_json::from_str(r#"{"block_time": 10}"#).expect("deserialize"); - assert_eq!(config.gas_limit, DEFAULT_GAS_LIMIT); - assert_eq!(config.block_time, 10); + #[test] + fn initial_base_fee_is_one_gwei() { + assert_eq!(INITIAL_BASE_FEE, 1_000_000_000); } #[test] fn test_execution_config_clone_and_eq() { - let config = ExecutionConfig { gas_limit: 999, block_time: 42 }; + let config = ExecutionConfig { gas_limit: 999, fee_recipient: None }; assert_eq!(config, config.clone()); assert_ne!(config, ExecutionConfig::default()); } + + #[test] + fn test_fee_recipient_json_roundtrip() { + let addr = "0xdead000000000000000000000000000000000001".parse::
().unwrap(); + let config = ExecutionConfig { gas_limit: DEFAULT_GAS_LIMIT, fee_recipient: Some(addr) }; + let serialized = serde_json::to_string(&config).expect("serialize"); + assert!(serialized.contains("0xdead")); + let deserialized: ExecutionConfig = serde_json::from_str(&serialized).expect("deserialize"); + assert_eq!(config, deserialized); + } + + #[test] + fn test_fee_recipient_toml_roundtrip() { + let addr = "0xdead000000000000000000000000000000000001".parse::
().unwrap(); + let config = ExecutionConfig { gas_limit: DEFAULT_GAS_LIMIT, fee_recipient: Some(addr) }; + let serialized = toml::to_string(&config).expect("serialize toml"); + let deserialized: ExecutionConfig = toml::from_str(&serialized).expect("deserialize toml"); + assert_eq!(config, deserialized); + } + + #[test] + fn test_fee_recipient_none_omitted_from_json() { + let config = ExecutionConfig::default(); + let serialized = serde_json::to_string(&config).expect("serialize"); + assert!(!serialized.contains("fee_recipient")); + } } diff --git a/crates/node/config/src/lib.rs b/crates/node/config/src/lib.rs index 65742fa..8e7c5b6 100644 --- a/crates/node/config/src/lib.rs +++ b/crates/node/config/src/lib.rs @@ -5,19 +5,27 @@ #![cfg_attr(not(test), warn(unused_crate_dependencies))] mod consensus; -pub use consensus::{ConsensusConfig, DEFAULT_THRESHOLD}; +pub use consensus::{ + ConsensusBlockCodecConfig, ConsensusConfig, ConsensusSimplexConfig, + DEFAULT_BLOCK_CODEC_MAX_TX_BYTES, DEFAULT_BLOCK_CODEC_MAX_TXS, + DEFAULT_SIMPLEX_ACTIVITY_TIMEOUT_VIEWS, DEFAULT_SIMPLEX_CERTIFICATION_TIMEOUT_SECS, + DEFAULT_SIMPLEX_FETCH_CONCURRENT, DEFAULT_SIMPLEX_FETCH_TIMEOUT_SECS, + DEFAULT_SIMPLEX_LEADER_TIMEOUT_SECS, DEFAULT_SIMPLEX_REPLAY_BUFFER_BYTES, + DEFAULT_SIMPLEX_SKIP_TIMEOUT_VIEWS, DEFAULT_SIMPLEX_TIMEOUT_RETRY_SECS, + DEFAULT_SIMPLEX_WRITE_BUFFER_BYTES, DEFAULT_THRESHOLD, +}; mod error; pub use error::ConfigError; mod execution; -pub use execution::{DEFAULT_BLOCK_TIME, DEFAULT_GAS_LIMIT, ExecutionConfig}; +pub use execution::{DEFAULT_GAS_LIMIT, ExecutionConfig, INITIAL_BASE_FEE}; mod network; pub use network::{DEFAULT_LISTEN_ADDR, NetworkConfig}; mod node; -pub use node::{DEFAULT_CHAIN_ID, DEFAULT_DATA_DIR, NodeConfig}; +pub use node::{DEFAULT_CHAIN_ID, DEFAULT_DATA_DIR, DEFAULT_WORKER_THREADS_CAP, NodeConfig}; mod rpc; pub use rpc::{DEFAULT_HTTP_ADDR, DEFAULT_WS_ADDR, RpcConfig}; diff --git a/crates/node/config/src/network.rs b/crates/node/config/src/network.rs index 497b165..bff029e 100644 --- a/crates/node/config/src/network.rs +++ b/crates/node/config/src/network.rs @@ -20,6 +20,12 @@ pub struct NetworkConfig { /// Bootstrap peers to connect to on startup. #[serde(default)] pub bootstrap_peers: Vec, + + /// Enable transaction gossip between validators. + /// When enabled, transactions received via RPC are broadcast to peers, + /// and transactions from peers are validated and inserted into the local mempool. + #[serde(default)] + pub tx_gossip: bool, } impl Default for NetworkConfig { @@ -28,6 +34,7 @@ impl Default for NetworkConfig { listen_addr: DEFAULT_LISTEN_ADDR.to_string(), dialable_addr: None, bootstrap_peers: Vec::new(), + tx_gossip: false, } } } @@ -54,6 +61,7 @@ mod tests { listen_addr: "127.0.0.1:9000".to_string(), dialable_addr: Some("1.2.3.4:9000".to_string()), bootstrap_peers: vec!["peer1:30303".to_string()], + tx_gossip: false, }; let serialized = serde_json::to_string(&config).expect("serialize"); let deserialized: NetworkConfig = serde_json::from_str(&serialized).expect("deserialize"); @@ -66,6 +74,7 @@ mod tests { listen_addr: "0.0.0.0:8080".to_string(), dialable_addr: None, bootstrap_peers: vec!["node1.example.com:30303".to_string()], + tx_gossip: false, }; let serialized = toml::to_string(&config).expect("serialize toml"); let deserialized: NetworkConfig = toml::from_str(&serialized).expect("deserialize toml"); @@ -103,6 +112,7 @@ mod tests { listen_addr: "10.0.0.1:5555".to_string(), dialable_addr: Some("external.host:5555".to_string()), bootstrap_peers: vec!["a".to_string()], + tx_gossip: false, }; assert_eq!(config, config.clone()); assert_ne!(config, NetworkConfig::default()); diff --git a/crates/node/config/src/node.rs b/crates/node/config/src/node.rs index cc6ea78..28ff047 100644 --- a/crates/node/config/src/node.rs +++ b/crates/node/config/src/node.rs @@ -2,6 +2,7 @@ use std::path::{Path, PathBuf}; +use commonware_codec::ReadExt as _; use serde::{Deserialize, Serialize}; use crate::{ConfigError, ConsensusConfig, ExecutionConfig, NetworkConfig, RpcConfig}; @@ -12,6 +13,9 @@ pub const DEFAULT_CHAIN_ID: u64 = 1; /// Default data directory. pub const DEFAULT_DATA_DIR: &str = "/var/lib/kora"; +/// Default cap for worker threads. +pub const DEFAULT_WORKER_THREADS_CAP: usize = 8; + /// Complete node configuration. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct NodeConfig { @@ -23,6 +27,13 @@ pub struct NodeConfig { #[serde(default = "default_data_dir")] pub data_dir: PathBuf, + /// Number of tokio async worker threads for the commonware runtime. + /// + /// Defaults to the number of available CPU cores, capped at 8. + /// Set explicitly in config to override. + #[serde(default = "default_worker_threads")] + pub worker_threads: usize, + /// Consensus configuration. #[serde(default)] pub consensus: ConsensusConfig, @@ -45,6 +56,7 @@ impl Default for NodeConfig { Self { chain_id: DEFAULT_CHAIN_ID, data_dir: PathBuf::from(DEFAULT_DATA_DIR), + worker_threads: default_worker_threads(), consensus: ConsensusConfig::default(), network: NetworkConfig::default(), execution: ExecutionConfig::default(), @@ -54,12 +66,22 @@ impl Default for NodeConfig { } impl NodeConfig { + /// Validate configuration values. + /// + /// Returns an error if any value is out of range. + pub fn validate(&self) -> Result<(), ConfigError> { + if self.worker_threads == 0 { + return Err(ConfigError::InvalidValue("worker_threads must be >= 1".to_string())); + } + Ok(()) + } + /// Load configuration from a file path, auto-detecting format by extension. /// /// If the path is `None`, returns the default configuration. /// Supported extensions: `.json` for JSON, all others default to TOML. pub fn load(path: Option<&Path>) -> Result { - path.map_or_else( + let config = path.map_or_else( || Ok(Self::default()), |p| { let ext = p.extension().and_then(|e| e.to_str()).unwrap_or("toml"); @@ -68,7 +90,9 @@ impl NodeConfig { _ => Self::from_toml_file(p), } }, - ) + )?; + config.validate()?; + Ok(config) } /// Load configuration from a TOML file. @@ -123,9 +147,7 @@ impl NodeConfig { } let mut seed = [0u8; 32]; seed.copy_from_slice(&key_bytes); - Ok(commonware_cryptography::ed25519::PrivateKey::from( - ed25519_consensus::SigningKey::from(seed), - )) + Ok(private_key_from_seed(seed)) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => { // Generate new key @@ -140,13 +162,21 @@ impl NodeConfig { })?; } - // Write key to disk - std::fs::write(&key_path, seed) - .map_err(|e| ConfigError::Write { path: key_path.clone(), source: e })?; + // Write key to disk with restrictive permissions (0600) + { + use std::os::unix::fs::OpenOptionsExt; + let mut f = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(&key_path) + .map_err(|e| ConfigError::Write { path: key_path.clone(), source: e })?; + std::io::Write::write_all(&mut f, &seed) + .map_err(|e| ConfigError::Write { path: key_path.clone(), source: e })?; + } - Ok(commonware_cryptography::ed25519::PrivateKey::from( - ed25519_consensus::SigningKey::from(seed), - )) + Ok(private_key_from_seed(seed)) } Err(e) => Err(ConfigError::Read { path: key_path, source: e }), } @@ -161,6 +191,11 @@ impl NodeConfig { } } +fn private_key_from_seed(seed: [u8; 32]) -> commonware_cryptography::ed25519::PrivateKey { + commonware_cryptography::ed25519::PrivateKey::read(&mut seed.as_slice()) + .expect("32-byte ed25519 seed should decode") +} + const fn default_chain_id() -> u64 { DEFAULT_CHAIN_ID } @@ -169,6 +204,12 @@ fn default_data_dir() -> PathBuf { PathBuf::from(DEFAULT_DATA_DIR) } +fn default_worker_threads() -> usize { + std::thread::available_parallelism() + .map(|n| n.get().min(DEFAULT_WORKER_THREADS_CAP)) + .unwrap_or(4) +} + #[cfg(test)] mod tests { use super::*; @@ -178,6 +219,38 @@ mod tests { let config = NodeConfig::default(); assert_eq!(config.chain_id, DEFAULT_CHAIN_ID); assert_eq!(config.data_dir, PathBuf::from(DEFAULT_DATA_DIR)); + assert!(config.worker_threads >= 1); + assert!(config.worker_threads <= DEFAULT_WORKER_THREADS_CAP); + } + + #[test] + fn test_worker_threads_default_from_toml() { + // A TOML config without worker_threads should get the default. + let config = NodeConfig::from_toml("chain_id = 1\n").unwrap(); + assert!(config.worker_threads >= 1); + assert!(config.worker_threads <= DEFAULT_WORKER_THREADS_CAP); + } + + #[test] + fn test_worker_threads_explicit() { + let config = NodeConfig::from_toml("worker_threads = 6\n").unwrap(); + assert_eq!(config.worker_threads, 6); + } + + #[test] + fn test_worker_threads_zero_rejected() { + let config = NodeConfig::from_toml("worker_threads = 0\n").unwrap(); + let err = config.validate(); + assert!(err.is_err()); + assert!(err.unwrap_err().to_string().contains("worker_threads")); + } + + #[test] + fn test_load_rejects_zero_worker_threads() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("config.toml"); + std::fs::write(&path, "worker_threads = 0\n").unwrap(); + assert!(NodeConfig::load(Some(&path)).is_err()); } #[test] diff --git a/crates/node/consensus/Cargo.toml b/crates/node/consensus/Cargo.toml index 8822b7e..9739f15 100644 --- a/crates/node/consensus/Cargo.toml +++ b/crates/node/consensus/Cargo.toml @@ -18,6 +18,7 @@ kora-traits = { path = "../../storage/traits" } # Alloy alloy-primitives.workspace = true alloy-consensus.workspace = true +alloy-eips.workspace = true # Commonware commonware-cryptography.workspace = true @@ -25,13 +26,19 @@ commonware-cryptography.workspace = true # Synchronization parking_lot.workspace = true +# Logging +tracing.workspace = true + # Error handling thiserror.workspace = true # Async futures.workspace = true +tokio = { workspace = true, features = ["rt"] } [dev-dependencies] +k256.workspace = true +kora-domain = { path = "../domain", features = ["evm"] } rstest = "0.24" tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/node/consensus/src/application.rs b/crates/node/consensus/src/application.rs index 5732731..5d69b1e 100644 --- a/crates/node/consensus/src/application.rs +++ b/crates/node/consensus/src/application.rs @@ -134,13 +134,14 @@ mod tests { impl ConsensusApplication for MockApp { fn propose(&self, _parent: Digest) -> Result { - Ok(Block { - parent: kora_domain::BlockId(alloy_primitives::B256::ZERO), - height: 0, - prevrandao: alloy_primitives::B256::ZERO, - state_root: kora_domain::StateRoot(alloy_primitives::B256::ZERO), - txs: Vec::new(), - }) + Ok(Block::new( + kora_domain::BlockId(alloy_primitives::B256::ZERO), + 0, + 0, + alloy_primitives::B256::ZERO, + kora_domain::StateRoot(alloy_primitives::B256::ZERO), + Vec::new(), + )) } fn verify(&self, block: &Block) -> Result { @@ -164,13 +165,14 @@ mod tests { #[test] fn mock_app_verify() { let app = MockApp; - let block = Block { - parent: kora_domain::BlockId(alloy_primitives::B256::ZERO), - height: 0, - prevrandao: alloy_primitives::B256::ZERO, - state_root: kora_domain::StateRoot(alloy_primitives::B256::ZERO), - txs: Vec::new(), - }; + let block = Block::new( + kora_domain::BlockId(alloy_primitives::B256::ZERO), + 0, + 0, + alloy_primitives::B256::ZERO, + kora_domain::StateRoot(alloy_primitives::B256::ZERO), + Vec::new(), + ); let digest = app.verify(&block).unwrap(); assert_eq!(digest, block.commitment()); } diff --git a/crates/node/consensus/src/components/mempool.rs b/crates/node/consensus/src/components/mempool.rs index a3f34e6..c4989ac 100644 --- a/crates/node/consensus/src/components/mempool.rs +++ b/crates/node/consensus/src/components/mempool.rs @@ -2,6 +2,9 @@ use std::{collections::BTreeMap, sync::Arc}; +use alloy_consensus::{Transaction as _, TxEnvelope, transaction::SignerRecoverable as _}; +use alloy_eips::eip2718::Decodable2718 as _; +use alloy_primitives::Address; use kora_domain::Tx; use parking_lot::RwLock; @@ -27,6 +30,16 @@ impl Default for InMemoryMempool { } } +fn tx_order_key(tx: &Tx) -> (u8, Address, u64) { + let Ok(envelope) = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()) else { + return (1, Address::ZERO, u64::MAX); + }; + let Ok(sender) = envelope.recover_signer() else { + return (1, Address::ZERO, u64::MAX); + }; + (0, sender, envelope.nonce()) +} + impl Mempool for InMemoryMempool { fn insert(&self, tx: Tx) -> bool { let id = tx.id(); @@ -36,12 +49,13 @@ impl Mempool for InMemoryMempool { fn build(&self, max_txs: usize, excluded: &std::collections::BTreeSet) -> Vec { let inner = self.inner.read(); - inner + let mut candidates: Vec<_> = inner .iter() .filter(|(id, _)| !excluded.contains(id)) - .take(max_txs) - .map(|(_, tx)| tx.clone()) - .collect() + .map(|(id, tx)| (tx_order_key(tx), *id, tx.clone())) + .collect(); + candidates.sort_by_key(|(order, id, _)| (*order, *id)); + candidates.into_iter().take(max_txs).map(|(_, _, tx)| tx).collect() } fn prune(&self, tx_ids: &[TxId]) { @@ -58,8 +72,43 @@ impl Mempool for InMemoryMempool { #[cfg(test)] mod tests { + use alloy_consensus::{Transaction as _, TxEnvelope, transaction::SignerRecoverable as _}; + use alloy_eips::eip2718::Decodable2718 as _; + use alloy_primitives::{Address, U256}; + use k256::ecdsa::SigningKey; + use kora_domain::evm::Evm; + use super::*; + fn signing_key_from_seed(seed: u8) -> SigningKey { + let mut secret = [0u8; 32]; + secret[31] = seed; + SigningKey::from_bytes((&secret).into()).expect("valid key") + } + + fn signed_transfer(sender_seed: u8, recipient_seed: u8, nonce: u64, value: u64) -> Tx { + let sender_key = signing_key_from_seed(sender_seed); + let recipient_key = signing_key_from_seed(recipient_seed); + let recipient = Evm::address_from_key(&recipient_key); + Evm::sign_eip1559_transfer( + &sender_key, + 1, + recipient, + U256::from(value), + nonce, + 21_000, + 0, + 0, + ) + } + + fn signed_order_key(tx: &Tx) -> (Address, u64, TxId) { + let mut data = tx.bytes.as_ref(); + let envelope = TxEnvelope::decode_2718(&mut data).expect("signed tx"); + let sender = envelope.recover_signer().expect("recover signer"); + (sender, envelope.nonce(), tx.id()) + } + #[test] fn mempool_insert_and_build() { let mempool = InMemoryMempool::new(); @@ -109,4 +158,30 @@ mod tests { assert_eq!(txs.len(), 1); assert_eq!(txs[0], tx2); } + + #[test] + fn mempool_build_orders_signed_txs_by_sender_nonce_and_id() { + let mempool = InMemoryMempool::new(); + let txs = vec![ + signed_transfer(2, 9, 1, 10), + signed_transfer(1, 9, 0, 20), + signed_transfer(2, 8, 0, 30), + signed_transfer(1, 8, 0, 40), + signed_transfer(1, 7, 1, 50), + signed_transfer(2, 7, 0, 60), + ]; + + for tx in txs.iter().rev() { + assert!(mempool.insert(tx.clone())); + } + + let mut expected = txs; + expected.sort_by_key(signed_order_key); + + let built = mempool.build(10, &std::collections::BTreeSet::new()); + let built_ids: Vec<_> = built.iter().map(Tx::id).collect(); + let expected_ids: Vec<_> = expected.iter().map(Tx::id).collect(); + + assert_eq!(built_ids, expected_ids); + } } diff --git a/crates/node/consensus/src/components/snapshot.rs b/crates/node/consensus/src/components/snapshot.rs index c683ce0..4a7c486 100644 --- a/crates/node/consensus/src/components/snapshot.rs +++ b/crates/node/consensus/src/components/snapshot.rs @@ -1,25 +1,43 @@ //! In-memory snapshot store implementation. use std::{ - collections::{BTreeMap, BTreeSet}, + collections::{BTreeMap, BTreeSet, VecDeque}, sync::Arc, }; use kora_qmdb::ChangeSet; use kora_traits::StateDb; use parking_lot::RwLock; +use tracing::debug; use crate::{ ConsensusError, traits::{Digest, Snapshot, SnapshotStore}, }; -/// In-memory snapshot store. +/// Default maximum number of persisted snapshots to retain in memory. +/// +/// Once more than this many snapshots have been persisted, the oldest are +/// evicted from the in-memory store. The `persisted` marker is kept so that +/// ancestor chain-walking terminates correctly, but the heavy snapshot data +/// (state overlay, change set, tx IDs) is freed. +const DEFAULT_MAX_PERSISTED_RETAINED: usize = 256; + +/// In-memory snapshot store with bounded retention of persisted snapshots. +/// +/// Snapshots that have been persisted to the underlying state database are +/// evicted (oldest-first) once the number of retained persisted entries +/// exceeds `max_persisted_retained`. This prevents unbounded memory growth +/// on long-running nodes. #[derive(Debug)] pub struct InMemorySnapshotStore { snapshots: Arc>>>, persisted: Arc>>, persisting: Arc>>, + /// Insertion-ordered queue of persisted digests, used for oldest-first eviction. + persisted_order: Arc>>, + /// Maximum number of persisted snapshots to retain in memory. + max_persisted_retained: usize, } impl Clone for InMemorySnapshotStore { @@ -28,20 +46,57 @@ impl Clone for InMemorySnapshotStore { snapshots: Arc::clone(&self.snapshots), persisted: Arc::clone(&self.persisted), persisting: Arc::clone(&self.persisting), + persisted_order: Arc::clone(&self.persisted_order), + max_persisted_retained: self.max_persisted_retained, } } } impl InMemorySnapshotStore { - /// Create a new empty snapshot store. + /// Create a new empty snapshot store with the default retention limit. #[must_use] pub fn new() -> Self { + Self::with_max_persisted_retained(DEFAULT_MAX_PERSISTED_RETAINED) + } + + /// Create a new empty snapshot store that retains at most + /// `max_persisted_retained` persisted snapshots in memory. + #[must_use] + pub fn with_max_persisted_retained(max_persisted_retained: usize) -> Self { Self { snapshots: Arc::new(RwLock::new(BTreeMap::new())), persisted: Arc::new(RwLock::new(BTreeSet::new())), persisting: Arc::new(RwLock::new(BTreeSet::new())), + persisted_order: Arc::new(RwLock::new(VecDeque::new())), + max_persisted_retained, } } + + /// Return the number of snapshots currently held in memory. + pub fn len(&self) -> usize { + self.snapshots.read().len() + } + + /// Return true if the store contains no snapshots. + pub fn is_empty(&self) -> bool { + self.snapshots.read().is_empty() + } + + /// Return the number of digests currently marked as persisted. + pub fn persisted_count(&self) -> usize { + self.persisted.read().len() + } + + /// Return the number of snapshots that have not yet been persisted. + /// + /// This is the count of entries in the snapshot map whose digest is not + /// in the persisted set. A rising value under steady-state operation + /// indicates the persistence pipeline is falling behind block production. + pub fn unpersisted_count(&self) -> usize { + let snapshots = self.snapshots.read(); + let persisted = self.persisted.read(); + snapshots.keys().filter(|d| !persisted.contains(d)).count() + } } impl InMemorySnapshotStore { @@ -67,6 +122,53 @@ impl InMemorySnapshotStore { persisting.remove(digest); } } + + /// Evict the oldest persisted snapshots that exceed the retention limit. + /// + /// After a successful `persist_snapshot` call, this method should be invoked + /// to free memory held by snapshots whose state has already been committed + /// to the persistent store (QMDB). + /// + /// The `persisted` marker is intentionally **kept** for evicted digests so + /// that ancestor chain-walking (`merged_changes`, `changes_for_persist`, + /// `collect_pending_tx_ids`) still terminates correctly at persisted + /// boundaries. + /// + /// Returns the number of snapshots evicted. + pub fn evict_persisted(&self) -> usize { + // Fast path: check with a read lock to avoid write-lock contention + // when no eviction is needed (the common case). + if self.persisted_order.read().len() <= self.max_persisted_retained { + return 0; + } + + let mut snapshots = self.snapshots.write(); + let persisted = self.persisted.read(); + let mut order = self.persisted_order.write(); + + let mut evicted = 0usize; + while order.len() > self.max_persisted_retained { + let Some(oldest) = order.pop_front() else { + break; + }; + // Only remove snapshot data if it is actually persisted. + // (Guards against stale entries in the order queue.) + if persisted.contains(&oldest) && snapshots.remove(&oldest).is_some() { + evicted += 1; + } + } + + if evicted > 0 { + debug!( + evicted, + retained = snapshots.len(), + persisted = persisted.len(), + "evicted persisted snapshots" + ); + } + + evicted + } } impl Default for InMemorySnapshotStore { @@ -90,8 +192,11 @@ impl SnapshotStore for InMemorySnapshotStore { fn mark_persisted(&self, digests: &[Digest]) { let mut persisted = self.persisted.write(); + let mut order = self.persisted_order.write(); for digest in digests { - persisted.insert(*digest); + if persisted.insert(*digest) { + order.push_back(*digest); + } } } @@ -286,4 +391,188 @@ mod tests { store.mark_persisted(&[digest]); assert!(!store.can_persist_chain(&[digest])); } + + fn make_digest(byte: u8) -> Digest { + Digest::from([byte; 32]) + } + + fn make_snapshot(parent: Option) -> Snapshot { + Snapshot::new(parent, MockStateDb, StateRoot(B256::ZERO), ChangeSet::new(), BTreeSet::new()) + } + + #[test] + fn evict_persisted_removes_oldest_snapshots() { + // Retain at most 2 persisted snapshots. + let store = InMemorySnapshotStore::::with_max_persisted_retained(2); + + let d1 = make_digest(0x01); + let d2 = make_digest(0x02); + let d3 = make_digest(0x03); + let d4 = make_digest(0x04); + + store.insert(d1, make_snapshot(None)); + store.insert(d2, make_snapshot(Some(d1))); + store.insert(d3, make_snapshot(Some(d2))); + store.insert(d4, make_snapshot(Some(d3))); + + // Persist d1 and d2, then evict -- both are within the limit. + store.mark_persisted(&[d1, d2]); + assert_eq!(store.evict_persisted(), 0); + assert_eq!(store.len(), 4); + + // Persist d3 -- now 3 persisted, limit is 2, so d1 should be evicted. + store.mark_persisted(&[d3]); + assert_eq!(store.evict_persisted(), 1); + assert!(store.get(&d1).is_none(), "d1 should have been evicted"); + assert!(store.get(&d2).is_some(), "d2 should still be retained"); + assert!(store.get(&d3).is_some(), "d3 should still be retained"); + assert!(store.get(&d4).is_some(), "d4 is not persisted, should be retained"); + + // The persisted marker for d1 should still be present (for chain-walking). + assert!(store.is_persisted(&d1)); + } + + #[test] + fn evict_persisted_does_not_remove_unpersisted() { + let store = InMemorySnapshotStore::::with_max_persisted_retained(1); + + let d1 = make_digest(0x01); + let d2 = make_digest(0x02); + let d3 = make_digest(0x03); + + store.insert(d1, make_snapshot(None)); + store.insert(d2, make_snapshot(Some(d1))); + store.insert(d3, make_snapshot(Some(d2))); + + // Only persist d1 -- within limit, no eviction. + store.mark_persisted(&[d1]); + assert_eq!(store.evict_persisted(), 0); + + // Persist d2 -- now 2 persisted, limit is 1, evict d1. + store.mark_persisted(&[d2]); + assert_eq!(store.evict_persisted(), 1); + assert!(store.get(&d1).is_none()); + assert!(store.get(&d2).is_some()); + // d3 is not persisted, must not be evicted. + assert!(store.get(&d3).is_some()); + } + + #[test] + fn evict_persisted_with_zero_retention_evicts_all() { + let store = InMemorySnapshotStore::::with_max_persisted_retained(0); + + let d1 = make_digest(0x01); + let d2 = make_digest(0x02); + + store.insert(d1, make_snapshot(None)); + store.insert(d2, make_snapshot(Some(d1))); + + store.mark_persisted(&[d1, d2]); + let evicted = store.evict_persisted(); + assert_eq!(evicted, 2); + assert!(store.get(&d1).is_none()); + assert!(store.get(&d2).is_none()); + // Persisted markers are kept. + assert!(store.is_persisted(&d1)); + assert!(store.is_persisted(&d2)); + } + + #[test] + fn len_and_persisted_count_track_correctly() { + let store = InMemorySnapshotStore::::with_max_persisted_retained(1); + + assert!(store.is_empty()); + assert_eq!(store.len(), 0); + assert_eq!(store.persisted_count(), 0); + + let d1 = make_digest(0x01); + let d2 = make_digest(0x02); + + store.insert(d1, make_snapshot(None)); + assert_eq!(store.len(), 1); + + store.insert(d2, make_snapshot(Some(d1))); + assert_eq!(store.len(), 2); + + store.mark_persisted(&[d1, d2]); + assert_eq!(store.persisted_count(), 2); + + store.evict_persisted(); + // d1 evicted from snapshots, d2 retained. + assert_eq!(store.len(), 1); + // Both remain in persisted set. + assert_eq!(store.persisted_count(), 2); + } + + #[test] + fn evict_persisted_is_noop_within_limit() { + // Retention limit of 4, persist exactly 4 -- no eviction should happen. + let store = InMemorySnapshotStore::::with_max_persisted_retained(4); + + let d1 = make_digest(0x01); + let d2 = make_digest(0x02); + let d3 = make_digest(0x03); + let d4 = make_digest(0x04); + + store.insert(d1, make_snapshot(None)); + store.insert(d2, make_snapshot(Some(d1))); + store.insert(d3, make_snapshot(Some(d2))); + store.insert(d4, make_snapshot(Some(d3))); + + store.mark_persisted(&[d1, d2, d3, d4]); + assert_eq!(store.persisted_count(), 4); + + // Eviction should be a no-op: exactly at the limit. + assert_eq!(store.evict_persisted(), 0); + + // All snapshots remain in memory. + assert_eq!(store.len(), 4); + assert!(store.get(&d1).is_some()); + assert!(store.get(&d2).is_some()); + assert!(store.get(&d3).is_some()); + assert!(store.get(&d4).is_some()); + } + + #[test] + fn mark_persisted_is_idempotent_for_order_tracking() { + let store = InMemorySnapshotStore::::with_max_persisted_retained(1); + + let d1 = make_digest(0x01); + store.insert(d1, make_snapshot(None)); + + // Mark persisted twice -- should not duplicate in the order queue. + store.mark_persisted(&[d1]); + store.mark_persisted(&[d1]); + + assert_eq!(store.persisted_count(), 1); + // Eviction with only 1 persisted and limit 1 should evict nothing. + assert_eq!(store.evict_persisted(), 0); + } + + #[test] + fn unpersisted_count_tracks_correctly() { + let store = InMemorySnapshotStore::::with_max_persisted_retained(4); + + let d1 = make_digest(0x01); + let d2 = make_digest(0x02); + let d3 = make_digest(0x03); + + // Empty store has zero unpersisted. + assert_eq!(store.unpersisted_count(), 0); + + // Insert three snapshots -- all unpersisted. + store.insert(d1, make_snapshot(None)); + store.insert(d2, make_snapshot(Some(d1))); + store.insert(d3, make_snapshot(Some(d2))); + assert_eq!(store.unpersisted_count(), 3); + assert_eq!(store.len(), 3); + + // Persist d1 -- two unpersisted remain. + store.mark_persisted(&[d1]); + assert_eq!(store.unpersisted_count(), 2); + + // Persist all -- zero unpersisted. + store.mark_persisted(&[d2, d3]); + assert_eq!(store.unpersisted_count(), 0); + } } diff --git a/crates/node/consensus/src/error.rs b/crates/node/consensus/src/error.rs index 6c1f8dd..bc393a1 100644 --- a/crates/node/consensus/src/error.rs +++ b/crates/node/consensus/src/error.rs @@ -34,6 +34,13 @@ pub enum ConsensusError { /// Actual state root. actual: StateRoot, }, + + /// Timestamp overflow (parent timestamp is `u64::MAX`). + #[error("timestamp overflow: cannot produce a timestamp after {parent_timestamp}")] + TimestampOverflow { + /// The parent block's timestamp that caused the overflow. + parent_timestamp: u64, + }, } #[cfg(test)] @@ -92,6 +99,14 @@ mod tests { assert!(msg.contains("got")); } + #[test] + fn test_timestamp_overflow_display() { + let err = ConsensusError::TimestampOverflow { parent_timestamp: u64::MAX }; + let msg = err.to_string(); + assert!(msg.contains("timestamp overflow")); + assert!(msg.contains(&u64::MAX.to_string())); + } + #[test] fn test_error_is_send_sync() { fn assert_send_sync() {} diff --git a/crates/node/consensus/src/execution.rs b/crates/node/consensus/src/execution.rs index 0ec7f1d..947762f 100644 --- a/crates/node/consensus/src/execution.rs +++ b/crates/node/consensus/src/execution.rs @@ -17,8 +17,11 @@ pub struct BlockExecution { impl BlockExecution { /// Execute a block's transactions against a parent snapshot. /// - /// This helper runs the executor and returns the execution outcome for callers to - /// compute deterministic consensus roots, persist state, or cache snapshots. + /// This helper runs the executor on a dedicated blocking thread via + /// [`tokio::task::spawn_blocking`] so that the synchronous EVM execution + /// does not occupy an async worker thread. The executor, state, context, + /// and transactions are cloned into the blocking closure (all clones are + /// cheap -- Arc bumps or small structs). pub async fn execute( parent_snapshot: &Snapshot, executor: &E, @@ -29,10 +32,17 @@ impl BlockExecution { S: StateDb, E: BlockExecutor, { + let executor = executor.clone(); + let state = parent_snapshot.state.clone(); + let context = context.clone(); let txs_bytes: Vec = txs.iter().map(|tx| tx.bytes.clone()).collect(); - let outcome = executor - .execute(&parent_snapshot.state, context, &txs_bytes) - .map_err(|e| ConsensusError::Execution(e.to_string()))?; + + let outcome = + tokio::task::spawn_blocking(move || executor.execute(&state, &context, &txs_bytes)) + .await + .map_err(|e| ConsensusError::Execution(format!("spawn_blocking join error: {e}")))? + .map_err(|e| ConsensusError::Execution(e.to_string()))?; + Ok(Self { outcome }) } } diff --git a/crates/node/consensus/src/proposal.rs b/crates/node/consensus/src/proposal.rs index 96d692c..4873420 100644 --- a/crates/node/consensus/src/proposal.rs +++ b/crates/node/consensus/src/proposal.rs @@ -8,16 +8,22 @@ use commonware_cryptography::Committable as _; use kora_domain::{Block, StateRoot, Tx}; use kora_executor::{BlockContext, BlockExecutor}; use kora_traits::StateDb; +use tracing::warn; use crate::{ConsensusError, Digest, Mempool, Snapshot, SnapshotStore, TxId}; -fn block_context(height: u64, prevrandao: B256) -> BlockContext { +fn block_context( + height: u64, + timestamp: u64, + prevrandao: B256, + fee_recipient: Address, +) -> BlockContext { let header = Header { number: height, - timestamp: height, + timestamp, gas_limit: kora_config::DEFAULT_GAS_LIMIT, - beneficiary: Address::ZERO, - base_fee_per_gas: Some(0), + beneficiary: fee_recipient, + base_fee_per_gas: Some(kora_config::INITIAL_BASE_FEE), ..Default::default() }; BlockContext::new(header, B256::ZERO, prevrandao) @@ -39,6 +45,8 @@ pub struct ProposalBuilder { executor: E, /// Maximum transactions per block. max_txs: usize, + /// Address that receives priority fees (tips). + fee_recipient: Address, } impl ProposalBuilder @@ -60,7 +68,23 @@ where /// * `snapshots` - Snapshot store for parent state lookup. /// * `executor` - Block executor for transaction execution. pub const fn new(state: S, mempool: M, snapshots: SS, executor: E) -> Self { - Self { state, mempool, snapshots, executor, max_txs: Self::DEFAULT_MAX_TXS } + Self { + state, + mempool, + snapshots, + executor, + max_txs: Self::DEFAULT_MAX_TXS, + fee_recipient: Address::ZERO, + } + } + + /// Set the fee recipient address. + /// + /// Defaults to [`Address::ZERO`] (burns priority fees). + #[must_use] + pub const fn with_fee_recipient(mut self, fee_recipient: Address) -> Self { + self.fee_recipient = fee_recipient; + self } /// Set the maximum number of transactions per block. @@ -80,10 +104,15 @@ where /// 3. Executes the batch against the parent state. /// 4. Computes the new state root from the execution outcome. /// 5. Constructs and returns the new block and its snapshot. + /// + /// NOTE: This synchronous method calls `executor.execute()` on the calling + /// thread. It is only used in tests. Production code should use + /// `build_proposal_async` which offloads execution to a blocking thread. pub fn build_proposal( &self, parent: &Block, prevrandao: B256, + now_secs: u64, ) -> Result<(Block, Snapshot), ConsensusError> { let parent_digest = parent.commitment(); let parent_snapshot = self @@ -95,7 +124,9 @@ where let txs = self.mempool.build(self.max_txs, &excluded); let height = parent.height + 1; - let context = block_context(height, prevrandao); + let timestamp = Block::next_timestamp(now_secs, parent.timestamp) + .ok_or(ConsensusError::TimestampOverflow { parent_timestamp: parent.timestamp })?; + let context = block_context(height, timestamp, prevrandao, self.fee_recipient); let txs_bytes: Vec = txs.iter().map(|tx| tx.bytes.clone()).collect(); let outcome = self .executor @@ -108,7 +139,7 @@ where .map_err(ConsensusError::StateDb)?; let state_root = StateRoot(state_root); - let block = Block { parent: parent.id(), height, prevrandao, state_root, txs }; + let block = Block::new(parent.id(), height, timestamp, prevrandao, state_root, txs); let tx_ids = self.tx_ids_from_block(&block); let snapshot = Snapshot::new( Some(parent_digest), @@ -122,10 +153,14 @@ where } /// Async variant of [`Self::build_proposal`] that awaits state root computation. + /// + /// Offloads the synchronous EVM execution to a blocking thread via + /// [`tokio::task::spawn_blocking`] to avoid starving async worker threads. pub async fn build_proposal_async( &self, parent: &Block, prevrandao: B256, + now_secs: u64, ) -> Result<(Block, Snapshot), ConsensusError> { let parent_digest = parent.commitment(); let parent_snapshot = self @@ -137,12 +172,18 @@ where let txs = self.mempool.build(self.max_txs, &excluded); let height = parent.height + 1; - let context = block_context(height, prevrandao); + let timestamp = Block::next_timestamp(now_secs, parent.timestamp) + .ok_or(ConsensusError::TimestampOverflow { parent_timestamp: parent.timestamp })?; + let context = block_context(height, timestamp, prevrandao, self.fee_recipient); let txs_bytes: Vec = txs.iter().map(|tx| tx.bytes.clone()).collect(); - let outcome = self - .executor - .execute(&parent_snapshot.state, &context, &txs_bytes) - .map_err(|e| ConsensusError::Execution(e.to_string()))?; + + let executor = self.executor.clone(); + let state = parent_snapshot.state.clone(); + let outcome = + tokio::task::spawn_blocking(move || executor.execute(&state, &context, &txs_bytes)) + .await + .map_err(|e| ConsensusError::Execution(format!("spawn_blocking join error: {e}")))? + .map_err(|e| ConsensusError::Execution(e.to_string()))?; let merged_changes = self.snapshots.merged_changes(parent_digest, outcome.changes.clone())?; @@ -150,7 +191,7 @@ where self.state.compute_root(&merged_changes).await.map_err(ConsensusError::StateDb)?; let state_root = StateRoot(state_root); - let block = Block { parent: parent.id(), height, prevrandao, state_root, txs }; + let block = Block::new(parent.id(), height, timestamp, prevrandao, state_root, txs); let tx_ids = self.tx_ids_from_block(&block); let snapshot = Snapshot::new( Some(parent_digest), @@ -176,8 +217,15 @@ where break; } - let snapshot = - self.snapshots.get(&digest).ok_or(ConsensusError::SnapshotNotFound(digest))?; + let Some(snapshot) = self.snapshots.get(&digest) else { + warn!( + ?digest, + collected_so_far = excluded.len(), + "snapshot chain gap during tx exclusion collection — \ + aborting proposal to prevent duplicate transactions" + ); + return Err(ConsensusError::SnapshotNotFound(digest)); + }; excluded.extend(snapshot.tx_ids.iter().copied()); current = snapshot.parent; } @@ -188,14 +236,12 @@ where #[cfg(test)] mod tests { - use std::{ - collections::BTreeMap, - sync::{Arc, RwLock}, - }; + use std::{collections::BTreeMap, sync::Arc}; use alloy_primitives::{Address, Bytes, U256}; use kora_executor::ExecutionOutcome; use kora_qmdb::ChangeSet; + use parking_lot::RwLock; use super::*; @@ -274,20 +320,19 @@ mod tests { fn add(&self, tx: Tx) { let id = tx.id(); - self.txs.write().unwrap().insert(id, tx); + self.txs.write().insert(id, tx); } } impl Mempool for MockMempool { fn insert(&self, tx: Tx) -> bool { let id = tx.id(); - self.txs.write().unwrap().insert(id, tx).is_none() + self.txs.write().insert(id, tx).is_none() } fn build(&self, max_txs: usize, excluded: &BTreeSet) -> Vec { self.txs .read() - .unwrap() .iter() .filter(|(id, _)| !excluded.contains(id)) .take(max_txs) @@ -296,14 +341,14 @@ mod tests { } fn prune(&self, tx_ids: &[TxId]) { - let mut txs = self.txs.write().unwrap(); + let mut txs = self.txs.write(); for id in tx_ids { txs.remove(id); } } fn len(&self) -> usize { - self.txs.read().unwrap().len() + self.txs.read().len() } } @@ -324,19 +369,19 @@ mod tests { impl SnapshotStore for MockSnapshotStore { fn get(&self, digest: &Digest) -> Option> { - self.snapshots.read().unwrap().get(digest).cloned() + self.snapshots.read().get(digest).cloned() } fn insert(&self, digest: Digest, snapshot: Snapshot) { - self.snapshots.write().unwrap().insert(digest, snapshot); + self.snapshots.write().insert(digest, snapshot); } fn is_persisted(&self, digest: &Digest) -> bool { - self.persisted.read().unwrap().contains(digest) + self.persisted.read().contains(digest) } fn mark_persisted(&self, digests: &[Digest]) { - let mut persisted = self.persisted.write().unwrap(); + let mut persisted = self.persisted.write(); for digest in digests { persisted.insert(*digest); } @@ -357,7 +402,6 @@ mod tests { let snapshot = self .snapshots .read() - .unwrap() .get(&digest) .cloned() .ok_or(ConsensusError::SnapshotNotFound(digest))?; @@ -381,6 +425,7 @@ mod tests { changes: ChangeSet::new(), receipts: Vec::new(), gas_used: txs.len() as u64 * 21000, + selfdestructed_addresses: Vec::new(), }) } @@ -394,13 +439,14 @@ mod tests { } fn parent_block() -> Block { - Block { - parent: kora_domain::BlockId(B256::ZERO), - height: 0, - prevrandao: B256::ZERO, - state_root: StateRoot(B256::ZERO), - txs: Vec::new(), - } + Block::new( + kora_domain::BlockId(B256::ZERO), + 0, + 0, + B256::ZERO, + StateRoot(B256::ZERO), + Vec::new(), + ) } #[test] @@ -435,7 +481,7 @@ mod tests { let builder = ProposalBuilder::new(state, mempool, snapshots, executor); let parent = parent_block(); - let result = builder.build_proposal(&parent, B256::ZERO); + let result = builder.build_proposal(&parent, B256::ZERO, 0); assert!(matches!(result, Err(ConsensusError::SnapshotNotFound(_)))); } @@ -461,7 +507,7 @@ mod tests { let builder = ProposalBuilder::new(state, mempool, snapshots, executor); - let result = builder.build_proposal(&parent, B256::ZERO); + let result = builder.build_proposal(&parent, B256::ZERO, 0); assert!(result.is_ok()); let (block, snapshot) = result.unwrap(); @@ -495,7 +541,7 @@ mod tests { let builder = ProposalBuilder::new(state, mempool, snapshots, executor); - let result = builder.build_proposal(&parent, B256::repeat_byte(0xAB)); + let result = builder.build_proposal(&parent, B256::repeat_byte(0xAB), 0); assert!(result.is_ok()); let (block, snapshot) = result.unwrap(); @@ -530,7 +576,7 @@ mod tests { let builder = ProposalBuilder::new(state, mempool, snapshots, executor).with_max_txs(10); - let result = builder.build_proposal(&parent, B256::ZERO); + let result = builder.build_proposal(&parent, B256::ZERO, 0); assert!(result.is_ok()); let (block, _) = result.unwrap(); @@ -565,7 +611,7 @@ mod tests { let builder = ProposalBuilder::new(state, mempool, snapshots, executor); - let (block, snapshot) = builder.build_proposal(&parent, B256::ZERO).unwrap(); + let (block, snapshot) = builder.build_proposal(&parent, B256::ZERO, 0).unwrap(); // MockStateDb::compute_root returns B256::repeat_byte(0x42) let expected_root = StateRoot(B256::repeat_byte(0x42)); @@ -599,7 +645,7 @@ mod tests { let builder = ProposalBuilder::new(state, mempool, snapshots, executor); - let (block, _) = builder.build_proposal(&parent, B256::ZERO).unwrap(); + let (block, _) = builder.build_proposal(&parent, B256::ZERO, 0).unwrap(); assert_eq!(block.txs.len(), 3); } @@ -612,13 +658,14 @@ mod tests { let executor = MockExecutor; let tx = Tx::new(vec![9].into()); - let parent = Block { - parent: kora_domain::BlockId(B256::ZERO), - height: 0, - prevrandao: B256::ZERO, - state_root: StateRoot(B256::ZERO), - txs: vec![tx.clone()], - }; + let parent = Block::new( + kora_domain::BlockId(B256::ZERO), + 0, + 0, + B256::ZERO, + StateRoot(B256::ZERO), + vec![tx.clone()], + ); let parent_digest = parent.commitment(); let parent_snapshot = Snapshot::new( None, @@ -632,7 +679,7 @@ mod tests { mempool.add(tx); let builder = ProposalBuilder::new(state, mempool, snapshots, executor); - let result = builder.build_proposal(&parent, B256::ZERO).unwrap(); + let result = builder.build_proposal(&parent, B256::ZERO, 0).unwrap(); assert!(result.0.txs.is_empty()); } diff --git a/crates/node/dkg/src/ceremony.rs b/crates/node/dkg/src/ceremony.rs index 4bbf9b6..e561332 100644 --- a/crates/node/dkg/src/ceremony.rs +++ b/crates/node/dkg/src/ceremony.rs @@ -4,7 +4,7 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use tracing::{debug, info, warn}; +use tracing::{error, info, warn}; use crate::{ DkgConfig, DkgError, DkgOutput, DkgPhase, PersistedDkgState, @@ -48,10 +48,10 @@ impl DkgCeremony { info!( validator_index = self.config.validator_index, n = self.config.n(), - t = self.config.t(), + quorum = self.config.t(), is_leader = self.is_leader(), force_restart = self.force_restart, - "Starting interactive DKG ceremony" + "Starting interactive DKG ceremony (quorum determined by N3f1)" ); // Check if we already have output @@ -227,6 +227,19 @@ impl DkgCeremony { tokio::time::sleep(backoff.next_delay()).await; } + let received = participant.received_dealer_count(); + let acks_sent = participant.acks_sent_count(); + let required = participant.required_dealer_logs(); + let total = participant.total_participants(); + error!( + received, + acks_sent, + required, + total, + timeout_secs = PHASE2_MAX_TIMEOUT_SECS, + "Phase 2 TIMEOUT: failed to collect all dealer messages within deadline. \ + This typically indicates network connectivity issues between DKG participants." + ); Err(DkgError::Timeout) } @@ -274,10 +287,18 @@ impl DkgCeremony { tokio::time::sleep(backoff.next_delay()).await; } + let ready = participant.ready_count(); + let total = participant.total_participants(); + error!( + ready, + total, + timeout_secs = PHASE2_MAX_TIMEOUT_SECS, + "Phase 2.5 TIMEOUT: not all participants signaled ready within deadline. \ + Some nodes may have failed to receive or send acks." + ); Err(DkgError::CeremonyFailed(format!( "Phase 2.5 timeout: only {}/{} participants ready", - participant.ready_count(), - participant.total_participants() + ready, total ))) } @@ -338,22 +359,32 @@ impl DkgCeremony { && last_request_time.elapsed() >= Duration::from_secs(5) && let Some(leader_pk) = self.config.participants.first() { - debug!(logs, required, "Requesting logs from leader"); + info!(logs, required, "Requesting dealer logs from leader"); let request_msg = ProtocolMessage::new( participant.ceremony_id(), ProtocolMessageKind::RequestLogs, ); - let _ = network.send_to(leader_pk, &request_msg); + if let Err(e) = network.send_to(leader_pk, &request_msg) { + warn!(?e, "Failed to send log request to leader"); + } last_request_time = Instant::now(); } tokio::time::sleep(backoff.next_delay()).await; } + let logs = participant.dealer_log_count(); + let required = participant.required_dealer_logs(); + error!( + logs, + required, + timeout_secs = PHASE4_MAX_TIMEOUT_SECS, + "Phase 4 TIMEOUT: failed to collect enough dealer logs within deadline. \ + Some dealers may have failed to finalize or broadcast their logs." + ); Err(DkgError::CeremonyFailed(format!( "Phase 4 timeout: only collected {}/{} dealer logs", - participant.dealer_log_count(), - participant.required_dealer_logs() + logs, required ))) } @@ -386,12 +417,16 @@ impl DkgCeremony { match target { Some(pk) => { if let Err(e) = network.send_to(&pk, &msg) { - debug!(?pk, ?e, "Failed to send to peer"); + warn!( + ?pk, + ?e, + "Failed to send DKG message to peer (will retry on next cycle)" + ); } } None => { if let Err(e) = network.broadcast(&msg) { - debug!(?e, "Failed to broadcast"); + warn!(?e, "Failed to broadcast DKG message (will retry on next cycle)"); } } } diff --git a/crates/node/dkg/src/config.rs b/crates/node/dkg/src/config.rs index ef6ddc7..0cc17bd 100644 --- a/crates/node/dkg/src/config.rs +++ b/crates/node/dkg/src/config.rs @@ -1,6 +1,7 @@ use std::{path::PathBuf, time::Duration}; use commonware_cryptography::ed25519; +use commonware_utils::{Faults, N3f1}; /// Configuration for a Distributed Key Generation (DKG) ceremony. #[derive(Debug, Clone)] @@ -11,8 +12,6 @@ pub struct DkgConfig { pub validator_index: usize, /// Public keys of all validators participating in the DKG ceremony. pub participants: Vec, - /// Minimum number of participants required to reconstruct the secret (t-of-n). - pub threshold: u32, /// Chain identifier for domain separation. pub chain_id: u64, /// Directory for persisting DKG state and key shares. @@ -31,9 +30,14 @@ impl DkgConfig { self.participants.len() } - /// Returns the threshold value (t). - pub const fn t(&self) -> u32 { - self.threshold + /// Returns the quorum / threshold value (t) as determined by N3f1. + /// + /// This is `n - f` where `f = (n-1)/3`. For example: + /// - n=4: t=3 (tolerates 1 fault) + /// - n=7: t=5 (tolerates 2 faults) + /// - n=15: t=11 (tolerates 4 faults) + pub fn t(&self) -> u32 { + N3f1::quorum(self.participants.len()) } /// Returns this validator's public key derived from the identity key. @@ -65,7 +69,6 @@ mod tests { identity_key, validator_index: 0, participants, - threshold: 3, chain_id: 1337, data_dir: PathBuf::from("/tmp/dkg-test"), listen_addr: "127.0.0.1:8000".parse::().unwrap(), @@ -81,83 +84,30 @@ mod tests { } #[test] - fn test_n_with_single_participant() { - let identity_key = ed25519::PrivateKey::from_seed(42); - let config = DkgConfig { - identity_key, - validator_index: 0, - participants: vec![ed25519::PrivateKey::from_seed(42).public_key()], - threshold: 1, - chain_id: 1337, - data_dir: PathBuf::from("/tmp/dkg-test"), - listen_addr: "127.0.0.1:8000".parse::().unwrap(), - bootstrap_peers: vec![], - timeout: Duration::from_secs(60), - }; - assert_eq!(config.n(), 1); - } - - #[test] - fn test_n_with_many_participants() { - let identity_key = ed25519::PrivateKey::from_seed(42); - let participants: Vec<_> = - (0..100).map(|i| ed25519::PrivateKey::from_seed(i as u64).public_key()).collect(); - - let config = DkgConfig { - identity_key, - validator_index: 0, - participants, - threshold: 67, - chain_id: 1337, - data_dir: PathBuf::from("/tmp/dkg-test"), - listen_addr: "127.0.0.1:8000".parse::().unwrap(), - bootstrap_peers: vec![], - timeout: Duration::from_secs(60), - }; - assert_eq!(config.n(), 100); - } - - #[test] - fn test_t_returns_threshold() { + fn test_t_returns_n3f1_quorum() { let config = test_config(); + // n=4: f=(4-1)/3=1, quorum=4-1=3 assert_eq!(config.t(), 3); } #[test] - fn test_t_with_threshold_one() { - let identity_key = ed25519::PrivateKey::from_seed(42); - let config = DkgConfig { - identity_key, - validator_index: 0, - participants: vec![ed25519::PrivateKey::from_seed(42).public_key()], - threshold: 1, - chain_id: 1337, - data_dir: PathBuf::from("/tmp/dkg-test"), - listen_addr: "127.0.0.1:8000".parse::().unwrap(), - bootstrap_peers: vec![], - timeout: Duration::from_secs(60), - }; - assert_eq!(config.t(), 1); - } - - #[test] - fn test_t_with_large_threshold() { + fn test_t_with_fifteen_validators() { let identity_key = ed25519::PrivateKey::from_seed(42); let participants: Vec<_> = - (0..100).map(|i| ed25519::PrivateKey::from_seed(i as u64).public_key()).collect(); + (0..15).map(|i| ed25519::PrivateKey::from_seed(i as u64).public_key()).collect(); let config = DkgConfig { identity_key, validator_index: 0, participants, - threshold: 67, chain_id: 1337, data_dir: PathBuf::from("/tmp/dkg-test"), listen_addr: "127.0.0.1:8000".parse::().unwrap(), bootstrap_peers: vec![], timeout: Duration::from_secs(60), }; - assert_eq!(config.t(), 67); + // n=15: f=(15-1)/3=4, quorum=15-4=11 (NOT 10!) + assert_eq!(config.t(), 11); } #[test] @@ -168,60 +118,6 @@ mod tests { assert_eq!(actual_public_key, expected_public_key); } - #[test] - fn test_my_public_key_consistent() { - let config = test_config(); - let first_call = config.my_public_key(); - let second_call = config.my_public_key(); - assert_eq!(first_call, second_call); - } - - #[test] - fn test_my_public_key_different_identities() { - let identity_key1 = ed25519::PrivateKey::from_seed(42); - let identity_key2 = ed25519::PrivateKey::from_seed(43); - - let config1 = DkgConfig { - identity_key: identity_key1, - validator_index: 0, - participants: vec![ - ed25519::PrivateKey::from_seed(42).public_key(), - ed25519::PrivateKey::from_seed(43).public_key(), - ], - threshold: 2, - chain_id: 1337, - data_dir: PathBuf::from("/tmp/dkg-test-1"), - listen_addr: "127.0.0.1:8001".parse::().unwrap(), - bootstrap_peers: vec![], - timeout: Duration::from_secs(60), - }; - - let config2 = DkgConfig { - identity_key: identity_key2, - validator_index: 1, - participants: vec![ - ed25519::PrivateKey::from_seed(42).public_key(), - ed25519::PrivateKey::from_seed(43).public_key(), - ], - threshold: 2, - chain_id: 1337, - data_dir: PathBuf::from("/tmp/dkg-test-2"), - listen_addr: "127.0.0.1:8002".parse::().unwrap(), - bootstrap_peers: vec![], - timeout: Duration::from_secs(60), - }; - - assert_ne!(config1.my_public_key(), config2.my_public_key()); - } - - #[test] - fn test_dkg_config_debug_implementation() { - let config = test_config(); - let debug_str = format!("{:?}", config); - assert!(!debug_str.is_empty()); - assert!(debug_str.contains("DkgConfig")); - } - #[test] fn test_dkg_config_clone() { let config = test_config(); @@ -233,32 +129,4 @@ mod tests { assert_eq!(config.chain_id, cloned.chain_id); assert_eq!(config.validator_index, cloned.validator_index); } - - #[test] - fn test_dkg_config_participants_matches_public_keys() { - let config = test_config(); - assert_eq!(config.participants.len(), 4); - assert_eq!(config.participants.len(), config.n()); - } - - #[test] - fn test_dkg_config_threshold_boundary() { - let identity_key = ed25519::PrivateKey::from_seed(42); - let participants: Vec<_> = - (0..4).map(|i| ed25519::PrivateKey::from_seed(i as u64).public_key()).collect(); - - let config = DkgConfig { - identity_key, - validator_index: 0, - participants, - threshold: 4, - chain_id: 1337, - data_dir: PathBuf::from("/tmp/dkg-test"), - listen_addr: "127.0.0.1:8000".parse::().unwrap(), - bootstrap_peers: vec![], - timeout: Duration::from_secs(60), - }; - - assert_eq!(config.t(), config.n() as u32); - } } diff --git a/crates/node/dkg/src/network.rs b/crates/node/dkg/src/network.rs index 23b14cb..d685d5c 100644 --- a/crates/node/dkg/src/network.rs +++ b/crates/node/dkg/src/network.rs @@ -121,7 +121,9 @@ impl DkgNetwork { Ok((mut stream, addr)) => { debug!(%addr, "Accepted connection"); - stream.set_read_timeout(Some(Duration::from_secs(5))).ok(); + if let Err(e) = stream.set_read_timeout(Some(Duration::from_secs(5))) { + warn!(%addr, %e, "Failed to set read timeout on incoming connection"); + } // Read public key (32 bytes for ed25519) let mut pk_bytes = [0u8; 32]; diff --git a/crates/node/dkg/src/output.rs b/crates/node/dkg/src/output.rs index b10c3ae..e317cba 100644 --- a/crates/node/dkg/src/output.rs +++ b/crates/node/dkg/src/output.rs @@ -1,5 +1,6 @@ -use std::path::Path; +use std::{io::Write as _, path::Path}; +use commonware_utils::{Faults, N3f1}; use serde::{Deserialize, Serialize}; use crate::DkgError; @@ -11,11 +12,15 @@ pub struct DkgOutput { pub group_public_key: Vec, /// Coefficients of the public polynomial used for share verification. pub public_polynomial: Vec, - /// Minimum number of participants required to reconstruct the secret. + /// Quorum size (minimum active validators for consensus), computed from N3f1. + /// + /// This is always `n - (n-1)/3` where n is the participant count. The value + /// stored in output.json may be stale if it was generated before this fix; + /// on load, we recompute it from the participant count. pub threshold: u32, /// Total number of participants in the DKG ceremony. pub participants: usize, - /// This participant's index in the DKG ceremony (1-indexed). + /// This participant's index in the DKG ceremony (0-indexed). pub share_index: u32, /// This participant's secret share of the distributed key. pub share_secret: Vec, @@ -27,6 +32,8 @@ pub struct DkgOutput { struct OutputJson { group_public_key: String, public_polynomial: String, + /// Persisted as "threshold" in JSON for backward compatibility, but the + /// authoritative value is always recomputed from `participants` via N3f1. threshold: u32, participants: usize, #[serde(default)] @@ -57,12 +64,15 @@ impl DkgOutput { ShareJson { index: self.share_index, secret: hex::encode(&self.share_secret) }; let share_path = data_dir.join("share.key"); - std::fs::write(&share_path, serde_json::to_string_pretty(&share_json)?)?; + write_secret_file(&share_path, serde_json::to_string_pretty(&share_json)?.as_bytes())?; Ok(()) } /// Loads a DKG output from `output.json` and `share.key` in `data_dir`. + /// + /// The `threshold` field is always recomputed from `participants` using N3f1 + /// to ensure correctness regardless of what value was persisted in the JSON. pub fn load(data_dir: &Path) -> Result { let output_path = data_dir.join("output.json"); let output_str = std::fs::read_to_string(&output_path)?; @@ -80,12 +90,16 @@ impl DkgOutput { .map(|k| hex::decode(k).map_err(|e| DkgError::Serialization(e.to_string()))) .collect::, _>>()?; + // Always compute the correct quorum from N3f1 rather than trusting + // the persisted threshold value, which may be wrong in old output files. + let correct_threshold = N3f1::quorum(output.participants); + Ok(Self { group_public_key: hex::decode(&output.group_public_key) .map_err(|e| DkgError::Serialization(e.to_string()))?, public_polynomial: hex::decode(&output.public_polynomial) .map_err(|e| DkgError::Serialization(e.to_string()))?, - threshold: output.threshold, + threshold: correct_threshold, participants: output.participants, share_index: share.index, share_secret: hex::decode(&share.secret) @@ -100,6 +114,19 @@ impl DkgOutput { } } +/// Write `data` to `path` with mode `0600` so key material is never world-readable. +fn write_secret_file(path: &Path, data: &[u8]) -> Result<(), DkgError> { + use std::os::unix::fs::OpenOptionsExt; + let mut f = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(path)?; + f.write_all(data)?; + Ok(()) +} + impl From for DkgError { fn from(e: serde_json::Error) -> Self { Self::Serialization(e.to_string()) diff --git a/crates/node/dkg/src/protocol.rs b/crates/node/dkg/src/protocol.rs index 2fa8904..3da6d46 100644 --- a/crates/node/dkg/src/protocol.rs +++ b/crates/node/dkg/src/protocol.rs @@ -5,13 +5,13 @@ use std::collections::{BTreeMap, HashSet}; -use commonware_codec::{Read as CodecRead, ReadExt, Write}; +use commonware_codec::{Read as _, ReadExt, Write}; use commonware_cryptography::{ Hasher as _, Sha256, bls12381::{ - dkg::{ + dkg::feldman_desmedt::{ Dealer, DealerLog, DealerPrivMsg, DealerPubMsg, Info, Logs, Player, PlayerAck, - SignedDealerLog, + SignedDealerLog, observe, }, primitives::{sharing::Mode, variant::MinSig}, }, @@ -816,7 +816,6 @@ impl DkgParticipant { let mut rng = rand::rngs::OsRng; // Debug: try to observe the logs first to understand what's failing - use commonware_cryptography::bls12381::dkg::observe; match observe::( &mut rng, self.logs_for_verification(), @@ -1024,30 +1023,65 @@ impl DkgParticipant { { let max_degree = config.t(); let mut reader = log_bytes.as_slice(); - if let Ok(log) = SignedDealerLog::::read_cfg( + match SignedDealerLog::::read_cfg( &mut reader, &core::num::NonZeroU32::new(max_degree).unwrap(), - ) && let Some((dealer_pk, dealer_log)) = log.clone().check(&participant.info) - { - participant.dealer_logs.insert(dealer_pk.clone(), dealer_log); - participant.signed_logs.insert(dealer_pk, log.clone()); - participant.our_signed_log = Some(log); + ) { + Ok(log) => { + if let Some((dealer_pk, dealer_log)) = log.clone().check(&participant.info) { + participant.dealer_logs.insert(dealer_pk.clone(), dealer_log); + participant.signed_logs.insert(dealer_pk, log.clone()); + participant.our_signed_log = Some(log); + } else { + warn!( + "Failed to verify our own persisted dealer log during state restoration" + ); + } + } + Err(e) => { + warn!( + ?e, + "Failed to deserialize our own persisted dealer log during state restoration" + ); + } } } - for (pk_hex, log_bytes) in state.get_received_logs() { + let mut restored_log_count = 0usize; + let received_logs = state.get_received_logs(); + let total_persisted_logs = received_logs.len(); + for (pk_hex, log_bytes) in received_logs { let max_degree = config.t(); let mut reader = log_bytes.as_slice(); - if let Ok(log) = SignedDealerLog::::read_cfg( + match SignedDealerLog::::read_cfg( &mut reader, &core::num::NonZeroU32::new(max_degree).unwrap(), - ) && let Some((dealer_pk, dealer_log)) = log.clone().check(&participant.info) - { - let _ = pk_hex; - participant.dealer_logs.insert(dealer_pk.clone(), dealer_log); - participant.signed_logs.insert(dealer_pk, log); + ) { + Ok(log) => { + if let Some((dealer_pk, dealer_log)) = log.clone().check(&participant.info) { + participant.dealer_logs.insert(dealer_pk.clone(), dealer_log); + participant.signed_logs.insert(dealer_pk, log); + restored_log_count += 1; + } else { + warn!( + pk_hex, + "Failed to verify persisted dealer log during state restoration" + ); + } + } + Err(e) => { + warn!( + pk_hex, + ?e, + "Failed to deserialize persisted dealer log during state restoration" + ); + } } } + info!( + restored_log_count, + total_persisted_logs, "Restored dealer logs from persisted state" + ); Ok(Some(participant)) } diff --git a/crates/node/dkg/src/state.rs b/crates/node/dkg/src/state.rs index e2f0f7c..3a0fdc7 100644 --- a/crates/node/dkg/src/state.rs +++ b/crates/node/dkg/src/state.rs @@ -3,6 +3,7 @@ use std::{collections::BTreeMap, path::Path}; use serde::{Deserialize, Serialize}; +use tracing::warn; use crate::{CeremonySession, DkgError}; @@ -155,7 +156,13 @@ impl PersistedDkgState { /// Get our signed dealer log bytes. pub fn get_our_signed_log(&self) -> Option> { - self.our_signed_log.as_ref().and_then(|s| hex::decode(s).ok()) + self.our_signed_log.as_ref().and_then(|s| match hex::decode(s) { + Ok(bytes) => Some(bytes), + Err(e) => { + warn!(%e, "Failed to hex-decode persisted dealer log (our_signed_log)"); + None + } + }) } /// Add a received dealer log. @@ -167,7 +174,13 @@ impl PersistedDkgState { pub fn get_received_logs(&self) -> BTreeMap> { self.received_logs .iter() - .filter_map(|(k, v)| hex::decode(v).ok().map(|bytes| (k.clone(), bytes))) + .filter_map(|(k, v)| match hex::decode(v) { + Ok(bytes) => Some((k.clone(), bytes)), + Err(e) => { + warn!(pk_hex = %k, %e, "Failed to hex-decode persisted dealer log (received_logs)"); + None + } + }) .collect() } } diff --git a/crates/node/dkg/src/tests.rs b/crates/node/dkg/src/tests.rs index 1ac83b0..e8eb41b 100644 --- a/crates/node/dkg/src/tests.rs +++ b/crates/node/dkg/src/tests.rs @@ -14,9 +14,6 @@ fn generate_test_keys(n: usize, seed: u64) -> Vec { fn make_test_config(keys: &[ed25519::PrivateKey], index: usize, base_port: u16) -> DkgConfig { let participants: Vec<_> = keys.iter().map(|k| k.public_key()).collect(); - let n = participants.len(); - let f = (n - 1) / 3; - let threshold = (n - f) as u32; let bootstrap_peers: Vec<_> = participants .iter() @@ -29,7 +26,6 @@ fn make_test_config(keys: &[ed25519::PrivateKey], index: usize, base_port: u16) identity_key: keys[index].clone(), validator_index: index, participants, - threshold, chain_id: 1337, data_dir: PathBuf::from(format!("/tmp/dkg-test-{}", index)), listen_addr: format!("127.0.0.1:{}", base_port + index as u16).parse().unwrap(), diff --git a/crates/node/dkg/src/transport.rs b/crates/node/dkg/src/transport.rs index a9ef26d..08a5a7a 100644 --- a/crates/node/dkg/src/transport.rs +++ b/crates/node/dkg/src/transport.rs @@ -199,7 +199,7 @@ impl DkgTransportConfig { E: Spawner + BufferPooler + Clock + CryptoRngCore + Network + Resolver + Metrics, { let (mut network, oracle) = - discovery::Network::new(context.with_label("dkg-network"), self.inner); + discovery::Network::new(context.child("dkg_network"), self.inner); let (sender, receiver) = network.register(CHANNEL_DKG, self.quota, self.backlog); @@ -216,7 +216,7 @@ impl DkgTransport { /// /// This should be called with the DKG ceremony participants before starting. pub async fn set_participants(&mut self, participants: Set) { - self.oracle.track(0, participants).await; + self.oracle.track(0, participants); } /// Send a message to a specific peer. @@ -224,11 +224,12 @@ impl DkgTransport { where E: Spawner + Clock + CryptoRngCore + Network, { - self.sender - .send(Recipients::One(to.clone()), msg, false) - .await - .map(|_| ()) - .map_err(|e| DkgError::Network(format!("Failed to send to peer: {}", e))) + let recipients = self.sender.send(Recipients::One(to.clone()), msg, false); + if recipients.iter().any(|pk| pk == to) { + Ok(()) + } else { + Err(DkgError::Network("Failed to enqueue message for peer".into())) + } } /// Broadcast a message to all connected peers. @@ -236,18 +237,25 @@ impl DkgTransport { where E: Spawner + Clock + CryptoRngCore + Network, { - self.sender - .send(Recipients::All, msg, false) - .await - .map(|_| ()) - .map_err(|e| DkgError::Network(format!("Failed to broadcast: {}", e))) + let recipients = self.sender.send(Recipients::All, msg, false); + if recipients.is_empty() { + Err(DkgError::Network("Failed to enqueue broadcast for any peer".into())) + } else { + Ok(()) + } } /// Receive the next message. /// /// Returns the sender's public key and the message bytes. pub async fn recv(&mut self) -> Option<(ed25519::PublicKey, Bytes)> { - self.receiver.recv().await.ok().map(|(sender, message)| (sender, Bytes::from(message))) + match self.receiver.recv().await { + Ok((sender, message)) => Some((sender, Bytes::from(message))), + Err(e) => { + tracing::warn!(%e, "DKG transport receive error"); + None + } + } } } diff --git a/crates/node/domain/Cargo.toml b/crates/node/domain/Cargo.toml index aa64f09..8aeb4c5 100644 --- a/crates/node/domain/Cargo.toml +++ b/crates/node/domain/Cargo.toml @@ -18,7 +18,7 @@ commonware-cryptography.workspace = true # Execution alloy-evm = { workspace = true, features = ["std"] } -alloy-primitives.workspace = true +alloy-primitives = { workspace = true, features = ["serde"] } alloy-consensus = { workspace = true, optional = true } alloy-eips = { workspace = true, optional = true } k256 = { workspace = true, optional = true } diff --git a/crates/node/domain/src/block.rs b/crates/node/domain/src/block.rs index acd923e..5540544 100644 --- a/crates/node/domain/src/block.rs +++ b/crates/node/domain/src/block.rs @@ -1,5 +1,7 @@ //! Block types +use std::sync::OnceLock; + use alloy_evm::revm::primitives::{B256, keccak256}; use bytes::{Buf, BufMut}; use commonware_codec::{Encode, EncodeSize, Error as CodecError, RangeCfg, Read, ReadExt, Write}; @@ -16,25 +18,111 @@ pub struct BlockCfg { pub tx: TxCfg, } -#[derive(Clone, Debug, PartialEq, Eq)] -/// Example block type agreed on by consensus (via its digest). +/// Block type agreed on by consensus (via its digest). +/// +/// The block identifier (keccak256 of the encoded block) is cached on first +/// access via [`OnceLock`] to avoid redundant serialization and hashing on +/// the hot path where `id()`, `digest()`, and `commitment()` are called +/// multiple times per consensus round. pub struct Block { /// Identifier of the parent block. pub parent: BlockId, /// Block height (number of committed ancestors). pub height: u64, + /// Unix timestamp for this block, in seconds. + pub timestamp: u64, /// Seed-derived randomness used for future prevrandao. pub prevrandao: B256, /// State commitment resulting from this block (pre-commit QMDB root). pub state_root: StateRoot, /// Transactions included in the block. pub txs: Vec, + + /// Cached block identifier, computed lazily on first call to [`Self::id`]. + /// + /// Excluded from equality comparisons, debug output, and codec encoding. + cached_id: OnceLock, +} + +impl Clone for Block { + fn clone(&self) -> Self { + Self { + parent: self.parent, + height: self.height, + timestamp: self.timestamp, + prevrandao: self.prevrandao, + state_root: self.state_root, + txs: self.txs.clone(), + // Propagate the cached ID if already computed. + cached_id: self.cached_id.get().map_or_else(OnceLock::new, |id| { + let lock = OnceLock::new(); + let _ = lock.set(*id); + lock + }), + } + } +} + +impl std::fmt::Debug for Block { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Block") + .field("parent", &self.parent) + .field("height", &self.height) + .field("timestamp", &self.timestamp) + .field("prevrandao", &self.prevrandao) + .field("state_root", &self.state_root) + .field("txs", &self.txs) + .finish() + } } +impl PartialEq for Block { + fn eq(&self, other: &Self) -> bool { + self.parent == other.parent + && self.height == other.height + && self.timestamp == other.timestamp + && self.prevrandao == other.prevrandao + && self.state_root == other.state_root + && self.txs == other.txs + } +} + +impl Eq for Block {} + impl Block { + /// Construct a new block. + /// + /// Prefer this over struct-literal syntax; it properly initializes the + /// internal [`OnceLock`] cache (lazily populated on first call to + /// [`Self::id`]). + #[must_use] + pub const fn new( + parent: BlockId, + height: u64, + timestamp: u64, + prevrandao: B256, + state_root: StateRoot, + txs: Vec, + ) -> Self { + Self { parent, height, timestamp, prevrandao, state_root, txs, cached_id: OnceLock::new() } + } + /// Compute the block identifier from its encoded contents. + /// + /// The result is cached internally so that repeated calls (e.g. from + /// [`Digestible::digest`] and [`Committable::commitment`]) do not + /// re-serialize and re-hash the block. pub fn id(&self) -> BlockId { - BlockId(keccak256(self.encode())) + *self.cached_id.get_or_init(|| BlockId(keccak256(self.encode()))) + } + + /// Choose a block timestamp that tracks wall-clock time without going backwards. + /// + /// `now_secs` is the current wall-clock time in seconds since the Unix + /// epoch. When blocks are produced faster than one per second, multiple + /// consecutive blocks may share the same timestamp. + pub const fn next_timestamp(now_secs: u64, parent_timestamp: u64) -> Option { + if now_secs > parent_timestamp { Some(now_secs) } else { Some(parent_timestamp) } } } @@ -76,6 +164,7 @@ impl Write for Block { fn write(&self, buf: &mut impl BufMut) { self.parent.write(buf); self.height.write(buf); + self.timestamp.write(buf); Idents::write_b256(&self.prevrandao, buf); self.state_root.write(buf); self.txs.write(buf); @@ -86,6 +175,7 @@ impl EncodeSize for Block { fn encode_size(&self) -> usize { self.parent.encode_size() + self.height.encode_size() + + self.timestamp.encode_size() + 32 + self.state_root.encode_size() + self.txs.encode_size() @@ -98,10 +188,11 @@ impl Read for Block { fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result { let parent = BlockId::read(buf)?; let height = u64::read(buf)?; + let timestamp = u64::read(buf)?; let prevrandao = Idents::read_b256(buf)?; let state_root = StateRoot::read(buf)?; let txs = Vec::::read_cfg(buf, &(RangeCfg::new(0..=cfg.max_txs), cfg.tx))?; - Ok(Self { parent, height, prevrandao, state_root, txs }) + Ok(Self::new(parent, height, timestamp, prevrandao, state_root, txs)) } } @@ -118,13 +209,14 @@ mod tests { } fn sample_block() -> Block { - Block { - parent: BlockId(B256::repeat_byte(0x01)), - height: 42, - prevrandao: B256::repeat_byte(0xab), - state_root: StateRoot(B256::repeat_byte(0xcd)), - txs: vec![Tx::new(Bytes::from_static(&[0xde, 0xad, 0xbe, 0xef]))], - } + Block::new( + BlockId(B256::repeat_byte(0x01)), + 42, + 1_700_000_042, + B256::repeat_byte(0xab), + StateRoot(B256::repeat_byte(0xcd)), + vec![Tx::new(Bytes::from_static(&[0xde, 0xad, 0xbe, 0xef]))], + ) } #[test] @@ -143,6 +235,15 @@ mod tests { assert_ne!(block1.id(), block2.id()); } + #[test] + fn block_id_differs_by_timestamp() { + let block1 = sample_block(); + let mut block2 = sample_block(); + block2.timestamp += 1; + assert_ne!(block1.id(), block2.id()); + assert_ne!(block1.commitment(), block2.commitment()); + } + #[test] fn block_id_differs_by_parent() { let block1 = sample_block(); @@ -181,13 +282,8 @@ mod tests { #[test] fn empty_block_roundtrip() { - let block = Block { - parent: BlockId(B256::ZERO), - height: 0, - prevrandao: B256::ZERO, - state_root: StateRoot(B256::ZERO), - txs: vec![], - }; + let block = + Block::new(BlockId(B256::ZERO), 0, 0, B256::ZERO, StateRoot(B256::ZERO), vec![]); let encoded = block.encode(); let decoded = Block::decode_cfg(encoded, &default_block_cfg()).expect("decode"); assert_eq!(block, decoded); @@ -200,6 +296,23 @@ mod tests { assert_eq!(block.height().get(), 42); } + #[test] + fn next_timestamp_uses_clock_when_ahead() { + assert_eq!(Block::next_timestamp(1_700_000_100, 1_700_000_042), Some(1_700_000_100)); + } + + #[test] + fn next_timestamp_allows_same_second_blocks_when_clock_lags() { + assert_eq!(Block::next_timestamp(1_700_000_042, 1_700_000_042), Some(1_700_000_042)); + assert_eq!(Block::next_timestamp(1_700_000_000, 1_700_000_042), Some(1_700_000_042)); + } + + #[test] + fn next_timestamp_handles_u64_max() { + assert_eq!(Block::next_timestamp(0, u64::MAX), Some(u64::MAX)); + assert_eq!(Block::next_timestamp(u64::MAX, u64::MAX), Some(u64::MAX)); + } + #[test] fn block_parent_commitment() { use commonware_consensus::Block as _; diff --git a/crates/node/domain/src/bootstrap.rs b/crates/node/domain/src/bootstrap.rs index a7aa7ab..7fd4a49 100644 --- a/crates/node/domain/src/bootstrap.rs +++ b/crates/node/domain/src/bootstrap.rs @@ -1,4 +1,4 @@ -//! Boostrap configuration. +//! Bootstrap configuration. use std::{path::Path, str::FromStr}; @@ -10,10 +10,14 @@ use crate::Tx; /// Bootstrap configuration for genesis state and initial transactions. #[derive(Clone, Debug)] pub struct BootstrapConfig { + /// Chain ID declared in the genesis file. + pub chain_id: u64, /// Initial account allocations (address, balance) for genesis. pub genesis_alloc: Vec<(Address, U256)>, /// Transactions to execute during bootstrap. pub bootstrap_txs: Vec, + /// Genesis block Unix timestamp, in seconds. + pub genesis_timestamp: u64, } #[derive(Serialize, Deserialize)] @@ -32,17 +36,29 @@ struct AllocationJson { impl BootstrapConfig { /// Create a new bootstrap configuration. #[must_use] - pub const fn new(genesis_alloc: Vec<(Address, U256)>, bootstrap_txs: Vec) -> Self { - Self { genesis_alloc, bootstrap_txs } + pub const fn new( + chain_id: u64, + genesis_alloc: Vec<(Address, U256)>, + bootstrap_txs: Vec, + ) -> Self { + Self { chain_id, genesis_alloc, bootstrap_txs, genesis_timestamp: 0 } + } + + /// Set the genesis block timestamp. + #[must_use] + pub const fn with_genesis_timestamp(mut self, genesis_timestamp: u64) -> Self { + self.genesis_timestamp = genesis_timestamp; + self } /// Load bootstrap configuration from a genesis JSON file. pub fn load(genesis_path: &Path) -> Result { let content = std::fs::read_to_string(genesis_path)?; let genesis: GenesisJson = serde_json::from_str(&content)?; + let GenesisJson { chain_id, timestamp, allocations } = genesis; - let mut genesis_alloc = Vec::with_capacity(genesis.allocations.len()); - for alloc in genesis.allocations { + let mut genesis_alloc = Vec::with_capacity(allocations.len()); + for alloc in allocations { let address = Address::from_str(&alloc.address) .map_err(|e| BootstrapError::Parse(format!("invalid address: {}", e)))?; let balance = U256::from_str(&alloc.balance) @@ -50,7 +66,12 @@ impl BootstrapConfig { genesis_alloc.push((address, balance)); } - Ok(Self { genesis_alloc, bootstrap_txs: Vec::new() }) + Ok(Self { + chain_id, + genesis_alloc, + bootstrap_txs: Vec::new(), + genesis_timestamp: timestamp, + }) } } @@ -88,3 +109,48 @@ impl From for BootstrapError { Self::Json(e) } } + +#[cfg(test)] +mod tests { + use std::{fs, path::PathBuf}; + + use super::*; + + fn temp_genesis_path() -> PathBuf { + std::env::temp_dir().join(format!( + "kora-genesis-{}-{}.json", + std::process::id(), + std::thread::current().name().unwrap_or("test") + )) + } + + #[test] + fn new_defaults_genesis_timestamp_to_zero() { + let bootstrap = BootstrapConfig::new(1337, Vec::new(), Vec::new()); + assert_eq!(bootstrap.chain_id, 1337); + assert_eq!(bootstrap.genesis_timestamp, 0); + } + + #[test] + fn load_preserves_genesis_timestamp() { + let path = temp_genesis_path(); + let json = r#"{ + "chain_id": 1337, + "timestamp": 1700000000, + "allocations": [ + { + "address": "0x0000000000000000000000000000000000000001", + "balance": "42" + } + ] + }"#; + + fs::write(&path, json).expect("write genesis"); + let bootstrap = BootstrapConfig::load(&path).expect("load genesis"); + fs::remove_file(path).expect("remove genesis"); + + assert_eq!(bootstrap.chain_id, 1337); + assert_eq!(bootstrap.genesis_timestamp, 1_700_000_000); + assert_eq!(bootstrap.genesis_alloc.len(), 1); + } +} diff --git a/crates/node/domain/src/events.rs b/crates/node/domain/src/events.rs index 2458f65..1716796 100644 --- a/crates/node/domain/src/events.rs +++ b/crates/node/domain/src/events.rs @@ -2,9 +2,10 @@ use std::sync::Arc; -use alloy_evm::revm::primitives::B256; +use alloy_primitives::{Address, B256, U256}; use futures::channel::mpsc::{UnboundedReceiver, UnboundedSender, unbounded}; use parking_lot::Mutex; +use serde::{Deserialize, Serialize}; use super::TxId; use crate::ConsensusDigest; @@ -20,6 +21,43 @@ pub enum LedgerEvent { SeedUpdated(ConsensusDigest, B256), } +/// Transaction lifecycle events emitted by the mempool. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "camelCase", rename_all_fields = "camelCase")] +pub enum MempoolEvent { + /// Transaction accepted into the mempool. + TxAdded { + /// Transaction hash. + hash: B256, + /// Sender address recovered from the transaction signature. + from: Address, + /// Recipient address, or `None` for contract creation. + to: Option
, + /// Value transferred by the transaction. + value: U256, + /// Effective gas price used for ordering. + gas_price: U256, + /// Transaction nonce. + nonce: u64, + }, + /// Transaction included in a finalized block. + TxIncluded { + /// Transaction hash. + hash: B256, + /// Finalized block number. + block_number: u64, + /// Finalized block hash. + block_hash: B256, + }, + /// Transaction removed from the mempool without inclusion. + TxEvicted { + /// Transaction hash. + hash: B256, + /// Human-readable eviction reason. + reason: String, + }, +} + /// Pub-sub registry for ledger events. #[derive(Clone, Debug)] pub struct LedgerEvents { @@ -55,7 +93,7 @@ impl Default for LedgerEvents { #[cfg(test)] mod tests { - use alloy_primitives::B256; + use alloy_primitives::{Address, B256, U256}; use commonware_cryptography::sha256::Digest; use super::*; @@ -96,7 +134,7 @@ mod tests { let tx_id = TxId(B256::repeat_byte(0x42)); events.publish(LedgerEvent::TransactionSubmitted(tx_id)); - let received = receiver.try_next().expect("channel open").expect("should receive event"); + let received = receiver.try_recv().expect("should receive event"); if let LedgerEvent::TransactionSubmitted(id) = received { assert_eq!(id.0, B256::repeat_byte(0x42)); } else { @@ -113,8 +151,8 @@ mod tests { let tx_id = TxId(B256::repeat_byte(0x01)); events.publish(LedgerEvent::TransactionSubmitted(tx_id)); - let e1 = r1.try_next().expect("channel open").expect("r1 should receive"); - let e2 = r2.try_next().expect("channel open").expect("r2 should receive"); + let e1 = r1.try_recv().expect("r1 should receive"); + let e2 = r2.try_recv().expect("r2 should receive"); assert!(matches!(e1, LedgerEvent::TransactionSubmitted(_))); assert!(matches!(e2, LedgerEvent::TransactionSubmitted(_))); @@ -132,4 +170,22 @@ mod tests { events.publish(LedgerEvent::SnapshotPersisted(digest)); assert_eq!(events.listeners.lock().len(), 0); } + + #[test] + fn mempool_event_serde_roundtrip() { + let event = MempoolEvent::TxAdded { + hash: B256::repeat_byte(0x01), + from: Address::repeat_byte(0x02), + to: Some(Address::repeat_byte(0x03)), + value: U256::from(1_000), + gas_price: U256::from(1_000_000_000u64), + nonce: 42, + }; + + let json = serde_json::to_string(&event).expect("serialize mempool event"); + assert!(json.contains("\"type\":\"txAdded\"")); + assert!(json.contains("\"gasPrice\"")); + let parsed: MempoolEvent = serde_json::from_str(&json).expect("deserialize mempool event"); + assert_eq!(parsed, event); + } } diff --git a/crates/node/domain/src/evm.rs b/crates/node/domain/src/evm.rs index ee61098..6f7c57f 100644 --- a/crates/node/domain/src/evm.rs +++ b/crates/node/domain/src/evm.rs @@ -22,6 +22,10 @@ impl Evm { } /// Sign a simple EIP-1559 transfer transaction and return its encoded bytes. + /// + /// `max_fee_per_gas` must be at least as large as the block's `base_fee_per_gas` + /// for the transaction to be included by the EVM. Pass `0` when the block + /// context has no base fee (e.g. in unit tests that use `base_fee_per_gas: None`). #[allow(clippy::too_many_arguments)] pub fn sign_eip1559_transfer( key: &SigningKey, @@ -30,13 +34,15 @@ impl Evm { value: U256, nonce: u64, gas_limit: u64, + max_fee_per_gas: u128, + max_priority_fee_per_gas: u128, ) -> Tx { let tx = TxEip1559 { chain_id, nonce, gas_limit, - max_fee_per_gas: 0, - max_priority_fee_per_gas: 0, + max_fee_per_gas, + max_priority_fee_per_gas, to: TxKind::Call(to), value, access_list: Default::default(), @@ -97,7 +103,7 @@ mod tests { let to = Address::repeat_byte(0xab); let value = U256::from(1000); - let tx = Evm::sign_eip1559_transfer(&key, 1, to, value, 0, 21000); + let tx = Evm::sign_eip1559_transfer(&key, 1, to, value, 0, 21000, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); @@ -111,7 +117,7 @@ mod tests { let to = Address::repeat_byte(0xab); let chain_id = 42u64; - let tx = Evm::sign_eip1559_transfer(&key, chain_id, to, U256::ZERO, 0, 21000); + let tx = Evm::sign_eip1559_transfer(&key, chain_id, to, U256::ZERO, 0, 21000, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); @@ -124,7 +130,7 @@ mod tests { let to = Address::repeat_byte(0xab); let nonce = 123u64; - let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, nonce, 21000); + let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, nonce, 21000, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); @@ -137,7 +143,7 @@ mod tests { let to = Address::repeat_byte(0xab); let gas_limit = 50000u64; - let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 0, gas_limit); + let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 0, gas_limit, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); @@ -150,7 +156,7 @@ mod tests { let to = Address::repeat_byte(0xab); let value = U256::from(999_999); - let tx = Evm::sign_eip1559_transfer(&key, 1, to, value, 0, 21000); + let tx = Evm::sign_eip1559_transfer(&key, 1, to, value, 0, 21000, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); @@ -162,7 +168,7 @@ mod tests { let key = signing_key_from_seed(1); let to = Address::repeat_byte(0xcd); - let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 0, 21000); + let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 0, 21000, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); @@ -174,8 +180,8 @@ mod tests { let key = signing_key_from_seed(1); let to = Address::repeat_byte(0xab); - let tx1 = Evm::sign_eip1559_transfer(&key, 1, to, U256::from(100), 0, 21000); - let tx2 = Evm::sign_eip1559_transfer(&key, 1, to, U256::from(200), 0, 21000); + let tx1 = Evm::sign_eip1559_transfer(&key, 1, to, U256::from(100), 0, 21000, 0, 0); + let tx2 = Evm::sign_eip1559_transfer(&key, 1, to, U256::from(200), 0, 21000, 0, 0); assert_ne!(tx1.bytes, tx2.bytes); } @@ -185,8 +191,8 @@ mod tests { let key = signing_key_from_seed(1); let to = Address::repeat_byte(0xab); - let tx1 = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 0, 21000); - let tx2 = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 1, 21000); + let tx1 = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 0, 21000, 0, 0); + let tx2 = Evm::sign_eip1559_transfer(&key, 1, to, U256::ZERO, 1, 21000, 0, 0); assert_ne!(tx1.bytes, tx2.bytes); } @@ -197,7 +203,7 @@ mod tests { let to = Address::repeat_byte(0xef); let sender = Evm::address_from_key(&key); - let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::from(500), 0, 21000); + let tx = Evm::sign_eip1559_transfer(&key, 1, to, U256::from(500), 0, 21000, 0, 0); let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).expect("valid envelope encoding"); diff --git a/crates/node/domain/src/idents.rs b/crates/node/domain/src/idents.rs index bc25f82..1a7d2ef 100644 --- a/crates/node/domain/src/idents.rs +++ b/crates/node/domain/src/idents.rs @@ -117,13 +117,14 @@ mod tests { #[test] fn test_block_roundtrip_and_id_stable() { let txs = vec![Tx { bytes: Bytes::new() }, Tx { bytes: Bytes::from(vec![9, 9, 9]) }]; - let block = Block { - parent: BlockId(B256::from([0xAAu8; 32])), - height: 7, - prevrandao: B256::from([0x55u8; 32]), - state_root: StateRoot(B256::from([0xBBu8; 32])), + let block = Block::new( + BlockId(B256::from([0xAAu8; 32])), + 7, + 1_700_000_007, + B256::from([0x55u8; 32]), + StateRoot(B256::from([0xBBu8; 32])), txs, - }; + ); let encoded = block.encode(); let decoded = Block::decode_cfg(encoded.clone(), &cfg()).expect("decode block"); assert_eq!(block, decoded); diff --git a/crates/node/domain/src/lib.rs b/crates/node/domain/src/lib.rs index 102dc2a..ceaf4cd 100644 --- a/crates/node/domain/src/lib.rs +++ b/crates/node/domain/src/lib.rs @@ -11,7 +11,7 @@ mod commitment; pub use commitment::{AccountChange, StateChanges, StateChangesCfg}; mod events; -pub use events::{LedgerEvent, LedgerEvents}; +pub use events::{LedgerEvent, LedgerEvents, MempoolEvent}; mod bootstrap; pub use bootstrap::{BootstrapConfig, BootstrapError}; diff --git a/crates/node/executor/Cargo.toml b/crates/node/executor/Cargo.toml index c477320..bd69e02 100644 --- a/crates/node/executor/Cargo.toml +++ b/crates/node/executor/Cargo.toml @@ -11,16 +11,20 @@ description = "Block execution abstractions and REVM implementation for Kora" alloy-consensus.workspace = true alloy-eips.workspace = true alloy-primitives.workspace = true -alloy-rlp.workspace = true futures.workspace = true kora-qmdb = { path = "../../storage/qmdb" } kora-traits = { path = "../../storage/traits" } revm.workspace = true thiserror.workspace = true +tracing.workspace = true tokio = { workspace = true, features = ["rt"] } [dev-dependencies] +alloy-consensus.workspace = true +alloy-eips.workspace = true +k256.workspace = true rstest.workspace = true +sha3.workspace = true tokio = { workspace = true, features = ["macros"] } [lints] diff --git a/crates/node/executor/src/adapter.rs b/crates/node/executor/src/adapter.rs index 9d5cc8e..ead0d2b 100644 --- a/crates/node/executor/src/adapter.rs +++ b/crates/node/executor/src/adapter.rs @@ -1,8 +1,15 @@ //! State database adapter for REVM. //! //! Note: REVM's `DatabaseRef` trait is synchronous, so we bridge async StateDb traits into -//! the sync REVM interface. When executing inside a Tokio runtime, we use `block_in_place` -//! so async storage can continue making progress on runtime workers. +//! the sync REVM interface. +//! +//! Callers are expected to run the entire EVM execution inside +//! `tokio::task::spawn_blocking` so that async worker threads remain free for +//! consensus, networking, and RPC. Inside a `spawn_blocking` thread, +//! `block_in_place` is a no-op (tokio 1.28+) and `Handle::block_on` drives +//! the state DB futures without starving any async workers. + +use std::collections::HashMap; use alloy_primitives::{Address, B256, KECCAK256_EMPTY, U256}; use kora_traits::{StateDbError, StateDbRead}; @@ -12,6 +19,16 @@ use tokio::runtime::RuntimeFlavor; use crate::ExecutionError; /// Wrapper for blocking async operations in sync contexts. +/// +/// When a tokio multi-thread runtime is available (the normal production +/// case -- either from a `spawn_blocking` thread or an async worker), +/// `block_in_place` + `handle.block_on` is used. On a `spawn_blocking` +/// thread (the expected production path), `block_in_place` is a no-op +/// (tokio >= 1.28) and `handle.block_on` safely drives the future without +/// starving async workers. +/// +/// When no tokio runtime is present (e.g. synchronous unit tests), we fall +/// back to `futures::executor::block_on`. fn block_on(f: F) -> F::Output { if let Ok(handle) = tokio::runtime::Handle::try_current() && handle.runtime_flavor() == RuntimeFlavor::MultiThread @@ -26,13 +43,15 @@ fn block_on(f: F) -> F::Output { #[derive(Clone, Debug)] pub struct StateDbAdapter { state: S, + /// Recent block hashes keyed by block number, used by the BLOCKHASH opcode. + block_hashes: HashMap, } impl StateDbAdapter { - /// Create a new adapter wrapping the given state. + /// Create a new adapter wrapping the given state and recent block hashes. #[must_use] - pub const fn new(state: S) -> Self { - Self { state } + pub const fn new(state: S, block_hashes: HashMap) -> Self { + Self { state, block_hashes } } /// Get the underlying state reference. @@ -46,10 +65,15 @@ impl DatabaseRef for StateDbAdapter { type Error = ExecutionError; fn basic_ref(&self, address: Address) -> Result, Self::Error> { - match block_on(self.state.nonce(&address)) { - Ok(nonce) => { - let balance = block_on(self.state.balance(&address))?; - let code_hash = block_on(self.state.code_hash(&address))?; + // Batch all three reads into a single block_on call to reduce the + // overhead of the async-to-sync bridge (block_in_place + handle.block_on). + match block_on(async { + let nonce = self.state.nonce(&address).await?; + let balance = self.state.balance(&address).await?; + let code_hash = self.state.code_hash(&address).await?; + Ok::<_, StateDbError>((nonce, balance, code_hash)) + }) { + Ok((nonce, balance, code_hash)) => { Ok(Some(AccountInfo { nonce, balance, code_hash, code: None, account_id: None })) } Err(StateDbError::AccountNotFound(_)) => Ok(None), @@ -73,19 +97,85 @@ impl DatabaseRef for StateDbAdapter { } } - fn block_hash_ref(&self, _number: u64) -> Result { - // Block hash lookups not supported yet - Ok(B256::ZERO) + fn block_hash_ref(&self, number: u64) -> Result { + Ok(self.block_hashes.get(&number).copied().unwrap_or(B256::ZERO)) } } #[cfg(test)] mod tests { + use alloy_primitives::Bytes; + use kora_traits::StateDbError; + use super::*; + /// Minimal mock that satisfies `StateDbRead` for tests that only exercise + /// the block-hash lookup path and never actually call the state methods. + #[derive(Clone)] + struct NoopState; + + impl StateDbRead for NoopState { + async fn nonce(&self, _: &Address) -> Result { + Ok(0) + } + + async fn balance(&self, _: &Address) -> Result { + Ok(U256::ZERO) + } + + async fn code_hash(&self, _: &Address) -> Result { + Ok(B256::ZERO) + } + + async fn code(&self, _: &B256) -> Result { + Ok(Bytes::new()) + } + + async fn storage(&self, _: &Address, _: &U256) -> Result { + Ok(U256::ZERO) + } + } + #[test] fn adapter_new() { - let adapter = StateDbAdapter::new(()); - assert_eq!(adapter.state(), &()); + let adapter = StateDbAdapter::new(NoopState, HashMap::new()); + // Verify the adapter is created successfully; state() returns a reference. + let _ = adapter.state(); + } + + #[test] + fn block_hash_ref_returns_known_hash() { + let mut hashes = HashMap::new(); + let expected = B256::repeat_byte(0xab); + hashes.insert(42, expected); + let adapter = StateDbAdapter::new(NoopState, hashes); + + let result = DatabaseRef::block_hash_ref(&adapter, 42).unwrap(); + assert_eq!(result, expected); + } + + #[test] + fn block_hash_ref_returns_zero_for_unknown() { + let adapter = StateDbAdapter::new(NoopState, HashMap::new()); + + let result = DatabaseRef::block_hash_ref(&adapter, 999).unwrap(); + assert_eq!(result, B256::ZERO); + } + + #[test] + fn block_hash_ref_multiple_entries() { + let mut hashes = HashMap::new(); + let hash_10 = B256::repeat_byte(0x10); + let hash_11 = B256::repeat_byte(0x11); + let hash_12 = B256::repeat_byte(0x12); + hashes.insert(10, hash_10); + hashes.insert(11, hash_11); + hashes.insert(12, hash_12); + let adapter = StateDbAdapter::new(NoopState, hashes); + + assert_eq!(DatabaseRef::block_hash_ref(&adapter, 10).unwrap(), hash_10); + assert_eq!(DatabaseRef::block_hash_ref(&adapter, 11).unwrap(), hash_11); + assert_eq!(DatabaseRef::block_hash_ref(&adapter, 12).unwrap(), hash_12); + assert_eq!(DatabaseRef::block_hash_ref(&adapter, 13).unwrap(), B256::ZERO); } } diff --git a/crates/node/executor/src/config.rs b/crates/node/executor/src/config.rs index 9ea1620..9e48dbf 100644 --- a/crates/node/executor/src/config.rs +++ b/crates/node/executor/src/config.rs @@ -82,13 +82,6 @@ impl ExecutionConfig { self.gas_limit_bounds = bounds; self } - - /// Set the base fee parameters. - #[must_use] - pub const fn with_base_fee_params(mut self, params: BaseFeeParams) -> Self { - self.base_fee_params = params; - self - } } impl Default for ExecutionConfig { diff --git a/crates/node/executor/src/context.rs b/crates/node/executor/src/context.rs index b71b697..08221ba 100644 --- a/crates/node/executor/src/context.rs +++ b/crates/node/executor/src/context.rs @@ -1,8 +1,13 @@ //! Block execution context. +use std::collections::HashMap; + use alloy_consensus::Header; use alloy_primitives::B256; +/// Maximum number of recent block hashes retained for the BLOCKHASH opcode. +const MAX_BLOCK_HASHES: usize = 256; + /// Context for block execution. /// /// Contains the block header and additional execution parameters. @@ -16,13 +21,22 @@ pub struct BlockContext { pub prevrandao: B256, /// Blob base fee for Cancun+ (EIP-4844). pub blob_base_fee: Option, + /// Recent block hashes keyed by block number for the BLOCKHASH opcode. + /// Contains up to the last 256 block hashes. + pub recent_block_hashes: HashMap, } impl BlockContext { /// Create a new block context. #[must_use] - pub const fn new(header: Header, parent_hash: B256, prevrandao: B256) -> Self { - Self { header, parent_hash, prevrandao, blob_base_fee: None } + pub fn new(header: Header, parent_hash: B256, prevrandao: B256) -> Self { + Self { + header, + parent_hash, + prevrandao, + blob_base_fee: None, + recent_block_hashes: HashMap::new(), + } } /// Set the blob base fee. @@ -32,9 +46,17 @@ impl BlockContext { self } - /// Get the base fee from the header. - pub fn base_fee(&self) -> u64 { - self.header.base_fee_per_gas.unwrap_or_default() + /// Set the recent block hashes for BLOCKHASH opcode support. + /// + /// Retains at most 256 entries (the EVM BLOCKHASH depth limit). + #[must_use] + pub fn with_recent_block_hashes(mut self, hashes: HashMap) -> Self { + if hashes.len() > MAX_BLOCK_HASHES { + self.recent_block_hashes = hashes.into_iter().take(MAX_BLOCK_HASHES).collect(); + } else { + self.recent_block_hashes = hashes; + } + self } } @@ -82,6 +104,7 @@ mod tests { assert_eq!(context.prevrandao, B256::ZERO); assert_eq!(context.parent_hash, parent_hash); assert!(context.blob_base_fee.is_none()); + assert!(context.recent_block_hashes.is_empty()); } #[test] @@ -91,6 +114,29 @@ mod tests { assert_eq!(context.blob_base_fee, Some(1000)); } + #[test] + fn block_context_with_recent_block_hashes() { + let header = Header::default(); + let mut hashes = HashMap::new(); + hashes.insert(10, B256::repeat_byte(0x10)); + hashes.insert(11, B256::repeat_byte(0x11)); + let context = + BlockContext::new(header, B256::ZERO, B256::ZERO).with_recent_block_hashes(hashes); + assert_eq!(context.recent_block_hashes.len(), 2); + assert_eq!(context.recent_block_hashes[&10], B256::repeat_byte(0x10)); + } + + #[test] + fn block_context_with_recent_block_hashes_truncates() { + let header = Header::default(); + let hashes: HashMap = + (0..300).map(|i| (i, B256::repeat_byte(i as u8))).collect(); + assert_eq!(hashes.len(), 300); + let context = + BlockContext::new(header, B256::ZERO, B256::ZERO).with_recent_block_hashes(hashes); + assert_eq!(context.recent_block_hashes.len(), MAX_BLOCK_HASHES); + } + #[test] fn parent_block_from_header() { let header = Header { diff --git a/crates/node/executor/src/error.rs b/crates/node/executor/src/error.rs index e17a047..bd741bb 100644 --- a/crates/node/executor/src/error.rs +++ b/crates/node/executor/src/error.rs @@ -37,6 +37,17 @@ pub enum ExecutionError { /// Code not found for hash. #[error("code not found: {0}")] CodeNotFound(B256), + + /// QMDB commit failed during block execution. + /// + /// The REVM `DatabaseCommit` trait is infallible, so a QMDB write failure + /// during inter-transaction state commit cannot be propagated through the + /// return type. Instead, the storage layer sets an atomic flag that the + /// executor checks after the transaction loop. When this error is returned, + /// one or more transactions in the block may have executed against stale + /// state and the block's results must be discarded. + #[error("QMDB commit failed during block execution — results are unreliable")] + StateCommit, } impl DBErrorMarker for ExecutionError {} diff --git a/crates/node/executor/src/lib.rs b/crates/node/executor/src/lib.rs index 0ffc67d..ebd5b0b 100644 --- a/crates/node/executor/src/lib.rs +++ b/crates/node/executor/src/lib.rs @@ -25,9 +25,3 @@ pub use revm::{CallParams, RevmExecutor, calculate_base_fee}; mod traits; pub use traits::BlockExecutor; - -mod validation; -pub use validation::{ - ACCESS_LIST_ADDRESS_GAS, ACCESS_LIST_STORAGE_KEY_GAS, MAX_BLOBS_PER_TX, TX_BASE_GAS, - TX_CREATE_GAS, TX_DATA_NON_ZERO_GAS, TX_DATA_ZERO_GAS, TxValidator, ValidatedTx, -}; diff --git a/crates/node/executor/src/outcome.rs b/crates/node/executor/src/outcome.rs index 38eaef2..c9ff4ea 100644 --- a/crates/node/executor/src/outcome.rs +++ b/crates/node/executor/src/outcome.rs @@ -13,13 +13,25 @@ pub struct ExecutionOutcome { pub receipts: Vec, /// Total gas used by all transactions. pub gas_used: u64, + /// Addresses that were selfdestructed during block execution. + /// + /// These addresses had their code and balance removed, but their storage + /// entries in QMDB become orphaned (keyed by the old generation). A + /// future garbage collector can use this list to reclaim dead storage + /// once Commonware supports prefix scanning. + pub selfdestructed_addresses: Vec
, } impl ExecutionOutcome { /// Create a new empty execution outcome. #[must_use] pub fn new() -> Self { - Self { changes: ChangeSet::new(), receipts: Vec::new(), gas_used: 0 } + Self { + changes: ChangeSet::new(), + receipts: Vec::new(), + gas_used: 0, + selfdestructed_addresses: Vec::new(), + } } } @@ -83,5 +95,6 @@ mod tests { assert!(outcome.changes.is_empty()); assert!(outcome.receipts.is_empty()); assert_eq!(outcome.gas_used, 0); + assert!(outcome.selfdestructed_addresses.is_empty()); } } diff --git a/crates/node/executor/src/revm.rs b/crates/node/executor/src/revm.rs index 4e61f97..8d28ad6 100644 --- a/crates/node/executor/src/revm.rs +++ b/crates/node/executor/src/revm.rs @@ -7,7 +7,7 @@ use alloy_primitives::{B256, Bytes, U256, keccak256}; use kora_qmdb::{AccountUpdate, ChangeSet}; use kora_traits::StateDb; use revm::{ - Context, ExecuteEvm, Journal, MainBuilder, + Context, DatabaseCommit as _, ExecuteEvm, Journal, MainBuilder, bytecode::Bytecode, context::{ block::BlockEnv, @@ -15,12 +15,14 @@ use revm::{ }, context_interface::{ ContextSetters, + block::BlobExcessGasAndPrice, transaction::{AccessList, AccessListItem}, }, database::State, primitives::{TxKind, hardfork::SpecId}, state::{EvmState, EvmStorageSlot}, }; +use tracing::{debug, warn}; use crate::{ BlockContext, BlockExecutor, ExecutionConfig, ExecutionError, ExecutionOutcome, @@ -55,11 +57,6 @@ impl RevmExecutor { self.config.chain_id } - /// Get the execution configuration. - pub const fn config(&self) -> &ExecutionConfig { - &self.config - } - /// Get the spec ID. pub const fn spec_id(&self) -> SpecId { self.config.spec_id @@ -86,9 +83,9 @@ impl RevmExecutor { ))); } - if header.timestamp <= parent.timestamp { + if header.timestamp < parent.timestamp { return Err(ExecutionError::BlockValidation(format!( - "timestamp not increasing: parent {}, current {}", + "timestamp moved backwards: parent {}, current {}", parent.timestamp, header.timestamp ))); } @@ -212,7 +209,7 @@ impl RevmExecutor { params: CallParams, context: &BlockContext, ) -> Result { - let adapter = StateDbAdapter::new(state.clone()); + let adapter = StateDbAdapter::new(state.clone(), context.recent_block_hashes.clone()); let db = State::builder().with_database_ref(adapter).build(); type Db = State>>; @@ -221,6 +218,9 @@ impl RevmExecutor { let ctx = ctx .modify_cfg_chained(|cfg| { cfg.chain_id = self.config.chain_id; + cfg.disable_nonce_check = true; + cfg.disable_balance_check = true; + cfg.disable_base_fee = true; }) .modify_block_chained(|blk: &mut BlockEnv| { blk.number = U256::from(context.header.number); @@ -229,6 +229,12 @@ impl RevmExecutor { blk.gas_limit = context.header.gas_limit; blk.basefee = context.header.base_fee_per_gas.unwrap_or_default(); blk.prevrandao = Some(context.prevrandao); + if let Some(blob_base_fee) = context.blob_base_fee { + blk.blob_excess_gas_and_price = Some(BlobExcessGasAndPrice { + excess_blob_gas: 0, + blob_gasprice: blob_base_fee, + }); + } }); let mut evm = ctx.build_mainnet(); @@ -360,52 +366,117 @@ impl BlockExecutor for RevmExecutor { context: &BlockContext, txs: &[Self::Tx], ) -> Result { - let adapter = StateDbAdapter::new(state.clone()); - - let db = State::builder().with_database_ref(adapter).build(); - - type Db = State>>; - let ctx: Context, Journal>, ()> = - Context::new(db, self.config.spec_id); - let ctx = ctx - .modify_cfg_chained(|cfg| { - cfg.chain_id = self.config.chain_id; - }) - .modify_block_chained(|blk: &mut BlockEnv| { - blk.number = U256::from(context.header.number); - blk.timestamp = U256::from(context.header.timestamp); - blk.beneficiary = context.header.beneficiary; - blk.gas_limit = context.header.gas_limit; - blk.basefee = context.header.base_fee_per_gas.unwrap_or_default(); - blk.prevrandao = Some(context.prevrandao); - }); - - let mut evm = ctx.build_mainnet(); + // --- pre-execution hook --- + let pre_changes = self.pre_execute(context, state)?; let mut outcome = ExecutionOutcome::new(); - let mut cumulative_gas = 0u64; - - for tx_bytes in txs { - let tx_hash = keccak256(tx_bytes); - - let tx_env = decode_tx_env(tx_bytes, self.config.chain_id)?; - evm.set_tx(tx_env); - - let result_and_state = - evm.replay().map_err(|e| ExecutionError::TxExecution(format!("{:?}", e)))?; + outcome.changes.merge(pre_changes); + + // Empty-block short circuit: skip EVM context construction, + // state-db adapter cloning, and journal allocation when there + // are no transactions to execute. This is the common case on + // low-load networks and avoids measurable setup overhead per + // empty block. + if !txs.is_empty() { + let adapter = StateDbAdapter::new(state.clone(), context.recent_block_hashes.clone()); + + let db = State::builder().with_database_ref(adapter).build(); + + type Db = State>>; + let ctx: Context, Journal>, ()> = + Context::new(db, self.config.spec_id); + let ctx = ctx + .modify_cfg_chained(|cfg| { + cfg.chain_id = self.config.chain_id; + }) + .modify_block_chained(|blk: &mut BlockEnv| { + blk.number = U256::from(context.header.number); + blk.timestamp = U256::from(context.header.timestamp); + blk.beneficiary = context.header.beneficiary; + blk.gas_limit = context.header.gas_limit; + blk.basefee = context.header.base_fee_per_gas.unwrap_or_default(); + blk.prevrandao = Some(context.prevrandao); + if let Some(blob_base_fee) = context.blob_base_fee { + blk.blob_excess_gas_and_price = Some(BlobExcessGasAndPrice { + excess_blob_gas: 0, + blob_gasprice: blob_base_fee, + }); + } + }); + + let mut evm = ctx.build_mainnet(); + let mut cumulative_gas = 0u64; + + for tx_bytes in txs { + let tx_hash = keccak256(tx_bytes); + + let tx_env = match decode_tx_env(tx_bytes, self.config.chain_id) { + Ok(env) => env, + Err(e) => { + warn!(hash = ?tx_hash, error = %e, "skipping undecodable transaction"); + outcome.receipts.push(build_skipped_receipt(tx_hash, cumulative_gas)); + continue; + } + }; + + // Enforce block gas limit: we `break` (not `continue`) because Ethereum + // semantics stop inclusion at the gas limit — remaining txs are simply not + // included. Unlike decode failures above, gas-limited txs get no placeholder + // receipts, so `receipts.len()` may be less than `txs.len()`. + let tx_gas_limit = tx_env.gas_limit; + if cumulative_gas.saturating_add(tx_gas_limit) > context.header.gas_limit { + break; + } + evm.set_tx(tx_env); + + let result_and_state = match evm.replay() { + Ok(result) => result, + Err(e) => { + debug!(hash = ?tx_hash, error = ?e, "skipping unexecutable transaction"); + outcome.receipts.push(build_skipped_receipt(tx_hash, cumulative_gas)); + continue; + } + }; + + let gas_used = result_and_state.result.tx_gas_used(); + cumulative_gas = cumulative_gas.saturating_add(gas_used); + + let receipt = + build_receipt(&result_and_state.result, tx_hash, gas_used, cumulative_gas); + outcome.receipts.push(receipt); + + let evm_state = result_and_state.state; + + // Collect addresses that were selfdestructed in this transaction. + // Their storage entries in QMDB become orphaned and need future GC. + for (address, account) in &evm_state { + if account.is_selfdestructed() { + outcome.selfdestructed_addresses.push(*address); + } + } - let gas_used = result_and_state.result.tx_gas_used(); - cumulative_gas = cumulative_gas.saturating_add(gas_used); + // Extract changes by reference to avoid cloning the entire + // EvmState HashMap. The original is then moved into + // `db.commit()` which consumes it. + let changes = extract_changes(&evm_state); + evm.ctx.modify_db(|db| db.commit(evm_state)); + outcome.changes.merge(changes); + } - let receipt = - build_receipt(&result_and_state.result, tx_hash, gas_used, cumulative_gas); - outcome.receipts.push(receipt); + outcome.gas_used = cumulative_gas; + } - let changes = extract_changes(result_and_state.state); - outcome.changes.merge(changes); + // Check the side-channel flag for DatabaseCommit failures. + // REVM's DatabaseCommit::commit() is infallible, so QMDB write errors + // are recorded via an atomic flag on the state handle and checked here. + if state.take_commit_failure() { + return Err(ExecutionError::StateCommit); } - outcome.gas_used = cumulative_gas; + // --- post-execution hook --- + let post_changes = self.post_execute(context, state, &outcome.receipts)?; + outcome.changes.merge(post_changes); + Ok(outcome) } @@ -433,10 +504,10 @@ impl BlockExecutor for RevmExecutor { /// Currently supports basic transaction decoding for all Ethereum transaction types. fn decode_tx_env(tx_bytes: &Bytes, _chain_id: u64) -> Result { use alloy_consensus::TxEnvelope; - use alloy_rlp::Decodable; + use alloy_eips::eip2718::Decodable2718 as _; - // Decode the transaction envelope - let envelope = TxEnvelope::decode(&mut tx_bytes.as_ref()) + // Decode both legacy RLP transactions and typed EIP-2718 envelopes. + let envelope = TxEnvelope::decode_2718(&mut tx_bytes.as_ref()) .map_err(|e| ExecutionError::TxDecode(format!("{}", e)))?; // Build TxEnv using the builder pattern @@ -597,6 +668,15 @@ fn convert_authorization_list( .collect() } +/// Build a placeholder failed receipt for a skipped transaction. +/// +/// This preserves index alignment between transactions and receipts so that +/// downstream code (e.g. reporters) can use the receipt index as the +/// transaction index. +const fn build_skipped_receipt(tx_hash: B256, cumulative_gas_used: u64) -> ExecutionReceipt { + ExecutionReceipt::new(tx_hash, false, 0, cumulative_gas_used, Vec::new(), None) +} + /// Build a transaction receipt from execution result. fn build_receipt( result: &ExecutionResult, @@ -621,7 +701,13 @@ fn build_receipt( } /// Extract state changes from REVM execution state. -fn extract_changes(state: EvmState) -> ChangeSet { +/// +/// Takes the state by reference to avoid a full `HashMap` clone on the +/// hot path: the caller needs the original `EvmState` for `db.commit()`, +/// and the previous code cloned it before extracting changes. Iterating +/// by reference copies only the individual field values we need, which is +/// dramatically cheaper than cloning the entire nested structure. +fn extract_changes(state: &EvmState) -> ChangeSet { let mut changes = ChangeSet::new(); for (address, account) in state { @@ -630,10 +716,11 @@ fn extract_changes(state: EvmState) -> ChangeSet { continue; } - // Extract storage changes + // Extract storage changes (skip read-only SLOAD slots) let storage: BTreeMap = account .storage .iter() + .filter(|(_, v)| v.is_changed()) .map(|(k, v): (&U256, &EvmStorageSlot)| (*k, v.present_value())) .collect(); @@ -650,7 +737,7 @@ fn extract_changes(state: EvmState) -> ChangeSet { storage, }; - changes.insert(address, update); + changes.insert(*address, update); } changes @@ -658,10 +745,14 @@ fn extract_changes(state: EvmState) -> ChangeSet { #[cfg(test)] mod tests { - use alloy_primitives::{Address, Bytes, KECCAK256_EMPTY}; + use alloy_consensus::{SignableTransaction as _, TxEip1559, TxEnvelope}; + use alloy_eips::eip2718::Encodable2718; + use alloy_primitives::{Address, Bytes, KECCAK256_EMPTY, Signature, TxKind as AlTxKind, U256}; + use k256::ecdsa::SigningKey; use kora_qmdb::ChangeSet; use kora_traits::{StateDb, StateDbError, StateDbRead, StateDbWrite}; use revm::state::Account; + use sha3::{Digest as _, Keccak256}; use super::*; use crate::GasLimitBounds; @@ -705,6 +796,42 @@ mod tests { } } + /// Helper: build a signed EIP-1559 transfer and return its raw encoded bytes. + fn build_valid_tx(chain_id: u64, nonce: u64) -> Bytes { + let mut secret = [0u8; 32]; + secret[31] = 1; // deterministic key + let key = SigningKey::from_bytes((&secret).into()).expect("valid key"); + + let to = Address::repeat_byte(0xab); + let tx = TxEip1559 { + chain_id, + nonce, + gas_limit: 21_000, + max_fee_per_gas: 0, + max_priority_fee_per_gas: 0, + to: AlTxKind::Call(to), + value: U256::ZERO, + access_list: Default::default(), + input: Bytes::new(), + }; + + let digest = Keccak256::new_with_prefix(tx.encoded_for_signing()); + let (sig, recid) = key.sign_digest_recoverable(digest).expect("sign tx"); + let signature = Signature::from((sig, recid)); + let signed = tx.into_signed(signature); + let envelope = TxEnvelope::from(signed); + let mut raw = Vec::new(); + envelope.encode_2718(&mut raw); + Bytes::from(raw) + } + + /// Helper: create a default block context suitable for tests. + fn test_block_context() -> BlockContext { + let header = + Header { number: 1, timestamp: 1000, gas_limit: 30_000_000, ..Header::default() }; + BlockContext::new(header, B256::ZERO, B256::ZERO) + } + #[test] fn revm_executor_new() { let executor = RevmExecutor::new(1); @@ -790,7 +917,7 @@ mod tests { base_fee_per_gas: None, }; - let header = Header { + let mut header = Header { parent_hash: B256::repeat_byte(1), number: 101, timestamp: 999, @@ -799,6 +926,9 @@ mod tests { }; assert!(executor.validate_header_against_parent(&header, &parent).is_err()); + + header.timestamp = 1000; + assert!(executor.validate_header_against_parent(&header, &parent).is_ok()); } #[test] @@ -894,7 +1024,7 @@ mod tests { #[test] fn extract_changes_empty() { let state = EvmState::default(); - let changes = extract_changes(state); + let changes = extract_changes(&state); assert!(changes.is_empty()); } @@ -917,7 +1047,7 @@ mod tests { state.insert(Address::ZERO, account); - let changes = extract_changes(state); + let changes = extract_changes(&state); assert_eq!(changes.len(), 1); let update = changes.accounts.get(&Address::ZERO).unwrap(); @@ -939,7 +1069,7 @@ mod tests { state.insert(Address::ZERO, account); - let changes = extract_changes(state); + let changes = extract_changes(&state); assert!(changes.is_empty()); } @@ -957,7 +1087,7 @@ mod tests { state.insert(Address::ZERO, account); - let changes = extract_changes(state); + let changes = extract_changes(&state); assert_eq!(changes.len(), 1); let update = changes.accounts.get(&Address::ZERO).unwrap(); @@ -978,10 +1108,89 @@ mod tests { state.insert(Address::ZERO, account); - let changes = extract_changes(state); + let changes = extract_changes(&state); assert_eq!(changes.len(), 1); let update = changes.accounts.get(&Address::ZERO).unwrap(); assert!(update.selfdestructed); } + + // --- Tests for invalid transaction skipping --- + + #[test] + fn execute_skips_garbage_bytes() { + // A block containing only garbage bytes should succeed with a placeholder + // failed receipt rather than aborting the entire block. + let executor = RevmExecutor::new(1); + let state = MockStateDb; + let context = test_block_context(); + + let garbage = Bytes::from(vec![0xde, 0xad, 0xbe, 0xef]); + let txs = vec![garbage]; + + let outcome = executor.execute(&state, &context, &txs).expect("block should not fail"); + // Receipt count must equal transaction count to preserve index alignment. + assert_eq!(outcome.receipts.len(), txs.len(), "receipt count must match tx count"); + assert!(!outcome.receipts[0].success(), "skipped tx receipt must be failed"); + assert_eq!(outcome.receipts[0].gas_used, 0, "skipped tx should use no gas"); + assert_eq!(outcome.gas_used, 0, "no gas should be consumed"); + } + + #[test] + fn execute_skips_invalid_but_processes_valid() { + // A block with [garbage, valid_tx] should emit a placeholder receipt for + // the garbage and still execute the valid transaction, preserving indices. + let executor = RevmExecutor::new(1); + let state = MockStateDb; + let context = test_block_context(); + + let garbage = Bytes::from(vec![0xff, 0x01, 0x02, 0x03]); + let valid_tx = build_valid_tx(1, 0); + let txs = vec![garbage, valid_tx]; + + let outcome = executor.execute(&state, &context, &txs).expect("block should not fail"); + + // Receipt count must equal transaction count to preserve index alignment. + assert_eq!(outcome.receipts.len(), txs.len(), "receipt count must match tx count"); + assert!(!outcome.receipts[0].success(), "garbage tx receipt must be failed"); + assert_eq!(outcome.receipts[0].gas_used, 0, "garbage tx should use no gas"); + assert!(outcome.receipts[1].success(), "valid tx receipt must be successful"); + assert!(outcome.gas_used > 0, "valid tx should consume gas"); + } + + #[test] + fn execute_processes_valid_tx_between_invalid() { + // A block with [garbage, valid_tx, more_garbage] should produce a receipt + // for every transaction, preserving index alignment. + let executor = RevmExecutor::new(1); + let state = MockStateDb; + let context = test_block_context(); + + let garbage1 = Bytes::from(vec![0xaa, 0xbb]); + let valid_tx = build_valid_tx(1, 0); + let garbage2 = Bytes::from(vec![0xcc, 0xdd, 0xee]); + let txs = vec![garbage1, valid_tx, garbage2]; + + let outcome = executor.execute(&state, &context, &txs).expect("block should not fail"); + + // Receipt count must equal transaction count to preserve index alignment. + assert_eq!(outcome.receipts.len(), txs.len(), "receipt count must match tx count"); + assert!(!outcome.receipts[0].success(), "first garbage receipt must be failed"); + assert!(outcome.receipts[1].success(), "valid tx receipt must be successful"); + assert!(!outcome.receipts[2].success(), "second garbage receipt must be failed"); + // Cumulative gas in the last receipt should match total gas used. + assert_eq!(outcome.receipts[2].cumulative_gas_used(), outcome.gas_used); + } + + #[test] + fn execute_empty_block_succeeds() { + // An empty transaction list should produce an empty outcome. + let executor = RevmExecutor::new(1); + let state = MockStateDb; + let context = test_block_context(); + + let outcome = executor.execute(&state, &context, &[]).expect("empty block should succeed"); + assert!(outcome.receipts.is_empty()); + assert_eq!(outcome.gas_used, 0); + } } diff --git a/crates/node/executor/src/traits.rs b/crates/node/executor/src/traits.rs index f9023f7..32806ff 100644 --- a/crates/node/executor/src/traits.rs +++ b/crates/node/executor/src/traits.rs @@ -1,9 +1,10 @@ //! Core execution traits. use alloy_consensus::Header; +use kora_qmdb::ChangeSet; use kora_traits::StateDb; -use crate::{BlockContext, ExecutionError, ExecutionOutcome}; +use crate::{BlockContext, ExecutionError, ExecutionOutcome, ExecutionReceipt}; /// Executes transactions against a state database. /// @@ -12,6 +13,20 @@ pub trait BlockExecutor: Clone + Send + Sync + 'static { /// Transaction type accepted for execution. type Tx: Clone + Send + Sync + 'static; + /// Called before transaction execution to apply protocol-level state + /// modifications (e.g. beacon-chain system calls, epoch transitions). + /// + /// Returns any state changes that should be included in the block's + /// changeset. The default implementation is a no-op that returns an + /// empty changeset. + fn pre_execute( + &self, + _context: &BlockContext, + _state: &S, + ) -> Result { + Ok(ChangeSet::new()) + } + /// Execute a batch of transactions against the given state. /// /// Returns the execution outcome containing state changes and receipts. @@ -22,6 +37,22 @@ pub trait BlockExecutor: Clone + Send + Sync + 'static { txs: &[Self::Tx], ) -> Result; + /// Called after transaction execution to apply protocol-level state + /// modifications (e.g. block rewards, fee burns, validator payouts). + /// + /// Receives the block context and the receipts produced by transaction + /// execution so that reward logic can inspect gas usage. Returns any + /// additional state changes. The default implementation is a no-op that + /// returns an empty changeset. + fn post_execute( + &self, + _context: &BlockContext, + _state: &S, + _receipts: &[ExecutionReceipt], + ) -> Result { + Ok(ChangeSet::new()) + } + /// Validate a block header. fn validate_header(&self, header: &Header) -> Result<(), ExecutionError>; } diff --git a/crates/node/executor/src/validation.rs b/crates/node/executor/src/validation.rs deleted file mode 100644 index 0a54f51..0000000 --- a/crates/node/executor/src/validation.rs +++ /dev/null @@ -1,384 +0,0 @@ -//! Transaction pre-validation. - -use alloy_consensus::TxEnvelope; -use alloy_eips::eip2930::AccessList; -use alloy_primitives::{Bytes, TxKind, U256}; -use alloy_rlp::Decodable; -use kora_traits::StateDb; - -use crate::{ExecutionConfig, ExecutionError}; - -/// Maximum number of blobs per transaction (EIP-4844). -pub const MAX_BLOBS_PER_TX: usize = 6; - -/// Gas cost per byte of calldata (zero byte). -pub const TX_DATA_ZERO_GAS: u64 = 4; - -/// Gas cost per byte of calldata (non-zero byte). -pub const TX_DATA_NON_ZERO_GAS: u64 = 16; - -/// Base gas cost for a transaction. -pub const TX_BASE_GAS: u64 = 21000; - -/// Gas cost for contract creation. -pub const TX_CREATE_GAS: u64 = 32000; - -/// Gas cost per access list address. -pub const ACCESS_LIST_ADDRESS_GAS: u64 = 2400; - -/// Gas cost per access list storage key. -pub const ACCESS_LIST_STORAGE_KEY_GAS: u64 = 1900; - -/// Transaction validator for pre-execution checks. -#[derive(Clone, Debug)] -pub struct TxValidator<'a> { - config: &'a ExecutionConfig, - base_fee: u64, - blob_base_fee: Option, -} - -impl<'a> TxValidator<'a> { - /// Create a new transaction validator. - pub const fn new(config: &'a ExecutionConfig, base_fee: u64) -> Self { - Self { config, base_fee, blob_base_fee: None } - } - - /// Set the blob base fee for Cancun+ validation. - #[must_use] - pub const fn with_blob_base_fee(mut self, blob_base_fee: u128) -> Self { - self.blob_base_fee = Some(blob_base_fee); - self - } - - /// Validate a transaction before execution. - pub async fn validate( - &self, - tx_bytes: &Bytes, - state: &S, - ) -> Result { - let envelope = TxEnvelope::decode(&mut tx_bytes.as_ref()) - .map_err(|e| ExecutionError::TxDecode(format!("{}", e)))?; - - self.validate_envelope(&envelope, state).await - } - - /// Validate a decoded transaction envelope. - async fn validate_envelope( - &self, - envelope: &TxEnvelope, - state: &S, - ) -> Result { - let ( - sender, - chain_id, - nonce, - gas_limit, - max_fee, - max_priority_fee, - value, - input, - is_create, - access_list, - ) = match envelope { - TxEnvelope::Legacy(signed) => { - let tx = signed.tx(); - let sender = signed.recover_signer().map_err(|e| { - ExecutionError::InvalidTx(format!("failed to recover signer: {}", e)) - })?; - ( - sender, - tx.chain_id, - tx.nonce, - tx.gas_limit, - tx.gas_price, - 0, - tx.value, - &tx.input, - matches!(tx.to, TxKind::Create), - None, - ) - } - TxEnvelope::Eip2930(signed) => { - let tx = signed.tx(); - let sender = signed.recover_signer().map_err(|e| { - ExecutionError::InvalidTx(format!("failed to recover signer: {}", e)) - })?; - ( - sender, - Some(tx.chain_id), - tx.nonce, - tx.gas_limit, - tx.gas_price, - 0, - tx.value, - &tx.input, - matches!(tx.to, TxKind::Create), - Some(&tx.access_list), - ) - } - TxEnvelope::Eip1559(signed) => { - let tx = signed.tx(); - let sender = signed.recover_signer().map_err(|e| { - ExecutionError::InvalidTx(format!("failed to recover signer: {}", e)) - })?; - ( - sender, - Some(tx.chain_id), - tx.nonce, - tx.gas_limit, - tx.max_fee_per_gas, - tx.max_priority_fee_per_gas, - tx.value, - &tx.input, - matches!(tx.to, TxKind::Create), - Some(&tx.access_list), - ) - } - TxEnvelope::Eip4844(signed) => { - let tx = signed.tx().tx(); - let sender = signed.recover_signer().map_err(|e| { - ExecutionError::InvalidTx(format!("failed to recover signer: {}", e)) - })?; - - self.validate_blob_tx_fields(&tx.blob_versioned_hashes, tx.max_fee_per_blob_gas)?; - - ( - sender, - Some(tx.chain_id), - tx.nonce, - tx.gas_limit, - tx.max_fee_per_gas, - tx.max_priority_fee_per_gas, - tx.value, - &tx.input, - false, - Some(&tx.access_list), - ) - } - TxEnvelope::Eip7702(signed) => { - let tx = signed.tx(); - let sender = signed.recover_signer().map_err(|e| { - ExecutionError::InvalidTx(format!("failed to recover signer: {}", e)) - })?; - ( - sender, - Some(tx.chain_id), - tx.nonce, - tx.gas_limit, - tx.max_fee_per_gas, - tx.max_priority_fee_per_gas, - tx.value, - &tx.input, - false, - Some(&tx.access_list), - ) - } - }; - - if let Some(tx_chain_id) = chain_id - && tx_chain_id != self.config.chain_id - { - return Err(ExecutionError::InvalidTx(format!( - "chain ID mismatch: expected {}, got {}", - self.config.chain_id, tx_chain_id - ))); - } - - let intrinsic_gas = self.calculate_intrinsic_gas(input, is_create, access_list)?; - if gas_limit < intrinsic_gas { - return Err(ExecutionError::InvalidTx(format!( - "gas limit {} below intrinsic gas {}", - gas_limit, intrinsic_gas - ))); - } - - let account_nonce = state.nonce(&sender).await?; - if account_nonce != nonce { - return Err(ExecutionError::InvalidTx(format!( - "nonce mismatch: expected {}, got {}", - account_nonce, nonce - ))); - } - - let account_balance = state.balance(&sender).await?; - let max_gas_cost = U256::from(gas_limit) * U256::from(max_fee); - let required_balance = max_gas_cost + value; - if account_balance < required_balance { - return Err(ExecutionError::InvalidTx(format!( - "insufficient balance: has {}, needs {}", - account_balance, required_balance - ))); - } - - if max_fee < u128::from(self.base_fee) { - return Err(ExecutionError::InvalidTx(format!( - "max fee {} below base fee {}", - max_fee, self.base_fee - ))); - } - - if max_priority_fee > max_fee { - return Err(ExecutionError::InvalidTx("max priority fee exceeds max fee".to_string())); - } - - if let Some(access_list) = access_list { - self.validate_access_list(access_list)?; - } - - Ok(ValidatedTx { sender, nonce, gas_limit, intrinsic_gas }) - } - - /// Calculate intrinsic gas for a transaction. - fn calculate_intrinsic_gas( - &self, - input: &Bytes, - is_create: bool, - access_list: Option<&AccessList>, - ) -> Result { - let mut gas = TX_BASE_GAS; - - if is_create { - gas = gas.saturating_add(TX_CREATE_GAS); - } - - for byte in input.iter() { - if *byte == 0 { - gas = gas.saturating_add(TX_DATA_ZERO_GAS); - } else { - gas = gas.saturating_add(TX_DATA_NON_ZERO_GAS); - } - } - - if let Some(access_list) = access_list { - for item in access_list.iter() { - gas = gas.saturating_add(ACCESS_LIST_ADDRESS_GAS); - gas = gas.saturating_add( - ACCESS_LIST_STORAGE_KEY_GAS.saturating_mul(item.storage_keys.len() as u64), - ); - } - } - - Ok(gas) - } - - /// Validate blob transaction specific fields. - fn validate_blob_tx_fields( - &self, - blob_versioned_hashes: &[alloy_primitives::B256], - max_fee_per_blob_gas: u128, - ) -> Result<(), ExecutionError> { - if blob_versioned_hashes.is_empty() { - return Err(ExecutionError::InvalidTx( - "blob transaction must have at least one blob".to_string(), - )); - } - - if blob_versioned_hashes.len() > MAX_BLOBS_PER_TX { - return Err(ExecutionError::InvalidTx(format!( - "blob transaction exceeds max blobs: {} > {}", - blob_versioned_hashes.len(), - MAX_BLOBS_PER_TX - ))); - } - - for hash in blob_versioned_hashes { - if hash[0] != 0x01 { - return Err(ExecutionError::InvalidTx(format!( - "invalid blob version: expected 0x01, got 0x{:02x}", - hash[0] - ))); - } - } - - if let Some(blob_base_fee) = self.blob_base_fee - && max_fee_per_blob_gas < blob_base_fee - { - return Err(ExecutionError::InvalidTx(format!( - "max fee per blob gas {} below blob base fee {}", - max_fee_per_blob_gas, blob_base_fee - ))); - } - - Ok(()) - } - - /// Validate access list entries. - fn validate_access_list(&self, access_list: &AccessList) -> Result<(), ExecutionError> { - for item in access_list.iter() { - if item.address.is_zero() { - return Err(ExecutionError::InvalidTx( - "access list contains zero address".to_string(), - )); - } - } - Ok(()) - } -} - -/// A validated transaction ready for execution. -#[derive(Clone, Debug)] -pub struct ValidatedTx { - /// Transaction sender. - pub sender: alloy_primitives::Address, - /// Transaction nonce. - pub nonce: u64, - /// Gas limit. - pub gas_limit: u64, - /// Intrinsic gas cost. - pub intrinsic_gas: u64, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn intrinsic_gas_simple_transfer() { - let config = ExecutionConfig::default(); - let validator = TxValidator::new(&config, 1000); - - let gas = validator.calculate_intrinsic_gas(&Bytes::new(), false, None).unwrap(); - assert_eq!(gas, TX_BASE_GAS); - } - - #[test] - fn intrinsic_gas_with_data() { - let config = ExecutionConfig::default(); - let validator = TxValidator::new(&config, 1000); - - let data = Bytes::from(vec![0, 1, 2, 0, 0, 3]); - let gas = validator.calculate_intrinsic_gas(&data, false, None).unwrap(); - - let expected = TX_BASE_GAS + (3 * TX_DATA_ZERO_GAS) + (3 * TX_DATA_NON_ZERO_GAS); - assert_eq!(gas, expected); - } - - #[test] - fn intrinsic_gas_create() { - let config = ExecutionConfig::default(); - let validator = TxValidator::new(&config, 1000); - - let gas = validator.calculate_intrinsic_gas(&Bytes::new(), true, None).unwrap(); - assert_eq!(gas, TX_BASE_GAS + TX_CREATE_GAS); - } - - #[test] - fn intrinsic_gas_with_access_list() { - use alloy_eips::eip2930::AccessListItem; - use alloy_primitives::Address; - - let config = ExecutionConfig::default(); - let validator = TxValidator::new(&config, 1000); - - let access_list = AccessList(vec![AccessListItem { - address: Address::repeat_byte(1), - storage_keys: vec![Default::default(), Default::default()], - }]); - - let gas = - validator.calculate_intrinsic_gas(&Bytes::new(), false, Some(&access_list)).unwrap(); - - let expected = TX_BASE_GAS + ACCESS_LIST_ADDRESS_GAS + (2 * ACCESS_LIST_STORAGE_KEY_GAS); - assert_eq!(gas, expected); - } -} diff --git a/crates/node/executor/tests/executor.rs b/crates/node/executor/tests/executor.rs index 974a6f8..5946266 100644 --- a/crates/node/executor/tests/executor.rs +++ b/crates/node/executor/tests/executor.rs @@ -5,12 +5,15 @@ use std::{ sync::{Arc, RwLock}, }; -use alloy_consensus::Header; -use alloy_primitives::{Address, B256, Bytes, U256}; +use alloy_consensus::{Header, SignableTransaction as _, TxEip1559, TxEnvelope}; +use alloy_eips::eip2718::Encodable2718; +use alloy_primitives::{Address, B256, Bytes, Signature, TxKind, U256, keccak256}; +use k256::ecdsa::SigningKey; use kora_executor::{BlockContext, BlockExecutor, RevmExecutor}; use kora_qmdb::{AccountUpdate, ChangeSet}; use kora_traits::{StateDb, StateDbError, StateDbRead, StateDbWrite}; use rstest::rstest; +use sha3::{Digest as _, Keccak256}; /// Account data stored in the mock state database. #[derive(Clone, Debug, Default)] @@ -585,3 +588,310 @@ fn test_execute_with_populated_state() { assert!(outcome.receipts.is_empty()); assert_eq!(outcome.gas_used, 0); } + +// ---------------------------------------------------------------------------- +// Helpers for creating signed transactions +// ---------------------------------------------------------------------------- + +/// Create a signing key from a deterministic seed byte. +fn signing_key_from_seed(seed: u8) -> SigningKey { + let mut secret = [0u8; 32]; + secret[31] = seed; + SigningKey::from_bytes((&secret).into()).expect("valid key") +} + +/// Derive an Ethereum address from a signing key. +fn address_from_key(key: &SigningKey) -> Address { + let encoded = key.verifying_key().to_encoded_point(false); + let pubkey = encoded.as_bytes(); + let hash = keccak256(&pubkey[1..]); + Address::from_slice(&hash[12..]) +} + +/// Sign an EIP-1559 transfer and return the raw encoded bytes. +fn sign_eip1559_transfer( + key: &SigningKey, + chain_id: u64, + to: Address, + value: U256, + nonce: u64, + gas_limit: u64, +) -> Bytes { + let tx = TxEip1559 { + chain_id, + nonce, + gas_limit, + max_fee_per_gas: 0, + max_priority_fee_per_gas: 0, + to: TxKind::Call(to), + value, + access_list: Default::default(), + input: Bytes::new(), + }; + + let digest = Keccak256::new_with_prefix(tx.encoded_for_signing()); + let (sig, recid) = key.sign_digest_recoverable(digest).expect("sign tx"); + let signature = Signature::from((sig, recid)); + let signed = tx.into_signed(signature); + let envelope = TxEnvelope::from(signed); + let mut raw_bytes = Vec::new(); + envelope.encode_2718(&mut raw_bytes); + Bytes::from(raw_bytes) +} + +// ---------------------------------------------------------------------------- +// Tests for block gas limit enforcement +// ---------------------------------------------------------------------------- + +#[test] +fn test_execute_enforces_block_gas_limit() { + let chain_id = 1u64; + let executor = RevmExecutor::new(chain_id); + let state = MockStateDb::new(); + + // Set up a sender with enough balance for transfers. + let sender_key = signing_key_from_seed(1); + let sender = address_from_key(&sender_key); + let receiver = Address::from([0xBB; 20]); + + state.insert_account( + sender, + MockAccount { nonce: 0, balance: U256::from(10_000_000_000u64), ..Default::default() }, + ); + + // Insert receiver as an existing (empty) account to ensure the 21_000 gas + // assumption holds regardless of fork rules for new-account creation. + state.insert_account(receiver, MockAccount::default()); + + // Each basic transfer uses 21_000 gas. + // Create 3 transactions, each requiring 21_000 gas. + let tx1 = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 0, 21_000); + let tx2 = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 1, 21_000); + let tx3 = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 2, 21_000); + + // Set block gas limit to only fit 2 transactions (42_000). + // The third transaction (cumulative would be 63_000 > 42_000) should be skipped. + let header = Header { gas_limit: 42_000, number: 1, timestamp: 1000, ..Default::default() }; + let context = BlockContext::new(header, B256::ZERO, B256::ZERO); + + let outcome = + executor.execute(&state, &context, &[tx1, tx2, tx3]).expect("execution should succeed"); + + // Only 2 transactions should have been executed, and both should succeed. + assert_eq!( + outcome.receipts.len(), + 2, + "only 2 of 3 transactions should execute within gas limit" + ); + assert!( + outcome.receipts.iter().all(|r| r.success()), + "all executed transactions should succeed" + ); + assert_eq!(outcome.gas_used, 42_000, "cumulative gas should equal 2 * 21_000"); +} + +#[test] +fn test_execute_within_gas_limit_processes_all_transactions() { + let chain_id = 1u64; + let executor = RevmExecutor::new(chain_id); + let state = MockStateDb::new(); + + let sender_key = signing_key_from_seed(1); + let sender = address_from_key(&sender_key); + let receiver = Address::from([0xBB; 20]); + + state.insert_account( + sender, + MockAccount { nonce: 0, balance: U256::from(10_000_000_000u64), ..Default::default() }, + ); + + // Insert receiver as an existing (empty) account to ensure the 21_000 gas + // assumption holds regardless of fork rules for new-account creation. + state.insert_account(receiver, MockAccount::default()); + + // Create 3 transactions, each requiring 21_000 gas. + let tx1 = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 0, 21_000); + let tx2 = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 1, 21_000); + let tx3 = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 2, 21_000); + + // Set block gas limit high enough for all 3 transactions (63_000). + let header = Header { gas_limit: 63_000, number: 1, timestamp: 1000, ..Default::default() }; + let context = BlockContext::new(header, B256::ZERO, B256::ZERO); + + let outcome = + executor.execute(&state, &context, &[tx1, tx2, tx3]).expect("execution should succeed"); + + // All 3 transactions should have been executed and all should succeed. + assert_eq!(outcome.receipts.len(), 3, "all 3 transactions should execute within gas limit"); + assert!( + outcome.receipts.iter().all(|r| r.success()), + "all executed transactions should succeed" + ); + assert_eq!(outcome.gas_used, 63_000, "cumulative gas should equal 3 * 21_000"); +} + +#[test] +fn test_execute_single_tx_exceeding_block_gas_limit_produces_empty_outcome() { + let chain_id = 1u64; + let executor = RevmExecutor::new(chain_id); + let state = MockStateDb::new(); + + let sender_key = signing_key_from_seed(1); + let sender = address_from_key(&sender_key); + let receiver = Address::from([0xBB; 20]); + + state.insert_account( + sender, + MockAccount { nonce: 0, balance: U256::from(10_000_000_000u64), ..Default::default() }, + ); + + // Insert receiver as an existing (empty) account to ensure the 21_000 gas + // assumption holds regardless of fork rules for new-account creation. + state.insert_account(receiver, MockAccount::default()); + + // Transaction requires 21_000 gas but block limit is only 10_000. + let tx = sign_eip1559_transfer(&sender_key, chain_id, receiver, U256::from(1), 0, 21_000); + + let header = Header { gas_limit: 10_000, number: 1, timestamp: 1000, ..Default::default() }; + let context = BlockContext::new(header, B256::ZERO, B256::ZERO); + + let outcome = executor.execute(&state, &context, &[tx]).expect("execution should succeed"); + + // The transaction should not have been executed. + assert!( + outcome.receipts.is_empty(), + "no transactions should execute when gas limit is too low" + ); + assert_eq!(outcome.gas_used, 0); +} + +// ---------------------------------------------------------------------------- +// Tests for real signed EIP-1559 transaction execution with state changes +// ---------------------------------------------------------------------------- + +/// Execute a real signed EIP-1559 transfer and verify that: +/// - The transaction succeeds. +/// - The sender's nonce is incremented. +/// - The receiver's balance increases by the transfer value. +/// - The receipt contains the correct transaction hash. +/// - The total gas used equals the basic transfer cost (21,000). +#[test] +fn test_execute_signed_eip1559_transfer_verifies_state_changes() { + let chain_id = 1u64; + let executor = RevmExecutor::new(chain_id); + let state = MockStateDb::new(); + + let sender_key = signing_key_from_seed(1); + let sender = address_from_key(&sender_key); + let receiver = Address::from([0xBB; 20]); + + let initial_balance = U256::from(10_000_000_000u64); + let transfer_value = U256::from(1_000); + + state.insert_account( + sender, + MockAccount { nonce: 0, balance: initial_balance, ..Default::default() }, + ); + // Insert receiver as existing (empty) account so the 21,000 gas assumption holds. + state.insert_account(receiver, MockAccount::default()); + + let tx_bytes = + sign_eip1559_transfer(&sender_key, chain_id, receiver, transfer_value, 0, 21_000); + let tx_hash = keccak256(&tx_bytes); + + let header = Header { gas_limit: 30_000_000, number: 1, timestamp: 1000, ..Default::default() }; + let context = BlockContext::new(header, B256::ZERO, B256::ZERO); + + let outcome = + executor.execute(&state, &context, &[tx_bytes]).expect("execution should succeed"); + + // Exactly one receipt produced. + assert_eq!(outcome.receipts.len(), 1, "should produce exactly one receipt"); + + // Transaction succeeded. + assert!(outcome.receipts[0].success(), "transfer should succeed"); + + // Receipt hash matches the transaction hash. + assert_eq!(outcome.receipts[0].tx_hash, tx_hash, "receipt must contain correct tx hash"); + + // Gas accounting: a simple transfer costs exactly 21,000 gas. + assert_eq!(outcome.gas_used, 21_000, "total gas used should be 21,000"); + assert_eq!(outcome.receipts[0].gas_used, 21_000, "per-tx gas should be 21,000"); + + // State changes must reflect the transfer. + let sender_update = + outcome.changes.accounts.get(&sender).expect("sender must appear in change set"); + assert_eq!(sender_update.nonce, 1, "sender nonce must increment to 1"); + assert_eq!( + sender_update.balance, + initial_balance - transfer_value, + "sender balance must decrease by transfer value (zero base fee means no gas cost)" + ); + + let receiver_update = + outcome.changes.accounts.get(&receiver).expect("receiver must appear in change set"); + assert_eq!( + receiver_update.balance, transfer_value, + "receiver balance must equal the transfer value" + ); +} + +/// Execute two sequential signed EIP-1559 transfers from the same sender +/// and verify nonce increments and cumulative balance changes. +#[test] +fn test_execute_multiple_signed_transfers_sequential_nonces() { + let chain_id = 1u64; + let executor = RevmExecutor::new(chain_id); + let state = MockStateDb::new(); + + let sender_key = signing_key_from_seed(1); + let sender = address_from_key(&sender_key); + let receiver = Address::from([0xCC; 20]); + + let initial_balance = U256::from(10_000_000_000u64); + let value_1 = U256::from(100); + let value_2 = U256::from(200); + + state.insert_account( + sender, + MockAccount { nonce: 0, balance: initial_balance, ..Default::default() }, + ); + state.insert_account(receiver, MockAccount::default()); + + let tx1 = sign_eip1559_transfer(&sender_key, chain_id, receiver, value_1, 0, 21_000); + let tx2 = sign_eip1559_transfer(&sender_key, chain_id, receiver, value_2, 1, 21_000); + + let header = Header { gas_limit: 30_000_000, number: 1, timestamp: 1000, ..Default::default() }; + let context = BlockContext::new(header, B256::ZERO, B256::ZERO); + + let outcome = + executor.execute(&state, &context, &[tx1, tx2]).expect("execution should succeed"); + + // Both transactions should succeed. + assert_eq!(outcome.receipts.len(), 2, "should produce two receipts"); + assert!(outcome.receipts[0].success(), "first transfer should succeed"); + assert!(outcome.receipts[1].success(), "second transfer should succeed"); + + // Gas accounting. + assert_eq!(outcome.gas_used, 42_000, "total gas should be 2 * 21,000"); + + // Cumulative gas in receipts. + assert_eq!(outcome.receipts[0].cumulative_gas_used(), 21_000); + assert_eq!(outcome.receipts[1].cumulative_gas_used(), 42_000); + + // Final state changes reflect both transfers. + let sender_update = outcome.changes.accounts.get(&sender).expect("sender in changes"); + assert_eq!(sender_update.nonce, 2, "sender nonce must be 2 after two transactions"); + assert_eq!( + sender_update.balance, + initial_balance - value_1 - value_2, + "sender balance must decrease by total transferred (zero base fee)" + ); + + let receiver_update = outcome.changes.accounts.get(&receiver).expect("receiver in changes"); + assert_eq!( + receiver_update.balance, + value_1 + value_2, + "receiver must have sum of both transfers" + ); +} diff --git a/crates/node/ledger/Cargo.toml b/crates/node/ledger/Cargo.toml index ac149cd..5a49be9 100644 --- a/crates/node/ledger/Cargo.toml +++ b/crates/node/ledger/Cargo.toml @@ -18,8 +18,10 @@ kora-overlay = { path = "../../storage/overlay" } kora-qmdb = { path = "../../storage/qmdb" } kora-qmdb-ledger = { path = "../../storage/qmdb-ledger" } kora-traits = { path = "../../storage/traits" } +kora-txpool = { path = "../txpool" } # Commonware +commonware-consensus.workspace = true commonware-cryptography.workspace = true commonware-runtime.workspace = true @@ -28,12 +30,17 @@ alloy-primitives.workspace = true # Async futures.workspace = true +tokio.workspace = true # Error handling thiserror.workspace = true +# Logging +tracing.workspace = true + [dev-dependencies] # Local crates +kora-config = { path = "../config" } kora-executor = { path = "../executor" } # Commonware diff --git a/crates/node/ledger/src/lib.rs b/crates/node/ledger/src/lib.rs index d19d44d..37bc5bf 100644 --- a/crates/node/ledger/src/lib.rs +++ b/crates/node/ledger/src/lib.rs @@ -5,15 +5,18 @@ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![cfg_attr(not(test), warn(unused_crate_dependencies))] -use std::{collections::BTreeSet, fmt, sync::Arc}; +mod live_state; + +use std::{collections::BTreeSet, fmt, sync::Arc, time::Duration}; use alloy_primitives::{Address, B256, U256}; +use commonware_consensus::Block as _; use commonware_cryptography::Committable as _; -use commonware_runtime::{Metrics as _, tokio}; +use commonware_runtime::{Supervisor as _, tokio}; use futures::{channel::mpsc::UnboundedReceiver, lock::Mutex}; use kora_consensus::{ ConsensusError, Mempool as _, SeedTracker as _, Snapshot, SnapshotStore as _, - components::{InMemoryMempool, InMemorySeedTracker, InMemorySnapshotStore}, + components::{InMemorySeedTracker, InMemorySnapshotStore}, }; use kora_domain::{ Block, BlockId, ConsensusDigest, LedgerEvent, LedgerEvents, StateRoot, Tx, TxId, @@ -22,11 +25,49 @@ use kora_overlay::OverlayState; use kora_qmdb::StateRoot as QmdbStateRoot; use kora_qmdb_ledger::{Error as QmdbError, QmdbChangeSet, QmdbConfig, QmdbLedger, QmdbState}; use kora_traits::{StateDbError, StateDbRead}; +use kora_txpool::{PoolConfig, TransactionPool}; +pub use live_state::LiveState; use thiserror::Error; /// Snapshot type used by the ledger. pub type LedgerSnapshot = Snapshot>; +/// Ledger mempool adapter backed by the transaction pool. +#[derive(Clone, Debug)] +pub struct LedgerMempool { + pool: TransactionPool, +} + +impl LedgerMempool { + /// Create a new ledger mempool adapter. + pub fn new(config: PoolConfig) -> Self { + Self { pool: TransactionPool::new(config) } + } + + /// Return the underlying transaction pool handle. + pub fn txpool(&self) -> TransactionPool { + self.pool.clone() + } +} + +impl kora_consensus::Mempool for LedgerMempool { + fn insert(&self, tx: Tx) -> bool { + kora_txpool::Mempool::insert(&self.pool, tx) + } + + fn build(&self, max_txs: usize, excluded: &BTreeSet) -> Vec { + kora_txpool::Mempool::build(&self.pool, max_txs, excluded) + } + + fn prune(&self, tx_ids: &[TxId]) { + kora_txpool::Mempool::prune(&self.pool, tx_ids); + } + + fn len(&self) -> usize { + kora_txpool::Mempool::len(&self.pool) + } +} + fn tx_ids(txs: &[Tx]) -> BTreeSet { txs.iter().map(Tx::id).collect() } @@ -55,6 +96,9 @@ pub struct LedgerView { inner: Arc>, /// Genesis block stored so the automaton can replay from height 0. genesis_block: Block, + /// Notifier signalled whenever a new snapshot is inserted, allowing + /// waiters to be woken event-driven instead of polling with sleep. + snapshot_notify: Arc<::tokio::sync::Notify>, } impl fmt::Debug for LedgerView { @@ -66,9 +110,11 @@ impl fmt::Debug for LedgerView { /// Internal ledger state guarded by the mutex inside `LedgerView`. struct LedgerState { /// Pending transactions that are not yet included in finalized blocks. - mempool: InMemoryMempool, + mempool: LedgerMempool, /// Execution snapshots indexed by digest so we can replay ancestors. snapshots: InMemorySnapshotStore>, + /// Digest of the latest executed snapshot known to the ledger. + head: ConsensusDigest, /// Cached seeds for each digest used to compute prevrandao. seeds: InMemorySeedTracker, /// Underlying QMDB ledger service for persistence. @@ -81,9 +127,55 @@ impl LedgerView { context: tokio::Context, partition_prefix: String, genesis_alloc: Vec<(Address, U256)>, + ) -> LedgerResult { + Self::init_with_genesis_timestamp(context, partition_prefix, genesis_alloc, 0).await + } + + /// Initialize a ledger view with an explicit genesis block timestamp. + pub async fn init_with_genesis_timestamp( + context: tokio::Context, + partition_prefix: String, + genesis_alloc: Vec<(Address, U256)>, + genesis_timestamp: u64, ) -> LedgerResult { let config = QmdbConfig::new(partition_prefix); - Self::init_with_config(context, config, genesis_alloc).await + Self::init_with_config_and_genesis_timestamp( + context, + config, + genesis_alloc, + genesis_timestamp, + ) + .await + } + + /// Initialize a ledger view, optionally applying the genesis allocation to QMDB. + pub async fn init_with_genesis( + context: tokio::Context, + partition_prefix: String, + genesis_alloc: Vec<(Address, U256)>, + apply_genesis: bool, + ) -> LedgerResult { + let config = QmdbConfig::new(partition_prefix); + Self::init_with_config_and_genesis(context, config, genesis_alloc, apply_genesis).await + } + + /// Initialize a ledger view with explicit timestamp and control over genesis allocation. + pub async fn init_with_genesis_options( + context: tokio::Context, + partition_prefix: String, + genesis_alloc: Vec<(Address, U256)>, + apply_genesis: bool, + genesis_timestamp: u64, + ) -> LedgerResult { + let config = QmdbConfig::new(partition_prefix); + Self::init_with_config_and_genesis_options( + context, + config, + genesis_alloc, + apply_genesis, + genesis_timestamp, + ) + .await } /// Initialize a ledger view with an explicit QMDB configuration. @@ -92,16 +184,62 @@ impl LedgerView { config: QmdbConfig, genesis_alloc: Vec<(Address, U256)>, ) -> LedgerResult { - let qmdb = QmdbLedger::init(context.with_label("qmdb"), config, genesis_alloc).await?; + Self::init_with_config_and_genesis(context, config, genesis_alloc, true).await + } + + /// Initialize a ledger view with explicit QMDB and genesis timestamp configuration. + pub async fn init_with_config_and_genesis_timestamp( + context: tokio::Context, + config: QmdbConfig, + genesis_alloc: Vec<(Address, U256)>, + genesis_timestamp: u64, + ) -> LedgerResult { + Self::init_with_config_and_genesis_options( + context, + config, + genesis_alloc, + true, + genesis_timestamp, + ) + .await + } + + /// Initialize a ledger view with control over whether genesis is applied to QMDB. + pub async fn init_with_config_and_genesis( + context: tokio::Context, + config: QmdbConfig, + genesis_alloc: Vec<(Address, U256)>, + apply_genesis: bool, + ) -> LedgerResult { + Self::init_with_config_and_genesis_options(context, config, genesis_alloc, apply_genesis, 0) + .await + } + + /// Initialize a ledger view with explicit QMDB, apply-genesis and timestamp configuration. + pub async fn init_with_config_and_genesis_options( + context: tokio::Context, + config: QmdbConfig, + genesis_alloc: Vec<(Address, U256)>, + apply_genesis: bool, + genesis_timestamp: u64, + ) -> LedgerResult { + let qmdb = QmdbLedger::init_with_genesis( + context.child("qmdb"), + config, + genesis_alloc, + apply_genesis, + ) + .await?; let genesis_root = qmdb.root().await?; - let genesis_block = Block { - parent: BlockId(B256::ZERO), - height: 0, - prevrandao: B256::ZERO, - state_root: genesis_root, - txs: Vec::new(), - }; + let genesis_block = Block::new( + BlockId(B256::ZERO), + 0, + genesis_timestamp, + B256::ZERO, + genesis_root, + Vec::new(), + ); let genesis_digest = genesis_block.commitment(); let state = OverlayState::new(qmdb.state(), QmdbChangeSet::default()); let snapshots = InMemorySnapshotStore::new(); @@ -117,12 +255,14 @@ impl LedgerView { Ok(Self { inner: Arc::new(Mutex::new(LedgerState { - mempool: InMemoryMempool::new(), + mempool: LedgerMempool::new(PoolConfig::default()), snapshots, + head: genesis_digest, seeds: InMemorySeedTracker::new(genesis_digest), qmdb, })), genesis_block, + snapshot_notify: Arc::new(::tokio::sync::Notify::new()), }) } @@ -147,6 +287,22 @@ impl LedgerView { inner.mempool.insert(tx) } + /// Return a handle to the transaction pool. + pub async fn txpool(&self) -> TransactionPool { + let inner = self.inner.lock().await; + inner.mempool.txpool() + } + + /// Return an overlay for the latest executed state known to the ledger. + pub async fn latest_state(&self) -> OverlayState { + let inner = self.inner.lock().await; + inner + .snapshots + .get(&inner.head) + .map(|snapshot| snapshot.state) + .unwrap_or_else(|| OverlayState::new(inner.qmdb.state(), QmdbChangeSet::default())) + } + /// Query a balance at the given digest. pub async fn query_balance(&self, digest: ConsensusDigest, address: Address) -> Option { let snapshot = { @@ -196,21 +352,77 @@ impl LedgerView { qmdb_changes: QmdbChangeSet, txs: &[Tx], ) { - let inner = self.inner.lock().await; + let mut inner = self.inner.lock().await; let ids = tx_ids(txs); inner.snapshots.insert(digest, Snapshot::new(Some(parent), state, root, qmdb_changes, ids)); + inner.head = digest; + drop(inner); + self.snapshot_notify.notify_waiters(); } /// Cache a snapshot that has already been constructed. pub async fn cache_snapshot(&self, digest: ConsensusDigest, snapshot: LedgerSnapshot) { - let inner = self.inner.lock().await; + let mut inner = self.inner.lock().await; + inner.snapshots.insert(digest, snapshot); + inner.head = digest; + drop(inner); + self.snapshot_notify.notify_waiters(); + } + + /// Restore a finalized block as an already-persisted snapshot over the current QMDB state. + pub async fn restore_persisted_snapshot(&self, block: &Block) { + let mut inner = self.inner.lock().await; + let digest = block.commitment(); + let state = OverlayState::new(inner.qmdb.state(), QmdbChangeSet::default()); + let snapshot = Snapshot::new( + Some(block.parent()), + state, + block.state_root, + QmdbChangeSet::default(), + tx_ids(&block.txs), + ); inner.snapshots.insert(digest, snapshot); + inner.snapshots.mark_persisted(&[digest]); + inner.head = digest; + drop(inner); + self.snapshot_notify.notify_waiters(); + } + + /// Wait for a parent snapshot to become available, with a timeout. + /// + /// Instead of polling with fixed sleep intervals, this method awaits the + /// internal [`Notify`](::tokio::sync::Notify) that fires whenever a new + /// snapshot is inserted. Falls back to the timeout if the snapshot never + /// arrives. + pub async fn wait_for_snapshot( + &self, + parent: ConsensusDigest, + timeout: Duration, + ) -> Option { + let deadline = ::tokio::time::Instant::now() + timeout; + loop { + // Register the notification future BEFORE checking the snapshot. + // This eliminates the race window where `notify_waiters()` fires + // between the check and the wait, which would cause a lost + // wake-up and an unnecessary full-timeout delay. + let notified = self.snapshot_notify.notified(); + if let Some(snap) = self.parent_snapshot(parent).await { + return Some(snap); + } + let remaining = deadline.saturating_duration_since(::tokio::time::Instant::now()); + if remaining.is_zero() { + break; + } + // Wait for any snapshot insertion, or the remaining timeout. + let _ = ::tokio::time::timeout(remaining, notified).await; + } + None } /// Fetch the components needed to build a proposal. pub async fn proposal_components( &self, - ) -> (OverlayState, InMemoryMempool, InMemorySnapshotStore>) + ) -> (OverlayState, LedgerMempool, InMemorySnapshotStore>) { let inner = self.inner.lock().await; let root_state = OverlayState::new(inner.qmdb.state(), QmdbChangeSet::default()); @@ -224,22 +436,25 @@ impl LedgerView { pub async fn compute_root( &self, parent: ConsensusDigest, - changes: QmdbChangeSet, + changes: &QmdbChangeSet, ) -> LedgerResult { self.compute_root_from_store(parent, changes).await } /// Compute the deterministic consensus root for a state transition. + /// + /// Takes `changes` by reference to avoid cloning the entire changeset + /// (which contains BTreeMaps of account updates and storage slots). pub async fn compute_root_from_store( &self, parent: ConsensusDigest, - changes: QmdbChangeSet, + changes: &QmdbChangeSet, ) -> LedgerResult { let parent_root = { let inner = self.inner.lock().await; inner.snapshots.get(&parent).ok_or(ConsensusError::SnapshotNotFound(parent))?.state_root }; - Ok(StateRoot(QmdbStateRoot::transition(parent_root.0, &changes))) + Ok(StateRoot(QmdbStateRoot::transition(parent_root.0, changes))) } /// Persist `digest` and any missing ancestors to QMDB. @@ -261,15 +476,41 @@ impl LedgerView { }; let result = qmdb.commit_changes(changes).await; - let inner = self.inner.lock().await; - inner.snapshots.clear_persisting_chain(&chain); - match result { - Ok(_) => { - inner.snapshots.mark_persisted(&chain); - Ok(true) + { + let inner = self.inner.lock().await; + inner.snapshots.clear_persisting_chain(&chain); + match result { + Ok(_) => { + for digest in &chain { + let snapshot = inner + .snapshots + .get(digest) + .ok_or(ConsensusError::SnapshotNotFound(*digest))?; + let compact_state = + OverlayState::new(inner.qmdb.state(), QmdbChangeSet::default()); + inner.snapshots.insert( + *digest, + Snapshot::new( + snapshot.parent, + compact_state, + snapshot.state_root, + QmdbChangeSet::default(), + snapshot.tx_ids, + ), + ); + } + inner.snapshots.mark_persisted(&chain); + // Evict oldest persisted snapshots to bound memory usage. + // Must happen inside the ledger mutex to prevent a TOCTOU + // race where another thread reads a snapshot between + // mark_persisted() and eviction. + inner.snapshots.evict_persisted(); + Ok(()) + } + Err(err) => Err(LedgerError::from(err)), } - Err(err) => Err(err.into()), - } + }?; + Ok(true) } /// Remove transactions that are included in a block from the mempool. @@ -278,6 +519,57 @@ impl LedgerView { let tx_ids: Vec = txs.iter().map(Tx::id).collect(); inner.mempool.prune(&tx_ids); } + + /// Remove transactions with stale nonces from the mempool. + /// + /// For each sender with transactions in the pool, queries the finalized + /// QMDB state for the current account nonce and removes all transactions + /// whose nonce is below that value. This catches stale transactions that + /// were not literally included in the finalized block but whose nonces + /// have been consumed by other transactions in earlier blocks. + pub async fn prune_stale_nonces(&self) { + let (pool, qmdb_state) = { + let inner = self.inner.lock().await; + (inner.mempool.txpool(), inner.qmdb.state()) + }; + + let senders = pool.senders(); + if senders.is_empty() { + return; + } + + for sender in senders { + let finalized_nonce = match qmdb_state.nonce(&sender).await { + Ok(n) => n, + Err(err) => { + tracing::warn!(%sender, error = ?err, "failed to query nonce during stale-nonce pruning"); + continue; + } + }; + + // The finalized nonce is the *next* nonce to be used, so all + // transactions with nonce < finalized_nonce are confirmed/stale. + if finalized_nonce > 0 { + pool.remove_confirmed(&sender, finalized_nonce - 1); + } + } + } + + /// Returns `true` if the snapshot for `digest` has been persisted to QMDB + /// (even if the in-memory snapshot data has since been evicted). + pub async fn is_snapshot_persisted(&self, digest: &ConsensusDigest) -> bool { + let inner = self.inner.lock().await; + inner.snapshots.is_persisted(digest) + } + + /// Return snapshot store statistics: `(total, unpersisted)`. + /// + /// - `total`: number of snapshots currently held in memory. + /// - `unpersisted`: number of snapshots not yet persisted to QMDB. + pub async fn snapshot_store_stats(&self) -> (usize, usize) { + let inner = self.inner.lock().await; + (inner.snapshots.len(), inner.snapshots.unpersisted_count()) + } } /// Domain service that exposes high-level ledger commands. @@ -323,6 +615,16 @@ impl LedgerService { inserted } + /// Return a handle to the transaction pool. + pub async fn txpool(&self) -> TransactionPool { + self.view.txpool().await + } + + /// Return an overlay for the latest executed state known to the ledger. + pub async fn latest_state(&self) -> OverlayState { + self.view.latest_state().await + } + /// Query a balance at the given digest. pub async fn query_balance(&self, digest: ConsensusDigest, address: Address) -> Option { self.view.query_balance(digest, address).await @@ -354,6 +656,18 @@ impl LedgerService { self.view.parent_snapshot(parent).await } + /// Wait for a parent snapshot to become available, with a timeout. + /// + /// Uses event-driven notification rather than polling with sleep. + /// See [`LedgerView::wait_for_snapshot`] for details. + pub async fn wait_for_snapshot( + &self, + parent: ConsensusDigest, + timeout: Duration, + ) -> Option { + self.view.wait_for_snapshot(parent, timeout).await + } + /// Insert a new snapshot. pub async fn insert_snapshot( &self, @@ -372,10 +686,15 @@ impl LedgerService { self.view.cache_snapshot(digest, snapshot).await; } + /// Restore a finalized block as an already-persisted snapshot. + pub async fn restore_persisted_snapshot(&self, block: &Block) { + self.view.restore_persisted_snapshot(block).await; + } + /// Fetch proposal components. pub async fn proposal_components( &self, - ) -> (OverlayState, InMemoryMempool, InMemorySnapshotStore>) + ) -> (OverlayState, LedgerMempool, InMemorySnapshotStore>) { self.view.proposal_components().await } @@ -385,7 +704,7 @@ impl LedgerService { pub async fn compute_root( &self, parent: ConsensusDigest, - changes: QmdbChangeSet, + changes: &QmdbChangeSet, ) -> LedgerResult { self.view.compute_root(parent, changes).await } @@ -394,7 +713,7 @@ impl LedgerService { pub async fn compute_root_from_store( &self, parent: ConsensusDigest, - changes: QmdbChangeSet, + changes: &QmdbChangeSet, ) -> LedgerResult { self.view.compute_root_from_store(parent, changes).await } @@ -412,17 +731,42 @@ impl LedgerService { pub async fn prune_mempool(&self, txs: &[Tx]) { self.view.prune_mempool(txs).await; } + + /// Remove transactions with stale nonces from the mempool. + /// + /// Delegates to [`LedgerView::prune_stale_nonces`] which queries the + /// finalized QMDB state for each sender in the pool. + pub async fn prune_stale_nonces(&self) { + self.view.prune_stale_nonces().await; + } + + /// Returns `true` if the snapshot for `digest` has been persisted to QMDB + /// (even if the in-memory snapshot data has since been evicted). + pub async fn is_snapshot_persisted(&self, digest: &ConsensusDigest) -> bool { + self.view.is_snapshot_persisted(digest).await + } + + /// Return snapshot store statistics: `(total, unpersisted)`. + /// + /// Delegates to [`LedgerView::snapshot_store_stats`]. + pub async fn snapshot_store_stats(&self) -> (usize, usize) { + self.view.snapshot_store_stats().await + } } #[cfg(test)] mod tests { - use std::sync::atomic::{AtomicUsize, Ordering}; + use std::{ + future::Future, + sync::atomic::{AtomicUsize, Ordering}, + }; use alloy_consensus::Header; use alloy_primitives::{Address, B256, Bytes, U256}; use commonware_cryptography::Committable as _; use commonware_runtime::{Runner, tokio}; use k256::ecdsa::SigningKey; + use kora_config::INITIAL_BASE_FEE; use kora_domain::{Block, ConsensusDigest, Tx, evm::Evm}; use kora_executor::{BlockContext, BlockExecutor, RevmExecutor}; use kora_overlay::OverlayState; @@ -432,8 +776,8 @@ mod tests { static PARTITION_COUNTER: AtomicUsize = AtomicUsize::new(0); - const GENESIS_BALANCE: u64 = 1_000_000; - const DUPLICATE_BALANCE: u64 = 500_000; + const GENESIS_BALANCE: u64 = 1_000_000_000_000_000_000; // 1 ETH in wei + const DUPLICATE_BALANCE: u64 = 1_000_000_000_000_000_000; // 1 ETH in wei const TRANSFER_ONE: u64 = 10; const TRANSFER_TWO: u64 = 5; const TRANSFER_DUPLICATE: u64 = 1; @@ -459,6 +803,26 @@ mod tests { digest: ConsensusDigest, } + fn run_ledger_test(f: F) + where + F: FnOnce(tokio::Context) -> Fut + Send + 'static, + Fut: Future + 'static, + { + let handle = std::thread::Builder::new() + .name("kora-ledger-test".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(move || { + let executor = tokio::Runner::default(); + executor.start(f); + }) + .expect("failed to spawn ledger test thread"); + + match handle.join() { + Ok(()) => (), + Err(panic) => std::panic::resume_unwind(panic), + } + } + fn key_from_byte(byte: u8) -> SigningKey { let mut bytes = [0u8; 32]; bytes[0] = byte.max(1); @@ -478,16 +842,18 @@ mod tests { U256::from(value), nonce, GAS_LIMIT_TRANSFER, + INITIAL_BASE_FEE as u128, + 0, ) } - fn block_context(height: u64, prevrandao: B256) -> BlockContext { + fn block_context(height: u64, timestamp: u64, prevrandao: B256) -> BlockContext { let header = Header { number: height, - timestamp: height, + timestamp, gas_limit: 30_000_000, beneficiary: Address::ZERO, - base_fee_per_gas: Some(0), + base_fee_per_gas: Some(INITIAL_BASE_FEE), ..Default::default() }; BlockContext::new(header, B256::ZERO, prevrandao) @@ -507,6 +873,22 @@ mod tests { LedgerSetup { ledger, service, genesis, genesis_digest } } + #[test] + fn init_uses_configured_genesis_timestamp() { + run_ledger_test(|context| async move { + let ledger = LedgerView::init_with_genesis_timestamp( + context, + next_partition("revm-ledger-genesis-timestamp"), + Vec::new(), + 1_700_000_000, + ) + .await + .expect("init ledger"); + + assert_eq!(ledger.genesis_block().timestamp, 1_700_000_000); + }); + } + async fn build_block_snapshot( service: &LedgerService, parent: &Block, @@ -515,18 +897,16 @@ mod tests { txs: Vec, ) -> BuiltBlock { let executor = RevmExecutor::new(CHAIN_ID); - let context = block_context(height, PREVRANDAO); + let timestamp = Block::next_timestamp(0, parent.timestamp).expect("timestamp overflow"); + let context = block_context(height, timestamp, PREVRANDAO); let txs_bytes: Vec = txs.iter().map(|tx| tx.bytes.clone()).collect(); let outcome = executor.execute(&parent_snapshot.state, &context, &txs_bytes).expect("execute txs"); let merged_changes = parent_snapshot.state.merge_changes(outcome.changes.clone()); let parent_digest = parent.commitment(); - let root = service - .compute_root(parent_digest, outcome.changes.clone()) - .await - .expect("compute root"); - let block = - Block { parent: parent.id(), height, prevrandao: PREVRANDAO, state_root: root, txs }; + let root = + service.compute_root(parent_digest, &outcome.changes).await.expect("compute root"); + let block = Block::new(parent.id(), height, timestamp, PREVRANDAO, root, txs); let digest = block.commitment(); let next_state = OverlayState::new(parent_snapshot.state.base(), merged_changes); service @@ -538,8 +918,7 @@ mod tests { #[test] fn persist_snapshot_merges_unpersisted_ancestors() { // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. - let executor = tokio::Runner::default(); - executor.start(|context| async move { + run_ledger_test(|context| async move { // Arrange let from_key = key_from_byte(FROM_BYTE_A); let to_key = key_from_byte(TO_BYTE_A); @@ -590,11 +969,93 @@ mod tests { }); } + #[test] + fn persist_snapshot_compacts_all_persisted_chain_snapshots() { + // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. + run_ledger_test(|context| async move { + // Arrange + let from_key = key_from_byte(FROM_BYTE_A); + let to_key = key_from_byte(TO_BYTE_A); + let from = Evm::address_from_key(&from_key); + let to = Evm::address_from_key(&to_key); + let setup = setup_ledger( + context, + "revm-ledger-compact-chain", + vec![(from, U256::from(GENESIS_BALANCE)), (to, U256::ZERO)], + ) + .await; + let parent_snapshot = setup + .service + .parent_snapshot(setup.genesis_digest) + .await + .expect("genesis snapshot"); + let block1 = build_block_snapshot( + &setup.service, + &setup.genesis, + parent_snapshot, + HEIGHT_ONE, + vec![transfer_tx(&from_key, to, TRANSFER_ONE, 0)], + ) + .await; + let parent_snapshot = + setup.service.parent_snapshot(block1.digest).await.expect("block1 snapshot"); + let block2 = build_block_snapshot( + &setup.service, + &block1.block, + parent_snapshot, + HEIGHT_TWO, + vec![transfer_tx(&from_key, to, TRANSFER_TWO, 1)], + ) + .await; + + let block1_before = + setup.service.parent_snapshot(block1.digest).await.expect("block1 snapshot"); + let block2_before = + setup.service.parent_snapshot(block2.digest).await.expect("block2 snapshot"); + assert!(!block1_before.changes.is_empty()); + assert!(!block2_before.changes.is_empty()); + + let block1_parent = block1_before.parent; + let block1_state_root = block1_before.state_root; + let block1_tx_ids = block1_before.tx_ids.clone(); + let block2_parent = block2_before.parent; + let block2_state_root = block2_before.state_root; + let block2_tx_ids = block2_before.tx_ids.clone(); + + // Act + let persisted = + setup.ledger.persist_snapshot(block2.digest).await.expect("persist snapshot"); + + // Assert + assert!(persisted); + let block1_after = + setup.service.parent_snapshot(block1.digest).await.expect("block1 snapshot"); + let block2_after = + setup.service.parent_snapshot(block2.digest).await.expect("block2 snapshot"); + + assert!(block1_after.changes.is_empty()); + assert!(block2_after.changes.is_empty()); + assert!( + block1_after.state.changes_is_empty(), + "block1 overlay change set should be empty after compaction" + ); + assert!( + block2_after.state.changes_is_empty(), + "block2 overlay change set should be empty after compaction" + ); + assert_eq!(block1_after.parent, block1_parent); + assert_eq!(block1_after.state_root, block1_state_root); + assert_eq!(block1_after.tx_ids, block1_tx_ids); + assert_eq!(block2_after.parent, block2_parent); + assert_eq!(block2_after.state_root, block2_state_root); + assert_eq!(block2_after.tx_ids, block2_tx_ids); + }); + } + #[test] fn empty_child_inherits_parent_state_root_after_persist() { // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. - let executor = tokio::Runner::default(); - executor.start(|context| async move { + run_ledger_test(|context| async move { // Arrange: create and persist a non-empty parent, matching the timing that can differ // across validators during consensus. let from_key = key_from_byte(FROM_BYTE_A); @@ -626,7 +1087,7 @@ mod tests { // from local persistence metadata. let empty_root = setup .service - .compute_root(parent.digest, Default::default()) + .compute_root(parent.digest, &Default::default()) .await .expect("compute empty child root"); @@ -638,8 +1099,7 @@ mod tests { #[test] fn persist_snapshot_duplicate_is_noop() { // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. - let executor = tokio::Runner::default(); - executor.start(|context| async move { + run_ledger_test(|context| async move { // Arrange let from_key = key_from_byte(FROM_BYTE_A); let to_key = key_from_byte(TO_BYTE_A); @@ -681,8 +1141,7 @@ mod tests { #[test] fn persist_snapshot_merges_overlays() { // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. - let executor = tokio::Runner::default(); - executor.start(|context| async move { + run_ledger_test(|context| async move { // Arrange let sender_bytes = [0x11, 0x12, 0x13, 0x14, 0x15]; let recipient_bytes = [0x21, 0x22, 0x23, 0x24, 0x25]; @@ -735,8 +1194,7 @@ mod tests { #[test] fn persist_snapshot_unrelated_merges() { // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. - let executor = tokio::Runner::default(); - executor.start(|context| async move { + run_ledger_test(|context| async move { // Arrange let from_key_a = key_from_byte(FROM_BYTE_A); let to_key_a = key_from_byte(TO_BYTE_A); @@ -808,8 +1266,7 @@ mod tests { #[test] fn persist_snapshot_updates_snapshot_state() { // Tokio runtime required for WrapDatabaseAsync in the QMDB adapter. - let executor = tokio::Runner::default(); - executor.start(|context| async move { + run_ledger_test(|context| async move { // Arrange let from_key = key_from_byte(FROM_BYTE_A); let to_key = key_from_byte(TO_BYTE_A); diff --git a/crates/node/ledger/src/live_state.rs b/crates/node/ledger/src/live_state.rs new file mode 100644 index 0000000..ae4ad17 --- /dev/null +++ b/crates/node/ledger/src/live_state.rs @@ -0,0 +1,97 @@ +//! Live state adapter for RPC. +//! +//! Wraps [`LedgerService`] to implement [`StateDbRead`] against the latest +//! in-memory overlay state rather than the persisted QMDB checkpoint. +//! +//! Without this, RPC state queries (balance, nonce, code, storage) read from +//! the QMDB persisted store which can lag up to 256 blocks behind the current +//! head. By delegating every read through [`LedgerService::latest_state()`], +//! queries always reflect the most recently executed block. + +use alloy_primitives::{Address, B256, Bytes, U256}; +use kora_traits::{StateDbError, StateDbRead}; + +use crate::LedgerService; + +/// A [`StateDbRead`] implementation backed by the live overlay state. +/// +/// On every read, this adapter fetches the latest overlay from the ledger +/// (which includes all in-memory changes since the last QMDB checkpoint) +/// and queries it. This ensures RPC responses reflect the most recent +/// executed block rather than a potentially stale persisted snapshot. +#[derive(Clone, Debug)] +pub struct LiveState { + ledger: LedgerService, +} + +impl LiveState { + /// Create a new live state adapter from a ledger service handle. + #[must_use] + pub const fn new(ledger: LedgerService) -> Self { + Self { ledger } + } +} + +impl StateDbRead for LiveState { + fn nonce( + &self, + address: &Address, + ) -> impl std::future::Future> + Send { + let ledger = self.ledger.clone(); + let address = *address; + async move { + let state = ledger.latest_state().await; + state.nonce(&address).await + } + } + + fn balance( + &self, + address: &Address, + ) -> impl std::future::Future> + Send { + let ledger = self.ledger.clone(); + let address = *address; + async move { + let state = ledger.latest_state().await; + state.balance(&address).await + } + } + + fn code_hash( + &self, + address: &Address, + ) -> impl std::future::Future> + Send { + let ledger = self.ledger.clone(); + let address = *address; + async move { + let state = ledger.latest_state().await; + state.code_hash(&address).await + } + } + + fn code( + &self, + code_hash: &B256, + ) -> impl std::future::Future> + Send { + let ledger = self.ledger.clone(); + let code_hash = *code_hash; + async move { + let state = ledger.latest_state().await; + state.code(&code_hash).await + } + } + + fn storage( + &self, + address: &Address, + slot: &U256, + ) -> impl std::future::Future> + Send { + let ledger = self.ledger.clone(); + let address = *address; + let slot = *slot; + async move { + let state = ledger.latest_state().await; + state.storage(&address, &slot).await + } + } +} diff --git a/crates/node/metrics/Cargo.toml b/crates/node/metrics/Cargo.toml new file mode 100644 index 0000000..6b8e150 --- /dev/null +++ b/crates/node/metrics/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "kora-metrics" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +description = "Application-level Prometheus metrics for Kora nodes" + +[lints] +workspace = true + +[dependencies] +prometheus-client.workspace = true diff --git a/crates/node/metrics/src/lib.rs b/crates/node/metrics/src/lib.rs new file mode 100644 index 0000000..4f67d43 --- /dev/null +++ b/crates/node/metrics/src/lib.rs @@ -0,0 +1,288 @@ +//! Application-level Prometheus metrics for Kora nodes. +//! +//! Provides counters, gauges, and histograms for txpool, block building, +//! finalization, and RPC instrumentation. All metrics are registered with +//! the commonware runtime's `Metrics` registry so they appear on the +//! existing `/metrics` endpoint alongside SDK metrics. +#![doc(issue_tracker_base_url = "https://github.com/refcell/kora/issues/")] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![cfg_attr(not(test), warn(unused_crate_dependencies))] + +use prometheus_client::metrics::{ + counter::Counter, family::Family, gauge::Gauge, histogram::Histogram, +}; + +/// Default histogram buckets for block build time (seconds). +const BLOCK_BUILD_BUCKETS: [f64; 9] = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]; + +/// Default histogram buckets for EVM execution time (seconds). +/// +/// Captures the time spent in the EVM executor (`BlockExecutor::execute`) +/// excluding proposal overhead (snapshot lookup, tx selection, state root +/// computation). Most executions complete in under 10 ms; the higher +/// buckets detect pathological transactions or state-cache misses. +const EVM_EXEC_BUCKETS: [f64; 9] = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]; + +/// Default histogram buckets for snapshot poll wait time (seconds). +/// +/// Captures the delay between "leader needs parent snapshot" and "snapshot +/// available". Most waits resolve in under 5 ms; the higher buckets detect +/// CPU-contention-related stalls. +const SNAPSHOT_POLL_BUCKETS: [f64; 8] = [0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.15]; + +/// Application-level metrics for a Kora node. +/// +/// Create with [`AppMetrics::new`] and register with +/// [`AppMetrics::register`] against any `commonware_runtime::Metrics` +/// implementor. +#[derive(Debug, Clone)] +pub struct AppMetrics { + // -- Transaction Pool -- + /// Current total number of transactions in the pool. + pub txpool_size: Gauge, + /// Current number of pending (executable) transactions. + pub txpool_pending: Gauge, + /// Current number of queued (future-nonce) transactions. + pub txpool_queued: Gauge, + /// Total rejected transactions, labelled by reason. + pub txpool_rejected: Family, + + // -- Block Building -- + /// Histogram of block build durations in seconds. + pub block_build_time: Histogram, + /// Number of transactions included in the most recently built block. + pub block_txs_included: Gauge, + + // -- Proposal health -- + /// Total proposals skipped because the parent snapshot was not ready + /// after the full poll window. A rising count indicates the execution + /// layer is consistently slower than the consensus layer. + pub proposal_snapshot_misses: Counter, + /// Total proposals skipped because the tip was too far ahead of the + /// last finalized height (proposal lag guard). A rising count means + /// finalization is not keeping up with block production. + pub proposal_lag_skips: Counter, + /// Histogram of time spent waiting for the parent snapshot to become + /// available during `build_block`, in seconds. Only recorded when at + /// least one poll attempt was needed (i.e. the snapshot was not + /// immediately available). + pub snapshot_poll_wait: Histogram, + + // -- Finalization -- + /// Total number of finalization failures. + pub finalization_failures: Counter, + /// Total number of blocks successfully finalized. + pub blocks_finalized: Counter, + + // -- EVM Execution -- + /// Histogram of EVM execution time in seconds (excluding proposal + /// overhead such as snapshot lookup, tx selection, and state root + /// computation). Recorded in both `build_block` and `verify_block`. + pub evm_execution_seconds: Histogram, + + // -- RPC -- + /// Total number of JSON-RPC requests received (including rate-limited). + pub rpc_requests_total: Counter, + + // -- Snapshot Store -- + /// Number of snapshots that have not yet been persisted to QMDB. + /// + /// A rising value under steady-state operation indicates the persistence + /// pipeline is falling behind block production, which leads to unbounded + /// memory growth and increasingly expensive chain walks. + pub unpersisted_snapshot_depth: Gauge, + /// Total number of snapshots currently held in the in-memory store + /// (both persisted and unpersisted). + pub snapshot_store_total: Gauge, + + // -- Transaction Gossip -- + /// Total transactions broadcast to peers via gossip. + pub gossip_tx_broadcast: Counter, + /// Total transactions received from peers via gossip. + pub gossip_tx_received: Counter, + /// Total gossip broadcast failures (send errors). + pub gossip_tx_broadcast_failed: Counter, + /// Total gossip transactions that failed validation. + pub gossip_tx_invalid: Counter, + + // -- Equivocation -- + /// Total equivocation events detected, labelled by type + /// (`conflicting_notarize`, `conflicting_finalize`, `nullify_finalize`). + pub equivocations: Family, +} + +/// Label set for metrics that carry a `reason` dimension. +#[derive(Clone, Debug, Hash, PartialEq, Eq, prometheus_client::encoding::EncodeLabelSet)] +pub struct ReasonLabel { + /// The rejection / error reason. + pub reason: String, +} + +/// Label set for equivocation metrics, distinguishing the type of Byzantine fault. +#[derive(Clone, Debug, Hash, PartialEq, Eq, prometheus_client::encoding::EncodeLabelSet)] +pub struct EquivocationTypeLabel { + /// The equivocation type (`conflicting_notarize`, `conflicting_finalize`, + /// `nullify_finalize`). + pub r#type: String, +} + +impl AppMetrics { + /// Create a new set of application metrics (unregistered). + #[must_use] + pub fn new() -> Self { + Self { + txpool_size: Gauge::default(), + txpool_pending: Gauge::default(), + txpool_queued: Gauge::default(), + txpool_rejected: Family::default(), + block_build_time: Histogram::new(BLOCK_BUILD_BUCKETS), + block_txs_included: Gauge::default(), + proposal_snapshot_misses: Counter::default(), + proposal_lag_skips: Counter::default(), + snapshot_poll_wait: Histogram::new(SNAPSHOT_POLL_BUCKETS), + finalization_failures: Counter::default(), + blocks_finalized: Counter::default(), + evm_execution_seconds: Histogram::new(EVM_EXEC_BUCKETS), + rpc_requests_total: Counter::default(), + unpersisted_snapshot_depth: Gauge::default(), + snapshot_store_total: Gauge::default(), + gossip_tx_broadcast: Counter::default(), + gossip_tx_received: Counter::default(), + gossip_tx_broadcast_failed: Counter::default(), + gossip_tx_invalid: Counter::default(), + equivocations: Family::default(), + } + } + + /// Register all metrics with a commonware runtime `Metrics` provider. + /// + /// Call this once during node startup so that the metrics appear on the + /// `/metrics` endpoint. + pub fn register(&self, registry: &M) { + registry.register( + "kora_txpool_size", + "Current number of transactions in the pool", + self.txpool_size.clone(), + ); + registry.register( + "kora_txpool_pending", + "Current number of pending (executable) transactions", + self.txpool_pending.clone(), + ); + registry.register( + "kora_txpool_queued", + "Current number of queued (future-nonce) transactions", + self.txpool_queued.clone(), + ); + // NOTE: Do not add a `_total` suffix to counter names here. + // The prometheus_client crate automatically appends `_total` to + // counters per the OpenMetrics specification. + registry.register( + "kora_txpool_rejected", + "Total rejected transactions by reason", + self.txpool_rejected.clone(), + ); + registry.register( + "kora_block_build_time_seconds", + "Block build duration in seconds", + self.block_build_time.clone(), + ); + registry.register( + "kora_block_txs_included", + "Transactions in the most recently built block", + self.block_txs_included.clone(), + ); + registry.register( + "kora_proposal_snapshot_misses", + "Proposals skipped due to missing parent snapshot", + self.proposal_snapshot_misses.clone(), + ); + registry.register( + "kora_proposal_lag_skips", + "Proposals skipped due to finalization lag guard", + self.proposal_lag_skips.clone(), + ); + registry.register( + "kora_snapshot_poll_wait_seconds", + "Time waiting for parent snapshot during block build", + self.snapshot_poll_wait.clone(), + ); + registry.register( + "kora_finalization_failures", + "Total finalization failures", + self.finalization_failures.clone(), + ); + registry.register( + "kora_blocks_finalized", + "Total blocks successfully finalized", + self.blocks_finalized.clone(), + ); + registry.register( + "kora_evm_execution_seconds", + "EVM execution time per block in seconds", + self.evm_execution_seconds.clone(), + ); + registry.register( + "kora_rpc_requests", + "Total JSON-RPC requests received", + self.rpc_requests_total.clone(), + ); + registry.register( + "kora_unpersisted_snapshot_depth", + "Number of in-memory snapshots not yet persisted to QMDB", + self.unpersisted_snapshot_depth.clone(), + ); + registry.register( + "kora_snapshot_store_total", + "Total snapshots currently held in the in-memory store", + self.snapshot_store_total.clone(), + ); + registry.register( + "kora_gossip_tx_broadcast", + "Total transactions broadcast to peers via gossip", + self.gossip_tx_broadcast.clone(), + ); + registry.register( + "kora_gossip_tx_received", + "Total transactions received from peers via gossip", + self.gossip_tx_received.clone(), + ); + registry.register( + "kora_gossip_tx_broadcast_failed", + "Total gossip broadcast failures", + self.gossip_tx_broadcast_failed.clone(), + ); + registry.register( + "kora_gossip_tx_invalid", + "Total gossip transactions that failed validation", + self.gossip_tx_invalid.clone(), + ); + registry.register( + "kora_equivocations", + "Total equivocation events detected by type", + self.equivocations.clone(), + ); + } +} + +impl Default for AppMetrics { + fn default() -> Self { + Self::new() + } +} + +/// Trait abstracting the `register` method from `commonware_runtime::Metrics`. +/// +/// This avoids pulling the entire commonware-runtime dependency into this +/// leaf crate. The runtime context already implements this via the `Metrics` +/// trait; callers just need to provide a thin adapter (or use the blanket +/// implementation below). +pub trait MetricsRegister { + /// Register a single metric. + fn register, H: Into>( + &self, + name: N, + help: H, + metric: impl prometheus_client::registry::Metric, + ); +} diff --git a/crates/node/reporters/Cargo.toml b/crates/node/reporters/Cargo.toml index 82288ae..653b35f 100644 --- a/crates/node/reporters/Cargo.toml +++ b/crates/node/reporters/Cargo.toml @@ -15,6 +15,7 @@ workspace = true kora-consensus = { path = "../consensus" } kora-domain = { path = "../domain" } kora-executor = { path = "../executor" } +kora-metrics = { path = "../metrics" } kora-indexer = { path = "../../storage/indexer" } kora-ledger = { path = "../ledger" } kora-overlay = { path = "../../storage/overlay" } @@ -22,6 +23,7 @@ kora-qmdb-ledger = { path = "../../storage/qmdb-ledger" } kora-rpc = { path = "../rpc" } # Commonware +commonware-actor.workspace = true commonware-codec.workspace = true commonware-consensus.workspace = true commonware-cryptography.workspace = true @@ -33,5 +35,16 @@ alloy-consensus.workspace = true alloy-eips.workspace = true alloy-primitives.workspace = true +# Error handling +thiserror.workspace = true + +# Async runtime +tokio.workspace = true + # Tracing tracing.workspace = true + +[dev-dependencies] +k256.workspace = true +sha3.workspace = true +tempfile.workspace = true diff --git a/crates/node/reporters/src/gc_log.rs b/crates/node/reporters/src/gc_log.rs new file mode 100644 index 0000000..a8abdfb --- /dev/null +++ b/crates/node/reporters/src/gc_log.rs @@ -0,0 +1,167 @@ +//! Append-only GC log for selfdestructed contract addresses. +//! +//! When a contract selfdestructs, its account entry in QMDB is deleted and the +//! generation counter is incremented so new storage writes use a fresh +//! namespace. However, the old storage entries (keyed by the previous +//! generation) remain on disk indefinitely because Commonware does not yet +//! support prefix-based key scanning or bulk deletion. +//! +//! This module records every selfdestructed address together with the block +//! height at which it was finalized. A future garbage collector can read this +//! log and reclaim the orphaned storage entries once the upstream storage layer +//! adds the necessary primitives. +//! +//! The log format is newline-delimited text: +//! +//! ```text +//! , +//! ``` +//! +//! This format is intentionally simple and human-readable to aid debugging and +//! operational tooling. Each line is flushed immediately so the log survives +//! crashes. + +use std::{ + fs::{File, OpenOptions}, + io::{BufWriter, Write as _}, + path::{Path, PathBuf}, + sync::Mutex, +}; + +use alloy_primitives::Address; +use tracing::{info, warn}; + +/// Default filename for the GC log within the data directory. +const GC_LOG_FILENAME: &str = "selfdestruct-gc.log"; + +/// Append-only log tracking selfdestructed addresses for future garbage +/// collection. +/// +/// Each entry records the finalized block height and the selfdestructed +/// contract address. The log is safe to truncate or delete -- the worst case +/// is that some orphaned storage is never reclaimed. +pub struct SelfdestructGcLog { + writer: Mutex>, + path: PathBuf, +} + +impl std::fmt::Debug for SelfdestructGcLog { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SelfdestructGcLog").field("path", &self.path).finish() + } +} + +impl SelfdestructGcLog { + /// Open or create the GC log at `dir/selfdestruct-gc.log`. + /// + /// The file is opened in append mode. If the directory does not exist it + /// is created. + /// + /// # Errors + /// + /// Returns an I/O error if the file cannot be opened or the directory + /// cannot be created. + pub fn open(dir: &Path) -> std::io::Result { + std::fs::create_dir_all(dir)?; + let path = dir.join(GC_LOG_FILENAME); + let file = OpenOptions::new().create(true).append(true).open(&path)?; + Ok(Self { writer: Mutex::new(BufWriter::new(file)), path }) + } + + /// Record one or more selfdestructed addresses from a finalized block. + /// + /// Each address is written as a separate line. The buffer is flushed after + /// all addresses in the batch are written so that the log is durable even + /// if the process crashes shortly after. + pub fn record(&self, block_height: u64, addresses: &[Address]) { + if addresses.is_empty() { + return; + } + + let mut writer = match self.writer.lock() { + Ok(w) => w, + Err(e) => { + warn!(error = %e, "GC log mutex poisoned; skipping write"); + return; + } + }; + + for address in addresses { + if let Err(e) = writeln!(writer, "{},{}", block_height, address) { + warn!( + block_height, + address = ?address, + error = %e, + "failed to write selfdestruct GC entry" + ); + return; + } + } + + if let Err(e) = writer.flush() { + warn!(block_height, error = %e, "failed to flush selfdestruct GC log"); + } else { + info!( + block_height, + count = addresses.len(), + path = %self.path.display(), + "recorded selfdestructed addresses for GC" + ); + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Read as _; + + use super::*; + + #[test] + fn record_writes_entries_and_flushes() { + let dir = tempfile::tempdir().expect("create tempdir"); + let gc_log = SelfdestructGcLog::open(dir.path()).expect("open gc log"); + + let addr1 = Address::repeat_byte(0x11); + let addr2 = Address::repeat_byte(0x22); + + gc_log.record(42, &[addr1, addr2]); + gc_log.record(43, &[addr1]); + + let mut contents = String::new(); + File::open(dir.path().join(GC_LOG_FILENAME)) + .expect("open log file") + .read_to_string(&mut contents) + .expect("read log file"); + + let lines: Vec<&str> = contents.lines().collect(); + assert_eq!(lines.len(), 3); + assert!(lines[0].starts_with("42,0x"), "expected 0x prefix: {}", lines[0]); + assert!(lines[0].to_lowercase().contains("1111111111111111111111111111111111111111")); + assert!(lines[1].starts_with("42,0x"), "expected 0x prefix: {}", lines[1]); + assert!(lines[1].to_lowercase().contains("2222222222222222222222222222222222222222")); + assert!(lines[2].starts_with("43,0x"), "expected 0x prefix: {}", lines[2]); + } + + #[test] + fn record_empty_is_noop() { + let dir = tempfile::tempdir().expect("create tempdir"); + let gc_log = SelfdestructGcLog::open(dir.path()).expect("open gc log"); + + gc_log.record(1, &[]); + + let metadata = std::fs::metadata(dir.path().join(GC_LOG_FILENAME)).expect("metadata"); + assert_eq!(metadata.len(), 0); + } + + #[test] + fn open_creates_directory() { + let dir = tempfile::tempdir().expect("create tempdir"); + let nested = dir.path().join("deeply").join("nested"); + let gc_log = SelfdestructGcLog::open(&nested).expect("open gc log"); + + gc_log.record(1, &[Address::ZERO]); + + assert!(nested.join(GC_LOG_FILENAME).exists()); + } +} diff --git a/crates/node/reporters/src/lib.rs b/crates/node/reporters/src/lib.rs index d998599..5c554a2 100644 --- a/crates/node/reporters/src/lib.rs +++ b/crates/node/reporters/src/lib.rs @@ -4,31 +4,69 @@ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![cfg_attr(not(test), warn(unused_crate_dependencies))] -use std::{fmt, marker::PhantomData, sync::Arc}; +mod gc_log; -use alloy_consensus::{Transaction as _, TxEnvelope, transaction::SignerRecoverable as _}; +use std::{ + fmt, + marker::PhantomData, + sync::{Arc, Mutex}, + time::Duration, +}; + +use alloy_consensus::{ + ReceiptEnvelope, ReceiptWithBloom, Transaction as _, TxEnvelope, + proofs::{calculate_receipt_root, calculate_transaction_root}, + transaction::{SignerRecoverable as _, to_eip155_value}, +}; use alloy_eips::eip2718::Decodable2718 as _; -use alloy_primitives::{B256, Bytes, keccak256}; +use alloy_primitives::{B256, Bloom, Bytes, U256, keccak256, logs_bloom}; +use commonware_actor::Feedback; use commonware_consensus::{ - Block as _, Reporter, + Block as _, Reporter, Viewable as _, marshal::Update, simplex::{ scheme::bls12381_threshold::vrf::{Scheme, Seedable as _}, - types::Activity, + types::{Activity, Attributable as _}, }, }; use commonware_cryptography::{Committable as _, bls12381::primitives::variant::Variant}; -use commonware_runtime::{Spawner as _, tokio}; -use commonware_utils::acknowledgement::Acknowledgement as _; +use commonware_runtime::{Spawner as _, Supervisor as _, tokio}; +use commonware_utils::acknowledgement::{Acknowledgement as _, Exact}; +pub use gc_log::SelfdestructGcLog; use kora_consensus::BlockExecution; -use kora_domain::{Block, ConsensusDigest, PublicKey}; +use kora_domain::{Block, ConsensusDigest, MempoolEvent, PublicKey, StateRoot}; use kora_executor::{BlockContext, BlockExecutor, ExecutionOutcome}; use kora_indexer::{BlockIndex, IndexedBlock, IndexedLog, IndexedReceipt, IndexedTransaction}; -use kora_ledger::LedgerService; +use kora_ledger::{LedgerError, LedgerService}; +use kora_metrics::{AppMetrics, EquivocationTypeLabel}; use kora_overlay::OverlayState; use kora_qmdb_ledger::QmdbState; -use kora_rpc::NodeState; -use tracing::{error, trace, warn}; +use kora_rpc::{MempoolEventSender, NodeState}; +use thiserror::Error; +use tracing::{error, info, trace, warn}; + +#[cfg(test)] +fn run_reporter_test(f: F) +where + F: FnOnce(tokio::Context) -> Fut + Send + 'static, + Fut: std::future::Future + 'static, +{ + let handle = std::thread::Builder::new() + .name("kora-reporters-test".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(move || { + use commonware_runtime::Runner as _; + + let runner = tokio::Runner::default(); + runner.start(f); + }) + .expect("failed to spawn reporters test thread"); + + match handle.join() { + Ok(()) => (), + Err(panic) => std::panic::resume_unwind(panic), + } +} /// Provides block execution context for finalized block verification. pub trait BlockContextProvider: Clone + Send + Sync + 'static { @@ -36,6 +74,71 @@ pub trait BlockContextProvider: Clone + Send + Sync + 'static { fn context(&self, block: &Block) -> BlockContext; } +/// Maximum number of attempts for transient finalization failures. +const MAX_FINALIZATION_ATTEMPTS: u32 = 3; + +/// Base delay between retry attempts (doubles each attempt: 100ms, 200ms, 400ms). +const FINALIZATION_RETRY_BASE: Duration = Duration::from_millis(100); + +/// Default QMDB checkpoint cadence. A value of 1 preserves per-block persistence. +const DEFAULT_CHECKPOINT_INTERVAL: u64 = 1; + +/// Errors that can occur during block finalization. +/// +/// Each variant corresponds to a specific failure mode so callers can +/// distinguish transient errors (worth retrying) from permanent ones +/// (indicating state divergence or eviction). +#[derive(Debug, Error)] +enum FinalizationError { + /// Block execution failed during finalization replay. + #[error("execution failed: {0}")] + ExecutionFailed(#[source] Box), + + /// QMDB root computation failed. + #[error("root computation failed: {0}")] + RootComputationFailed(#[source] LedgerError), + + /// Computed state root does not match the block's declared root. + /// This is a deterministic mismatch and is NOT retryable. + #[error("state root mismatch: expected {expected:?}, computed {computed:?}")] + StateRootMismatch { expected: StateRoot, computed: StateRoot }, + + /// The spawned persistence task panicked or was cancelled. + #[error("persist task failed: {0}")] + PersistTaskFailed(String), + + /// QMDB persistence returned an error. + #[error("persist failed: {0}")] + PersistFailed(#[source] LedgerError), +} + +impl FinalizationError { + /// Returns `true` if this error is potentially transient and the operation + /// should be retried. + const fn is_retryable(&self) -> bool { + match self { + // Deterministic: local state has diverged, retry produces the same mismatch. + Self::StateRootMismatch { .. } => false, + // All other failures may be transient (I/O, OOM, race condition). + Self::ExecutionFailed(_) + | Self::RootComputationFailed(_) + | Self::PersistTaskFailed(_) + | Self::PersistFailed(_) => true, + } + } + + /// Returns a static label suitable for Prometheus metric labels. + const fn metric_label(&self) -> &'static str { + match self { + Self::ExecutionFailed(_) => "execution_failed", + Self::RootComputationFailed(_) => "root_computation_failed", + Self::StateRootMismatch { .. } => "state_root_mismatch", + Self::PersistTaskFailed(_) => "persist_task_failed", + Self::PersistFailed(_) => "persist_failed", + } + } +} + /// Helper function for SeedReporter::report that owns all its inputs. async fn seed_report_inner( state: LedgerService, @@ -58,7 +161,33 @@ async fn seed_report_inner( ) .await; } - _ => {} + Activity::ConflictingNotarize(ref proof) => { + warn!( + signer = ?proof.signer(), + view = ?proof.view(), + "EQUIVOCATION: conflicting notarize detected" + ); + } + Activity::ConflictingFinalize(ref proof) => { + warn!( + signer = ?proof.signer(), + view = ?proof.view(), + "EQUIVOCATION: conflicting finalize detected" + ); + } + Activity::NullifyFinalize(ref proof) => { + warn!( + signer = ?proof.signer(), + view = ?proof.view(), + "EQUIVOCATION: nullify-finalize conflict detected" + ); + } + // Normal per-vote and aggregate events that don't affect seed state. + Activity::Notarize(_) + | Activity::Certification(_) + | Activity::Nullify(_) + | Activity::Nullification(_) + | Activity::Finalize(_) => {} } } @@ -94,20 +223,28 @@ where { type Activity = Activity, ConsensusDigest>; - fn report(&mut self, activity: Self::Activity) -> impl std::future::Future + Send { + fn report(&mut self, activity: Self::Activity) -> Feedback { let state = self.state.clone(); - async move { + ::tokio::spawn(async move { seed_report_inner(state, activity).await; - } + }); + Feedback::Ok } } +#[allow(clippy::too_many_arguments)] async fn handle_finalized_update( state: LedgerService, context: tokio::Context, executor: E, provider: P, block_index: Option>, + mempool_broadcast: Option, + gc_log: Option>, + metrics: Option, + checkpoint_interval: u64, + pending_acks: Arc>>, + node_state: Option, update: Update, ) where E: BlockExecutor, Tx = Bytes>, @@ -116,119 +253,777 @@ async fn handle_finalized_update( match update { Update::Tip(..) => {} Update::Block(block, ack) => { - let digest = block.commitment(); - let snapshot_exists = state.query_state_root(digest).await.is_some(); - let mut execution_outcome = None; - let mut execution_context = None; - - if !snapshot_exists || block_index.is_some() { - if snapshot_exists { - trace!(?digest, "re-executing finalized block for RPC indexing"); + if let Some(ref ns) = node_state { + ns.set_finalized_height(block.height); + } + let persist_checkpoint = + checkpoint_interval <= 1 || block.height.is_multiple_of(checkpoint_interval); + let result = finalize_with_retry( + &state, + &context, + &executor, + &provider, + block_index.as_ref(), + &block, + persist_checkpoint, + ) + .await; + + // Record finalization result in metrics. + if let Some(ref m) = metrics { + if result.is_ok() { + m.blocks_finalized.inc(); } else { - trace!(?digest, "missing snapshot for finalized block; re-executing"); + m.finalization_failures.inc(); } - let parent_digest = block.parent(); - if let Some(parent_snapshot) = state.parent_snapshot(parent_digest).await { - let block_context = provider.context(&block); - let execution = match BlockExecution::execute( - &parent_snapshot, - &executor, - &block_context, - &block.txs, - ) - .await - { - Ok(result) => result, - Err(err) => { - error!(?digest, error = ?err, "failed to execute finalized block"); - ack.acknowledge(); - return; - } - }; - - let state_root = match state - .compute_root_from_store(parent_digest, execution.outcome.changes.clone()) - .await - { - Ok(root) => root, - Err(err) => { - error!(?digest, error = ?err, "failed to compute qmdb root"); - ack.acknowledge(); - return; - } - }; - if state_root != block.state_root { - warn!( - ?digest, - expected = ?block.state_root, - computed = ?state_root, - "state root mismatch for finalized block" - ); - ack.acknowledge(); - return; - } - if !snapshot_exists { - let merged_changes = - parent_snapshot.state.merge_changes(execution.outcome.changes.clone()); - let next_state = - OverlayState::new(parent_snapshot.state.base(), merged_changes); - state - .insert_snapshot( - digest, - parent_digest, - next_state, - state_root, - execution.outcome.changes.clone(), - &block.txs, - ) - .await; + // Update snapshot store depth gauges so operators can detect + // when the persistence pipeline falls behind block production. + let (total, unpersisted) = state.snapshot_store_stats().await; + m.snapshot_store_total.set(total as i64); + m.unpersisted_snapshot_depth.set(unpersisted as i64); + } + + // If finalization permanently failed, the node's QMDB state has + // diverged from the consensus chain. Continuing would produce + // incorrect state roots for all subsequent blocks, cause failed + // proposals when this node is leader, and vote against valid blocks + // from other validators. + // + // We deliberately do NOT acknowledge the checkpoint to the marshal + // so it does not garbage-collect data that was never persisted. + // Then we abort the process to prevent silent state divergence. + // + // See: https://github.com/Nunchi-trade/daeji/issues/269 + if let Err(ref e) = result { + error!( + block_height = block.height, + error = %e, + error_kind = e.metric_label(), + "FATAL: finalization permanently failed -- \ + aborting to prevent state divergence. \ + The node must be restarted after investigating the root cause." + ); + // Prune mempool before halting so a restart does not re-propose + // transactions from the finalized block. + state.prune_mempool(&block.txs).await; + // Allow a brief window for log buffers to flush. + ::tokio::time::sleep(Duration::from_millis(200)).await; + std::process::abort(); + } + + if let Ok((Some(outcome), Some(block_context))) = result.as_ref() { + if let Some(index) = block_index.as_ref() { + index_finalized_block(index, &block, block_context, outcome); + // Prune old blocks to bound memory usage (see issue #262). + let min_height = block.height.saturating_sub(BlockIndex::MAX_RETAINED_BLOCKS); + if min_height > 0 { + index.prune_before(min_height); } + } - execution_outcome = Some(execution.outcome); - execution_context = Some(block_context); - } else if snapshot_exists { - warn!( - ?digest, - ?parent_digest, - "missing parent snapshot for cached finalized block; skipping RPC indexing replay" - ); - } else { - error!(?digest, ?parent_digest, "missing parent snapshot for finalized block"); - ack.acknowledge(); - return; + // Record selfdestructed addresses for future GC. + if !outcome.selfdestructed_addresses.is_empty() + && let Some(ref log) = gc_log + { + log.record(block.height, &outcome.selfdestructed_addresses); } - } else { - trace!(?digest, "using cached snapshot for finalized block"); } - let persist_state = state.clone(); - let persist_handle = context - .shared(true) - .spawn(move |_| async move { persist_state.persist_snapshot(digest).await }); - let persist_result = match persist_handle.await { - Ok(result) => result, - Err(err) => { - error!(?digest, error = ?err, "persist task failed"); - ack.acknowledge(); - return; + + acknowledge_checkpoint(pending_acks, block.height, checkpoint_interval, ack).await; + + // Prune the mempool -- the block is consensus-finalized, so its + // transactions must never be re-proposed. + state.prune_mempool(&block.txs).await; + + // Evict any remaining transactions whose nonces are now stale + // relative to finalized state. + state.prune_stale_nonces().await; + + publish_mempool_inclusions(mempool_broadcast.as_ref(), &block); + } + } +} + +async fn acknowledge_checkpoint( + pending_acks: Arc>>, + height: u64, + checkpoint_interval: u64, + ack: Exact, +) { + let is_checkpoint = checkpoint_interval <= 1 || height.is_multiple_of(checkpoint_interval); + if is_checkpoint { + // Checkpoint boundary reached: acknowledge this block and all pending + // blocks from previous non-checkpoint heights. This tells the marshal + // that all blocks up through this checkpoint are durably persisted + // (QMDB has been fsynced and the archive has been fsynced). + let pending = { + let mut guard = pending_acks.lock().expect("pending_acks mutex poisoned"); + std::mem::take(&mut *guard) + }; + for pending_ack in pending { + pending_ack.acknowledge(); + } + ack.acknowledge(); + } else { + // Between checkpoints: defer acknowledgment until the next boundary. + let mut guard = pending_acks.lock().expect("pending_acks mutex poisoned"); + guard.push(ack); + } +} + +/// Retry wrapper around [`finalize_block`] that retries transient failures +/// with exponential backoff. +/// +/// Non-retryable errors (state root mismatch, evicted parent snapshot) are +/// returned immediately. Transient errors are retried up to +/// [`MAX_FINALIZATION_ATTEMPTS`] times with delays of 100ms, 200ms, 400ms, etc. +async fn finalize_with_retry( + state: &LedgerService, + context: &tokio::Context, + executor: &E, + provider: &P, + block_index: Option<&Arc>, + block: &Block, + persist_checkpoint: bool, +) -> Result<(Option, Option), FinalizationError> +where + E: BlockExecutor, Tx = Bytes>, + P: BlockContextProvider, +{ + let digest = block.commitment(); + let mut last_err = None; + + for attempt in 0..MAX_FINALIZATION_ATTEMPTS { + match finalize_block( + state, + context, + executor, + provider, + block_index, + block, + persist_checkpoint, + ) + .await + { + Ok(result) => { + if attempt > 0 { + info!(?digest, attempt, "finalization succeeded after retry"); } - }; - if let Err(err) = persist_result { - error!(?digest, error = ?err, "failed to persist finalized block"); - ack.acknowledge(); - return; + return Ok(result); } - if let (Some(index), Some(outcome), Some(block_context)) = - (block_index.as_ref(), execution_outcome.as_ref(), execution_context.as_ref()) - { - index_finalized_block(index, &block, block_context, outcome); + Err(e) if e.is_retryable() && attempt < MAX_FINALIZATION_ATTEMPTS - 1 => { + let delay = FINALIZATION_RETRY_BASE * 2u32.pow(attempt); + warn!( + ?digest, + attempt = attempt + 1, + max_attempts = MAX_FINALIZATION_ATTEMPTS, + delay_ms = delay.as_millis() as u64, + error = %e, + error_kind = e.metric_label(), + "finalization failed with transient error, retrying" + ); + ::tokio::time::sleep(delay).await; + last_err = Some(e); + } + Err(e) => { + // Either non-retryable or final attempt exhausted. + error!( + ?digest, + attempt = attempt + 1, + max_attempts = MAX_FINALIZATION_ATTEMPTS, + error = %e, + error_kind = e.metric_label(), + retryable = e.is_retryable(), + block_height = block.height, + parent = ?block.parent(), + state_root = ?block.state_root, + tx_count = block.txs.len(), + "CRITICAL: finalization failed permanently -- \ + consensus-agreed block will NOT be persisted to QMDB, \ + node state may diverge from the network" + ); + return Err(e); } - state.prune_mempool(&block.txs).await; - // Marshal waits for the application to acknowledge processing before advancing the - // delivery floor. Without this, the node can stall on finalized block delivery. - ack.acknowledge(); } } + + // All retryable attempts exhausted (should only reach here if + // MAX_FINALIZATION_ATTEMPTS > 0 and the last attempt was retryable). + let e = last_err.expect("at least one attempt was made"); + error!( + ?digest, + attempts = MAX_FINALIZATION_ATTEMPTS, + error = %e, + error_kind = e.metric_label(), + block_height = block.height, + parent = ?block.parent(), + state_root = ?block.state_root, + tx_count = block.txs.len(), + "CRITICAL: finalization retries exhausted -- \ + consensus-agreed block will NOT be persisted to QMDB, \ + node state may diverge from the network" + ); + Err(e) +} + +/// Inner helper that performs the fallible finalization work for a single block. +/// +/// Returns `Ok((execution_outcome, execution_context))` on success, where the +/// inner `Option`s may be `None` when a cached snapshot was reused without +/// re-execution. Returns a typed [`FinalizationError`] on failure so the +/// caller can decide whether to retry. +async fn finalize_block( + state: &LedgerService, + context: &tokio::Context, + executor: &E, + provider: &P, + block_index: Option<&Arc>, + block: &Block, + persist_checkpoint: bool, +) -> Result<(Option, Option), FinalizationError> +where + E: BlockExecutor, Tx = Bytes>, + P: BlockContextProvider, +{ + let digest = block.commitment(); + let snapshot_exists = state.query_state_root(digest).await.is_some(); + let mut execution_outcome = None; + let mut execution_context = None; + + if !snapshot_exists || block_index.is_some() { + if snapshot_exists { + trace!(?digest, "re-executing finalized block for RPC indexing"); + } else { + trace!(?digest, "missing snapshot for finalized block; re-executing"); + } + let parent_digest = block.parent(); + + // Retry parent snapshot lookup with exponential backoff. A concurrent + // persist_snapshot() call may be evicting or replacing snapshots; a + // brief retry window avoids spurious "missing parent" failures that + // would otherwise nullify the view. + const MAX_PARENT_RETRIES: u32 = 3; + const PARENT_RETRY_BASE_MS: u64 = 10; + + let mut parent_snapshot = state.parent_snapshot(parent_digest).await; + if parent_snapshot.is_none() && !snapshot_exists { + for attempt in 1..=MAX_PARENT_RETRIES { + let delay = Duration::from_millis(PARENT_RETRY_BASE_MS << (attempt - 1)); + warn!( + ?digest, + ?parent_digest, + attempt, + ?delay, + "parent snapshot not found, retrying" + ); + ::tokio::time::sleep(delay).await; + parent_snapshot = state.parent_snapshot(parent_digest).await; + if parent_snapshot.is_some() { + break; + } + } + } + + if let Some(parent_snapshot) = parent_snapshot { + let block_context = provider.context(block); + let execution = + BlockExecution::execute(&parent_snapshot, executor, &block_context, &block.txs) + .await + .map_err(|err| FinalizationError::ExecutionFailed(Box::new(err)))?; + + let state_root = state + .compute_root_from_store(parent_digest, &execution.outcome.changes) + .await + .map_err(FinalizationError::RootComputationFailed)?; + + if state_root != block.state_root { + return Err(FinalizationError::StateRootMismatch { + expected: block.state_root, + computed: state_root, + }); + } + + if !snapshot_exists { + let merged_changes = + parent_snapshot.state.merge_changes(execution.outcome.changes.clone()); + let next_state = OverlayState::new(parent_snapshot.state.base(), merged_changes); + state + .insert_snapshot( + digest, + parent_digest, + next_state, + state_root, + execution.outcome.changes.clone(), + &block.txs, + ) + .await; + } + + execution_outcome = Some(execution.outcome); + execution_context = Some(block_context); + } else if snapshot_exists { + warn!( + ?digest, + ?parent_digest, + "missing parent snapshot for cached finalized block; skipping RPC indexing replay" + ); + } else { + // Parent snapshot is missing and the block's own snapshot is also + // missing. This can happen during catch-up when blocks arrive + // faster than they can be verified, or after a restart when + // eviction races with finalization. + // + // Rather than permanently failing (which stalls the finalization + // pipeline), restore the block as a persisted snapshot over the + // current QMDB state. The snapshot won't have correct overlay + // changes, but the block is consensus-finalized so the state + // root is authoritative. The QMDB commit path uses the + // declared state root, not the overlay, so persistence is safe. + let is_evicted = state.is_snapshot_persisted(&parent_digest).await; + warn!( + ?digest, + ?parent_digest, + parent_evicted = is_evicted, + height = block.height, + "finalize_block: parent snapshot unavailable; restoring block as \ + trusted persisted snapshot to unblock finalization pipeline" + ); + state.restore_persisted_snapshot(block).await; + // After restoring, the snapshot exists so persistence can + // proceed. We do not have execution results for RPC indexing, + // but that is acceptable: the alternative was permanent failure. + } + } else { + trace!(?digest, "using cached snapshot for finalized block"); + } + if persist_checkpoint { + let persist_state = state.clone(); + let persist_handle = context + .child("persist") + .shared(true) + .spawn(move |_| async move { persist_state.persist_snapshot(digest).await }); + let persist_result = persist_handle + .await + .map_err(|err| FinalizationError::PersistTaskFailed(format!("{err}")))?; + if let Err(err) = persist_result { + return Err(FinalizationError::PersistFailed(err)); + } + } + + Ok((execution_outcome, execution_context)) +} + +fn publish_mempool_inclusions(mempool_broadcast: Option<&MempoolEventSender>, block: &Block) { + let Some(sender) = mempool_broadcast else { + return; + }; + + let block_hash = block.id().0; + for tx in &block.txs { + let _ = sender.send(MempoolEvent::TxIncluded { + hash: keccak256(&tx.bytes), + block_number: block.height, + block_hash, + }); + } +} + +#[cfg(test)] +mod mempool_tests { + use alloy_primitives::{B256, Bytes, keccak256}; + use kora_domain::{BlockId, StateRoot, Tx}; + + use super::*; + + #[test] + fn publish_mempool_inclusions_broadcasts_tx_included() { + let (sender, mut receiver) = kora_rpc::mempool_event_channel(); + let tx = Tx::new(Bytes::from_static(&[0x01, 0x02, 0x03])); + let block = Block::new( + BlockId(B256::ZERO), + 7, + 0, + B256::ZERO, + StateRoot(B256::ZERO), + vec![tx.clone()], + ); + let block_hash = block.id().0; + + publish_mempool_inclusions(Some(&sender), &block); + + assert_eq!( + receiver.try_recv().unwrap(), + MempoolEvent::TxIncluded { + hash: keccak256(&tx.bytes), + block_number: block.height, + block_hash, + } + ); + } +} + +#[cfg(test)] +mod finalize_error_tests { + use std::sync::atomic::{AtomicUsize, Ordering}; + + use alloy_consensus::Header; + use alloy_primitives::{B256, Bytes}; + use kora_domain::StateRoot; + use kora_executor::ExecutionError; + use kora_ledger::LedgerView; + + use super::*; + + static PARTITION_COUNTER: AtomicUsize = AtomicUsize::new(10_000); + + fn next_partition(prefix: &str) -> String { + let id = PARTITION_COUNTER.fetch_add(1, Ordering::Relaxed); + format!("{prefix}-{id}") + } + + /// A block executor that always returns an error. + /// + /// Used to force `finalize_with_retry` into an error path so the caller + /// can verify that permanent failures are surfaced correctly. + #[derive(Clone)] + struct FailingExecutor; + + impl BlockExecutor> for FailingExecutor { + type Tx = Bytes; + + fn execute( + &self, + _state: &OverlayState, + _context: &BlockContext, + _txs: &[Bytes], + ) -> Result { + Err(ExecutionError::TxExecution("injected test failure".into())) + } + + fn validate_header(&self, _header: &Header) -> Result<(), ExecutionError> { + Ok(()) + } + } + + /// A trivial block-context provider for tests. + #[derive(Clone)] + struct StubProvider; + + impl BlockContextProvider for StubProvider { + fn context(&self, block: &Block) -> BlockContext { + BlockContext::new(Header::default(), block.parent.0, block.prevrandao) + } + } + + /// Verify that `finalize_with_retry` returns an error when the executor + /// permanently fails, which causes `handle_finalized_update` to abort the + /// process (preventing silent state divergence). + /// + /// We cannot test `handle_finalized_update` end-to-end with a failing + /// executor because it calls `std::process::abort()` on permanent + /// finalization failure (see #269). Instead, we test the inner retry + /// logic directly and verify it surfaces the expected error. + /// + /// Note: with retry logic, execution failures are retried up to 3 times + /// before the error is considered permanent. + #[test] + fn finalize_with_retry_returns_error_on_permanent_failure() { + run_reporter_test(|context| async move { + // -- set up ledger with an empty genesis -- + let ledger = LedgerView::init( + context.child("ledger"), + next_partition("reporters-finalize-err"), + Vec::new(), + ) + .await + .expect("init ledger"); + let service = LedgerService::new(ledger); + let genesis = service.genesis_block(); + + // -- build a block that references genesis as parent -- + // The block's own snapshot does NOT exist in the store, so + // `finalize_block` will attempt execution (and our FailingExecutor + // will cause it to return Err(FinalizationError::ExecutionFailed)). + let block = Block::new(genesis.id(), 1, 1, B256::ZERO, StateRoot(B256::ZERO), vec![]); + + // -- invoke finalize_with_retry directly -- + let result = finalize_with_retry( + &service, + &context, + &FailingExecutor, + &StubProvider, + None, + &block, + true, + ) + .await; + + // -- assert: finalization failed with execution error -- + assert!(result.is_err(), "finalize_with_retry must return Err on permanent failure"); + let err = result.unwrap_err(); + assert!( + matches!(err, FinalizationError::ExecutionFailed(_)), + "expected ExecutionFailed, got: {err:?}" + ); + }); + } +} + +#[cfg(test)] +mod finalize_success_tests { + use std::sync::atomic::{AtomicUsize, Ordering}; + + use alloy_consensus::Header; + use alloy_primitives::{Address, B256, U256}; + use commonware_utils::acknowledgement::{Acknowledgement as _, Exact}; + use k256::ecdsa::SigningKey; + use kora_domain::evm::Evm; + use kora_executor::ExecutionError; + use kora_ledger::LedgerView; + + use super::*; + + static PARTITION_COUNTER: AtomicUsize = AtomicUsize::new(20_000); + + fn next_partition(prefix: &str) -> String { + let id = PARTITION_COUNTER.fetch_add(1, Ordering::Relaxed); + format!("{prefix}-{id}") + } + + /// A block executor that always returns an empty successful outcome. + /// + /// Produces no state changes, so the state root stays the same as the + /// parent. This allows `finalize_block` to succeed with a matching root. + #[derive(Clone)] + struct EmptySuccessExecutor; + + impl BlockExecutor> for EmptySuccessExecutor { + type Tx = Bytes; + + fn execute( + &self, + _state: &OverlayState, + _context: &BlockContext, + _txs: &[Bytes], + ) -> Result { + Ok(ExecutionOutcome::new()) + } + + fn validate_header(&self, _header: &Header) -> Result<(), ExecutionError> { + Ok(()) + } + } + + /// A trivial block-context provider for tests. + #[derive(Clone)] + struct StubProvider; + + impl BlockContextProvider for StubProvider { + fn context(&self, block: &Block) -> BlockContext { + BlockContext::new(Header::default(), block.parent.0, block.prevrandao) + } + } + + /// When finalization succeeds (executor returns Ok, state root matches), + /// the handler must persist the snapshot, prune the mempool, and + /// acknowledge the update. + #[test] + fn successful_finalization_persists_and_acknowledges() { + run_reporter_test(|context| async move { + // -- set up ledger with an empty genesis -- + let ledger = LedgerView::init( + context.child("ledger"), + next_partition("reporters-finalize-ok"), + Vec::new(), + ) + .await + .expect("init ledger"); + let service = LedgerService::new(ledger); + let genesis = service.genesis_block(); + let genesis_digest = genesis.commitment(); + + // Fetch the genesis state root so we can build a matching block. + let genesis_root = + service.query_state_root(genesis_digest).await.expect("genesis state root"); + + // -- insert a dummy tx into the mempool so we can verify pruning -- + let sender_key = SigningKey::from_bytes(&[2u8; 32].into()).expect("valid key"); + let to = Address::repeat_byte(0xcd); + let tx = Evm::sign_eip1559_transfer(&sender_key, 1, to, U256::ZERO, 0, 21_000, 0, 0); + assert!(service.submit_tx(tx.clone()).await, "tx should be accepted"); + let pool = service.txpool().await; + assert_eq!(pool.len(), 1); + + // -- build a block with no real txs but containing the dummy tx -- + // EmptySuccessExecutor ignores transactions and produces an empty + // changeset, so the state root stays at genesis_root. + let block = Block::new(genesis.id(), 1, 1, B256::ZERO, genesis_root, vec![tx]); + + let (ack, waiter) = Exact::handle(); + + handle_finalized_update( + service.clone(), + context, + EmptySuccessExecutor, + StubProvider, + None, + None, + None, + None, + 1, + Arc::new(Mutex::new(Vec::new())), + None, + Update::Block(block.clone(), ack), + ) + .await; + + // -- assert: mempool was pruned -- + assert_eq!(pool.len(), 0, "mempool must be pruned after successful finalization"); + + // -- assert: acknowledgement was delivered -- + waiter.await.expect("ack must be called after successful finalization"); + + // -- assert: snapshot was persisted (state root is queryable) -- + let block_digest = block.commitment(); + let stored_root = service.query_state_root(block_digest).await; + assert!(stored_root.is_some(), "snapshot must exist after successful finalization"); + assert_eq!( + stored_root.unwrap(), + genesis_root, + "persisted root must match the block state root" + ); + }); + } + + /// When a `BlockIndex` is provided, successful finalization must populate + /// the index with the finalized block metadata. + #[test] + fn finalization_updates_block_index() { + run_reporter_test(|context| async move { + let ledger = LedgerView::init( + context.child("ledger"), + next_partition("reporters-finalize-index"), + Vec::new(), + ) + .await + .expect("init ledger"); + let service = LedgerService::new(ledger); + let genesis = service.genesis_block(); + let genesis_digest = genesis.commitment(); + let genesis_root = + service.query_state_root(genesis_digest).await.expect("genesis state root"); + + // Build an empty block whose state root matches genesis (no changes). + let block = Block::new(genesis.id(), 1, 1, B256::ZERO, genesis_root, Vec::new()); + let block_hash = block.id().0; + + let index = Arc::new(BlockIndex::new()); + let (ack, waiter) = Exact::handle(); + + handle_finalized_update( + service.clone(), + context, + EmptySuccessExecutor, + StubProvider, + Some(index.clone()), + None, + None, + None, + 1, + Arc::new(Mutex::new(Vec::new())), + None, + Update::Block(block, ack), + ) + .await; + + waiter.await.expect("ack must be called"); + + // -- assert: the block was indexed -- + let indexed = index.get_block_by_hash(&block_hash); + assert!(indexed.is_some(), "block must be indexed after finalization"); + let indexed_block = indexed.unwrap(); + assert_eq!(indexed_block.number, 1); + assert_eq!(indexed_block.hash, block_hash); + }); + } + + #[test] + fn checkpoint_interval_persists_chain_only_on_boundary() { + run_reporter_test(|context| async move { + let ledger = LedgerView::init( + context.child("ledger"), + next_partition("reporters-finalize-checkpoint"), + Vec::new(), + ) + .await + .expect("init ledger"); + let service = LedgerService::new(ledger); + let genesis = service.genesis_block(); + let genesis_digest = genesis.commitment(); + let genesis_root = + service.query_state_root(genesis_digest).await.expect("genesis state root"); + + let block1 = Block::new(genesis.id(), 1, 1, B256::ZERO, genesis_root, Vec::new()); + let block1_digest = block1.commitment(); + let block1_id = block1.id(); + let (ack1, waiter1) = Exact::handle(); + let pending_acks = Arc::new(Mutex::new(Vec::new())); + + handle_finalized_update( + service.clone(), + context.child("finalize_block1"), + EmptySuccessExecutor, + StubProvider, + None, + None, + None, + None, + 2, + pending_acks.clone(), + None, + Update::Block(block1, ack1), + ) + .await; + + assert_eq!(service.query_state_root(block1_digest).await, Some(genesis_root)); + assert!( + !service.is_snapshot_persisted(&block1_digest).await, + "height 1 should remain an in-memory snapshot before the checkpoint boundary" + ); + + let block2 = Block::new(block1_id, 2, 2, B256::ZERO, genesis_root, Vec::new()); + let block2_digest = block2.commitment(); + let (ack2, waiter2) = Exact::handle(); + + handle_finalized_update( + service.clone(), + context, + EmptySuccessExecutor, + StubProvider, + None, + None, + None, + None, + 2, + pending_acks, + None, + Update::Block(block2, ack2), + ) + .await; + waiter1.await.expect("first ack must be called at checkpoint"); + waiter2.await.expect("ack must be called"); + + assert!( + service.is_snapshot_persisted(&block1_digest).await, + "checkpoint should persist unpersisted ancestors" + ); + assert!( + service.is_snapshot_persisted(&block2_digest).await, + "checkpoint boundary should persist the boundary block" + ); + }); + } } #[derive(Clone, Debug)] @@ -238,6 +1033,13 @@ struct TxMetadata { value: alloy_primitives::U256, gas_limit: u64, gas_price: u128, + tx_type: u8, + chain_id: Option, + max_fee_per_gas: Option, + max_priority_fee_per_gas: Option, + v: u128, + r: U256, + s: U256, input: Bytes, nonce: u64, } @@ -252,17 +1054,40 @@ fn index_finalized_block( let transaction_hashes = block.txs.iter().map(|tx| keccak256(&tx.bytes)).collect::>(); let tx_metadata = block.txs.iter().map(|tx| decode_tx_metadata(&tx.bytes)).collect::>(); - let indexed_block = IndexedBlock { - hash: block_hash, - number: block.height, - parent_hash: block.parent.0, - state_root: block.state_root.0, - timestamp: block_context.header.timestamp, - gas_limit: block_context.header.gas_limit, - gas_used: outcome.gas_used, - base_fee_per_gas: block_context.header.base_fee_per_gas, - transaction_hashes, - }; + // Approximate block size: fixed header overhead + sum of raw transaction sizes. + // An Ethereum block header is ~508 bytes RLP-encoded; we use 508 as the + // constant and add the raw EIP-2718 envelope bytes for each transaction. + let tx_bytes_total: u64 = block.txs.iter().map(|tx| tx.bytes.len() as u64).sum(); + let block_size = 508 + tx_bytes_total; + + // Compute the transactions trie root from the raw EIP-2718 encoded transactions. + let tx_envelopes: Vec = block + .txs + .iter() + .filter_map(|tx| TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).ok()) + .collect(); + let transactions_root = calculate_transaction_root(&tx_envelopes); + + // Compute the receipts trie root from the execution receipts. + let receipt_envelopes: Vec = outcome + .receipts + .iter() + .zip(tx_metadata.iter()) + .filter_map(|(receipt, metadata)| { + let metadata = metadata.as_ref()?; + let bloom = logs_bloom(receipt.logs()); + let rwb = ReceiptWithBloom::new(receipt.receipt.clone(), bloom); + Some(match metadata.tx_type { + 0 => ReceiptEnvelope::Legacy(rwb), + 1 => ReceiptEnvelope::Eip2930(rwb), + 2 => ReceiptEnvelope::Eip1559(rwb), + 3 => ReceiptEnvelope::Eip4844(rwb), + 4 => ReceiptEnvelope::Eip7702(rwb), + _ => ReceiptEnvelope::Legacy(rwb), + }) + }) + .collect(); + let receipts_root = calculate_receipt_root(&receipt_envelopes); let indexed_txs = tx_metadata .iter() @@ -280,6 +1105,13 @@ fn index_finalized_block( value: metadata.value, gas_limit: metadata.gas_limit, gas_price: metadata.gas_price, + tx_type: metadata.tx_type, + chain_id: metadata.chain_id, + max_fee_per_gas: metadata.max_fee_per_gas, + max_priority_fee_per_gas: metadata.max_priority_fee_per_gas, + v: metadata.v, + r: metadata.r, + s: metadata.s, input: metadata.input.clone(), nonce: metadata.nonce, }) @@ -287,12 +1119,15 @@ fn index_finalized_block( .collect(); let mut next_log_index = 0u64; - let indexed_receipts = outcome + let indexed_receipts: Vec = outcome .receipts .iter() .enumerate() .filter_map(|(idx, receipt)| { let metadata = tx_metadata.get(idx)?.as_ref()?; + let transaction_hash = receipt.tx_hash; + let transaction_index = idx as u64; + let receipt_logs_bloom = logs_bloom(receipt.logs()); let logs = receipt .logs() .iter() @@ -300,26 +1135,64 @@ fn index_finalized_block( let (topics, data) = log.data.clone().split(); let log_index = next_log_index; next_log_index += 1; - IndexedLog { address: log.address, topics, data, log_index } + IndexedLog { + address: log.address, + topics, + data, + log_index, + block_number: block.height, + block_hash, + transaction_hash, + transaction_index, + } }) .collect(); Some(IndexedReceipt { - transaction_hash: receipt.tx_hash, + transaction_hash, block_hash, block_number: block.height, - transaction_index: idx as u64, + transaction_index, from: metadata.from, to: metadata.to, cumulative_gas_used: receipt.cumulative_gas_used(), gas_used: receipt.gas_used, contract_address: receipt.contract_address, logs, + logs_bloom: receipt_logs_bloom, + tx_type: metadata.tx_type, + effective_gas_price: receipt_effective_gas_price( + metadata, + block_context.header.base_fee_per_gas, + ), status: receipt.success(), }) }) .collect(); + // Compute block-level Bloom as the bitwise OR of all receipt Blooms. + let mut block_logs_bloom = Bloom::ZERO; + for receipt in &indexed_receipts { + block_logs_bloom |= receipt.logs_bloom; + } + + let indexed_block = IndexedBlock { + hash: block_hash, + number: block.height, + parent_hash: block.parent.0, + state_root: block.state_root.0, + transactions_root, + receipts_root, + timestamp: block.timestamp, + gas_limit: block_context.header.gas_limit, + gas_used: outcome.gas_used, + base_fee_per_gas: block_context.header.base_fee_per_gas, + mix_hash: block.prevrandao, + logs_bloom: block_logs_bloom, + size: block_size, + transaction_hashes, + }; + index.insert_block(indexed_block, indexed_txs, indexed_receipts); } @@ -338,19 +1211,48 @@ fn decode_tx_metadata(tx_bytes: &Bytes) -> Option { return None; } }; + let signature = envelope.signature(); Some(TxMetadata { from, to: envelope.to(), value: envelope.value(), gas_limit: envelope.gas_limit(), - gas_price: effective_gas_price(&envelope), + gas_price: transaction_gas_price(&envelope), + tx_type: transaction_type(&envelope), + chain_id: envelope.chain_id(), + max_fee_per_gas: max_fee_per_gas(&envelope), + max_priority_fee_per_gas: max_priority_fee_per_gas(&envelope), + v: signature_v(&envelope), + r: signature.r(), + s: signature.s(), input: envelope.input().clone(), nonce: envelope.nonce(), }) } -const fn effective_gas_price(envelope: &TxEnvelope) -> u128 { +fn signature_v(envelope: &TxEnvelope) -> u128 { + let y_parity = envelope.signature().v(); + match envelope { + TxEnvelope::Legacy(tx) => to_eip155_value(y_parity, tx.tx().chain_id), + TxEnvelope::Eip2930(_) + | TxEnvelope::Eip1559(_) + | TxEnvelope::Eip4844(_) + | TxEnvelope::Eip7702(_) => u128::from(y_parity), + } +} + +const fn transaction_type(envelope: &TxEnvelope) -> u8 { + match envelope { + TxEnvelope::Legacy(_) => 0, + TxEnvelope::Eip2930(_) => 1, + TxEnvelope::Eip1559(_) => 2, + TxEnvelope::Eip4844(_) => 3, + TxEnvelope::Eip7702(_) => 4, + } +} + +const fn transaction_gas_price(envelope: &TxEnvelope) -> u128 { match envelope { TxEnvelope::Legacy(tx) => tx.tx().gas_price, TxEnvelope::Eip2930(tx) => tx.tx().gas_price, @@ -360,7 +1262,36 @@ const fn effective_gas_price(envelope: &TxEnvelope) -> u128 { } } -#[derive(Clone)] +const fn max_fee_per_gas(envelope: &TxEnvelope) -> Option { + match envelope { + TxEnvelope::Legacy(_) | TxEnvelope::Eip2930(_) => None, + TxEnvelope::Eip1559(tx) => Some(tx.tx().max_fee_per_gas), + TxEnvelope::Eip4844(tx) => Some(tx.tx().tx().max_fee_per_gas), + TxEnvelope::Eip7702(tx) => Some(tx.tx().max_fee_per_gas), + } +} + +const fn max_priority_fee_per_gas(envelope: &TxEnvelope) -> Option { + match envelope { + TxEnvelope::Legacy(_) | TxEnvelope::Eip2930(_) => None, + TxEnvelope::Eip1559(tx) => Some(tx.tx().max_priority_fee_per_gas), + TxEnvelope::Eip4844(tx) => Some(tx.tx().tx().max_priority_fee_per_gas), + TxEnvelope::Eip7702(tx) => Some(tx.tx().max_priority_fee_per_gas), + } +} + +fn receipt_effective_gas_price(metadata: &TxMetadata, base_fee_per_gas: Option) -> u128 { + let Some(max_fee_per_gas) = metadata.max_fee_per_gas else { + return metadata.gas_price; + }; + let Some(base_fee_per_gas) = base_fee_per_gas else { + return max_fee_per_gas; + }; + + let priority_fee = metadata.max_priority_fee_per_gas.unwrap_or_default(); + max_fee_per_gas.min(u128::from(base_fee_per_gas).saturating_add(priority_fee)) +} + /// Persists finalized blocks. pub struct FinalizedReporter { /// Ledger service used to verify blocks and persist snapshots. @@ -373,6 +1304,43 @@ pub struct FinalizedReporter { provider: P, /// Optional RPC block index updated after finalized blocks are persisted. block_index: Option>, + /// Optional mempool event channel for RPC subscriptions. + mempool_broadcast: Option, + /// Optional GC log for tracking selfdestructed addresses. + gc_log: Option>, + /// Optional application-level metrics. + metrics: Option, + /// Persist QMDB every N finalized blocks. + checkpoint_interval: u64, + /// Marshal acknowledgements held until the next checkpoint boundary. + pending_acks: Arc>>, + /// Serializes finalized-block persistence so marshal acknowledgements advance in chain order. + finalize_lock: Arc<::tokio::sync::Mutex<()>>, + /// Optional node state for tracking the latest finalized height. + node_state: Option, +} + +impl Clone for FinalizedReporter +where + E: Clone, + P: Clone, +{ + fn clone(&self) -> Self { + Self { + state: self.state.clone(), + context: self.context.child("finalized_reporter"), + executor: self.executor.clone(), + provider: self.provider.clone(), + block_index: self.block_index.clone(), + mempool_broadcast: self.mempool_broadcast.clone(), + gc_log: self.gc_log.clone(), + metrics: self.metrics.clone(), + checkpoint_interval: self.checkpoint_interval, + pending_acks: self.pending_acks.clone(), + finalize_lock: self.finalize_lock.clone(), + node_state: self.node_state.clone(), + } + } } impl fmt::Debug for FinalizedReporter { @@ -387,13 +1355,21 @@ where P: BlockContextProvider, { /// Create a new finalized reporter. - pub const fn new( - state: LedgerService, - context: tokio::Context, - executor: E, - provider: P, - ) -> Self { - Self { state, context, executor, provider, block_index: None } + pub fn new(state: LedgerService, context: tokio::Context, executor: E, provider: P) -> Self { + Self { + state, + context, + executor, + provider, + block_index: None, + mempool_broadcast: None, + gc_log: None, + metrics: None, + checkpoint_interval: DEFAULT_CHECKPOINT_INTERVAL, + pending_acks: Arc::new(Mutex::new(Vec::new())), + finalize_lock: Arc::new(::tokio::sync::Mutex::new(())), + node_state: None, + } } /// Attach the RPC-visible block index to update when blocks finalize. @@ -402,6 +1378,45 @@ where self.block_index = Some(block_index); self } + + /// Attach the mempool event channel used by RPC subscriptions. + #[must_use] + pub fn with_mempool_broadcast(mut self, mempool_broadcast: MempoolEventSender) -> Self { + self.mempool_broadcast = Some(mempool_broadcast); + self + } + + /// Attach a GC log for tracking selfdestructed contract addresses. + /// + /// When a finalized block contains selfdestructed contracts, their + /// addresses are appended to this log for future garbage collection of + /// orphaned QMDB storage entries. + #[must_use] + pub fn with_gc_log(mut self, gc_log: Arc) -> Self { + self.gc_log = Some(gc_log); + self + } + + /// Attach application-level metrics for tracking finalization outcomes. + #[must_use] + pub fn with_metrics(mut self, metrics: AppMetrics) -> Self { + self.metrics = Some(metrics); + self + } + + /// Persist QMDB every `interval` finalized blocks. + #[must_use] + pub const fn with_checkpoint_interval(mut self, interval: u64) -> Self { + self.checkpoint_interval = if interval == 0 { 1 } else { interval }; + self + } + + /// Attach the RPC node state so the reporter can update finalized height. + #[must_use] + pub fn with_node_state(mut self, node_state: NodeState) -> Self { + self.node_state = Some(node_state); + self + } } impl Reporter for FinalizedReporter @@ -411,15 +1426,141 @@ where { type Activity = Update; - fn report(&mut self, update: Self::Activity) -> impl std::future::Future + Send { + fn report(&mut self, update: Self::Activity) -> Feedback { let state = self.state.clone(); - let context = self.context.clone(); + let context = self.context.child("report"); let executor = self.executor.clone(); let provider = self.provider.clone(); let block_index = self.block_index.clone(); - async move { - handle_finalized_update(state, context, executor, provider, block_index, update).await; - } + let mempool_broadcast = self.mempool_broadcast.clone(); + let gc_log = self.gc_log.clone(); + let metrics = self.metrics.clone(); + let checkpoint_interval = self.checkpoint_interval; + let pending_acks = self.pending_acks.clone(); + let finalize_lock = self.finalize_lock.clone(); + let node_state = self.node_state.clone(); + self.context.child("report_task").spawn(move |_| async move { + let _guard = finalize_lock.lock().await; + handle_finalized_update( + state, + context, + executor, + provider, + block_index, + mempool_broadcast, + gc_log, + metrics, + checkpoint_interval, + pending_acks, + node_state, + update, + ) + .await; + }); + Feedback::Ok + } +} + +#[cfg(test)] +mod tests { + use alloy_consensus::{Header, SignableTransaction as _, TxEip1559}; + use alloy_eips::eip2718::Encodable2718 as _; + use alloy_primitives::{ + Address, B256, Bloom, Log, LogData, Signature, TxKind, U256, keccak256, + }; + use k256::ecdsa::SigningKey; + use kora_domain::{BlockId, StateRoot, Tx}; + use kora_executor::ExecutionReceipt; + use sha3::{Digest as _, Keccak256}; + + use super::*; + + fn signed_eip1559_tx( + chain_id: u64, + max_fee_per_gas: u128, + max_priority_fee_per_gas: u128, + ) -> Bytes { + let mut secret = [0u8; 32]; + secret[31] = 1; + let key = SigningKey::from_bytes((&secret).into()).expect("valid key"); + let tx = TxEip1559 { + chain_id, + nonce: 7, + gas_limit: 50_000, + max_fee_per_gas, + max_priority_fee_per_gas, + to: TxKind::Call(Address::repeat_byte(0xbb)), + value: U256::from(42), + access_list: Default::default(), + input: Bytes::from_static(&[0xde, 0xad]), + }; + let digest = Keccak256::new_with_prefix(tx.encoded_for_signing()); + let (sig, recid) = key.sign_digest_recoverable(digest).expect("sign tx"); + let signature = Signature::from((sig, recid)); + let envelope = TxEnvelope::from(tx.into_signed(signature)); + let mut raw = Vec::new(); + envelope.encode_2718(&mut raw); + Bytes::from(raw) + } + + #[test] + fn finalized_index_preserves_transaction_receipt_and_log_metadata() { + let tx_bytes = signed_eip1559_tx(1337, 20, 3); + let tx_hash = keccak256(&tx_bytes); + let block = Block::new( + BlockId(B256::repeat_byte(0x10)), + 5, + 1234, + B256::repeat_byte(0x20), + StateRoot(B256::repeat_byte(0x30)), + vec![Tx::new(tx_bytes)], + ); + let block_hash = block.id().0; + let block_context = BlockContext::new( + Header { + timestamp: 1234, + gas_limit: 30_000_000, + base_fee_per_gas: Some(10), + ..Header::default() + }, + block.parent.0, + block.prevrandao, + ); + let log = Log { + address: Address::repeat_byte(0xcc), + data: LogData::new_unchecked( + vec![B256::repeat_byte(0xdd)], + Bytes::from_static(&[0x01, 0x02]), + ), + }; + let mut outcome = ExecutionOutcome::new(); + outcome.gas_used = 21_000; + outcome.receipts = + vec![ExecutionReceipt::new(tx_hash, true, 21_000, 21_000, vec![log], None)]; + + let index = BlockIndex::new(); + index_finalized_block(&index, &block, &block_context, &outcome); + + let indexed_tx = index.get_transaction(&tx_hash).expect("indexed transaction"); + assert_eq!(indexed_tx.hash, tx_hash); + assert_eq!(indexed_tx.block_hash, block_hash); + assert_eq!(indexed_tx.tx_type, 2); + assert_eq!(indexed_tx.chain_id, Some(1337)); + assert_eq!(indexed_tx.gas_price, 20); + assert_eq!(indexed_tx.max_fee_per_gas, Some(20)); + assert_eq!(indexed_tx.max_priority_fee_per_gas, Some(3)); + assert_ne!(indexed_tx.r, U256::ZERO); + assert_ne!(indexed_tx.s, U256::ZERO); + + let receipt = index.get_receipt(&tx_hash).expect("indexed receipt"); + assert_eq!(receipt.tx_type, 2); + assert_eq!(receipt.effective_gas_price, 13); + assert_ne!(receipt.logs_bloom, Bloom::ZERO); + assert_eq!(receipt.logs.len(), 1); + assert_eq!(receipt.logs[0].block_number, 5); + assert_eq!(receipt.logs[0].block_hash, block_hash); + assert_eq!(receipt.logs[0].transaction_hash, tx_hash); + assert_eq!(receipt.logs[0].transaction_index, 0); } } @@ -429,10 +1570,13 @@ where /// - Current view number (from notarizations) /// - Finalized block count /// - Nullified round count +/// - Equivocation events (Byzantine behavior) #[derive(Clone)] pub struct NodeStateReporter { /// RPC node state to update. state: NodeState, + /// Optional application-level metrics for Prometheus counters. + metrics: Option, /// Marker for the signing scheme. _scheme: PhantomData, } @@ -446,7 +1590,14 @@ impl fmt::Debug for NodeStateReporter { impl NodeStateReporter { /// Create a new node state reporter. pub const fn new(state: NodeState) -> Self { - Self { state, _scheme: PhantomData } + Self { state, metrics: None, _scheme: PhantomData } + } + + /// Attach application-level metrics for tracking equivocation events. + #[must_use] + pub fn with_metrics(mut self, metrics: AppMetrics) -> Self { + self.metrics = Some(metrics); + self } } @@ -456,7 +1607,7 @@ where { type Activity = Activity; - fn report(&mut self, activity: Self::Activity) -> impl std::future::Future + Send { + fn report(&mut self, activity: Self::Activity) -> Feedback { match &activity { Activity::Notarization(n) => { self.state.set_view(n.proposal.round.view().get()); @@ -468,8 +1619,55 @@ where Activity::Nullification(_) => { self.state.inc_nullified(); } - _ => {} + Activity::ConflictingNotarize(proof) => { + warn!( + signer = ?proof.signer(), + view = ?proof.view(), + "EQUIVOCATION: conflicting notarize detected" + ); + self.state.inc_equivocations(); + if let Some(ref m) = self.metrics { + m.equivocations + .get_or_create(&EquivocationTypeLabel { + r#type: "conflicting_notarize".into(), + }) + .inc(); + } + } + Activity::ConflictingFinalize(proof) => { + warn!( + signer = ?proof.signer(), + view = ?proof.view(), + "EQUIVOCATION: conflicting finalize detected" + ); + self.state.inc_equivocations(); + if let Some(ref m) = self.metrics { + m.equivocations + .get_or_create(&EquivocationTypeLabel { + r#type: "conflicting_finalize".into(), + }) + .inc(); + } + } + Activity::NullifyFinalize(proof) => { + warn!( + signer = ?proof.signer(), + view = ?proof.view(), + "EQUIVOCATION: nullify-finalize conflict detected" + ); + self.state.inc_equivocations(); + if let Some(ref m) = self.metrics { + m.equivocations + .get_or_create(&EquivocationTypeLabel { r#type: "nullify_finalize".into() }) + .inc(); + } + } + // Normal per-vote and aggregate events that don't affect node state. + Activity::Notarize(_) + | Activity::Certification(_) + | Activity::Nullify(_) + | Activity::Finalize(_) => {} } - async {} + Feedback::Ok } } diff --git a/crates/node/rpc/Cargo.toml b/crates/node/rpc/Cargo.toml index 94eecea..9e9f18a 100644 --- a/crates/node/rpc/Cargo.toml +++ b/crates/node/rpc/Cargo.toml @@ -12,9 +12,12 @@ workspace = true [dependencies] # HTTP server -axum = "0.8" -tower = { version = "0.5", features = ["limit"] } +axum.workspace = true +tower = { version = "0.5", features = ["limit", "util"] } tower-http = { version = "0.6", features = ["cors"] } +# jsonrpsee 0.24 depends on tower 0.4; its `set_http_middleware` expects +# tower 0.4's `ServiceBuilder`, so we keep a renamed 0.4 dependency. +tower_04 = { package = "tower", version = "0.4" } # JSON-RPC jsonrpsee = { version = "0.24", features = ["server", "macros"] } @@ -30,6 +33,7 @@ async-trait.workspace = true # Serialization serde.workspace = true +serde_json.workspace = true # Error handling thiserror.workspace = true @@ -37,16 +41,20 @@ thiserror.workspace = true # Tracing tracing.workspace = true +# Metrics +prometheus-client.workspace = true + # Misc parking_lot = "0.12" # Kora crates +kora-domain = { path = "../domain" } kora-executor = { path = "../executor" } kora-indexer = { path = "../../storage/indexer" } kora-traits = { path = "../../storage/traits" } +kora-txpool = { path = "../txpool" } [dev-dependencies] -tokio = { workspace = true, features = ["rt", "macros"] } -serde_json.workspace = true +tokio = { workspace = true, features = ["rt", "macros", "time"] } k256.workspace = true sha3.workspace = true diff --git a/crates/node/rpc/src/config.rs b/crates/node/rpc/src/config.rs index 0679038..bc24e04 100644 --- a/crates/node/rpc/src/config.rs +++ b/crates/node/rpc/src/config.rs @@ -17,6 +17,11 @@ pub struct RpcServerConfig { pub rate_limit: RateLimitConfig, /// Maximum number of concurrent connections. pub max_connections: u32, + /// Maximum number of WebSocket subscriptions per connection. + pub max_subscriptions_per_connection: u32, + /// Maximum number of calls allowed in a single JSON-RPC batch request. + /// `0` disables batch requests entirely. + pub max_batch_size: u32, } impl RpcServerConfig { @@ -29,6 +34,8 @@ impl RpcServerConfig { cors: CorsConfig::default(), rate_limit: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, } } @@ -51,12 +58,42 @@ impl RpcServerConfig { self } + /// Set rate limit including burst size. + #[must_use] + pub const fn with_rate_limit_burst( + mut self, + requests_per_second: u64, + burst_size: u64, + ) -> Self { + self.rate_limit.requests_per_second = requests_per_second; + self.rate_limit.burst_size = burst_size; + self + } + /// Set maximum connections. #[must_use] pub const fn with_max_connections(mut self, max_connections: u32) -> Self { self.max_connections = max_connections; self } + + /// Set the maximum number of WebSocket subscriptions per connection. + #[must_use] + pub const fn with_max_subscriptions_per_connection( + mut self, + max_subscriptions_per_connection: u32, + ) -> Self { + self.max_subscriptions_per_connection = max_subscriptions_per_connection; + self + } + + /// Set the maximum number of calls in a single batch request. + /// `0` disables batch requests entirely. + #[must_use] + pub const fn with_max_batch_size(mut self, max_batch_size: u32) -> Self { + self.max_batch_size = max_batch_size; + self + } } impl Default for RpcServerConfig { @@ -68,6 +105,8 @@ impl Default for RpcServerConfig { cors: CorsConfig::default(), rate_limit: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, } } } @@ -130,7 +169,7 @@ impl CorsConfig { /// Rate limiting configuration. #[derive(Clone, Debug)] pub struct RateLimitConfig { - /// Maximum requests per second per client. + /// Maximum requests per second enforced by the server. pub requests_per_second: u64, /// Burst size for rate limiting. pub burst_size: u64, @@ -147,6 +186,11 @@ impl RateLimitConfig { pub const fn disabled() -> Self { Self { requests_per_second: u64::MAX, burst_size: u64::MAX } } + + /// Return whether rate limiting is disabled. + pub const fn is_disabled(&self) -> bool { + self.requests_per_second == u64::MAX + } } #[cfg(test)] @@ -160,6 +204,8 @@ mod tests { assert_eq!(config.jsonrpc_addr, "127.0.0.1:8545".parse().unwrap()); assert_eq!(config.chain_id, 1); assert_eq!(config.max_connections, 100); + assert_eq!(config.max_subscriptions_per_connection, 32); + assert_eq!(config.max_batch_size, 100); } #[test] @@ -172,6 +218,7 @@ mod tests { assert_eq!(config.jsonrpc_addr, jsonrpc); assert_eq!(config.chain_id, 42); assert_eq!(config.max_connections, 100); + assert_eq!(config.max_subscriptions_per_connection, 32); } #[test] @@ -197,22 +244,46 @@ mod tests { assert_eq!(config.rate_limit.requests_per_second, 500); } + #[test] + fn rpc_server_config_with_rate_limit_burst() { + let config = RpcServerConfig::default().with_rate_limit_burst(500, 750); + assert_eq!(config.rate_limit.requests_per_second, 500); + assert_eq!(config.rate_limit.burst_size, 750); + } + #[test] fn rpc_server_config_with_max_connections() { let config = RpcServerConfig::default().with_max_connections(200); assert_eq!(config.max_connections, 200); } + #[test] + fn rpc_server_config_with_max_subscriptions_per_connection() { + let config = RpcServerConfig::default().with_max_subscriptions_per_connection(16); + assert_eq!(config.max_subscriptions_per_connection, 16); + } + + #[test] + fn rpc_server_config_with_max_batch_size() { + let config = RpcServerConfig::default().with_max_batch_size(50); + assert_eq!(config.max_batch_size, 50); + } + #[test] fn rpc_server_config_chained_builder() { let config = RpcServerConfig::default() .with_cors_origins(vec!["*".to_string()]) - .with_rate_limit(1000) - .with_max_connections(50); + .with_rate_limit_burst(1000, 1500) + .with_max_connections(50) + .with_max_subscriptions_per_connection(24) + .with_max_batch_size(200); assert_eq!(config.cors.allowed_origins, vec!["*"]); assert_eq!(config.rate_limit.requests_per_second, 1000); + assert_eq!(config.rate_limit.burst_size, 1500); assert_eq!(config.max_connections, 50); + assert_eq!(config.max_subscriptions_per_connection, 24); + assert_eq!(config.max_batch_size, 200); } #[test] @@ -258,6 +329,12 @@ mod tests { let config = RateLimitConfig::disabled(); assert_eq!(config.requests_per_second, u64::MAX); assert_eq!(config.burst_size, u64::MAX); + assert!(config.is_disabled()); + } + + #[test] + fn rate_limit_config_default_is_not_disabled() { + assert!(!RateLimitConfig::default().is_disabled()); } #[test] diff --git a/crates/node/rpc/src/error.rs b/crates/node/rpc/src/error.rs index 28e38bb..1d901ed 100644 --- a/crates/node/rpc/src/error.rs +++ b/crates/node/rpc/src/error.rs @@ -1,5 +1,6 @@ //! JSON-RPC error types following Ethereum error code conventions. +use alloy_primitives::Bytes; use jsonrpsee::types::ErrorObjectOwned; use thiserror::Error; @@ -28,8 +29,10 @@ pub mod codes { pub const METHOD_NOT_SUPPORTED: i32 = -32004; /// Request limit exceeded. pub const LIMIT_EXCEEDED: i32 = -32005; - /// Execution error (revert, out of gas, etc.). + /// Execution error (out of gas, etc.). pub const EXECUTION_ERROR: i32 = -32015; + /// Execution reverted (EIP-3 standard code). + pub const EXECUTION_REVERTED: i32 = 3; } /// RPC-specific errors that can occur during request handling. @@ -43,6 +46,10 @@ pub enum RpcError { #[error("transaction not found")] TransactionNotFound, + /// Filter not found. + #[error("filter not found")] + FilterNotFound, + /// Account not found. #[error("account not found: {0}")] AccountNotFound(String), @@ -59,6 +66,10 @@ pub enum RpcError { #[error("execution failed: {0}")] ExecutionFailed(String), + /// Execution reverted with optional revert data. + #[error("execution reverted")] + ExecutionReverted(Option), + /// State database error. #[error("state error: {0}")] StateError(String), @@ -70,22 +81,41 @@ pub enum RpcError { /// Method not implemented. #[error("method not implemented")] NotImplemented, + + /// Invalid method parameters. + #[error("invalid params: {0}")] + InvalidParams(String), + + /// Unsupported operation (e.g. historical state queries). + #[error("unsupported: {0}")] + Unsupported(String), } impl From for ErrorObjectOwned { fn from(err: RpcError) -> Self { - let (code, message) = match &err { - RpcError::BlockNotFound => (codes::RESOURCE_NOT_FOUND, err.to_string()), - RpcError::TransactionNotFound => (codes::RESOURCE_NOT_FOUND, err.to_string()), - RpcError::AccountNotFound(_) => (codes::RESOURCE_NOT_FOUND, err.to_string()), - RpcError::InvalidBlockNumber(_) => (codes::INVALID_PARAMS, err.to_string()), - RpcError::InvalidTransaction(_) => (codes::INVALID_PARAMS, err.to_string()), - RpcError::ExecutionFailed(_) => (codes::EXECUTION_ERROR, err.to_string()), - RpcError::StateError(_) => (codes::INTERNAL_ERROR, err.to_string()), - RpcError::Internal(_) => (codes::INTERNAL_ERROR, err.to_string()), - RpcError::NotImplemented => (codes::METHOD_NOT_SUPPORTED, err.to_string()), - }; - ErrorObjectOwned::owned(code, message, None::<()>) + match err { + RpcError::ExecutionReverted(data) => { + ErrorObjectOwned::owned(codes::EXECUTION_REVERTED, "execution reverted", data) + } + other => { + let (code, message) = match &other { + RpcError::BlockNotFound => (codes::RESOURCE_NOT_FOUND, other.to_string()), + RpcError::TransactionNotFound => (codes::RESOURCE_NOT_FOUND, other.to_string()), + RpcError::FilterNotFound => (codes::SERVER_ERROR, other.to_string()), + RpcError::AccountNotFound(_) => (codes::RESOURCE_NOT_FOUND, other.to_string()), + RpcError::InvalidBlockNumber(_) => (codes::INVALID_PARAMS, other.to_string()), + RpcError::InvalidTransaction(_) => (codes::INVALID_PARAMS, other.to_string()), + RpcError::ExecutionFailed(_) => (codes::EXECUTION_ERROR, other.to_string()), + RpcError::InvalidParams(_) => (codes::INVALID_PARAMS, other.to_string()), + RpcError::StateError(_) => (codes::INTERNAL_ERROR, other.to_string()), + RpcError::Internal(_) => (codes::INTERNAL_ERROR, other.to_string()), + RpcError::NotImplemented => (codes::METHOD_NOT_SUPPORTED, other.to_string()), + RpcError::Unsupported(_) => (codes::INVALID_PARAMS, other.to_string()), + RpcError::ExecutionReverted(_) => unreachable!(), + }; + ErrorObjectOwned::owned(code, message, None::<()>) + } + } } } @@ -111,6 +141,7 @@ mod tests { assert_eq!(codes::METHOD_NOT_SUPPORTED, -32004); assert_eq!(codes::LIMIT_EXCEEDED, -32005); assert_eq!(codes::EXECUTION_ERROR, -32015); + assert_eq!(codes::EXECUTION_REVERTED, 3); } #[test] @@ -125,6 +156,12 @@ mod tests { assert_eq!(err.to_string(), "transaction not found"); } + #[test] + fn rpc_error_display_filter_not_found() { + let err = RpcError::FilterNotFound; + assert_eq!(err.to_string(), "filter not found"); + } + #[test] fn rpc_error_display_account_not_found() { let err = RpcError::AccountNotFound("0x1234".to_string()); @@ -183,6 +220,14 @@ mod tests { assert_eq!(obj.message(), "transaction not found"); } + #[test] + fn rpc_error_to_error_object_filter_not_found() { + let err = RpcError::FilterNotFound; + let obj: ErrorObjectOwned = err.into(); + assert_eq!(obj.code(), codes::SERVER_ERROR); + assert_eq!(obj.message(), "filter not found"); + } + #[test] fn rpc_error_to_error_object_account_not_found() { let err = RpcError::AccountNotFound("0xabc".to_string()); @@ -233,10 +278,70 @@ mod tests { assert_eq!(obj.code(), codes::METHOD_NOT_SUPPORTED); } + #[test] + fn rpc_error_display_invalid_params() { + let err = RpcError::InvalidParams("block range exceeds maximum".to_string()); + assert_eq!(err.to_string(), "invalid params: block range exceeds maximum"); + } + + #[test] + fn rpc_error_to_error_object_invalid_params() { + let err = RpcError::InvalidParams("too wide".to_string()); + let obj: ErrorObjectOwned = err.into(); + assert_eq!(obj.code(), codes::INVALID_PARAMS); + assert!(obj.message().contains("too wide")); + } + + #[test] + fn rpc_error_display_unsupported() { + let err = RpcError::Unsupported("historical state not available".to_string()); + assert_eq!(err.to_string(), "unsupported: historical state not available"); + } + + #[test] + fn rpc_error_to_error_object_unsupported() { + let err = RpcError::Unsupported("historical state".to_string()); + let obj: ErrorObjectOwned = err.into(); + assert_eq!(obj.code(), codes::INVALID_PARAMS); + assert!(obj.message().contains("historical state")); + } + #[test] fn rpc_error_debug() { let err = RpcError::BlockNotFound; - let debug_str = format!("{:?}", err); + let debug_str = format!("{err:?}"); assert!(debug_str.contains("BlockNotFound")); } + + #[test] + fn rpc_error_display_execution_reverted() { + let err = RpcError::ExecutionReverted(Some(Bytes::from_static(&[0x08, 0xc3, 0x79, 0xa0]))); + assert_eq!(err.to_string(), "execution reverted"); + } + + #[test] + fn rpc_error_display_execution_reverted_none() { + let err = RpcError::ExecutionReverted(None); + assert_eq!(err.to_string(), "execution reverted"); + } + + #[test] + fn rpc_error_to_error_object_execution_reverted_with_data() { + let data = Bytes::from_static(&[0x08, 0xc3, 0x79, 0xa0]); + let err = RpcError::ExecutionReverted(Some(data)); + let obj: ErrorObjectOwned = err.into(); + assert_eq!(obj.code(), codes::EXECUTION_REVERTED); + assert_eq!(obj.message(), "execution reverted"); + // data field should be present (not null) + assert!(obj.data().is_some()); + } + + #[test] + fn rpc_error_to_error_object_execution_reverted_without_data() { + let err = RpcError::ExecutionReverted(None); + let obj: ErrorObjectOwned = err.into(); + assert_eq!(obj.code(), codes::EXECUTION_REVERTED); + assert_eq!(obj.message(), "execution reverted"); + assert!(obj.data().is_none()); + } } diff --git a/crates/node/rpc/src/eth.rs b/crates/node/rpc/src/eth.rs index 8f85e26..5baf5be 100644 --- a/crates/node/rpc/src/eth.rs +++ b/crates/node/rpc/src/eth.rs @@ -1,22 +1,49 @@ //! Ethereum JSON-RPC API implementation. -use std::{collections::HashMap, future::Future, pin::Pin, sync::Arc}; +use std::{ + collections::{HashMap, HashSet, VecDeque}, + future::Future, + pin::Pin, + sync::Arc, +}; -use alloy_consensus::{Transaction as _, TxEnvelope, transaction::SignerRecoverable as _}; +use alloy_consensus::{ + Transaction as _, TxEnvelope, + transaction::{SignerRecoverable as _, to_eip155_value}, +}; use alloy_eips::eip2718::Decodable2718 as _; use alloy_primitives::{Address, B256, Bytes, U64, U256}; use jsonrpsee::{core::RpcResult, proc_macros::rpc}; +use kora_domain::MempoolEvent; +use kora_txpool::TransactionPool; use tokio::sync::RwLock; +use tracing::warn; use crate::{ error::RpcError, + filters::{Filter, FilterChanges, FilterStore}, + state::NodeState, state_provider::StateProvider, + subscription::{MempoolEventSender, PendingTxEvent, PendingTxEventSender, PendingTxInfo}, types::{ - BlockNumberOrTag, CallRequest, RpcBlock, RpcLog, RpcLogFilter, RpcTransaction, - RpcTransactionReceipt, + BlockNumberOrTag, BlockTag, BlockTransactions, CallRequest, RpcBlock, RpcLog, RpcLogFilter, + RpcTransaction, RpcTransactionReceipt, SyncInfo, SyncStatus, }, }; +const DEFAULT_GAS_ORACLE_BLOCKS: usize = 20; +const DEFAULT_GAS_ORACLE_PERCENTILE: u8 = 60; +const GWEI: u64 = 1_000_000_000; +const DEFAULT_MAX_GAS_PRICE: u64 = 500 * GWEI; + +/// Maximum number of pending transactions to track in memory. +/// +/// When the limit is reached, the oldest entries are evicted on the next +/// `send_raw_transaction` call. This prevents unbounded memory growth +/// under sustained load when transactions are submitted faster than they +/// are finalized and queried. +const MAX_PENDING_TXS: usize = 10_000; + /// Ethereum JSON-RPC API trait. /// /// Defines the core eth_* methods required for Ethereum compatibility. @@ -129,11 +156,35 @@ pub trait EthApi { /// Returns syncing status. #[method(name = "syncing")] - async fn syncing(&self) -> RpcResult; + async fn syncing(&self) -> RpcResult; /// Returns logs matching the given filter. #[method(name = "getLogs")] async fn get_logs(&self, filter: RpcLogFilter) -> RpcResult>; + + /// Creates a log filter. + #[method(name = "newFilter")] + async fn new_filter(&self, filter: RpcLogFilter) -> RpcResult; + + /// Creates a block filter. + #[method(name = "newBlockFilter")] + async fn new_block_filter(&self) -> RpcResult; + + /// Creates a pending transaction filter. + #[method(name = "newPendingTransactionFilter")] + async fn new_pending_transaction_filter(&self) -> RpcResult; + + /// Returns changes since the last poll for the given filter. + #[method(name = "getFilterChanges")] + async fn get_filter_changes(&self, filter_id: U256) -> RpcResult; + + /// Returns all logs matching the given log filter. + #[method(name = "getFilterLogs")] + async fn get_filter_logs(&self, filter_id: U256) -> RpcResult>; + + /// Removes a filter. + #[method(name = "uninstallFilter")] + async fn uninstall_filter(&self, filter_id: U256) -> RpcResult; } /// Net namespace API. @@ -188,6 +239,45 @@ pub type TxSubmitFuture = Pin> + Se /// Async transaction submission callback type. pub type TxSubmitCallback = Arc TxSubmitFuture + Send + Sync>; +/// Configuration for recent-block fee estimation. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct GasOracleConfig { + /// Number of recent blocks sampled by the oracle. + pub blocks: usize, + /// Percentile used when selecting sampled gas prices and priority fees. + pub percentile: u8, + /// Minimum total gas price returned by `eth_gasPrice`. + pub min_price: U256, + /// Maximum total gas price returned by `eth_gasPrice`. + pub max_price: U256, + /// Minimum priority fee returned by `eth_maxPriorityFeePerGas`. + pub min_priority_fee: U256, +} + +impl Default for GasOracleConfig { + fn default() -> Self { + Self { + blocks: DEFAULT_GAS_ORACLE_BLOCKS, + percentile: DEFAULT_GAS_ORACLE_PERCENTILE, + min_price: U256::from(GWEI), + max_price: U256::from(DEFAULT_MAX_GAS_PRICE), + min_priority_fee: U256::from(GWEI), + } + } +} + +#[derive(Clone, Copy, Debug)] +struct GasOracleEstimate { + gas_price: U256, + priority_fee: U256, +} + +#[derive(Clone, Copy, Debug)] +struct CachedGasOracleEstimate { + head: u64, + estimate: GasOracleEstimate, +} + /// Ethereum API implementation with state provider. pub struct EthApiImpl { chain_id: u64, @@ -195,6 +285,27 @@ pub struct EthApiImpl { tx_submit: Option, state_provider: Arc>, pending_txs: Arc>>, + pending_tx_broadcast: Option, + mempool_broadcast: Option, + /// Transaction pool used for pending nonce lookups in + /// `eth_getTransactionCount("pending")`. + txpool: Option, + gas_oracle_config: GasOracleConfig, + gas_oracle_cache: Arc>>, + /// Insertion-ordered record of pending transaction hashes so that + /// `eth_getFilterChanges` for pending-tx filters can return hashes + /// in arrival order rather than an arbitrary sorted order. + pending_tx_order: Arc>>, + /// Cumulative count of entries evicted from the front of + /// `pending_tx_order`. Filter cursors store an absolute index; this + /// offset converts it to a position inside the (now shorter) deque. + pending_tx_evicted: Arc, + /// Maximum number of pending transactions to hold in memory before + /// evicting the oldest entries. + max_pending_txs: usize, + filter_store: Arc, + /// Shared node state for sync status reporting. + node_state: Option, } impl std::fmt::Debug for EthApiImpl { @@ -203,6 +314,8 @@ impl std::fmt::Debug for EthApiImpl { .field("chain_id", &self.chain_id) .field("block_height", &self.block_height) .field("tx_submit", &self.tx_submit.is_some()) + .field("txpool", &self.txpool.is_some()) + .field("gas_oracle_config", &self.gas_oracle_config) .finish() } } @@ -210,26 +323,85 @@ impl std::fmt::Debug for EthApiImpl { impl EthApiImpl { /// Create a new Ethereum API implementation with a state provider. pub fn new(chain_id: u64, state_provider: S) -> Self { - Self { - chain_id, - block_height: Arc::new(std::sync::atomic::AtomicU64::new(0)), - tx_submit: None, - state_provider: Arc::new(RwLock::new(state_provider)), - pending_txs: Arc::new(RwLock::new(HashMap::new())), - } + Self::from_parts(chain_id, state_provider, None, GasOracleConfig::default()) } /// Create a new Ethereum API implementation with a transaction submission callback. pub fn with_tx_submit(chain_id: u64, state_provider: S, tx_submit: TxSubmitCallback) -> Self { + Self::from_parts(chain_id, state_provider, Some(tx_submit), GasOracleConfig::default()) + } + + fn from_parts( + chain_id: u64, + state_provider: S, + tx_submit: Option, + gas_oracle_config: GasOracleConfig, + ) -> Self { Self { chain_id, block_height: Arc::new(std::sync::atomic::AtomicU64::new(0)), - tx_submit: Some(tx_submit), + tx_submit, state_provider: Arc::new(RwLock::new(state_provider)), pending_txs: Arc::new(RwLock::new(HashMap::new())), + pending_tx_broadcast: None, + mempool_broadcast: None, + txpool: None, + gas_oracle_config, + gas_oracle_cache: Arc::new(RwLock::new(None)), + pending_tx_order: Arc::new(RwLock::new(VecDeque::new())), + pending_tx_evicted: Arc::new(std::sync::atomic::AtomicUsize::new(0)), + max_pending_txs: MAX_PENDING_TXS, + filter_store: Arc::new(FilterStore::default()), + node_state: None, } } + /// Attach a pending transaction broadcast channel. + #[must_use] + pub fn with_pending_tx_broadcast(mut self, pending_tx_broadcast: PendingTxEventSender) -> Self { + self.pending_tx_broadcast = Some(pending_tx_broadcast); + self + } + + /// Attach a Kora mempool event broadcast channel. + #[must_use] + pub fn with_mempool_broadcast(mut self, mempool_broadcast: MempoolEventSender) -> Self { + self.mempool_broadcast = Some(mempool_broadcast); + self + } + + /// Attach a transaction pool for pending nonce lookups. + /// + /// When set, `eth_getTransactionCount("pending")` will return the + /// next nonce after all pending mempool transactions, rather than + /// the finalized on-chain nonce. + #[must_use] + pub fn with_txpool(mut self, txpool: TransactionPool) -> Self { + self.txpool = Some(txpool); + self + } + + /// Attach shared node state for sync status reporting. + #[must_use] + pub fn with_node_state(mut self, node_state: NodeState) -> Self { + self.node_state = Some(node_state); + self + } + + /// Override the maximum number of pending transactions held in memory. + #[cfg(test)] + const fn with_max_pending_txs(mut self, max_pending_txs: usize) -> Self { + self.max_pending_txs = max_pending_txs; + self + } + + /// Override the default recent-block gas oracle configuration. + pub fn with_gas_oracle_config(mut self, gas_oracle_config: GasOracleConfig) -> Self { + self.gas_oracle_config = gas_oracle_config; + self.gas_oracle_cache = Arc::new(RwLock::new(None)); + self + } + /// Get a handle to update the block height. pub fn block_height_handle(&self) -> Arc { self.block_height.clone() @@ -239,6 +411,32 @@ impl EthApiImpl { pub fn set_block_height(&self, height: u64) { self.block_height.store(height, std::sync::atomic::Ordering::Relaxed); } + + async fn current_block_number(&self) -> u64 { + let provider = self.state_provider.read().await; + provider + .block_number() + .await + .unwrap_or_else(|_| self.block_height.load(std::sync::atomic::Ordering::Relaxed)) + } + + async fn recent_fee_estimate(&self) -> RpcResult { + let provider = self.state_provider.read().await; + let head = provider + .block_number() + .await + .unwrap_or_else(|_| self.block_height.load(std::sync::atomic::Ordering::Relaxed)); + + if let Some(cached) = *self.gas_oracle_cache.read().await + && cached.head == head + { + return Ok(cached.estimate); + } + + let estimate = estimate_recent_fees(&*provider, head, self.gas_oracle_config).await; + *self.gas_oracle_cache.write().await = Some(CachedGasOracleEstimate { head, estimate }); + Ok(estimate) + } } #[jsonrpsee::core::async_trait] @@ -248,14 +446,7 @@ impl EthApiServer for EthApiImpl { } async fn block_number(&self) -> RpcResult { - let provider = self.state_provider.read().await; - provider.block_number().await.map_or_else( - |_| { - let height = self.block_height.load(std::sync::atomic::Ordering::Relaxed); - Ok(U64::from(height)) - }, - |height| Ok(U64::from(height)), - ) + Ok(U64::from(self.current_block_number().await)) } async fn get_balance( @@ -272,9 +463,23 @@ impl EthApiServer for EthApiImpl { address: Address, block: Option, ) -> RpcResult { + let is_pending = block.as_ref().is_some_and(BlockNumberOrTag::is_pending); + let provider = self.state_provider.read().await; - let nonce = provider.nonce(address, block).await?; - Ok(U64::from(nonce)) + let finalized_nonce = provider.nonce(address, block).await?; + + // When the caller asks for the "pending" nonce, augment the + // finalized on-chain nonce with the transaction pool's view so + // that sequential sends from one account get strictly increasing + // nonces. + if is_pending + && let Some(ref txpool) = self.txpool + && let Some(pool_nonce) = txpool.next_nonce(&address) + { + return Ok(U64::from(pool_nonce.max(finalized_nonce))); + } + + Ok(U64::from(finalized_nonce)) } async fn get_code( @@ -300,11 +505,57 @@ impl EthApiServer for EthApiImpl { let tx_hash = alloy_primitives::keccak256(&data); let pending_tx = raw_tx_to_pending_rpc(&data)?; - if let Some(ref submit) = self.tx_submit { + let accepted = if let Some(ref submit) = self.tx_submit { submit(data).await?; + true + } else { + false + }; + + { + let mut txs = self.pending_txs.write().await; + let mut order = self.pending_tx_order.write().await; + txs.insert(tx_hash, pending_tx.clone()); + order.push_back(tx_hash); + + // Evict oldest entries when either the pending map or the + // order deque exceeds the cap. The deque can accumulate stale + // entries (hashes removed from the map by + // `get_transaction_by_hash` but not from the deque), so we + // must bound both independently. + let cap = self.max_pending_txs; + let needs_eviction = txs.len() > cap || order.len() > cap; + if needs_eviction { + let map_excess = txs.len().saturating_sub(cap); + let deque_excess = order.len().saturating_sub(cap); + let target = map_excess.max(deque_excess); + warn!( + map_excess, + deque_excess, + cap, + "pending transaction cache exceeded limit, evicting oldest entries" + ); + let mut evicted = 0; + let mut drained = 0usize; + // Drain from the front (oldest) of the order deque until + // we have removed enough entries from the map AND trimmed + // the deque back to the cap. + while (evicted < map_excess || drained < target) && !order.is_empty() { + let old_hash = order.pop_front().unwrap(); + drained += 1; + if txs.remove(&old_hash).is_some() { + evicted += 1; + } + } + // Update the cumulative eviction offset so that filter + // cursors (which store absolute indices) remain correct. + self.pending_tx_evicted.fetch_add(drained, std::sync::atomic::Ordering::Relaxed); + } } - self.pending_txs.write().await.insert(tx_hash, pending_tx); + if accepted { + self.broadcast_pending_tx(tx_hash, pending_tx); + } Ok(tx_hash) } @@ -364,11 +615,11 @@ impl EthApiServer for EthApiImpl { } async fn gas_price(&self) -> RpcResult { - Ok(U256::from(1_000_000_000u64)) + Ok(self.recent_fee_estimate().await?.gas_price) } async fn max_priority_fee_per_gas(&self) -> RpcResult { - Ok(U256::from(1_000_000_000u64)) + Ok(self.recent_fee_estimate().await?.priority_fee) } async fn fee_history( @@ -377,28 +628,64 @@ impl EthApiServer for EthApiImpl { newest_block: BlockNumberOrTag, reward_percentiles: Option>, ) -> RpcResult { + // Validate percentile values before doing any work. + if let Some(percentiles) = &reward_percentiles { + validate_reward_percentiles(percentiles)?; + } + let provider = self.state_provider.read().await; let head = provider .block_number() .await .unwrap_or_else(|_| self.block_height.load(std::sync::atomic::Ordering::Relaxed)); - let newest = match newest_block { - BlockNumberOrTag::Number(n) => n.to::().min(head), - BlockNumberOrTag::Tag(_) | BlockNumberOrTag::Latest => head, - }; + let newest = resolve_fee_history_newest(newest_block, head); let requested = block_count.to::().min(1024); let count = requested.min(newest.saturating_add(1)) as usize; let oldest = newest.saturating_add(1).saturating_sub(count as u64); - let base_fee = U256::from(1_000_000_000u64); - - Ok(FeeHistory { - base_fee_per_gas: vec![base_fee; count + 1], - gas_used_ratio: vec![0.0; count], - oldest_block: U64::from(oldest), - reward: reward_percentiles.map(|percentiles| { - vec![vec![U256::from(1_000_000_000u64); percentiles.len()]; count] - }), - }) + + let mut base_fee_per_gas = Vec::with_capacity(count + 1); + let mut gas_used_ratio = Vec::with_capacity(count); + let mut reward = reward_percentiles.as_ref().map(|_| Vec::with_capacity(count)); + let mut last_base_fee = None; + let mut last_gas_used = 0; + let mut last_gas_limit = 0; + + for block_number in oldest..oldest + count as u64 { + let block = block_by_number_or_none(&*provider, block_number, reward.is_some()).await; + let base_fee = block + .as_ref() + .and_then(|block| block.base_fee_per_gas) + .or(last_base_fee) + .unwrap_or_else(default_base_fee); + base_fee_per_gas.push(base_fee); + + if let Some(block) = block { + let gas_used = block.gas_used.to::(); + let gas_limit = block.gas_limit.to::(); + gas_used_ratio.push(block_gas_used_ratio(gas_used, gas_limit)); + + if let (Some(percentiles), Some(rows)) = (&reward_percentiles, reward.as_mut()) { + let tx_gas_used = fetch_tx_gas_used(&*provider, &block).await; + rows.push(compute_reward_percentiles(&block, &tx_gas_used, percentiles)); + } + + last_base_fee = Some(base_fee); + last_gas_used = gas_used; + last_gas_limit = gas_limit; + } else { + gas_used_ratio.push(0.0); + if let (Some(percentiles), Some(rows)) = (&reward_percentiles, reward.as_mut()) { + rows.push(vec![U256::ZERO; percentiles.len()]); + } + } + } + + let next_base_fee = last_base_fee + .map(|base_fee| calculate_next_base_fee(base_fee, last_gas_used, last_gas_limit)) + .unwrap_or_else(default_base_fee); + base_fee_per_gas.push(next_base_fee); + + Ok(FeeHistory { base_fee_per_gas, gas_used_ratio, oldest_block: U64::from(oldest), reward }) } async fn accounts(&self) -> RpcResult> { @@ -409,14 +696,277 @@ impl EthApiServer for EthApiImpl { Ok("0x44".to_string()) } - async fn syncing(&self) -> RpcResult { - Ok(false) + async fn syncing(&self) -> RpcResult { + if let Some(ref state) = self.node_state + && state.is_catching_up() + { + let current_block = self.current_block_number().await; + Ok(SyncStatus::Syncing(SyncInfo { + starting_block: U64::from(state.recovered_height()), + current_block: U64::from(current_block), + highest_block: U64::from(current_block), + })) + } else { + Ok(SyncStatus::NotSyncing(false)) + } } async fn get_logs(&self, filter: RpcLogFilter) -> RpcResult> { let provider = self.state_provider.read().await; provider.get_logs(filter).await.map_err(Into::into) } + + async fn new_filter(&self, filter: RpcLogFilter) -> RpcResult { + let head = self.current_block_number().await; + // Initialize the cursor so the first `getFilterChanges` starts at + // `from_block` (inclusive) when explicitly provided, rather than + // always starting from the current head. + let last_poll_block = if filter.block_hash.is_some() { + // block_hash filters are single-block; `None` ensures the + // first poll returns results, and subsequent polls return empty. + None + } else { + match &filter.from_block { + Some(BlockNumberOrTag::Number(n)) => { + let from = n.to::(); + // Cursor is *last included* block, so subtract 1 so the + // first poll begins at `from`. For block 0 we use `None` + // to represent "nothing polled yet". + if from == 0 { None } else { Some(from - 1) } + } + Some(BlockNumberOrTag::Tag(crate::types::BlockTag::Earliest)) => { + // Start from genesis: no blocks polled yet. + None + } + // latest / pending / safe / finalized / default -> current head + _ => Some(head), + } + }; + let id = self.filter_store.create(Filter::Log { criteria: filter, last_poll_block }); + Ok(U256::from(id)) + } + + async fn new_block_filter(&self) -> RpcResult { + let head = self.current_block_number().await; + let id = self.filter_store.create(Filter::Block { last_poll_block: head }); + Ok(U256::from(id)) + } + + async fn new_pending_transaction_filter(&self) -> RpcResult { + let known_hashes = self.pending_txs.read().await.keys().copied().collect(); + // Read `evicted` and `order.len()` under the same lock to avoid a + // race where an eviction between the two reads would shift the + // cursor. This is consistent with `send_raw_transaction`'s lock + // ordering (`pending_txs` then `pending_tx_order`). + let last_seen_index = { + let order = self.pending_tx_order.read().await; + let evicted = self.pending_tx_evicted.load(std::sync::atomic::Ordering::Relaxed); + evicted + order.len() + }; + let id = + self.filter_store.create(Filter::PendingTransaction { known_hashes, last_seen_index }); + Ok(U256::from(id)) + } + + async fn get_filter_changes(&self, filter_id: U256) -> RpcResult { + let id = filter_id_to_u64(filter_id).ok_or(RpcError::FilterNotFound)?; + let entry = self.filter_store.get(id).ok_or(RpcError::FilterNotFound)?; + + // Read filter state under the lock, then release before any async I/O. + enum FilterSnapshot { + Log { criteria: RpcLogFilter, last_poll_block: Option }, + Block { last_poll_block: u64 }, + PendingTx { known_hashes: HashSet, last_seen_index: usize }, + } + + let snapshot = { + let filter = entry.lock().await; + match &*filter { + Filter::Log { criteria, last_poll_block } => FilterSnapshot::Log { + criteria: criteria.clone(), + last_poll_block: *last_poll_block, + }, + Filter::Block { last_poll_block } => { + FilterSnapshot::Block { last_poll_block: *last_poll_block } + } + Filter::PendingTransaction { known_hashes, last_seen_index } => { + FilterSnapshot::PendingTx { + known_hashes: known_hashes.clone(), + last_seen_index: *last_seen_index, + } + } + } + }; + // Lock is released here. + + match snapshot { + FilterSnapshot::Log { criteria, last_poll_block } => { + let head = self.current_block_number().await; + if let Some(lpb) = last_poll_block + && head <= lpb + { + entry.touch(); + return Ok(FilterChanges::Logs(Vec::new())); + } + + // Preserve the original `to_block` / `block_hash`. + // Only override `from_block` to advance the cursor, and + // only cap `to_block` at head when no fixed bound was set. + let changes_filter = if criteria.block_hash.is_some() { + // block_hash filters are single-block and already returned + // their results on the first poll (when last_poll_block was + // None). Subsequent polls always return empty. + if last_poll_block.is_some() { + entry.touch(); + return Ok(FilterChanges::Logs(Vec::new())); + } + criteria.clone() + } else { + let from = last_poll_block.map(|lpb| lpb.saturating_add(1)).unwrap_or(0); + let to = match &criteria.to_block { + // Honour the original fixed upper bound. + Some(BlockNumberOrTag::Number(n)) => n.to::().min(head), + // Open-ended or "latest": cap at current head. + _ => head, + }; + RpcLogFilter { + from_block: Some(BlockNumberOrTag::Number(U64::from(from))), + to_block: Some(BlockNumberOrTag::Number(U64::from(to))), + // Preserve everything else from the original criteria. + address: criteria.address.clone(), + topics: criteria.topics.clone(), + block_hash: None, + } + }; + + let provider = self.state_provider.read().await; + let logs = provider.get_logs(changes_filter).await?; + + // Update the cursor under the lock. + let mut filter = entry.lock().await; + if let Filter::Log { last_poll_block: lpb, .. } = &mut *filter { + *lpb = Some(head); + } + entry.touch(); + Ok(FilterChanges::Logs(logs)) + } + FilterSnapshot::Block { last_poll_block } => { + let head = self.current_block_number().await; + if head <= last_poll_block { + entry.touch(); + return Ok(FilterChanges::Hashes(Vec::new())); + } + + let provider = self.state_provider.read().await; + let mut hashes = Vec::new(); + // Track the highest block that was actually observed + // rather than blindly advancing to `head`. + let mut highest_observed = last_poll_block; + for block_num in last_poll_block.saturating_add(1)..=head { + if let Some(block) = provider + .block_by_number(BlockNumberOrTag::Number(U64::from(block_num)), false) + .await? + { + hashes.push(block.hash); + highest_observed = block_num; + } + } + + let mut filter = entry.lock().await; + if let Filter::Block { last_poll_block: lpb } = &mut *filter { + *lpb = highest_observed; + } + entry.touch(); + Ok(FilterChanges::Hashes(hashes)) + } + FilterSnapshot::PendingTx { known_hashes, last_seen_index } => { + // Return new pending tx hashes in insertion order. + // + // IMPORTANT: We must drop the `pending_tx_order` lock before + // acquiring `pending_txs` to maintain consistent lock ordering + // with `send_raw_transaction` (which takes `pending_txs` then + // `pending_tx_order`). + let (new_hashes, new_index) = { + let tx_order = self.pending_tx_order.read().await; + let evicted = + self.pending_tx_evicted.load(std::sync::atomic::Ordering::Relaxed); + // Convert the absolute cursor to a deque-relative offset. + // If entries were evicted past the cursor, start from the + // front of the deque (relative offset 0). + let relative_skip = last_seen_index.saturating_sub(evicted); + let hashes: Vec = tx_order + .iter() + .skip(relative_skip) + .filter(|h| !known_hashes.contains(*h)) + .copied() + .collect(); + let idx = evicted + tx_order.len(); + (hashes, idx) + // tx_order lock is dropped here + }; + let current_hashes: HashSet = + self.pending_txs.read().await.keys().copied().collect(); + + let mut filter = entry.lock().await; + if let Filter::PendingTransaction { known_hashes: kh, last_seen_index: idx } = + &mut *filter + { + *kh = current_hashes; + *idx = new_index; + } + entry.touch(); + Ok(FilterChanges::Hashes(new_hashes)) + } + } + } + + async fn get_filter_logs(&self, filter_id: U256) -> RpcResult> { + let id = filter_id_to_u64(filter_id).ok_or(RpcError::FilterNotFound)?; + let entry = self.filter_store.get(id).ok_or(RpcError::FilterNotFound)?; + let criteria = { + let filter = entry.lock().await; + match &*filter { + Filter::Log { criteria, .. } => criteria.clone(), + Filter::Block { .. } | Filter::PendingTransaction { .. } => { + return Err(RpcError::FilterNotFound.into()); + } + } + }; + + let provider = self.state_provider.read().await; + let logs = provider.get_logs(criteria).await?; + entry.touch(); + Ok(logs) + } + + async fn uninstall_filter(&self, filter_id: U256) -> RpcResult { + let Some(id) = filter_id_to_u64(filter_id) else { + return Ok(false); + }; + Ok(self.filter_store.remove(id)) + } +} + +impl EthApiImpl { + fn broadcast_pending_tx(&self, tx_hash: B256, pending_tx: RpcTransaction) { + if let Some(sender) = &self.pending_tx_broadcast { + let _ = sender.send(PendingTxEvent::Added(PendingTxInfo { + hash: tx_hash, + full_tx: Some(pending_tx.clone()), + })); + } + + if let Some(sender) = &self.mempool_broadcast { + let _ = sender.send(MempoolEvent::TxAdded { + hash: tx_hash, + from: pending_tx.from, + to: pending_tx.to, + value: pending_tx.value, + gas_price: pending_tx.gas_price, + nonce: pending_tx.nonce.to::(), + }); + } + } } /// Net API implementation. @@ -487,6 +1037,272 @@ impl Web3ApiServer for Web3ApiImpl { } } +const fn filter_id_to_u64(filter_id: U256) -> Option { + let limbs = filter_id.as_limbs(); + if limbs[1] != 0 || limbs[2] != 0 || limbs[3] != 0 { + return None; + } + Some(limbs[0]) +} + +async fn estimate_recent_fees( + provider: &S, + head: u64, + config: GasOracleConfig, +) -> GasOracleEstimate { + let block_count = config.blocks.max(1); + let start = head.saturating_sub(block_count.saturating_sub(1) as u64); + let mut gas_prices = Vec::new(); + let mut priority_fees = Vec::new(); + let mut latest_base_fee = None; + + for block_number in start..=head { + let Some(block) = block_by_number_or_none(provider, block_number, true).await else { + continue; + }; + let base_fee = block.base_fee_per_gas.unwrap_or_else(default_base_fee); + latest_base_fee = Some(base_fee); + + if let BlockTransactions::Full(txs) = &block.transactions { + gas_prices.extend(txs.iter().map(|tx| effective_gas_price_for_sampling(tx, base_fee))); + priority_fees.extend(txs.iter().map(|tx| effective_priority_fee(tx, base_fee))); + } + } + + let priority_fee = + percentile_value(&mut priority_fees, config.percentile).unwrap_or(config.min_priority_fee); + let priority_fee = priority_fee.max(config.min_priority_fee); + let latest_base_fee = latest_base_fee.unwrap_or_else(default_base_fee); + + // Clamp priority fee so that base_fee + priority_fee does not exceed + // max_price (when the base fee alone is still under the cap). + let priority_fee = if latest_base_fee < config.max_price { + priority_fee.min(config.max_price.saturating_sub(latest_base_fee)) + } else { + priority_fee + }; + + let min_gas_price = config.min_price.max(latest_base_fee.saturating_add(priority_fee)); + let gas_price = percentile_value(&mut gas_prices, config.percentile).unwrap_or(min_gas_price); + let gas_price = gas_price.max(min_gas_price); + + // Always enforce the hard cap unless the base fee alone exceeds it -- + // in that case the chain's base fee is already above the configured + // maximum and we must still return a usable price. + let gas_price = if latest_base_fee <= config.max_price { + gas_price.min(config.max_price) + } else { + gas_price + }; + + GasOracleEstimate { gas_price, priority_fee } +} + +async fn block_by_number_or_none( + provider: &S, + block_number: u64, + full_transactions: bool, +) -> Option { + match provider + .block_by_number(BlockNumberOrTag::Number(U64::from(block_number)), full_transactions) + .await + { + Ok(block) => block, + Err(e) => { + warn!(block_number, error = %e, "failed to fetch block by number"); + None + } + } +} + +fn resolve_fee_history_newest(newest_block: BlockNumberOrTag, head: u64) -> u64 { + match newest_block { + BlockNumberOrTag::Number(n) => n.to::().min(head), + BlockNumberOrTag::Tag(BlockTag::Earliest) => 0, + BlockNumberOrTag::Tag(_) | BlockNumberOrTag::Latest => head, + } +} + +fn default_base_fee() -> U256 { + U256::from(GWEI) +} + +fn percentile_value(values: &mut [U256], percentile: u8) -> Option { + if values.is_empty() { + return None; + } + + values.sort_unstable(); + let percentile = usize::from(percentile.min(100)); + let index = (values.len() * percentile / 100).min(values.len() - 1); + Some(values[index]) +} + +fn block_gas_used_ratio(gas_used: u64, gas_limit: u64) -> f64 { + if gas_limit == 0 { + return 0.0; + } + (gas_used as f64 / gas_limit as f64).clamp(0.0, 1.0) +} + +/// Validates that `reward_percentiles` values are in `[0, 100]` and +/// monotonically non-decreasing, per the Ethereum JSON-RPC specification. +fn validate_reward_percentiles(percentiles: &[f64]) -> RpcResult<()> { + for p in percentiles { + if !p.is_finite() || *p < 0.0 || *p > 100.0 { + return Err(RpcError::InvalidTransaction( + "reward percentiles must be in [0, 100]".to_string(), + ) + .into()); + } + } + for w in percentiles.windows(2) { + if w[0] > w[1] { + return Err(RpcError::InvalidTransaction( + "reward percentiles must be monotonically non-decreasing".to_string(), + ) + .into()); + } + } + Ok(()) +} + +/// Fetches per-transaction `gas_used` from receipts for all transactions in +/// the block. Returns a `Vec` parallel to the block's full transaction list. +/// When a receipt cannot be found, falls back to the transaction's gas limit. +async fn fetch_tx_gas_used(provider: &S, block: &RpcBlock) -> Vec { + let BlockTransactions::Full(txs) = &block.transactions else { + return Vec::new(); + }; + let mut gas_used = Vec::with_capacity(txs.len()); + for tx in txs { + let used = match provider.receipt_by_hash(tx.hash).await { + Ok(Some(receipt)) => receipt.gas_used.to::(), + _ => tx.gas.to::(), + }; + gas_used.push(used); + } + gas_used +} + +fn compute_reward_percentiles( + block: &RpcBlock, + tx_gas_used: &[u64], + percentiles: &[f64], +) -> Vec { + let BlockTransactions::Full(txs) = &block.transactions else { + return vec![U256::ZERO; percentiles.len()]; + }; + if txs.is_empty() { + return vec![U256::ZERO; percentiles.len()]; + } + + let base_fee = block.base_fee_per_gas.unwrap_or_default(); + let mut rewards: Vec<(U256, u64)> = txs + .iter() + .enumerate() + .map(|(i, tx)| { + let gas = tx_gas_used.get(i).copied().unwrap_or_else(|| tx.gas.to::()); + (effective_priority_fee(tx, base_fee), gas) + }) + .filter(|(_, gas)| *gas > 0) + .collect(); + if rewards.is_empty() { + return vec![U256::ZERO; percentiles.len()]; + } + + rewards.sort_by_key(|(tip, _)| *tip); + let total_gas = rewards.iter().map(|(_, gas)| u128::from(*gas)).sum(); + + percentiles + .iter() + .map(|percentile| weighted_percentile_reward(&rewards, total_gas, *percentile)) + .collect() +} + +fn weighted_percentile_reward(rewards: &[(U256, u64)], total_gas: u128, percentile: f64) -> U256 { + let threshold = percentile_threshold(total_gas, percentile); + let mut cumulative_gas = 0u128; + + for (tip, gas) in rewards { + cumulative_gas = cumulative_gas.saturating_add(u128::from(*gas)); + if cumulative_gas >= threshold { + return *tip; + } + } + + rewards.last().map(|(tip, _)| *tip).unwrap_or_default() +} + +fn percentile_threshold(total_gas: u128, percentile: f64) -> u128 { + if total_gas == 0 { + return 0; + } + + let percentile = if percentile.is_finite() { percentile.clamp(0.0, 100.0) } else { 0.0 }; + ((total_gas as f64 * percentile / 100.0).ceil() as u128).min(total_gas) +} + +fn effective_priority_fee(tx: &RpcTransaction, base_fee: U256) -> U256 { + match (tx.max_fee_per_gas, tx.max_priority_fee_per_gas) { + (Some(max_fee), Some(max_priority_fee)) => { + max_priority_fee.min(max_fee.saturating_sub(base_fee)) + } + _ if is_dynamic_fee_type(tx) => { + // Indexed EIP-1559 (or later) tx without populated EIP-1559 + // fields: `gas_price` may represent `max_fee_per_gas`, so we + // cannot reliably derive the tip. Return zero to avoid + // inflating estimates. + U256::ZERO + } + _ => tx.gas_price.saturating_sub(base_fee), + } +} + +/// Returns `true` when the transaction type uses dynamic-fee semantics +/// (types 2, 3, 4 -- EIP-1559, EIP-4844, EIP-7702). +fn is_dynamic_fee_type(tx: &RpcTransaction) -> bool { + tx.tx_type.to::() >= 2 +} + +/// Derives the effective gas price a transaction actually paid, accounting +/// for the difference between legacy `gas_price` and EIP-1559 +/// `min(max_fee, base_fee + tip)`. +fn effective_gas_price_for_sampling(tx: &RpcTransaction, base_fee: U256) -> U256 { + match (tx.max_fee_per_gas, tx.max_priority_fee_per_gas) { + (Some(max_fee), Some(max_priority_fee)) => { + let tip = max_priority_fee.min(max_fee.saturating_sub(base_fee)); + base_fee.saturating_add(tip).min(max_fee) + } + _ => tx.gas_price, + } +} + +fn calculate_next_base_fee( + parent_base_fee: U256, + parent_gas_used: u64, + parent_gas_limit: u64, +) -> U256 { + let parent_gas_target = parent_gas_limit / 2; + if parent_gas_target == 0 || parent_gas_used == parent_gas_target { + return parent_base_fee; + } + + if parent_gas_used > parent_gas_target { + let gas_used_delta = parent_gas_used - parent_gas_target; + let base_fee_delta = parent_base_fee * U256::from(gas_used_delta) + / U256::from(parent_gas_target) + / U256::from(8); + parent_base_fee.saturating_add(base_fee_delta.max(U256::from(1))) + } else { + let gas_used_delta = parent_gas_target - parent_gas_used; + let base_fee_delta = parent_base_fee * U256::from(gas_used_delta) + / U256::from(parent_gas_target) + / U256::from(8); + parent_base_fee.saturating_sub(base_fee_delta) + } +} + fn raw_tx_to_pending_rpc(data: &Bytes) -> Result { let envelope = TxEnvelope::decode_2718(&mut data.as_ref()) .map_err(|err| RpcError::InvalidTransaction(format!("failed to decode: {err}")))?; @@ -512,12 +1328,23 @@ fn raw_tx_to_pending_rpc(data: &Bytes) -> Result { chain_id: envelope.chain_id().map(U64::from), max_fee_per_gas: max_fee_per_gas(&envelope).map(U256::from), max_priority_fee_per_gas: max_priority_fee_per_gas(&envelope).map(U256::from), - v: U64::from(u64::from(signature.v())), + v: U256::from(signature_v(&envelope)), r: signature.r(), s: signature.s(), }) } +fn signature_v(envelope: &TxEnvelope) -> u128 { + let y_parity = envelope.signature().v(); + match envelope { + TxEnvelope::Legacy(tx) => to_eip155_value(y_parity, tx.tx().chain_id), + TxEnvelope::Eip2930(_) + | TxEnvelope::Eip1559(_) + | TxEnvelope::Eip4844(_) + | TxEnvelope::Eip7702(_) => u128::from(y_parity), + } +} + const fn transaction_type(envelope: &TxEnvelope) -> u64 { match envelope { TxEnvelope::Legacy(_) => 0, @@ -558,14 +1385,308 @@ const fn max_priority_fee_per_gas(envelope: &TxEnvelope) -> Option { #[cfg(test)] mod tests { + use std::collections::HashMap; + use alloy_consensus::{SignableTransaction as _, TxEip1559}; use alloy_eips::eip2718::Encodable2718 as _; use alloy_primitives::{Signature, TxKind}; + use async_trait::async_trait; use k256::ecdsa::SigningKey; + use kora_domain::MempoolEvent; use sha3::{Digest as _, Keccak256}; use super::*; - use crate::state_provider::NoopStateProvider; + use crate::{ + PendingTxEvent, mempool_event_channel, pending_tx_channel, + state_provider::NoopStateProvider, + types::{AddressFilter, BlockTag, TopicFilter}, + }; + + #[derive(Clone, Debug)] + struct MockFeeStateProvider { + blocks: HashMap, + receipts: HashMap, + head: u64, + } + + impl MockFeeStateProvider { + fn new(blocks: Vec) -> Self { + let head = blocks.iter().map(|block| block.number.to::()).max().unwrap_or(0); + let blocks = + blocks.into_iter().map(|block| (block.number.to::(), block)).collect(); + Self { blocks, receipts: HashMap::new(), head } + } + + fn with_receipts(mut self, receipts: Vec) -> Self { + self.receipts = receipts.into_iter().map(|r| (r.transaction_hash, r)).collect(); + self + } + + fn resolve_block_number(&self, block: BlockNumberOrTag) -> u64 { + match block { + BlockNumberOrTag::Number(number) => number.to::(), + BlockNumberOrTag::Tag(BlockTag::Earliest) => 0, + BlockNumberOrTag::Tag(_) | BlockNumberOrTag::Latest => self.head, + } + } + + fn block_with_transaction_shape( + &self, + number: u64, + full_transactions: bool, + ) -> Option { + let mut block = self.blocks.get(&number).cloned()?; + if !full_transactions && let BlockTransactions::Full(txs) = &block.transactions { + block.transactions = + BlockTransactions::Hashes(txs.iter().map(|tx| tx.hash).collect()); + } + Some(block) + } + } + + #[async_trait] + impl StateProvider for MockFeeStateProvider { + async fn balance( + &self, + _address: Address, + _block: Option, + ) -> Result { + Ok(U256::ZERO) + } + + async fn nonce( + &self, + _address: Address, + _block: Option, + ) -> Result { + Ok(0) + } + + async fn code( + &self, + _address: Address, + _block: Option, + ) -> Result { + Ok(Bytes::new()) + } + + async fn storage( + &self, + _address: Address, + _slot: U256, + _block: Option, + ) -> Result { + Ok(U256::ZERO) + } + + async fn block_by_number( + &self, + block: BlockNumberOrTag, + full_transactions: bool, + ) -> Result, RpcError> { + Ok(self + .block_with_transaction_shape(self.resolve_block_number(block), full_transactions)) + } + + async fn block_by_hash( + &self, + hash: B256, + full_transactions: bool, + ) -> Result, RpcError> { + let number = self + .blocks + .values() + .find(|block| block.hash == hash) + .map(|block| block.number.to::()); + Ok(number + .and_then(|number| self.block_with_transaction_shape(number, full_transactions))) + } + + async fn transaction_by_hash( + &self, + hash: B256, + ) -> Result, RpcError> { + Ok(self.blocks.values().find_map(|block| match &block.transactions { + BlockTransactions::Full(txs) => txs.iter().find(|tx| tx.hash == hash).cloned(), + BlockTransactions::Hashes(_) => None, + })) + } + + async fn receipt_by_hash( + &self, + hash: B256, + ) -> Result, RpcError> { + Ok(self.receipts.get(&hash).cloned()) + } + + async fn block_number(&self) -> Result { + Ok(self.head) + } + } + + fn gwei(value: u64) -> U256 { + U256::from(value * GWEI) + } + + /// EIP-1559 transaction parameters for test block construction. + struct Eip1559TxParams { + max_fee: U256, + max_priority_fee: U256, + } + + fn make_fee_block( + number: u64, + base_fee_per_gas: U256, + gas_used: u64, + gas_limit: u64, + gas_prices: Vec, + ) -> RpcBlock { + let block_hash = B256::repeat_byte(number as u8); + let transactions = gas_prices + .into_iter() + .enumerate() + .map(|(index, gas_price)| RpcTransaction { + hash: B256::repeat_byte((number as u8).wrapping_mul(16).wrapping_add(index as u8)), + nonce: U64::from(index as u64), + block_hash: Some(block_hash), + block_number: Some(U64::from(number)), + transaction_index: Some(U64::from(index as u64)), + from: Address::repeat_byte(0x11), + to: Some(Address::repeat_byte(0x22)), + value: U256::ZERO, + gas: U64::from(21_000), + gas_price, + input: Bytes::new(), + tx_type: U64::ZERO, + chain_id: None, + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }) + .collect(); + + RpcBlock { + hash: block_hash, + parent_hash: B256::ZERO, + sha3_uncles: B256::ZERO, + number: U64::from(number), + state_root: B256::ZERO, + transactions_root: B256::ZERO, + receipts_root: B256::ZERO, + logs_bloom: Bytes::new(), + timestamp: U64::from(number), + gas_limit: U64::from(gas_limit), + gas_used: U64::from(gas_used), + extra_data: Bytes::new(), + mix_hash: B256::ZERO, + nonce: Default::default(), + base_fee_per_gas: Some(base_fee_per_gas), + miner: Address::ZERO, + difficulty: U256::ZERO, + total_difficulty: U256::ZERO, + uncles: vec![], + size: U64::ZERO, + transactions: BlockTransactions::Full(transactions), + withdrawals: vec![], + withdrawals_root: B256::ZERO, + } + } + + /// Build a block containing EIP-1559 (type 2) transactions with explicit + /// `max_fee_per_gas` and `max_priority_fee_per_gas` fields. + fn make_eip1559_fee_block( + number: u64, + base_fee_per_gas: U256, + gas_used: u64, + gas_limit: u64, + txs: Vec, + ) -> RpcBlock { + let block_hash = B256::repeat_byte(number as u8); + let transactions = txs + .into_iter() + .enumerate() + .map(|(index, params)| { + // Effective gas price = min(max_fee, base_fee + tip) + let tip = + params.max_priority_fee.min(params.max_fee.saturating_sub(base_fee_per_gas)); + let gas_price = base_fee_per_gas.saturating_add(tip).min(params.max_fee); + RpcTransaction { + hash: B256::repeat_byte( + (number as u8).wrapping_mul(16).wrapping_add(index as u8), + ), + nonce: U64::from(index as u64), + block_hash: Some(block_hash), + block_number: Some(U64::from(number)), + transaction_index: Some(U64::from(index as u64)), + from: Address::repeat_byte(0x11), + to: Some(Address::repeat_byte(0x22)), + value: U256::ZERO, + gas: U64::from(21_000), + gas_price, + input: Bytes::new(), + tx_type: U64::from(2), + chain_id: Some(U64::from(1)), + max_fee_per_gas: Some(params.max_fee), + max_priority_fee_per_gas: Some(params.max_priority_fee), + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + } + }) + .collect(); + + RpcBlock { + hash: block_hash, + parent_hash: B256::ZERO, + sha3_uncles: B256::ZERO, + number: U64::from(number), + state_root: B256::ZERO, + transactions_root: B256::ZERO, + receipts_root: B256::ZERO, + logs_bloom: Bytes::new(), + timestamp: U64::from(number), + gas_limit: U64::from(gas_limit), + gas_used: U64::from(gas_used), + extra_data: Bytes::new(), + mix_hash: B256::ZERO, + nonce: Default::default(), + base_fee_per_gas: Some(base_fee_per_gas), + miner: Address::ZERO, + difficulty: U256::ZERO, + total_difficulty: U256::ZERO, + uncles: vec![], + size: U64::ZERO, + transactions: BlockTransactions::Full(transactions), + withdrawals: vec![], + withdrawals_root: B256::ZERO, + } + } + + fn make_test_receipt( + tx_hash: B256, + block_hash: B256, + block_number: u64, + gas_used: u64, + ) -> RpcTransactionReceipt { + RpcTransactionReceipt { + transaction_hash: tx_hash, + transaction_index: U64::ZERO, + block_hash, + block_number: U64::from(block_number), + from: Address::repeat_byte(0x11), + to: Some(Address::repeat_byte(0x22)), + cumulative_gas_used: U64::from(gas_used), + gas_used: U64::from(gas_used), + contract_address: None, + logs: vec![], + logs_bloom: Bytes::new(), + tx_type: U64::ZERO, + status: U64::from(1), + effective_gas_price: U256::from(GWEI), + } + } fn signed_test_tx(chain_id: u64, nonce: u64) -> Bytes { let mut secret = [0u8; 32]; @@ -591,6 +1712,188 @@ mod tests { Bytes::from(raw) } + #[derive(Clone, Default)] + struct TestStateProvider { + inner: Arc>, + } + + #[derive(Default)] + struct TestState { + head: u64, + blocks: HashMap, + logs: Vec, + } + + impl TestStateProvider { + async fn insert_block(&self, number: u64, hash: B256) { + let mut inner = self.inner.write().await; + inner.head = inner.head.max(number); + inner.blocks.insert( + number, + RpcBlock { hash, number: U64::from(number), ..RpcBlock::default() }, + ); + } + + async fn insert_log( + &self, + block_number: u64, + address: Address, + topics: Vec, + ) -> RpcLog { + let mut inner = self.inner.write().await; + let block_hash = inner.blocks.get(&block_number).map_or(B256::ZERO, |block| block.hash); + let log = RpcLog { + address, + topics, + data: Bytes::new(), + block_number: U64::from(block_number), + transaction_hash: B256::ZERO, + transaction_index: U64::ZERO, + block_hash, + log_index: U64::from(inner.logs.len() as u64), + removed: false, + }; + inner.logs.push(log.clone()); + log + } + } + + #[async_trait::async_trait] + impl StateProvider for TestStateProvider { + async fn balance( + &self, + _address: Address, + _block: Option, + ) -> Result { + Ok(U256::ZERO) + } + + async fn nonce( + &self, + _address: Address, + _block: Option, + ) -> Result { + Ok(0) + } + + async fn code( + &self, + _address: Address, + _block: Option, + ) -> Result { + Ok(Bytes::new()) + } + + async fn storage( + &self, + _address: Address, + _slot: U256, + _block: Option, + ) -> Result { + Ok(U256::ZERO) + } + + async fn block_by_number( + &self, + block: BlockNumberOrTag, + _full_transactions: bool, + ) -> Result, RpcError> { + let inner = self.inner.read().await; + let number = resolve_test_block_number(&block, inner.head); + Ok(inner.blocks.get(&number).cloned()) + } + + async fn block_by_hash( + &self, + hash: B256, + _full_transactions: bool, + ) -> Result, RpcError> { + let inner = self.inner.read().await; + Ok(inner.blocks.values().find(|block| block.hash == hash).cloned()) + } + + async fn transaction_by_hash( + &self, + _hash: B256, + ) -> Result, RpcError> { + Ok(None) + } + + async fn receipt_by_hash( + &self, + _hash: B256, + ) -> Result, RpcError> { + Ok(None) + } + + async fn block_number(&self) -> Result { + Ok(self.inner.read().await.head) + } + + async fn get_logs(&self, filter: RpcLogFilter) -> Result, RpcError> { + let inner = self.inner.read().await; + let from = filter + .from_block + .as_ref() + .map_or(0, |block| resolve_test_block_number(block, inner.head)); + let to = filter + .to_block + .as_ref() + .map_or(inner.head, |block| resolve_test_block_number(block, inner.head)); + let addresses = filter.address.clone().map(AddressFilter::into_vec); + + Ok(inner + .logs + .iter() + .filter(|log| { + if let Some(block_hash) = filter.block_hash + && log.block_hash != block_hash + { + return false; + } + if filter.block_hash.is_none() + && (log.block_number.to::() < from + || log.block_number.to::() > to) + { + return false; + } + if let Some(addresses) = &addresses + && !addresses.contains(&log.address) + { + return false; + } + topics_match(log, filter.topics.as_ref()) + }) + .cloned() + .collect()) + } + } + + fn resolve_test_block_number(block: &BlockNumberOrTag, head: u64) -> u64 { + match block { + BlockNumberOrTag::Number(number) => number.to::(), + BlockNumberOrTag::Tag(BlockTag::Earliest) => 0, + BlockNumberOrTag::Tag(_) | BlockNumberOrTag::Latest => head, + } + } + + fn topics_match(log: &RpcLog, filters: Option<&Vec>>) -> bool { + let Some(filters) = filters else { + return true; + }; + + for (index, filter) in filters.iter().enumerate() { + let Some(filter) = filter else { + continue; + }; + let allowed = filter.clone().into_vec(); + if !log.topics.get(index).is_some_and(|topic| allowed.contains(topic)) { + return false; + } + } + true + } + #[test] fn web3_client_version() { let api = Web3ApiImpl::new(); @@ -620,6 +1923,418 @@ mod tests { assert_eq!(block_number, U64::from(42)); } + #[tokio::test] + async fn gas_price_reflects_recent_transactions() { + let provider = MockFeeStateProvider::new(vec![ + make_fee_block(0, gwei(1), 21_000, 30_000_000, vec![gwei(2)]), + make_fee_block(1, gwei(1), 21_000, 30_000_000, vec![gwei(4)]), + make_fee_block(2, gwei(1), 21_000, 30_000_000, vec![gwei(6)]), + ]); + let api = EthApiImpl::new(1, provider); + + let gas_price = EthApiServer::gas_price(&api).await.unwrap(); + let priority_fee = EthApiServer::max_priority_fee_per_gas(&api).await.unwrap(); + + assert_eq!(gas_price, gwei(4)); + assert_eq!(priority_fee, gwei(3)); + } + + #[tokio::test] + async fn gas_price_falls_back_to_base_fee_plus_min_tip_without_transactions() { + let provider = + MockFeeStateProvider::new(vec![make_fee_block(0, gwei(5), 0, 30_000_000, vec![])]); + let api = EthApiImpl::new(1, provider); + + let gas_price = EthApiServer::gas_price(&api).await.unwrap(); + let priority_fee = EthApiServer::max_priority_fee_per_gas(&api).await.unwrap(); + + assert_eq!(gas_price, gwei(6)); + assert_eq!(priority_fee, gwei(1)); + } + + #[tokio::test] + async fn fee_history_uses_indexed_base_fee_and_gas_ratio() { + let provider = MockFeeStateProvider::new(vec![make_fee_block( + 0, + gwei(7), + 15_000_000, + 30_000_000, + vec![], + )]); + let api = EthApiImpl::new(1, provider); + + let history = EthApiServer::fee_history(&api, U64::from(1), BlockNumberOrTag::Latest, None) + .await + .unwrap(); + + assert_eq!(history.oldest_block, U64::ZERO); + assert_eq!(history.base_fee_per_gas, vec![gwei(7), gwei(7)]); + assert_eq!(history.gas_used_ratio, vec![0.5]); + assert!(history.reward.is_none()); + } + + #[tokio::test] + async fn fee_history_rewards_reflect_actual_tips() { + let provider = MockFeeStateProvider::new(vec![make_fee_block( + 0, + gwei(1), + 42_000, + 30_000_000, + vec![gwei(3), gwei(5)], + )]); + let api = EthApiImpl::new(1, provider); + + let history = EthApiServer::fee_history( + &api, + U64::from(1), + BlockNumberOrTag::Latest, + Some(vec![50.0]), + ) + .await + .unwrap(); + + let rewards = history.reward.unwrap(); + assert_eq!(rewards, vec![vec![gwei(2)]]); + } + + #[tokio::test] + async fn fee_history_rewards_are_zero_for_empty_blocks() { + let provider = + MockFeeStateProvider::new(vec![make_fee_block(0, gwei(1), 0, 30_000_000, vec![])]); + let api = EthApiImpl::new(1, provider); + + let history = EthApiServer::fee_history( + &api, + U64::from(1), + BlockNumberOrTag::Latest, + Some(vec![25.0, 75.0]), + ) + .await + .unwrap(); + + let rewards = history.reward.unwrap(); + assert_eq!(rewards, vec![vec![U256::ZERO, U256::ZERO]]); + } + + #[tokio::test] + async fn fee_history_reward_uses_gas_used_not_gas_limit() { + // Two transactions: + // tx0: gas_price=3 gwei (tip=2 gwei), gas_limit=1_000_000, gas_used=50_000 + // tx1: gas_price=11 gwei (tip=10 gwei), gas_limit=21_000, gas_used=21_000 + // + // With gas_used weighting: total=71_000, 50th pct threshold=35_500. + // Sorted by tip: [(2 gwei, 50_000), (10 gwei, 21_000)]. + // cumulative after tx0 = 50_000 >= 35_500 => 50th pct = 2 gwei. + // + // With the old (buggy) gas_limit weighting: total=1_021_000. + // threshold=510_500. cumulative after tx0=1_000_000 >= 510_500 => still 2 gwei. + // Use 75th pct to differentiate: threshold_used=53_250, threshold_limit=765_750. + // With gas_used: cumulative after tx0=50_000 < 53_250, after tx1=71_000 >= 53_250 => 10 gwei. + // With gas_limit: cumulative after tx0=1_000_000 >= 765_750 => 2 gwei. + let block_hash = B256::repeat_byte(0); + let tx0_hash = B256::repeat_byte(0x10); + let tx1_hash = B256::repeat_byte(0x11); + let block = RpcBlock { + hash: block_hash, + parent_hash: B256::ZERO, + sha3_uncles: B256::ZERO, + number: U64::ZERO, + state_root: B256::ZERO, + transactions_root: B256::ZERO, + receipts_root: B256::ZERO, + logs_bloom: Bytes::new(), + timestamp: U64::ZERO, + gas_limit: U64::from(30_000_000), + gas_used: U64::from(71_000), + extra_data: Bytes::new(), + mix_hash: B256::ZERO, + nonce: Default::default(), + base_fee_per_gas: Some(gwei(1)), + miner: Address::ZERO, + difficulty: U256::ZERO, + total_difficulty: U256::ZERO, + uncles: vec![], + size: U64::ZERO, + transactions: BlockTransactions::Full(vec![ + RpcTransaction { + hash: tx0_hash, + nonce: U64::ZERO, + block_hash: Some(block_hash), + block_number: Some(U64::ZERO), + transaction_index: Some(U64::ZERO), + from: Address::repeat_byte(0x11), + to: Some(Address::repeat_byte(0x22)), + value: U256::ZERO, + gas: U64::from(1_000_000), + gas_price: gwei(3), + input: Bytes::new(), + tx_type: U64::ZERO, + chain_id: None, + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }, + RpcTransaction { + hash: tx1_hash, + nonce: U64::from(1), + block_hash: Some(block_hash), + block_number: Some(U64::ZERO), + transaction_index: Some(U64::from(1)), + from: Address::repeat_byte(0x11), + to: Some(Address::repeat_byte(0x22)), + value: U256::ZERO, + gas: U64::from(21_000), + gas_price: gwei(11), + input: Bytes::new(), + tx_type: U64::ZERO, + chain_id: None, + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }, + ]), + withdrawals: vec![], + withdrawals_root: B256::ZERO, + }; + let receipts = vec![ + make_test_receipt(tx0_hash, block_hash, 0, 50_000), + make_test_receipt(tx1_hash, block_hash, 0, 21_000), + ]; + let provider = MockFeeStateProvider::new(vec![block]).with_receipts(receipts); + let api = EthApiImpl::new(1, provider); + + let history = EthApiServer::fee_history( + &api, + U64::from(1), + BlockNumberOrTag::Latest, + Some(vec![75.0]), + ) + .await + .unwrap(); + + let rewards = history.reward.unwrap(); + // With gas_used weighting, 75th percentile should be 10 gwei (tx1). + // With the old gas_limit weighting, it would have been 2 gwei (tx0). + assert_eq!(rewards, vec![vec![gwei(10)]]); + } + + #[tokio::test] + async fn fee_history_rejects_out_of_range_percentiles() { + let provider = + MockFeeStateProvider::new(vec![make_fee_block(0, gwei(1), 0, 30_000_000, vec![])]); + let api = EthApiImpl::new(1, provider); + + let result = EthApiServer::fee_history( + &api, + U64::from(1), + BlockNumberOrTag::Latest, + Some(vec![150.0]), + ) + .await; + + assert!(result.is_err()); + } + + #[tokio::test] + async fn fee_history_rejects_non_monotonic_percentiles() { + let provider = + MockFeeStateProvider::new(vec![make_fee_block(0, gwei(1), 0, 30_000_000, vec![])]); + let api = EthApiImpl::new(1, provider); + + let result = EthApiServer::fee_history( + &api, + U64::from(1), + BlockNumberOrTag::Latest, + Some(vec![75.0, 25.0]), + ) + .await; + + assert!(result.is_err()); + } + + #[tokio::test] + async fn fee_history_accepts_valid_monotonic_percentiles() { + let provider = + MockFeeStateProvider::new(vec![make_fee_block(0, gwei(1), 0, 30_000_000, vec![])]); + let api = EthApiImpl::new(1, provider); + + let result = EthApiServer::fee_history( + &api, + U64::from(1), + BlockNumberOrTag::Latest, + Some(vec![0.0, 25.0, 50.0, 75.0, 100.0]), + ) + .await; + + assert!(result.is_ok()); + } + + #[test] + fn effective_priority_fee_eip1559_uses_min_of_tip_and_headroom() { + // EIP-1559 tx: max_fee=10 gwei, max_priority_fee=3 gwei, base_fee=2 gwei. + // headroom = max_fee - base_fee = 8 gwei + // effective tip = min(3, 8) = 3 gwei + let tx = RpcTransaction { + hash: B256::ZERO, + nonce: U64::ZERO, + block_hash: None, + block_number: None, + transaction_index: None, + from: Address::ZERO, + to: None, + value: U256::ZERO, + gas: U64::from(21_000), + gas_price: gwei(10), + input: Bytes::new(), + tx_type: U64::from(2), + chain_id: Some(U64::from(1)), + max_fee_per_gas: Some(gwei(10)), + max_priority_fee_per_gas: Some(gwei(3)), + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }; + + assert_eq!(effective_priority_fee(&tx, gwei(2)), gwei(3)); + } + + #[test] + fn effective_priority_fee_eip1559_caps_at_headroom() { + // EIP-1559 tx: max_fee=5 gwei, max_priority_fee=4 gwei, base_fee=3 gwei. + // headroom = 5 - 3 = 2 gwei + // effective tip = min(4, 2) = 2 gwei + let tx = RpcTransaction { + hash: B256::ZERO, + nonce: U64::ZERO, + block_hash: None, + block_number: None, + transaction_index: None, + from: Address::ZERO, + to: None, + value: U256::ZERO, + gas: U64::from(21_000), + gas_price: gwei(5), + input: Bytes::new(), + tx_type: U64::from(2), + chain_id: Some(U64::from(1)), + max_fee_per_gas: Some(gwei(5)), + max_priority_fee_per_gas: Some(gwei(4)), + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }; + + assert_eq!(effective_priority_fee(&tx, gwei(3)), gwei(2)); + } + + #[test] + fn effective_priority_fee_indexed_eip1559_without_fields_returns_zero() { + // Indexed EIP-1559 tx where max_fee_per_gas/max_priority_fee_per_gas + // are not populated (None), but gas_price holds max_fee_per_gas. + // The fallback should return zero rather than inflating the tip. + let tx = RpcTransaction { + hash: B256::ZERO, + nonce: U64::ZERO, + block_hash: None, + block_number: None, + transaction_index: None, + from: Address::ZERO, + to: None, + value: U256::ZERO, + gas: U64::from(21_000), + gas_price: gwei(20), + input: Bytes::new(), + tx_type: U64::from(2), + chain_id: Some(U64::from(1)), + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }; + + // Without the fix this would return gwei(19), inflating estimates. + assert_eq!(effective_priority_fee(&tx, gwei(1)), U256::ZERO); + } + + #[tokio::test] + async fn gas_price_eip1559_uses_effective_price_not_max_fee() { + // base_fee = 2 gwei. A type-2 tx with max_fee=20 gwei, tip=3 gwei. + // Effective price = base_fee + min(tip, max_fee - base_fee) = 2+3 = 5 gwei. + // Without the fix, the oracle would sample gas_price = max_fee = 20 gwei. + let provider = MockFeeStateProvider::new(vec![make_eip1559_fee_block( + 0, + gwei(2), + 21_000, + 30_000_000, + vec![Eip1559TxParams { max_fee: gwei(20), max_priority_fee: gwei(3) }], + )]); + let api = EthApiImpl::new(1, provider); + + let gas_price = EthApiServer::gas_price(&api).await.unwrap(); + // Should be 5 gwei (base + tip), not 20 gwei (max_fee). + assert_eq!(gas_price, gwei(5)); + } + + #[tokio::test] + async fn gas_price_never_exceeds_max_price() { + // Set up a scenario where min_gas_price (base_fee + priority_fee) + // would exceed max_price. Ensure the oracle respects the cap. + let config = GasOracleConfig { + blocks: 1, + percentile: 60, + min_price: U256::from(GWEI), + max_price: gwei(10), + min_priority_fee: U256::from(GWEI), + }; + // base_fee = 8 gwei, tx gas_price = 12 gwei + // Without fix: min_gas_price = base_fee + priority_fee could exceed max_price + let provider = MockFeeStateProvider::new(vec![make_fee_block( + 0, + gwei(8), + 21_000, + 30_000_000, + vec![gwei(12)], + )]); + let api = EthApiImpl::new(1, provider).with_gas_oracle_config(config); + + let gas_price = EthApiServer::gas_price(&api).await.unwrap(); + assert!(gas_price <= gwei(10), "gas_price {gas_price} should not exceed max_price 10 gwei"); + } + + #[tokio::test] + async fn gas_price_allows_exceeding_max_when_base_fee_above_cap() { + // When the base fee alone is above max_price, the oracle must still + // return a usable price rather than clamping to max_price. + let config = GasOracleConfig { + blocks: 1, + percentile: 60, + min_price: U256::from(GWEI), + max_price: gwei(5), + min_priority_fee: U256::from(GWEI), + }; + // base_fee = 10 gwei (above max_price of 5 gwei) + let provider = MockFeeStateProvider::new(vec![make_fee_block( + 0, + gwei(10), + 21_000, + 30_000_000, + vec![gwei(12)], + )]); + let api = EthApiImpl::new(1, provider).with_gas_oracle_config(config); + + let gas_price = EthApiServer::gas_price(&api).await.unwrap(); + // Must be at least base_fee + min_priority_fee + assert!( + gas_price >= gwei(11), + "gas_price {gas_price} should be at least base_fee + min_priority_fee when base_fee exceeds cap" + ); + } + #[test] fn web3_sha3() { let api = Web3ApiImpl::new(); @@ -645,6 +2360,44 @@ mod tests { assert_eq!(result.unwrap(), alloy_primitives::keccak256(&tx_data)); } + #[tokio::test] + async fn eth_send_raw_transaction_broadcasts_after_acceptance() { + let callback: TxSubmitCallback = Arc::new(move |_| Box::pin(async { Ok(()) })); + let (pending_tx, mut pending_rx) = pending_tx_channel(); + let (mempool_tx, mut mempool_rx) = mempool_event_channel(); + let api = EthApiImpl::with_tx_submit(1, NoopStateProvider, callback) + .with_pending_tx_broadcast(pending_tx) + .with_mempool_broadcast(mempool_tx); + let tx_data = signed_test_tx(1, 3); + let hash = EthApiServer::send_raw_transaction(&api, tx_data).await.unwrap(); + + let PendingTxEvent::Added(info) = pending_rx.try_recv().unwrap(); + assert_eq!(info.hash, hash); + assert_eq!(info.full_tx.as_ref().map(|tx| tx.hash), Some(hash)); + + assert!(matches!( + mempool_rx.try_recv().unwrap(), + MempoolEvent::TxAdded { hash: event_hash, nonce: 3, .. } if event_hash == hash + )); + } + + #[tokio::test] + async fn invalid_raw_transaction_does_not_broadcast() { + let callback: TxSubmitCallback = Arc::new(move |_| Box::pin(async { Ok(()) })); + let (pending_tx, mut pending_rx) = pending_tx_channel(); + let (mempool_tx, mut mempool_rx) = mempool_event_channel(); + let api = EthApiImpl::with_tx_submit(1, NoopStateProvider, callback) + .with_pending_tx_broadcast(pending_tx) + .with_mempool_broadcast(mempool_tx); + + let result = + EthApiServer::send_raw_transaction(&api, Bytes::from_static(b"not a tx")).await; + + assert!(result.is_err()); + assert!(pending_rx.try_recv().is_err()); + assert!(mempool_rx.try_recv().is_err()); + } + #[tokio::test] async fn eth_get_transaction_by_hash_returns_pending_submission() { let callback: TxSubmitCallback = Arc::new(move |_| Box::pin(async { Ok(()) })); @@ -717,4 +2470,310 @@ mod tests { "callback receives the caller's tx bytes verbatim — no re-encoding, no truncation" ); } + + #[tokio::test] + async fn eth_block_filter_lifecycle() { + let provider = TestStateProvider::default(); + provider.insert_block(1, B256::repeat_byte(1)).await; + let api = EthApiImpl::new(1, provider.clone()); + + let filter_id = EthApiServer::new_block_filter(&api).await.unwrap(); + provider.insert_block(2, B256::repeat_byte(2)).await; + provider.insert_block(3, B256::repeat_byte(3)).await; + + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Hashes(hashes) = changes else { + panic!("block filter should return hashes"); + }; + assert_eq!(hashes, vec![B256::repeat_byte(2), B256::repeat_byte(3)]); + + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Hashes(hashes) = changes else { + panic!("block filter should return hashes"); + }; + assert!(hashes.is_empty()); + + assert!(EthApiServer::uninstall_filter(&api, filter_id).await.unwrap()); + assert!(!EthApiServer::uninstall_filter(&api, filter_id).await.unwrap()); + let err = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap_err(); + assert_eq!(err.code(), crate::error_codes::SERVER_ERROR); + } + + #[tokio::test] + async fn eth_log_filter_lifecycle() { + let provider = TestStateProvider::default(); + let target = Address::repeat_byte(0x11); + let other = Address::repeat_byte(0x22); + let topic = B256::repeat_byte(0xaa); + + provider.insert_block(1, B256::repeat_byte(1)).await; + provider.insert_log(1, target, vec![topic]).await; + let api = EthApiImpl::new(1, provider.clone()); + let filter_id = EthApiServer::new_filter( + &api, + RpcLogFilter { + address: Some(AddressFilter::Single(target)), + topics: Some(vec![Some(TopicFilter::Single(topic))]), + ..RpcLogFilter::default() + }, + ) + .await + .unwrap(); + + provider.insert_block(2, B256::repeat_byte(2)).await; + provider.insert_log(2, target, vec![topic]).await; + provider.insert_log(2, other, vec![topic]).await; + + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Logs(logs) = changes else { + panic!("log filter should return logs"); + }; + assert_eq!(logs.len(), 1); + assert_eq!(logs[0].address, target); + assert_eq!(logs[0].block_number, U64::from(2)); + + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Logs(logs) = changes else { + panic!("log filter should return logs"); + }; + assert!(logs.is_empty()); + + let all_logs = EthApiServer::get_filter_logs(&api, filter_id).await.unwrap(); + assert_eq!(all_logs.len(), 2); + } + + #[tokio::test] + async fn eth_pending_transaction_filter_lifecycle() { + let callback: TxSubmitCallback = Arc::new(move |_| Box::pin(async { Ok(()) })); + let api = EthApiImpl::with_tx_submit(1, NoopStateProvider, callback); + + let existing = + EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 0)).await.unwrap(); + let filter_id = EthApiServer::new_pending_transaction_filter(&api).await.unwrap(); + let new = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 1)).await.unwrap(); + + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Hashes(hashes) = changes else { + panic!("pending transaction filter should return hashes"); + }; + assert_eq!(hashes, vec![new]); + assert!(!hashes.contains(&existing)); + + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Hashes(hashes) = changes else { + panic!("pending transaction filter should return hashes"); + }; + assert!(hashes.is_empty()); + } + + #[tokio::test] + async fn eth_log_filter_block_hash_returns_once() { + let provider = TestStateProvider::default(); + let target = Address::repeat_byte(0x11); + let topic = B256::repeat_byte(0xaa); + let block_hash = B256::repeat_byte(1); + + provider.insert_block(1, block_hash).await; + provider.insert_log(1, target, vec![topic]).await; + + let api = EthApiImpl::new(1, provider.clone()); + let filter_id = EthApiServer::new_filter( + &api, + RpcLogFilter { block_hash: Some(block_hash), ..RpcLogFilter::default() }, + ) + .await + .unwrap(); + + // First poll returns the matching logs. + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Logs(logs) = changes else { + panic!("log filter should return logs"); + }; + assert_eq!(logs.len(), 1); + assert_eq!(logs[0].address, target); + + // Advance the chain and confirm subsequent polls return empty. + provider.insert_block(2, B256::repeat_byte(2)).await; + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + assert_eq!(changes, FilterChanges::Logs(Vec::new())); + } + + #[tokio::test] + async fn eth_get_filter_logs_rejects_non_log_filter() { + let provider = TestStateProvider::default(); + provider.insert_block(1, B256::repeat_byte(1)).await; + let api = EthApiImpl::new(1, provider); + + let filter_id = EthApiServer::new_block_filter(&api).await.unwrap(); + let err = EthApiServer::get_filter_logs(&api, filter_id).await.unwrap_err(); + assert_eq!(err.code(), crate::error_codes::SERVER_ERROR); + } + + #[tokio::test] + async fn eth_get_filter_changes_invalid_id() { + let api = EthApiImpl::new(1, NoopStateProvider); + + // Non-existent filter id. + let err = EthApiServer::get_filter_changes(&api, U256::from(999)).await.unwrap_err(); + assert_eq!(err.code(), crate::error_codes::SERVER_ERROR); + + // Overflowing filter id (> u64::MAX). + let overflow = U256::from(u64::MAX).wrapping_add(U256::from(1)); + let err = EthApiServer::get_filter_changes(&api, overflow).await.unwrap_err(); + assert_eq!(err.code(), crate::error_codes::SERVER_ERROR); + } + + #[test] + fn filter_id_to_u64_edge_cases() { + assert_eq!(filter_id_to_u64(U256::ZERO), Some(0)); + assert_eq!(filter_id_to_u64(U256::from(1)), Some(1)); + assert_eq!(filter_id_to_u64(U256::from(u64::MAX)), Some(u64::MAX)); + assert_eq!(filter_id_to_u64(U256::from(u64::MAX).wrapping_add(U256::from(1))), None); + assert_eq!(filter_id_to_u64(U256::MAX), None); + } + + // --- Unit tests for helper functions --- + + #[test] + fn calculate_next_base_fee_at_target() { + // Gas used == target (half of limit): base fee unchanged. + let base_fee = gwei(10); + assert_eq!(calculate_next_base_fee(base_fee, 15_000_000, 30_000_000), base_fee); + } + + #[test] + fn calculate_next_base_fee_above_target() { + let next = calculate_next_base_fee(gwei(10), 20_000_000, 30_000_000); + assert!(next > gwei(10), "base fee should increase when gas exceeds target"); + } + + #[test] + fn calculate_next_base_fee_below_target() { + let next = calculate_next_base_fee(gwei(10), 5_000_000, 30_000_000); + assert!(next < gwei(10), "base fee should decrease when gas is below target"); + } + + #[test] + fn calculate_next_base_fee_zero_gas_limit() { + assert_eq!(calculate_next_base_fee(gwei(10), 0, 0), gwei(10)); + } + + #[test] + fn percentile_value_at_extremes() { + let mut values = vec![gwei(1), gwei(5), gwei(10)]; + assert_eq!(percentile_value(&mut values, 0), Some(gwei(1))); + assert_eq!(percentile_value(&mut values, 100), Some(gwei(10))); + } + + #[test] + fn percentile_value_empty_returns_none() { + let mut values: Vec = vec![]; + assert_eq!(percentile_value(&mut values, 50), None); + } + + #[test] + fn resolve_fee_history_newest_earliest_tag() { + let result = resolve_fee_history_newest(BlockNumberOrTag::Tag(BlockTag::Earliest), 1000); + assert_eq!(result, 0); + } + + #[tokio::test] + async fn fee_history_multi_block_returns_correct_structure() { + let provider = MockFeeStateProvider::new(vec![ + make_fee_block(0, gwei(1), 10_000_000, 30_000_000, vec![gwei(2)]), + make_fee_block(1, gwei(2), 20_000_000, 30_000_000, vec![gwei(4)]), + make_fee_block(2, gwei(3), 25_000_000, 30_000_000, vec![gwei(6)]), + ]); + let api = EthApiImpl::new(1, provider); + + let history = EthApiServer::fee_history(&api, U64::from(3), BlockNumberOrTag::Latest, None) + .await + .unwrap(); + + assert_eq!(history.oldest_block, U64::ZERO); + // 3 blocks + 1 predicted next base fee = 4 entries. + assert_eq!(history.base_fee_per_gas.len(), 4); + assert_eq!(history.gas_used_ratio.len(), 3); + assert!(history.reward.is_none()); + } + + #[test] + fn effective_gas_price_for_sampling_legacy_tx() { + let tx = RpcTransaction { + hash: B256::ZERO, + nonce: U64::ZERO, + block_hash: None, + block_number: None, + transaction_index: None, + from: Address::ZERO, + to: None, + value: U256::ZERO, + gas: U64::from(21_000), + gas_price: gwei(15), + input: Bytes::new(), + tx_type: U64::ZERO, + chain_id: None, + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: U256::ZERO, + r: U256::ZERO, + s: U256::ZERO, + }; + + assert_eq!(effective_gas_price_for_sampling(&tx, gwei(1)), gwei(15)); + } + + #[test] + fn block_gas_used_ratio_edge_cases() { + assert_eq!(block_gas_used_ratio(100, 0), 0.0); + assert_eq!(block_gas_used_ratio(30_000_000, 30_000_000), 1.0); + } + + #[tokio::test] + async fn pending_tx_cache_evicts_oldest_when_over_limit() { + let callback: TxSubmitCallback = Arc::new(move |_| Box::pin(async { Ok(()) })); + let api = + EthApiImpl::with_tx_submit(1, NoopStateProvider, callback).with_max_pending_txs(3); + + // Submit 4 transactions with a cap of 3. + let h0 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 0)).await.unwrap(); + let _h1 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 1)).await.unwrap(); + let _h2 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 2)).await.unwrap(); + let h3 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 3)).await.unwrap(); + + // The oldest transaction (h0) should have been evicted. + let txs = api.pending_txs.read().await; + assert_eq!(txs.len(), 3, "map must be bounded to the cap"); + assert!(!txs.contains_key(&h0), "oldest tx should be evicted"); + assert!(txs.contains_key(&h3), "newest tx should still be present"); + drop(txs); + + let order = api.pending_tx_order.read().await; + assert_eq!(order.len(), 3, "order deque must be bounded to the cap"); + } + + #[tokio::test] + async fn pending_tx_filter_works_after_eviction() { + let callback: TxSubmitCallback = Arc::new(move |_| Box::pin(async { Ok(()) })); + let api = + EthApiImpl::with_tx_submit(1, NoopStateProvider, callback).with_max_pending_txs(3); + + // Submit 3 transactions, then create a filter. + let _h0 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 0)).await.unwrap(); + let _h1 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 1)).await.unwrap(); + let _h2 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 2)).await.unwrap(); + let filter_id = EthApiServer::new_pending_transaction_filter(&api).await.unwrap(); + + // Submit 2 more which trigger eviction of h0 and h1. + let h3 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 3)).await.unwrap(); + let h4 = EthApiServer::send_raw_transaction(&api, signed_test_tx(1, 4)).await.unwrap(); + + // Filter changes should report the newly added hashes. + let changes = EthApiServer::get_filter_changes(&api, filter_id).await.unwrap(); + let FilterChanges::Hashes(hashes) = changes else { + panic!("pending transaction filter should return hashes"); + }; + assert!(hashes.contains(&h3), "new tx after filter creation should appear"); + assert!(hashes.contains(&h4), "new tx after filter creation should appear"); + } } diff --git a/crates/node/rpc/src/filters.rs b/crates/node/rpc/src/filters.rs new file mode 100644 index 0000000..a3fe07e --- /dev/null +++ b/crates/node/rpc/src/filters.rs @@ -0,0 +1,232 @@ +//! In-memory Ethereum filter state. + +use std::{ + collections::{HashMap, HashSet}, + sync::{ + Arc, + atomic::{AtomicU64, Ordering}, + }, + time::{Duration, Instant}, +}; + +use alloy_primitives::B256; +use parking_lot::RwLock; +use serde::Serialize; +use tokio::sync::{Mutex, MutexGuard}; + +use crate::types::{RpcLog, RpcLogFilter}; + +/// Default lifetime for inactive HTTP filters. +pub(crate) const DEFAULT_FILTER_TTL: Duration = Duration::from_secs(5 * 60); + +/// Default maximum number of active HTTP filters. +pub(crate) const DEFAULT_MAX_FILTERS: usize = 1024; + +/// Unique server-local filter identifier. +pub(crate) type FilterId = u64; + +/// Response payload for `eth_getFilterChanges`. +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +#[serde(untagged)] +pub enum FilterChanges { + /// Log filter changes. + Logs(Vec), + /// Block or pending transaction hash changes. + Hashes(Vec), +} + +/// Server-side Ethereum filter cursor. +#[derive(Debug)] +pub(crate) enum Filter { + /// Log filter cursor. + Log { + /// Log matching criteria supplied at filter creation. + criteria: RpcLogFilter, + /// Last block included by `eth_getFilterChanges`. + /// `None` means no blocks have been polled yet (first poll starts + /// from the filter's `from_block`). + last_poll_block: Option, + }, + /// Block filter cursor. + Block { + /// Last block included by `eth_getFilterChanges`. + last_poll_block: u64, + }, + /// Pending transaction filter cursor. + PendingTransaction { + /// Pending transaction hashes already reported to this filter. + known_hashes: HashSet, + /// Snapshot index into the shared insertion-order vec at the time + /// of last poll (or filter creation). New hashes are those at + /// indices >= this value that are not in `known_hashes`. + last_seen_index: usize, + }, +} + +/// A single filter entry plus its TTL bookkeeping. +#[derive(Debug)] +pub(crate) struct FilterEntry { + filter: Mutex, + last_poll_time: RwLock, +} + +impl FilterEntry { + fn new(filter: Filter) -> Self { + Self { filter: Mutex::new(filter), last_poll_time: RwLock::new(Instant::now()) } + } + + #[cfg(test)] + fn new_at(filter: Filter, last_poll_time: Instant) -> Self { + Self { filter: Mutex::new(filter), last_poll_time: RwLock::new(last_poll_time) } + } + + pub(crate) async fn lock(&self) -> MutexGuard<'_, Filter> { + self.filter.lock().await + } + + pub(crate) fn touch(&self) { + *self.last_poll_time.write() = Instant::now(); + } + + fn last_poll_time(&self) -> Instant { + *self.last_poll_time.read() + } +} + +/// Bounded in-memory store for active Ethereum HTTP filters. +#[derive(Debug)] +pub(crate) struct FilterStore { + filters: RwLock>>, + next_id: AtomicU64, + max_filters: usize, + ttl: Duration, +} + +impl Default for FilterStore { + fn default() -> Self { + Self::new(DEFAULT_MAX_FILTERS, DEFAULT_FILTER_TTL) + } +} + +impl FilterStore { + /// Create a store with a maximum entry count and inactive-entry TTL. + pub(crate) fn new(max_filters: usize, ttl: Duration) -> Self { + assert!(max_filters > 0, "filter store must allow at least one filter"); + Self { filters: RwLock::new(HashMap::new()), next_id: AtomicU64::new(1), max_filters, ttl } + } + + /// Insert a filter and return its id. + pub(crate) fn create(&self, filter: Filter) -> FilterId { + self.cleanup_expired(); + + let mut id = self.next_filter_id(); + let mut filters = self.filters.write(); + while filters.contains_key(&id) { + id = self.next_filter_id(); + } + if filters.len() >= self.max_filters { + Self::evict_oldest(&mut filters); + } + filters.insert(id, Arc::new(FilterEntry::new(filter))); + id + } + + /// Return a filter entry if it exists and has not expired. + pub(crate) fn get(&self, id: FilterId) -> Option> { + self.cleanup_expired(); + self.filters.read().get(&id).cloned() + } + + /// Remove a filter by id. + pub(crate) fn remove(&self, id: FilterId) -> bool { + self.filters.write().remove(&id).is_some() + } + + /// Remove filters that have not been polled within the TTL. + pub(crate) fn cleanup_expired(&self) -> usize { + let now = Instant::now(); + let mut filters = self.filters.write(); + let before = filters.len(); + filters.retain(|_, entry| now.duration_since(entry.last_poll_time()) < self.ttl); + before - filters.len() + } + + fn next_filter_id(&self) -> FilterId { + loop { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + if id != 0 { + return id; + } + } + } + + fn evict_oldest(filters: &mut HashMap>) { + if let Some(id) = + filters.iter().min_by_key(|(_, entry)| entry.last_poll_time()).map(|(id, _)| *id) + { + filters.remove(&id); + } + } + + #[cfg(test)] + fn create_at(&self, filter: Filter, last_poll_time: Instant) -> FilterId { + let id = self.next_filter_id(); + self.filters.write().insert(id, Arc::new(FilterEntry::new_at(filter, last_poll_time))); + id + } + + #[cfg(test)] + fn len(&self) -> usize { + self.filters.read().len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn block_filter(last_poll_block: u64) -> Filter { + Filter::Block { last_poll_block } + } + + #[test] + fn filter_store_create_and_get() { + let store = FilterStore::new(16, Duration::from_secs(300)); + let id = store.create(block_filter(10)); + + assert!(store.get(id).is_some()); + assert!(store.get(id + 999).is_none()); + } + + #[test] + fn filter_store_remove() { + let store = FilterStore::new(16, Duration::from_secs(300)); + let id = store.create(block_filter(0)); + + assert!(store.remove(id)); + assert!(!store.remove(id)); + assert!(store.get(id).is_none()); + } + + #[test] + fn filter_store_cleanup_expired() { + let store = FilterStore::new(16, Duration::from_millis(50)); + let expired = store.create_at(block_filter(0), Instant::now() - Duration::from_millis(100)); + let fresh = store.create_at(block_filter(0), Instant::now()); + + assert_eq!(store.cleanup_expired(), 1); + assert!(store.get(expired).is_none()); + assert!(store.get(fresh).is_some()); + } + + #[test] + fn filter_store_evicts_oldest_when_bounded() { + let store = FilterStore::new(1, Duration::from_secs(300)); + let first = store.create(block_filter(0)); + let second = store.create(block_filter(1)); + + assert!(store.get(first).is_none()); + assert!(store.get(second).is_some()); + assert_eq!(store.len(), 1); + } +} diff --git a/crates/node/rpc/src/indexed_provider.rs b/crates/node/rpc/src/indexed_provider.rs index 398ce61..8e1ac0a 100644 --- a/crates/node/rpc/src/indexed_provider.rs +++ b/crates/node/rpc/src/indexed_provider.rs @@ -16,11 +16,19 @@ use crate::{ error::RpcError, state_provider::StateProvider, types::{ - BlockNumberOrTag, BlockTag, BlockTransactions, CallRequest, RpcBlock, RpcLog, RpcLogFilter, - RpcTransaction, RpcTransactionReceipt, + BlockNumberOrTag, BlockTag, BlockTransactions, CallRequest, EMPTY_UNCLE_HASH, + EMPTY_WITHDRAWALS_ROOT, RpcBlock, RpcLog, RpcLogFilter, RpcTransaction, + RpcTransactionReceipt, }, }; +/// Maximum block range allowed for a single `eth_getLogs` query. +/// +/// Ranges exceeding this limit are rejected with an invalid-params error to +/// prevent unbounded iteration from monopolising the RPC thread. The value is +/// aligned with Infura's 10 000-block cap. +const MAX_LOG_BLOCK_RANGE: u64 = 10_000; + /// State provider that combines indexed block data with live state queries. /// /// Uses [`BlockIndex`] for block, transaction, and receipt lookups, delegates @@ -32,20 +40,26 @@ pub struct IndexedStateProvider { index: Arc, state: S, executor: Arc, + fee_recipient: Address, } impl IndexedStateProvider { /// Creates a new indexed state provider with an explicit executor. #[must_use] - pub const fn new(index: Arc, state: S, executor: Arc) -> Self { - Self { index, state, executor } + pub const fn new( + index: Arc, + state: S, + executor: Arc, + fee_recipient: Address, + ) -> Self { + Self { index, state, executor, fee_recipient } } /// Creates a new indexed state provider with a default executor for the /// given chain id. #[must_use] pub fn with_chain_id(index: Arc, state: S, chain_id: u64) -> Self { - Self::new(index, state, Arc::new(RevmExecutor::new(chain_id))) + Self::new(index, state, Arc::new(RevmExecutor::new(chain_id)), Address::ZERO) } } @@ -55,6 +69,7 @@ impl Clone for IndexedStateProvider { index: Arc::clone(&self.index), state: self.state.clone(), executor: Arc::clone(&self.executor), + fee_recipient: self.fee_recipient, } } } @@ -64,24 +79,35 @@ impl StateProvider for IndexedStateProvi async fn balance( &self, address: Address, - _block: Option, + block: Option, ) -> Result { - self.state.balance(&address).await.map_err(state_error_to_rpc) + self.reject_historical_block(&block)?; + match self.state.balance(&address).await { + Ok(balance) => Ok(balance), + Err(StateDbError::AccountNotFound(_)) => Ok(U256::ZERO), + Err(e) => Err(state_error_to_rpc(e)), + } } async fn nonce( &self, address: Address, - _block: Option, + block: Option, ) -> Result { - self.state.nonce(&address).await.map_err(state_error_to_rpc) + self.reject_historical_block(&block)?; + match self.state.nonce(&address).await { + Ok(nonce) => Ok(nonce), + Err(StateDbError::AccountNotFound(_)) => Ok(0), + Err(e) => Err(state_error_to_rpc(e)), + } } async fn code( &self, address: Address, - _block: Option, + block: Option, ) -> Result { + self.reject_historical_block(&block)?; // EIP-1474: `eth_getCode` MUST return `0x` for unknown accounts and // for EOAs without code, NOT an error. Many tools branch on // `getCode === '0x'` to decide "is this a contract?". @@ -104,9 +130,14 @@ impl StateProvider for IndexedStateProvi &self, address: Address, slot: U256, - _block: Option, + block: Option, ) -> Result { - self.state.storage(&address, &slot).await.map_err(state_error_to_rpc) + self.reject_historical_block(&block)?; + match self.state.storage(&address, &slot).await { + Ok(value) => Ok(value), + Err(StateDbError::AccountNotFound(_)) => Ok(U256::ZERO), + Err(e) => Err(state_error_to_rpc(e)), + } } async fn block_by_number( @@ -147,6 +178,7 @@ impl StateProvider for IndexedStateProvi request: CallRequest, block: Option, ) -> Result { + self.reject_historical_block(&block)?; let block_ctx = self.block_context_for(block)?; let params = call_request_to_params(request); self.executor.simulate_call(&self.state, params, &block_ctx).map_err(execution_error_to_rpc) @@ -157,24 +189,54 @@ impl StateProvider for IndexedStateProvi request: CallRequest, block: Option, ) -> Result { + self.reject_historical_block(&block)?; let block_ctx = self.block_context_for(block)?; let params = call_request_to_params(request); self.executor.estimate_gas(&self.state, params, &block_ctx).map_err(execution_error_to_rpc) } async fn get_logs(&self, filter: RpcLogFilter) -> Result, RpcError> { - let from_block = - filter.from_block.as_ref().map(|b| self.resolve_block_number(b)).transpose()?; - let to_block = - filter.to_block.as_ref().map(|b| self.resolve_block_number(b)).transpose()?; + // EIP-234: blockHash is mutually exclusive with fromBlock/toBlock. + if filter.block_hash.is_some() && (filter.from_block.is_some() || filter.to_block.is_some()) + { + return Err(RpcError::InvalidParams( + "blockHash is mutually exclusive with fromBlock/toBlock".into(), + )); + } let mut log_filter = LogFilter::new(); - if let Some(from) = from_block { - log_filter = log_filter.from_block(from); - } - if let Some(to) = to_block { - log_filter = log_filter.to_block(to); + + if let Some(block_hash) = &filter.block_hash { + // Single-block query by hash per EIP-234. + let block = self + .index + .get_block_by_hash(block_hash) + .ok_or_else(|| RpcError::InvalidParams("block not found".into()))?; + log_filter = log_filter.from_block(block.number).to_block(block.number); + } else { + let head = self.index.head_block_number(); + let from_block = + filter.from_block.as_ref().map(|b| self.resolve_block_number(b)).transpose()?; + let to_block = + filter.to_block.as_ref().map(|b| self.resolve_block_number(b)).transpose()?; + + let from = from_block.unwrap_or(0); + let to = to_block.unwrap_or(head).min(head); + + if from > to { + return Err(RpcError::InvalidParams( + "fromBlock must not be greater than toBlock".into(), + )); + } + if to.saturating_sub(from) > MAX_LOG_BLOCK_RANGE { + return Err(RpcError::InvalidParams(format!( + "block range exceeds maximum of {MAX_LOG_BLOCK_RANGE}" + ))); + } + + log_filter = log_filter.from_block(from).to_block(to); } + if let Some(addr_filter) = filter.address { log_filter = log_filter.address(addr_filter.into_vec()); } @@ -186,18 +248,18 @@ impl StateProvider for IndexedStateProvi } } - let indexed_logs = self.index.get_logs(&log_filter); - let block_number = self.index.head_block_number(); - let logs = indexed_logs + let logs = self + .index + .get_logs(&log_filter) .into_iter() .map(|log| RpcLog { address: log.address, topics: log.topics, data: log.data, - block_number: U64::from(block_number), - transaction_hash: B256::ZERO, - transaction_index: U64::ZERO, - block_hash: B256::ZERO, + block_number: U64::from(log.block_number), + transaction_hash: log.transaction_hash, + transaction_index: U64::from(log.transaction_index), + block_hash: log.block_hash, log_index: U64::from(log.log_index), removed: false, }) @@ -207,6 +269,43 @@ impl StateProvider for IndexedStateProvi } impl IndexedStateProvider { + /// Reject requests for historical or future state that we cannot serve. + /// + /// Kora uses QMDB which only maintains the latest state. We accept + /// `None`, `latest`, `pending`, `safe`, `finalized`, and the current + /// head block number; everything else returns an explicit error instead + /// of silently returning the latest state. + /// + /// In Simplex BFT all committed blocks are immediately finalized, so + /// `safe` and `finalized` are semantically equivalent to `latest`. + fn reject_historical_block(&self, block: &Option) -> Result<(), RpcError> { + match block { + None + | Some(BlockNumberOrTag::Latest) + | Some(BlockNumberOrTag::Tag( + BlockTag::Latest | BlockTag::Pending | BlockTag::Safe | BlockTag::Finalized, + )) => Ok(()), + Some(BlockNumberOrTag::Number(n)) => { + let head = self.index.head_block_number(); + let requested = n.to::(); + if requested == head { + Ok(()) + } else if requested > head { + Err(RpcError::InvalidBlockNumber(format!( + "block not yet available (requested {requested}, head {head})", + ))) + } else { + Err(RpcError::Unsupported(format!( + "historical state not available (block {requested})", + ))) + } + } + Some(BlockNumberOrTag::Tag(tag)) => { + Err(RpcError::Unsupported(format!("historical state not available (tag {tag:?})",))) + } + } + } + fn indexed_block_to_rpc(&self, block: IndexedBlock, full_transactions: bool) -> RpcBlock { let transactions = if full_transactions { let txs = self @@ -223,24 +322,27 @@ impl IndexedStateProvider { RpcBlock { hash: block.hash, parent_hash: block.parent_hash, + sha3_uncles: EMPTY_UNCLE_HASH, number: U64::from(block.number), state_root: block.state_root, - transactions_root: B256::ZERO, - receipts_root: B256::ZERO, - logs_bloom: Bytes::new(), + transactions_root: block.transactions_root, + receipts_root: block.receipts_root, + logs_bloom: Bytes::copy_from_slice(block.logs_bloom.as_slice()), timestamp: U64::from(block.timestamp), gas_limit: U64::from(block.gas_limit), gas_used: U64::from(block.gas_used), extra_data: Bytes::new(), - mix_hash: B256::ZERO, + mix_hash: block.mix_hash, nonce: Default::default(), base_fee_per_gas: block.base_fee_per_gas.map(U256::from), - miner: Address::ZERO, + miner: self.fee_recipient, difficulty: U256::ZERO, total_difficulty: U256::ZERO, uncles: vec![], - size: U64::ZERO, + size: U64::from(block.size), transactions, + withdrawals: vec![], + withdrawals_root: EMPTY_WITHDRAWALS_ROOT, } } @@ -269,6 +371,7 @@ impl IndexedStateProvider { Some(b) => self.resolve_block_number(&b)?, None => self.index.head_block_number(), }; + let recent_hashes = self.index.recent_block_hashes(block_num); if let Some(indexed) = self.index.get_block_by_number(block_num) { let header = Header { number: indexed.number, @@ -277,7 +380,8 @@ impl IndexedStateProvider { base_fee_per_gas: indexed.base_fee_per_gas, ..Header::default() }; - Ok(BlockContext::new(header, indexed.parent_hash, B256::ZERO)) + Ok(BlockContext::new(header, indexed.parent_hash, B256::ZERO) + .with_recent_block_hashes(recent_hashes)) } else { let header = Header { number: 0, @@ -310,12 +414,15 @@ fn call_request_to_params(req: CallRequest) -> CallParams { fn execution_error_to_rpc(err: kora_executor::ExecutionError) -> RpcError { use kora_executor::ExecutionError as E; match err { - E::Revert(data) => RpcError::ExecutionFailed(format!("execution reverted: {data}")), + E::Revert(data) => RpcError::ExecutionReverted(Some(data)), E::TxExecution(msg) | E::InvalidTx(msg) | E::TxDecode(msg) | E::BlockValidation(msg) => { RpcError::ExecutionFailed(msg) } E::State(s) => state_error_to_rpc(s), E::CodeNotFound(h) => RpcError::StateError(format!("code not found: {h}")), + E::StateCommit => { + RpcError::Internal("QMDB commit failed during block execution".to_string()) + } } } @@ -342,17 +449,18 @@ fn indexed_tx_to_rpc(tx: IndexedTransaction) -> RpcTransaction { gas: U64::from(tx.gas_limit), gas_price: U256::from(tx.gas_price), input: tx.input, - tx_type: U64::ZERO, - chain_id: None, - max_fee_per_gas: None, - max_priority_fee_per_gas: None, - v: U64::ZERO, - r: U256::ZERO, - s: U256::ZERO, + tx_type: U64::from(tx.tx_type), + chain_id: tx.chain_id.map(U64::from), + max_fee_per_gas: tx.max_fee_per_gas.map(U256::from), + max_priority_fee_per_gas: tx.max_priority_fee_per_gas.map(U256::from), + v: U256::from(tx.v), + r: tx.r, + s: tx.s, } } fn indexed_receipt_to_rpc(receipt: IndexedReceipt) -> RpcTransactionReceipt { + let logs_bloom = Bytes::copy_from_slice(receipt.logs_bloom.as_slice()); let logs = receipt .logs .into_iter() @@ -360,10 +468,10 @@ fn indexed_receipt_to_rpc(receipt: IndexedReceipt) -> RpcTransactionReceipt { address: log.address, topics: log.topics, data: log.data, - block_number: U64::from(receipt.block_number), - transaction_hash: receipt.transaction_hash, - transaction_index: U64::from(receipt.transaction_index), - block_hash: receipt.block_hash, + block_number: U64::from(log.block_number), + transaction_hash: log.transaction_hash, + transaction_index: U64::from(log.transaction_index), + block_hash: log.block_hash, log_index: U64::from(log.log_index), removed: false, }) @@ -380,15 +488,16 @@ fn indexed_receipt_to_rpc(receipt: IndexedReceipt) -> RpcTransactionReceipt { gas_used: U64::from(receipt.gas_used), contract_address: receipt.contract_address, logs, - logs_bloom: Bytes::new(), - tx_type: U64::ZERO, + logs_bloom, + tx_type: U64::from(receipt.tx_type), status: if receipt.status { U64::from(1) } else { U64::ZERO }, - effective_gas_price: U256::ZERO, + effective_gas_price: U256::from(receipt.effective_gas_price), } } #[cfg(test)] mod tests { + use alloy_primitives::Bloom; use kora_indexer::IndexedLog; use super::*; @@ -449,10 +558,15 @@ mod tests { number, parent_hash: B256::ZERO, state_root: B256::ZERO, + transactions_root: B256::ZERO, + receipts_root: B256::ZERO, timestamp: 1000 + number, gas_limit: 30_000_000, gas_used: 21_000, base_fee_per_gas: Some(1_000_000_000), + mix_hash: B256::ZERO, + logs_bloom: Bloom::ZERO, + size: 508, transaction_hashes: vec![], } } @@ -468,6 +582,13 @@ mod tests { value: U256::ZERO, gas_limit: 21_000, gas_price: 1_000_000_000, + tx_type: 0, + chain_id: Some(1337), + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: 27, + r: U256::from(1), + s: U256::from(2), input: Bytes::new(), nonce: 0, } @@ -489,11 +610,99 @@ mod tests { topics: vec![], data: Bytes::new(), log_index: 0, + block_number, + block_hash, + transaction_hash: tx_hash, + transaction_index: 0, }], + logs_bloom: Bloom::ZERO, + tx_type: 0, + effective_gas_price: 1_000_000_000, status: true, } } + #[test] + fn indexed_tx_preserves_eip1559_fields() { + let block_hash = B256::repeat_byte(1); + let tx_hash = B256::repeat_byte(2); + let tx = IndexedTransaction { + hash: tx_hash, + block_hash, + block_number: 7, + index: 3, + from: Address::repeat_byte(0xaa), + to: Some(Address::repeat_byte(0xbb)), + value: U256::from(10), + gas_limit: 50_000, + gas_price: 20_000_000_000, + tx_type: 2, + chain_id: Some(1337), + max_fee_per_gas: Some(20_000_000_000), + max_priority_fee_per_gas: Some(1_500_000_000), + v: 1, + r: U256::from(123), + s: U256::from(456), + input: Bytes::from_static(&[0xde, 0xad]), + nonce: 9, + }; + + let rpc_tx = indexed_tx_to_rpc(tx); + + assert_eq!(rpc_tx.hash, tx_hash); + assert_eq!(rpc_tx.block_hash, Some(block_hash)); + assert_eq!(rpc_tx.transaction_index, Some(U64::from(3))); + assert_eq!(rpc_tx.tx_type, U64::from(2)); + assert_eq!(rpc_tx.chain_id, Some(U64::from(1337))); + assert_eq!(rpc_tx.max_fee_per_gas, Some(U256::from(20_000_000_000u64))); + assert_eq!(rpc_tx.max_priority_fee_per_gas, Some(U256::from(1_500_000_000u64))); + assert_eq!(rpc_tx.v, U256::from(1)); + assert_eq!(rpc_tx.r, U256::from(123)); + assert_eq!(rpc_tx.s, U256::from(456)); + } + + #[test] + fn indexed_receipt_preserves_fee_type_bloom_and_log_metadata() { + let block_hash = B256::repeat_byte(1); + let tx_hash = B256::repeat_byte(2); + let receipt = IndexedReceipt { + transaction_hash: tx_hash, + block_hash, + block_number: 5, + transaction_index: 1, + from: Address::repeat_byte(0xaa), + to: Some(Address::repeat_byte(0xbb)), + cumulative_gas_used: 50_000, + gas_used: 29_000, + contract_address: None, + logs: vec![IndexedLog { + address: Address::repeat_byte(0xcc), + topics: vec![B256::repeat_byte(0xdd)], + data: Bytes::from_static(&[0x01, 0x02]), + log_index: 4, + block_number: 5, + block_hash, + transaction_hash: tx_hash, + transaction_index: 1, + }], + logs_bloom: Bloom::repeat_byte(0xab), + tx_type: 2, + effective_gas_price: 12_000_000_000, + status: true, + }; + + let rpc_receipt = indexed_receipt_to_rpc(receipt); + + assert_eq!(rpc_receipt.tx_type, U64::from(2)); + assert_eq!(rpc_receipt.effective_gas_price, U256::from(12_000_000_000u64)); + assert_eq!(rpc_receipt.logs_bloom.len(), 256); + assert_eq!(rpc_receipt.logs_bloom[0], 0xab); + assert_eq!(rpc_receipt.logs[0].block_number, U64::from(5)); + assert_eq!(rpc_receipt.logs[0].block_hash, block_hash); + assert_eq!(rpc_receipt.logs[0].transaction_hash, tx_hash); + assert_eq!(rpc_receipt.logs[0].transaction_index, U64::from(1)); + } + #[tokio::test] async fn test_balance() { let index = Arc::new(BlockIndex::new()); @@ -512,6 +721,34 @@ mod tests { assert_eq!(nonce, 42); } + #[tokio::test] + async fn test_missing_account_balance_returns_zero() { + let index = Arc::new(BlockIndex::new()); + let provider = IndexedStateProvider::with_chain_id(index, MissingAccountState, 1337); + + let balance = provider.balance(Address::repeat_byte(0xaa), None).await.unwrap(); + assert_eq!(balance, U256::ZERO); + } + + #[tokio::test] + async fn test_missing_account_nonce_returns_zero() { + let index = Arc::new(BlockIndex::new()); + let provider = IndexedStateProvider::with_chain_id(index, MissingAccountState, 1337); + + let nonce = provider.nonce(Address::repeat_byte(0xaa), None).await.unwrap(); + assert_eq!(nonce, 0); + } + + #[tokio::test] + async fn test_missing_account_storage_returns_zero() { + let index = Arc::new(BlockIndex::new()); + let provider = IndexedStateProvider::with_chain_id(index, MissingAccountState, 1337); + + let value = + provider.storage(Address::repeat_byte(0xaa), U256::from(7), None).await.unwrap(); + assert_eq!(value, U256::ZERO); + } + #[tokio::test] async fn test_block_by_number() { let index = Arc::new(BlockIndex::new()); @@ -609,6 +846,63 @@ mod tests { assert_eq!(receipt.logs.len(), 1); } + #[tokio::test] + async fn get_logs_returns_indexed_block_and_transaction_metadata() { + let index = Arc::new(BlockIndex::new()); + let block_hash = B256::repeat_byte(5); + let tx_hash = B256::repeat_byte(2); + let log_address = Address::repeat_byte(0xcc); + let receipt = IndexedReceipt { + transaction_hash: tx_hash, + block_hash, + block_number: 5, + transaction_index: 2, + from: Address::repeat_byte(0xaa), + to: Some(Address::repeat_byte(0xbb)), + cumulative_gas_used: 42_000, + gas_used: 21_000, + contract_address: None, + logs: vec![IndexedLog { + address: log_address, + topics: vec![B256::repeat_byte(0xdd)], + data: Bytes::from_static(&[0x01]), + log_index: 9, + block_number: 5, + block_hash, + transaction_hash: tx_hash, + transaction_index: 2, + }], + logs_bloom: Bloom::ZERO, + tx_type: 2, + effective_gas_price: 12_000_000_000, + status: true, + }; + index.insert_block( + create_test_block(5, block_hash), + vec![create_test_tx(tx_hash, block_hash, 5)], + vec![receipt], + ); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + let logs = provider + .get_logs(RpcLogFilter { + from_block: Some(BlockNumberOrTag::Number(U64::from(5))), + to_block: Some(BlockNumberOrTag::Number(U64::from(5))), + ..Default::default() + }) + .await + .unwrap(); + + assert_eq!(logs.len(), 1); + assert_eq!(logs[0].address, log_address); + assert_eq!(logs[0].block_number, U64::from(5)); + assert_eq!(logs[0].block_hash, block_hash); + assert_eq!(logs[0].transaction_hash, tx_hash); + assert_eq!(logs[0].transaction_index, U64::from(2)); + assert_eq!(logs[0].log_index, U64::from(9)); + } + #[tokio::test] async fn test_block_number() { let index = Arc::new(BlockIndex::new()); @@ -638,4 +932,264 @@ mod tests { .unwrap(); assert!(block.is_none()); } + + // --- reject_historical_block tests --- + + #[tokio::test] + async fn balance_with_none_block_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let balance = provider.balance(Address::ZERO, None).await.unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn balance_with_latest_tag_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let balance = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Tag(BlockTag::Latest))) + .await + .unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn balance_with_latest_default_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let balance = + provider.balance(Address::ZERO, Some(BlockNumberOrTag::Latest)).await.unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn balance_with_pending_tag_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let balance = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Tag(BlockTag::Pending))) + .await + .unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn balance_with_current_block_number_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let balance = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Number(U64::from(5)))) + .await + .unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn balance_with_historical_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Number(U64::from(5)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + assert!(err.to_string().contains("historical state not available")); + } + + #[tokio::test] + async fn balance_with_future_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Number(U64::from(20)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::InvalidBlockNumber(_))); + assert!(err.to_string().contains("block not yet available")); + } + + #[tokio::test] + async fn nonce_with_historical_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .nonce(Address::ZERO, Some(BlockNumberOrTag::Number(U64::from(3)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + } + + #[tokio::test] + async fn code_with_historical_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .code(Address::ZERO, Some(BlockNumberOrTag::Number(U64::from(3)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + } + + #[tokio::test] + async fn storage_with_historical_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .storage(Address::ZERO, U256::from(1), Some(BlockNumberOrTag::Number(U64::from(3)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + } + + #[tokio::test] + async fn call_with_historical_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .call(CallRequest::default(), Some(BlockNumberOrTag::Number(U64::from(3)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + assert!(err.to_string().contains("historical state not available")); + } + + #[tokio::test] + async fn call_with_future_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .call(CallRequest::default(), Some(BlockNumberOrTag::Number(U64::from(20)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::InvalidBlockNumber(_))); + assert!(err.to_string().contains("block not yet available")); + } + + #[tokio::test] + async fn estimate_gas_with_historical_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .estimate_gas(CallRequest::default(), Some(BlockNumberOrTag::Number(U64::from(3)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + assert!(err.to_string().contains("historical state not available")); + } + + #[tokio::test] + async fn estimate_gas_with_future_block_number_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(10, B256::repeat_byte(10)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .estimate_gas(CallRequest::default(), Some(BlockNumberOrTag::Number(U64::from(20)))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::InvalidBlockNumber(_))); + assert!(err.to_string().contains("block not yet available")); + } + + #[tokio::test] + async fn balance_with_earliest_tag_returns_error() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let err = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Tag(BlockTag::Earliest))) + .await + .unwrap_err(); + assert!(matches!(err, RpcError::Unsupported(_))); + assert!(err.to_string().contains("historical state not available")); + } + + #[tokio::test] + async fn balance_with_safe_tag_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + // In BFT consensus all committed blocks are immediately finalized, + // so "safe" is semantically equivalent to "latest". + let balance = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Tag(BlockTag::Safe))) + .await + .unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn balance_with_finalized_tag_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + // In BFT consensus all committed blocks are immediately finalized, + // so "finalized" is semantically equivalent to "latest". + let balance = provider + .balance(Address::ZERO, Some(BlockNumberOrTag::Tag(BlockTag::Finalized))) + .await + .unwrap(); + assert_eq!(balance, U256::from(1000)); + } + + #[tokio::test] + async fn nonce_with_none_block_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let nonce = provider.nonce(Address::ZERO, None).await.unwrap(); + assert_eq!(nonce, 42); + } + + #[tokio::test] + async fn storage_with_none_block_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + let value = provider.storage(Address::ZERO, U256::from(1), None).await.unwrap(); + assert_eq!(value, U256::from(123)); + } + + #[tokio::test] + async fn code_with_none_block_succeeds() { + let index = Arc::new(BlockIndex::new()); + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + let provider = IndexedStateProvider::with_chain_id(index, MockState, 1337); + + // MockState returns B256::ZERO code_hash, so code() returns empty bytes + let code = provider.code(Address::ZERO, None).await.unwrap(); + assert!(code.is_empty()); + } } diff --git a/crates/node/rpc/src/lib.rs b/crates/node/rpc/src/lib.rs index 4d5a629..feab9e3 100644 --- a/crates/node/rpc/src/lib.rs +++ b/crates/node/rpc/src/lib.rs @@ -13,18 +13,30 @@ pub use error::{RpcError, codes as error_codes}; mod eth; pub use eth::{ - EthApiImpl, EthApiServer, FeeHistory, NetApiImpl, NetApiServer, TxSubmitCallback, - TxSubmitFuture, Web3ApiImpl, Web3ApiServer, + EthApiImpl, EthApiServer, FeeHistory, GasOracleConfig, NetApiImpl, NetApiServer, + TxSubmitCallback, TxSubmitFuture, Web3ApiImpl, Web3ApiServer, }; +mod filters; +pub use filters::FilterChanges; + mod kora; pub use kora::{KoraApiImpl, KoraApiServer}; +mod txpool; +pub use txpool::{TxpoolApiImpl, TxpoolApiServer, TxpoolContent, TxpoolInspect, TxpoolStatus}; + mod server; pub use server::{JsonRpcServer, RpcServer, RpcServerHandle, ServerError}; +mod subscription; +pub use subscription::{ + MEMPOOL_EVENT_CHANNEL_CAPACITY, MempoolEventSender, PENDING_TX_CHANNEL_CAPACITY, + PendingTxEvent, PendingTxEventSender, PendingTxInfo, mempool_event_channel, pending_tx_channel, +}; + mod state; -pub use state::{NodeState, NodeStatus}; +pub use state::{NodeState, NodeStatus, PartitionStatus}; mod state_provider; pub use state_provider::{NoopStateProvider, StateProvider}; diff --git a/crates/node/rpc/src/server.rs b/crates/node/rpc/src/server.rs index 6aee83f..53a2e7f 100644 --- a/crates/node/rpc/src/server.rs +++ b/crates/node/rpc/src/server.rs @@ -1,15 +1,40 @@ //! HTTP and JSON-RPC server implementation. -use std::{net::SocketAddr, sync::Arc, time::Duration}; +use std::{ + collections::HashMap, + future::Future, + net::SocketAddr, + pin::Pin, + sync::Arc, + time::{Duration, Instant}, +}; -use axum::{Router, extract::State, http::StatusCode, response::IntoResponse, routing::get}; -use jsonrpsee::server::{Server, ServerHandle}; +use axum::{ + Router, + extract::{Request, State}, + http::StatusCode, + middleware::{self, Next}, + response::{IntoResponse, Response}, + routing::get, +}; +use jsonrpsee::{ + core::server::MethodResponse, + server::{ + BatchRequestConfig, ConnectionId, PingConfig, Server, ServerHandle, + middleware::rpc::{RpcServiceBuilder, RpcServiceT}, + }, + types::{ErrorObjectOwned, Id, Request as RpcRequest}, +}; +use kora_txpool::TransactionPool; +use parking_lot::Mutex; +use prometheus_client::metrics::counter::Counter; use tower::limit::ConcurrencyLimitLayer; use tower_http::cors::{AllowOrigin, Any, CorsLayer}; -use tracing::{error, info}; +use tracing::{error, info, warn}; use crate::{ - config::{CorsConfig, RpcServerConfig}, + config::{CorsConfig, RateLimitConfig, RpcServerConfig}, + error::codes, eth::{ EthApiImpl, EthApiServer, NetApiImpl, NetApiServer, TxSubmitCallback, Web3ApiImpl, Web3ApiServer, @@ -17,6 +42,8 @@ use crate::{ kora::{KoraApiImpl, KoraApiServer}, state::NodeState, state_provider::{NoopStateProvider, StateProvider}, + subscription::{MempoolEventSender, PendingTxEventSender, subscription_module}, + txpool::{TxpoolApiImpl, TxpoolApiServer}, }; /// Error type for RPC server operations. @@ -68,6 +95,281 @@ fn build_cors_layer(config: &CorsConfig) -> CorsLayer { layer.max_age(Duration::from_secs(config.max_age)) } +/// Global (server-wide) rate limiter used as a backstop to cap total +/// throughput across all connections. This is the original single-bucket +/// limiter, now renamed to clarify its role. +#[derive(Debug, Clone)] +struct SharedRateLimiter { + bucket: Arc>, +} + +impl SharedRateLimiter { + fn new(config: RateLimitConfig) -> Option { + if config.is_disabled() { + return None; + } + + Some(Self { bucket: Arc::new(Mutex::new(TokenBucket::new(config, Instant::now()))) }) + } + + fn try_acquire(&self) -> bool { + self.bucket.lock().try_acquire_at(Instant::now()) + } +} + +/// Per-connection rate limiter that maintains a separate [`TokenBucket`] for +/// each jsonrpsee [`ConnectionId`]. +/// +/// Ideally this would key by client IP, but jsonrpsee 0.24 only injects +/// [`ConnectionId`] (not the peer address) into request extensions. Since +/// each TCP connection gets a unique ID, this still isolates independent +/// clients. A single client opening many connections will get a separate +/// budget per connection, which is acceptable -- the global limiter caps +/// aggregate throughput. +/// +/// Stale entries are pruned lazily: every [`CLEANUP_INTERVAL`] seconds the +/// map is scanned and buckets that have been idle longer than the interval +/// are removed. +#[derive(Debug, Clone)] +struct PerConnectionRateLimiter { + inner: Arc>, + config: RateLimitConfig, +} + +/// Duration of inactivity after which a connection bucket is considered stale +/// and eligible for eviction. +const STALE_BUCKET_SECS: u64 = 300; + +/// Minimum wall-clock interval between cleanup sweeps. +const CLEANUP_INTERVAL: Duration = Duration::from_secs(60); + +#[derive(Debug)] +struct PerConnectionInner { + buckets: HashMap, + last_cleanup: Instant, +} + +impl PerConnectionRateLimiter { + fn new(config: RateLimitConfig) -> Option { + if config.is_disabled() { + return None; + } + Some(Self { + inner: Arc::new(Mutex::new(PerConnectionInner { + buckets: HashMap::new(), + last_cleanup: Instant::now(), + })), + config, + }) + } + + /// Try to acquire a token for the given connection. Creates a new bucket + /// lazily if this is the first request on `conn_id`. + fn try_acquire(&self, conn_id: usize) -> bool { + let now = Instant::now(); + let mut inner = self.inner.lock(); + + // Lazy cleanup: periodically prune idle buckets to bound memory. + if now.saturating_duration_since(inner.last_cleanup) >= CLEANUP_INTERVAL { + let stale_cutoff = Duration::from_secs(STALE_BUCKET_SECS); + inner.buckets.retain(|_, bucket| { + now.saturating_duration_since(bucket.last_refill) < stale_cutoff + }); + inner.last_cleanup = now; + } + + let bucket = inner + .buckets + .entry(conn_id) + .or_insert_with(|| TokenBucket::new(self.config.clone(), now)); + bucket.try_acquire_at(now) + } +} + +#[derive(Debug)] +struct TokenBucket { + requests_per_second: f64, + burst_size: f64, + tokens: f64, + last_refill: Instant, +} + +impl TokenBucket { + const fn new(config: RateLimitConfig, now: Instant) -> Self { + let requests_per_second = config.requests_per_second as f64; + let burst_size = if config.requests_per_second == 0 { + 0.0 + } else { + // Clamp burst_size to at least 1 so that an enabled limiter can + // always admit at least one request. Without this, burst_size==0 + // would start with 0 tokens and refill() would never add more, + // permanently rejecting all requests. + let bs = config.burst_size as f64; + if bs < 1.0 { 1.0 } else { bs } + }; + + Self { requests_per_second, burst_size, tokens: burst_size, last_refill: now } + } + + fn try_acquire_at(&mut self, now: Instant) -> bool { + self.refill(now); + + if self.tokens >= 1.0 { + self.tokens -= 1.0; + true + } else { + false + } + } + + fn refill(&mut self, now: Instant) { + let elapsed = now.saturating_duration_since(self.last_refill); + if elapsed.is_zero() { + return; + } + + self.last_refill = now; + if self.requests_per_second == 0.0 || self.tokens >= self.burst_size { + return; + } + + let replenished = elapsed.as_secs_f64() * self.requests_per_second; + self.tokens = (self.tokens + replenished).min(self.burst_size); + } +} + +fn global_rate_limit_allows(rate_limiter: &Option) -> bool { + rate_limiter.as_ref().is_none_or(SharedRateLimiter::try_acquire) +} + +fn rate_limited_rpc_response(id: Id<'static>) -> MethodResponse { + MethodResponse::error( + id, + ErrorObjectOwned::owned(codes::LIMIT_EXCEEDED, "rate limit exceeded", None::<()>), + ) +} + +async fn enforce_http_rate_limit( + State(rate_limiter): State>, + request: Request, + next: Next, +) -> Response { + if !global_rate_limit_allows(&rate_limiter) { + return (StatusCode::TOO_MANY_REQUESTS, "rate limit exceeded").into_response(); + } + + next.run(request).await +} + +#[derive(Debug, Clone)] +struct RateLimitedRpcService { + service: S, + /// Per-connection rate limiter (primary defense). + per_conn_limiter: Option, + /// Global rate limiter (backstop for aggregate throughput). + global_limiter: Option, + /// Optional counter incremented on every incoming RPC request. + rpc_requests_total: Option, +} + +/// Subscription method names that require WebSocket transport. +const SUBSCRIPTION_METHODS: &[&str] = + &["eth_subscribe", "eth_unsubscribe", "kora_subscribe", "kora_unsubscribe"]; + +/// Check whether `method` is a subscription method that requires WebSocket. +fn is_subscription_method(method: &str) -> bool { + SUBSCRIPTION_METHODS.contains(&method) +} + +/// Build a [`MethodResponse`] with error code `-32004` (method not supported) +/// when a subscription method is called over HTTP. +fn subscription_not_available_response(id: Id<'static>) -> MethodResponse { + MethodResponse::error( + id, + ErrorObjectOwned::owned( + codes::METHOD_NOT_SUPPORTED, + "Subscriptions are not available over HTTP. Use WebSocket instead.", + None::<()>, + ), + ) +} + +impl<'a, S> RpcServiceT<'a> for RateLimitedRpcService +where + S: RpcServiceT<'a> + Clone + Send + Sync + 'static, + S::Future: Send, +{ + type Future = Pin + Send + 'a>>; + + fn call(&self, request: RpcRequest<'a>) -> Self::Future { + if let Some(ref counter) = self.rpc_requests_total { + counter.inc(); + } + + // --- Per-connection rate limit (primary) --- + if let Some(ref limiter) = self.per_conn_limiter { + let conn_id = request.extensions().get::().map(|id| id.0); + + match conn_id { + Some(id) => { + if !limiter.try_acquire(id) { + return Box::pin(std::future::ready(rate_limited_rpc_response( + request.id().into_owned(), + ))); + } + } + None => { + // ConnectionId is normally always present. If missing, + // log once and fall through to the global limiter. + warn!( + "RPC request missing ConnectionId in extensions; falling back to global limiter" + ); + } + } + } + + // --- Global rate limit (backstop) --- + if !global_rate_limit_allows(&self.global_limiter) { + return Box::pin(std::future::ready(rate_limited_rpc_response( + request.id().into_owned(), + ))); + } + + let is_sub = is_subscription_method(request.method_name()); + let id = request.id().into_owned(); + let fut = self.service.call(request); + + Box::pin(async move { + let response = fut.await; + + // When jsonrpsee receives a subscription call over HTTP it returns + // ErrorCode::InternalError (-32603) because subscriptions require a + // persistent connection. Replace that with -32004 and a message + // that tells the caller to use WebSocket instead. + if is_sub && response.as_error_code() == Some(codes::INTERNAL_ERROR) { + return subscription_not_available_response(id); + } + + response + }) + } +} + +fn build_http_router( + node_state: Arc, + cors_layer: CorsLayer, + max_connections: u32, + rate_limiter: Option, +) -> Router { + Router::new() + .route("/status", get(status_handler)) + .route("/health", get(health_handler)) + .layer(middleware::from_fn_with_state(rate_limiter, enforce_http_rate_limit)) + .layer(cors_layer) + .layer(ConcurrencyLimitLayer::new(max_connections as usize)) + .with_state(node_state) +} + /// RPC server for exposing node status via HTTP and Ethereum JSON-RPC. pub struct RpcServer { state: NodeState, @@ -75,10 +377,18 @@ pub struct RpcServer { jsonrpc_addr: SocketAddr, chain_id: u64, tx_submit: Option, + txpool: Option, state_provider: S, cors_config: CorsConfig, + rate_limit_config: RateLimitConfig, max_connections: u32, + max_subscriptions_per_connection: u32, + max_batch_size: u32, peer_count: u64, + pending_tx_broadcast: Option, + mempool_broadcast: Option, + /// Prometheus counter incremented on every incoming JSON-RPC request. + rpc_requests_total: Option, } impl std::fmt::Debug for RpcServer { @@ -89,6 +399,13 @@ impl std::fmt::Debug for RpcServer { .field("jsonrpc_addr", &self.jsonrpc_addr) .field("chain_id", &self.chain_id) .field("tx_submit", &self.tx_submit.is_some()) + .field("txpool", &self.txpool.is_some()) + .field("pending_tx_broadcast", &self.pending_tx_broadcast.is_some()) + .field("mempool_broadcast", &self.mempool_broadcast.is_some()) + .field("rate_limit_config", &self.rate_limit_config) + .field("max_connections", &self.max_connections) + .field("max_subscriptions_per_connection", &self.max_subscriptions_per_connection) + .field("max_batch_size", &self.max_batch_size) .finish() } } @@ -109,10 +426,17 @@ impl RpcServer { jsonrpc_addr: addr, chain_id: 1, tx_submit: None, + txpool: None, state_provider: NoopStateProvider, cors_config: CorsConfig::default(), + rate_limit_config: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, peer_count: 0, + pending_tx_broadcast: None, + mempool_broadcast: None, + rpc_requests_total: None, } } @@ -124,10 +448,17 @@ impl RpcServer { jsonrpc_addr: addr, chain_id, tx_submit: None, + txpool: None, state_provider: NoopStateProvider, cors_config: CorsConfig::default(), + rate_limit_config: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, peer_count: 0, + pending_tx_broadcast: None, + mempool_broadcast: None, + rpc_requests_total: None, } } } @@ -146,10 +477,17 @@ impl RpcServer { jsonrpc_addr: addr, chain_id, tx_submit: None, + txpool: None, state_provider, cors_config: CorsConfig::default(), + rate_limit_config: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, peer_count: 0, + pending_tx_broadcast: None, + mempool_broadcast: None, + rpc_requests_total: None, } } @@ -160,6 +498,34 @@ impl RpcServer { self } + /// Set the transaction pool exposed by the `txpool_*` namespace. + #[must_use] + pub fn with_txpool(mut self, txpool: TransactionPool) -> Self { + self.txpool = Some(txpool); + self + } + + /// Set the pending transaction broadcast channel used by subscriptions. + #[must_use] + pub fn with_pending_tx_broadcast(mut self, pending_tx_broadcast: PendingTxEventSender) -> Self { + self.pending_tx_broadcast = Some(pending_tx_broadcast); + self + } + + /// Set the Kora mempool lifecycle broadcast channel used by subscriptions. + #[must_use] + pub fn with_mempool_broadcast(mut self, mempool_broadcast: MempoolEventSender) -> Self { + self.mempool_broadcast = Some(mempool_broadcast); + self + } + + /// Attach a Prometheus counter for tracking total RPC requests. + #[must_use] + pub fn with_rpc_requests_counter(mut self, counter: Counter) -> Self { + self.rpc_requests_total = Some(counter); + self + } + /// Set CORS configuration. #[must_use] pub fn with_cors(mut self, cors_config: CorsConfig) -> Self { @@ -167,6 +533,13 @@ impl RpcServer { self } + /// Set rate limiting configuration. + #[must_use] + pub const fn with_rate_limit_config(mut self, rate_limit_config: RateLimitConfig) -> Self { + self.rate_limit_config = rate_limit_config; + self + } + /// Set maximum concurrent connections. #[must_use] pub const fn with_max_connections(mut self, max_connections: u32) -> Self { @@ -174,6 +547,24 @@ impl RpcServer { self } + /// Set the maximum number of WebSocket subscriptions per connection. + #[must_use] + pub const fn with_max_subscriptions_per_connection( + mut self, + max_subscriptions_per_connection: u32, + ) -> Self { + self.max_subscriptions_per_connection = max_subscriptions_per_connection; + self + } + + /// Set the maximum number of calls in a single batch request. + /// `0` disables batch requests entirely. + #[must_use] + pub const fn with_max_batch_size(mut self, max_batch_size: u32) -> Self { + self.max_batch_size = max_batch_size; + self + } + /// Set the initially reported peer count for `net_peerCount`. #[must_use] pub const fn with_peer_count(mut self, peer_count: u64) -> Self { @@ -189,10 +580,17 @@ impl RpcServer { jsonrpc_addr: config.jsonrpc_addr, chain_id: config.chain_id, tx_submit: None, + txpool: None, state_provider, cors_config: config.cors, + rate_limit_config: config.rate_limit, + rpc_requests_total: None, max_connections: config.max_connections, + max_subscriptions_per_connection: config.max_subscriptions_per_connection, + max_batch_size: config.max_batch_size, peer_count: 0, + pending_tx_broadcast: None, + mempool_broadcast: None, } } @@ -206,18 +604,24 @@ impl RpcServer { let node_state_for_jsonrpc = Arc::clone(&node_state); let chain_id = self.chain_id; let tx_submit = self.tx_submit; + let txpool = self.txpool; let cors_layer = build_cors_layer(&self.cors_config); + let jsonrpc_cors_layer = build_cors_layer(&self.cors_config); + let http_rate_limiter = SharedRateLimiter::new(self.rate_limit_config.clone()); + let rpc_global_limiter = SharedRateLimiter::new(self.rate_limit_config.clone()); + let rpc_per_conn_limiter = PerConnectionRateLimiter::new(self.rate_limit_config); let max_connections = self.max_connections; + let max_subscriptions_per_connection = self.max_subscriptions_per_connection; + let max_batch_size = self.max_batch_size; let state_provider = self.state_provider; let peer_count = self.peer_count; + let pending_tx_broadcast = self.pending_tx_broadcast; + let mempool_broadcast = self.mempool_broadcast; + + let rpc_requests_total = self.rpc_requests_total; let http_handle = tokio::spawn(async move { - let app = Router::new() - .route("/status", get(status_handler)) - .route("/health", get(health_handler)) - .layer(cors_layer) - .layer(ConcurrencyLimitLayer::new(max_connections as usize)) - .with_state(node_state); + let app = build_http_router(node_state, cors_layer, max_connections, http_rate_limiter); info!(addr = %http_addr, "Starting HTTP server"); @@ -235,8 +639,21 @@ impl RpcServer { }); let jsonrpc_handle = tokio::spawn(async move { + let rpc_middleware = + RpcServiceBuilder::new().layer_fn(move |service| RateLimitedRpcService { + service, + per_conn_limiter: rpc_per_conn_limiter.clone(), + global_limiter: rpc_global_limiter.clone(), + rpc_requests_total: rpc_requests_total.clone(), + }); + let server = match Server::builder() .max_connections(max_connections) + .max_subscriptions_per_connection(max_subscriptions_per_connection) + .set_http_middleware(tower_04::ServiceBuilder::new().layer(jsonrpc_cors_layer)) + .enable_ws_ping(PingConfig::new()) + .set_batch_request_config(BatchRequestConfig::Limit(max_batch_size)) + .set_rpc_middleware(rpc_middleware) .build(jsonrpc_addr) .await { @@ -247,14 +664,35 @@ impl RpcServer { } }; - let eth_api = tx_submit.map_or_else( + let eth_node_state = (*node_state_for_jsonrpc).clone(); + let mut eth_api = tx_submit.map_or_else( || EthApiImpl::new(chain_id, state_provider.clone()), |submit| EthApiImpl::with_tx_submit(chain_id, state_provider.clone(), submit), ); + eth_api = eth_api.with_node_state(eth_node_state); + if let Some(sender) = pending_tx_broadcast.clone() { + eth_api = eth_api.with_pending_tx_broadcast(sender); + } + if let Some(sender) = mempool_broadcast.clone() { + eth_api = eth_api.with_mempool_broadcast(sender); + } + if let Some(ref pool) = txpool { + eth_api = eth_api.with_txpool(pool.clone()); + } let net_api = NetApiImpl::new(chain_id); net_api.set_peer_count(peer_count); let web3_api = Web3ApiImpl::new(); let kora_api = KoraApiImpl::new(node_state_for_jsonrpc); + let subscription_api = match subscription_module( + pending_tx_broadcast.clone(), + mempool_broadcast.clone(), + ) { + Ok(api) => api, + Err(e) => { + error!(error = %e, "Failed to build subscription API"); + return None; + } + }; let mut module = jsonrpsee::RpcModule::new(()); if let Err(e) = module.merge(eth_api.into_rpc()) { @@ -273,6 +711,16 @@ impl RpcServer { error!(error = %e, "Failed to merge kora API"); return None; } + if let Some(txpool) = txpool + && let Err(e) = module.merge(TxpoolApiImpl::new(txpool).into_rpc()) + { + error!(error = %e, "Failed to merge txpool API"); + return None; + } + if let Err(e) = module.merge(subscription_api) { + error!(error = %e, "Failed to merge subscription API"); + return None; + } info!(addr = %jsonrpc_addr, "Starting JSON-RPC server"); @@ -324,9 +772,18 @@ pub struct JsonRpcServer { addr: SocketAddr, chain_id: u64, tx_submit: Option, + txpool: Option, state_provider: S, + cors_config: CorsConfig, + rate_limit_config: RateLimitConfig, max_connections: u32, + max_subscriptions_per_connection: u32, + max_batch_size: u32, peer_count: u64, + pending_tx_broadcast: Option, + mempool_broadcast: Option, + /// Prometheus counter incremented on every incoming JSON-RPC request. + rpc_requests_total: Option, } impl std::fmt::Debug for JsonRpcServer { @@ -335,6 +792,13 @@ impl std::fmt::Debug for JsonRpcServer { .field("addr", &self.addr) .field("chain_id", &self.chain_id) .field("tx_submit", &self.tx_submit.is_some()) + .field("txpool", &self.txpool.is_some()) + .field("pending_tx_broadcast", &self.pending_tx_broadcast.is_some()) + .field("mempool_broadcast", &self.mempool_broadcast.is_some()) + .field("rate_limit_config", &self.rate_limit_config) + .field("max_connections", &self.max_connections) + .field("max_subscriptions_per_connection", &self.max_subscriptions_per_connection) + .field("max_batch_size", &self.max_batch_size) .finish() } } @@ -346,9 +810,17 @@ impl JsonRpcServer { addr, chain_id, tx_submit: None, + txpool: None, state_provider: NoopStateProvider, + cors_config: CorsConfig::default(), + rate_limit_config: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, peer_count: 0, + pending_tx_broadcast: None, + mempool_broadcast: None, + rpc_requests_total: None, } } } @@ -360,9 +832,17 @@ impl JsonRpcServer { addr, chain_id, tx_submit: None, + txpool: None, state_provider, + cors_config: CorsConfig::default(), + rate_limit_config: RateLimitConfig::default(), max_connections: 100, + max_subscriptions_per_connection: 32, + max_batch_size: 100, peer_count: 0, + pending_tx_broadcast: None, + mempool_broadcast: None, + rpc_requests_total: None, } } @@ -373,6 +853,48 @@ impl JsonRpcServer { self } + /// Set the transaction pool exposed by the `txpool_*` namespace. + #[must_use] + pub fn with_txpool(mut self, txpool: TransactionPool) -> Self { + self.txpool = Some(txpool); + self + } + + /// Set the pending transaction broadcast channel used by subscriptions. + #[must_use] + pub fn with_pending_tx_broadcast(mut self, pending_tx_broadcast: PendingTxEventSender) -> Self { + self.pending_tx_broadcast = Some(pending_tx_broadcast); + self + } + + /// Set the Kora mempool lifecycle broadcast channel used by subscriptions. + #[must_use] + pub fn with_mempool_broadcast(mut self, mempool_broadcast: MempoolEventSender) -> Self { + self.mempool_broadcast = Some(mempool_broadcast); + self + } + + /// Set CORS configuration. + #[must_use] + pub fn with_cors(mut self, cors_config: CorsConfig) -> Self { + self.cors_config = cors_config; + self + } + + /// Attach a Prometheus counter for tracking total RPC requests. + #[must_use] + pub fn with_rpc_requests_counter(mut self, counter: Counter) -> Self { + self.rpc_requests_total = Some(counter); + self + } + + /// Set rate limiting configuration. + #[must_use] + pub const fn with_rate_limit_config(mut self, rate_limit_config: RateLimitConfig) -> Self { + self.rate_limit_config = rate_limit_config; + self + } + /// Set maximum concurrent connections. #[must_use] pub const fn with_max_connections(mut self, max_connections: u32) -> Self { @@ -380,6 +902,24 @@ impl JsonRpcServer { self } + /// Set the maximum number of WebSocket subscriptions per connection. + #[must_use] + pub const fn with_max_subscriptions_per_connection( + mut self, + max_subscriptions_per_connection: u32, + ) -> Self { + self.max_subscriptions_per_connection = max_subscriptions_per_connection; + self + } + + /// Set the maximum number of calls in a single batch request. + /// `0` disables batch requests entirely. + #[must_use] + pub const fn with_max_batch_size(mut self, max_batch_size: u32) -> Self { + self.max_batch_size = max_batch_size; + self + } + /// Set the initially reported peer count for `net_peerCount`. #[must_use] pub const fn with_peer_count(mut self, peer_count: u64) -> Self { @@ -389,24 +929,56 @@ impl JsonRpcServer { /// Start the JSON-RPC server. pub async fn start(self) -> Result { + let cors_layer = build_cors_layer(&self.cors_config); + let rpc_global_limiter = SharedRateLimiter::new(self.rate_limit_config.clone()); + let rpc_per_conn_limiter = PerConnectionRateLimiter::new(self.rate_limit_config); + let rpc_requests_total = self.rpc_requests_total; + let rpc_middleware = + RpcServiceBuilder::new().layer_fn(move |service| RateLimitedRpcService { + service, + per_conn_limiter: rpc_per_conn_limiter.clone(), + global_limiter: rpc_global_limiter.clone(), + rpc_requests_total: rpc_requests_total.clone(), + }); + let server = Server::builder() .max_connections(self.max_connections) + .max_subscriptions_per_connection(self.max_subscriptions_per_connection) + .set_http_middleware(tower_04::ServiceBuilder::new().layer(cors_layer)) + .enable_ws_ping(PingConfig::new()) + .set_batch_request_config(BatchRequestConfig::Limit(self.max_batch_size)) + .set_rpc_middleware(rpc_middleware) .build(self.addr) .await .map_err(|e| ServerError::Build(e.to_string()))?; - let eth_api = self.tx_submit.map_or_else( + let mut eth_api = self.tx_submit.map_or_else( || EthApiImpl::new(self.chain_id, self.state_provider.clone()), |submit| EthApiImpl::with_tx_submit(self.chain_id, self.state_provider.clone(), submit), ); + if let Some(sender) = self.pending_tx_broadcast.clone() { + eth_api = eth_api.with_pending_tx_broadcast(sender); + } + if let Some(sender) = self.mempool_broadcast.clone() { + eth_api = eth_api.with_mempool_broadcast(sender); + } + if let Some(ref pool) = self.txpool { + eth_api = eth_api.with_txpool(pool.clone()); + } let net_api = NetApiImpl::new(self.chain_id); net_api.set_peer_count(self.peer_count); let web3_api = Web3ApiImpl::new(); + let subscription_api = + subscription_module(self.pending_tx_broadcast, self.mempool_broadcast)?; let mut module = jsonrpsee::RpcModule::new(()); module.merge(eth_api.into_rpc())?; module.merge(net_api.into_rpc())?; module.merge(web3_api.into_rpc())?; + if let Some(txpool) = self.txpool { + module.merge(TxpoolApiImpl::new(txpool).into_rpc())?; + } + module.merge(subscription_api)?; info!(addr = %self.addr, "Starting JSON-RPC server"); @@ -416,8 +988,40 @@ impl JsonRpcServer { #[cfg(test)] mod tests { + use std::borrow::Cow; + + use axum::{body::Body, http::Request as HttpRequest}; + use jsonrpsee::core::server::ResponsePayload; + use tower::ServiceExt; + use super::*; + #[derive(Debug, Clone)] + struct AlwaysOkRpcService; + + impl<'a> RpcServiceT<'a> for AlwaysOkRpcService { + type Future = std::future::Ready; + + fn call(&self, request: RpcRequest<'a>) -> Self::Future { + std::future::ready(MethodResponse::response( + request.id().into_owned(), + ResponsePayload::success("ok"), + usize::MAX, + )) + } + } + + fn rpc_request(id: u64) -> RpcRequest<'static> { + RpcRequest::new(Cow::Borrowed("web3_clientVersion"), None, Id::Number(id)) + } + + /// Build an [`RpcRequest`] with a [`ConnectionId`] injected in extensions. + fn rpc_request_with_conn(id: u64, conn_id: usize) -> RpcRequest<'static> { + let mut req = rpc_request(id); + req.extensions_mut().insert(ConnectionId(conn_id)); + req + } + #[test] fn cors_layer_empty_origins() { let config = CorsConfig::none(); @@ -440,4 +1044,272 @@ mod tests { let config = CorsConfig::permissive(); let _layer = build_cors_layer(&config); } + + #[test] + fn token_bucket_honors_burst_and_refill() { + let start = Instant::now(); + let mut bucket = + TokenBucket::new(RateLimitConfig { requests_per_second: 2, burst_size: 2 }, start); + + assert!(bucket.try_acquire_at(start)); + assert!(bucket.try_acquire_at(start)); + assert!(!bucket.try_acquire_at(start)); + + let half_second_later = start + Duration::from_millis(500); + assert!(bucket.try_acquire_at(half_second_later)); + assert!(!bucket.try_acquire_at(half_second_later)); + } + + #[test] + fn token_bucket_clamps_zero_burst_to_one() { + let start = Instant::now(); + let mut bucket = + TokenBucket::new(RateLimitConfig { requests_per_second: 10, burst_size: 0 }, start); + + // burst_size=0 is clamped to 1, so the first request succeeds. + assert!(bucket.try_acquire_at(start)); + // Second request at the same instant is rejected (burst of 1). + assert!(!bucket.try_acquire_at(start)); + + // After enough time, a new token is replenished. + let later = start + Duration::from_millis(200); + assert!(bucket.try_acquire_at(later)); + } + + #[test] + fn token_bucket_zero_rps_rejects_all() { + let start = Instant::now(); + let mut bucket = + TokenBucket::new(RateLimitConfig { requests_per_second: 0, burst_size: 100 }, start); + + // With requests_per_second=0, burst_size is forced to 0 and no tokens are ever added. + assert!(!bucket.try_acquire_at(start)); + assert!(!bucket.try_acquire_at(start + Duration::from_secs(10))); + } + + #[test] + fn token_bucket_does_not_exceed_burst() { + let start = Instant::now(); + let mut bucket = + TokenBucket::new(RateLimitConfig { requests_per_second: 100, burst_size: 3 }, start); + + // Drain all tokens. + assert!(bucket.try_acquire_at(start)); + assert!(bucket.try_acquire_at(start)); + assert!(bucket.try_acquire_at(start)); + assert!(!bucket.try_acquire_at(start)); + + // Wait long enough for many tokens to accumulate, but cap at burst_size. + let much_later = start + Duration::from_secs(60); + assert!(bucket.try_acquire_at(much_later)); + assert!(bucket.try_acquire_at(much_later)); + assert!(bucket.try_acquire_at(much_later)); + assert!(!bucket.try_acquire_at(much_later)); + } + + #[test] + fn disabled_rate_limit_does_not_build_limiter() { + assert!(SharedRateLimiter::new(RateLimitConfig::disabled()).is_none()); + assert!(PerConnectionRateLimiter::new(RateLimitConfig::disabled()).is_none()); + } + + #[test] + fn rate_limit_allows_with_no_limiter() { + assert!(global_rate_limit_allows(&None)); + } + + #[test] + fn rpc_server_from_config_threads_limits() { + let config = RpcServerConfig::default() + .with_rate_limit_burst(7, 11) + .with_max_connections(13) + .with_max_subscriptions_per_connection(17) + .with_max_batch_size(50); + + let server = RpcServer::from_config(NodeState::new(1, 0), config, NoopStateProvider); + + assert_eq!(server.rate_limit_config.requests_per_second, 7); + assert_eq!(server.rate_limit_config.burst_size, 11); + assert_eq!(server.max_connections, 13); + assert_eq!(server.max_subscriptions_per_connection, 17); + assert_eq!(server.max_batch_size, 50); + } + + #[test] + fn json_rpc_server_builders_thread_limits() { + let server = JsonRpcServer::new("127.0.0.1:0".parse().unwrap(), 1) + .with_rate_limit_config(RateLimitConfig { requests_per_second: 3, burst_size: 5 }) + .with_max_connections(7) + .with_max_subscriptions_per_connection(9); + + assert_eq!(server.rate_limit_config.requests_per_second, 3); + assert_eq!(server.rate_limit_config.burst_size, 5); + assert_eq!(server.max_connections, 7); + assert_eq!(server.max_subscriptions_per_connection, 9); + } + + #[tokio::test] + async fn rpc_rate_limiter_rejects_after_burst() { + let per_conn = PerConnectionRateLimiter::new(RateLimitConfig { + requests_per_second: 1, + burst_size: 1, + }); + let service = RateLimitedRpcService { + service: AlwaysOkRpcService, + per_conn_limiter: per_conn, + global_limiter: None, + rpc_requests_total: None, + }; + + let first = service.call(rpc_request_with_conn(1, 42)).await; + assert!(first.is_success()); + + let second = service.call(rpc_request_with_conn(2, 42)).await; + assert_eq!(second.as_error_code(), Some(crate::error::codes::LIMIT_EXCEEDED)); + assert!(second.as_result().contains("rate limit exceeded")); + } + + #[tokio::test] + async fn per_connection_limiter_isolates_connections() { + // Two connections each get their own bucket. + let per_conn = PerConnectionRateLimiter::new(RateLimitConfig { + requests_per_second: 1, + burst_size: 1, + }); + let service = RateLimitedRpcService { + service: AlwaysOkRpcService, + per_conn_limiter: per_conn, + global_limiter: None, + rpc_requests_total: None, + }; + + // Connection 1: exhaust its bucket. + let resp = service.call(rpc_request_with_conn(1, 1)).await; + assert!(resp.is_success()); + let resp = service.call(rpc_request_with_conn(2, 1)).await; + assert_eq!(resp.as_error_code(), Some(crate::error::codes::LIMIT_EXCEEDED)); + + // Connection 2: should still be allowed (separate bucket). + let resp = service.call(rpc_request_with_conn(3, 2)).await; + assert!(resp.is_success()); + } + + #[tokio::test] + async fn global_limiter_caps_aggregate_throughput() { + // Even though per-connection allows the request, the global limiter + // can reject it. + let global = + SharedRateLimiter::new(RateLimitConfig { requests_per_second: 1, burst_size: 1 }); + let service = RateLimitedRpcService { + service: AlwaysOkRpcService, + per_conn_limiter: None, + global_limiter: global, + rpc_requests_total: None, + }; + + let first = service.call(rpc_request_with_conn(1, 1)).await; + assert!(first.is_success()); + + // Second request from a different connection is still blocked by global. + let second = service.call(rpc_request_with_conn(2, 2)).await; + assert_eq!(second.as_error_code(), Some(crate::error::codes::LIMIT_EXCEEDED)); + } + + /// A mock service that returns InternalError (-32603) for subscription + /// methods, mimicking jsonrpsee's behaviour when subscriptions are called + /// over HTTP. + #[derive(Debug, Clone)] + struct InternalErrorOnSubscriptionService; + + impl<'a> RpcServiceT<'a> for InternalErrorOnSubscriptionService { + type Future = std::future::Ready; + + fn call(&self, request: RpcRequest<'a>) -> Self::Future { + let id = request.id().into_owned(); + if is_subscription_method(request.method_name()) { + std::future::ready(MethodResponse::error( + id, + ErrorObjectOwned::owned(codes::INTERNAL_ERROR, "Internal error", None::<()>), + )) + } else { + std::future::ready(MethodResponse::response( + id, + ResponsePayload::success("ok"), + usize::MAX, + )) + } + } + } + + #[tokio::test] + async fn subscription_over_http_returns_method_not_supported() { + let service = RateLimitedRpcService { + service: InternalErrorOnSubscriptionService, + per_conn_limiter: None, + global_limiter: None, + rpc_requests_total: None, + }; + + // eth_subscribe should be rewritten from -32603 to -32004. + let sub_req = RpcRequest::new(Cow::Borrowed("eth_subscribe"), None, Id::Number(1)); + let response = service.call(sub_req).await; + assert_eq!(response.as_error_code(), Some(codes::METHOD_NOT_SUPPORTED)); + assert!(response.as_result().contains("Subscriptions are not available over HTTP")); + } + + #[tokio::test] + async fn subscription_over_ws_passes_through() { + // When the inner service returns success (WebSocket case), the + // middleware must not interfere. + let service = RateLimitedRpcService { + service: AlwaysOkRpcService, + per_conn_limiter: None, + global_limiter: None, + rpc_requests_total: None, + }; + + let sub_req = RpcRequest::new(Cow::Borrowed("eth_subscribe"), None, Id::Number(1)); + let response = service.call(sub_req).await; + assert!(response.is_success()); + } + + #[tokio::test] + async fn non_subscription_internal_error_not_rewritten() { + // An InternalError on a regular method must NOT be rewritten. + let service = RateLimitedRpcService { + service: InternalErrorOnSubscriptionService, + per_conn_limiter: None, + global_limiter: None, + rpc_requests_total: None, + }; + + let req = rpc_request(1); + let response = service.call(req).await; + assert!(response.is_success()); + } + + #[tokio::test] + async fn http_status_rate_limiter_returns_too_many_requests() { + let rate_limiter = + SharedRateLimiter::new(RateLimitConfig { requests_per_second: 1, burst_size: 1 }); + let app = build_http_router( + Arc::new(NodeState::new(1, 0)), + build_cors_layer(&CorsConfig::none()), + 10, + rate_limiter, + ); + + let first = app + .clone() + .oneshot(HttpRequest::builder().uri("/health").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(first.status(), StatusCode::OK); + + let second = app + .oneshot(HttpRequest::builder().uri("/health").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(second.status(), StatusCode::TOO_MANY_REQUESTS); + } } diff --git a/crates/node/rpc/src/state.rs b/crates/node/rpc/src/state.rs index ff15ccc..fef3a1f 100644 --- a/crates/node/rpc/src/state.rs +++ b/crates/node/rpc/src/state.rs @@ -1,6 +1,7 @@ //! Node state management for RPC endpoints. use std::{ + num::NonZeroU32, sync::{ Arc, atomic::{AtomicU64, Ordering}, @@ -11,6 +12,48 @@ use std::{ use parking_lot::RwLock; use serde::{Deserialize, Serialize}; +/// Default validator count used by tests and legacy callers. +pub(crate) const DEFAULT_VALIDATOR_COUNT: u32 = 4; + +/// Number of blocks past the recovery point that must be fully verified +/// before the node exits catch-up mode. Mirrors the constant in +/// `crates/node/runner/src/app.rs`. +const CATCH_UP_THRESHOLD: u64 = 64; + +/// Network partition status derived from peer connectivity. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum PartitionStatus { + /// All expected peers are connected. + Healthy, + /// Some peers are missing but quorum is still possible. + Degraded, + /// Too few peers for BFT quorum (fewer than n-f). + Partitioned, +} + +impl PartitionStatus { + /// Derive partition status from the number of connected peers and total + /// expected peers (i.e. `validator_count - 1`). + /// + /// Commonware simplex uses an N3f1 quorum model: with `n` validators and + /// `f = (n-1)/3` maximum Byzantine faults, quorum requires `n - f` + /// participants. A node needs at least `n - f - 1` *other* peers to form + /// quorum (since it counts itself as one of the `n - f` participants). + const fn from_peer_counts(connected_peers: u64, total_expected_peers: u64) -> Self { + if connected_peers >= total_expected_peers { + Self::Healthy + } else { + // total_validators = total_expected_peers + 1 (include self) + let total_validators = total_expected_peers + 1; + // f = (n-1) / 3, quorum = n - f, peers needed = quorum - 1 (self) + let f = (total_validators.saturating_sub(1)) / 3; + let quorum_peers_needed = total_validators - f - 1; // (n - f) - 1 for self + if connected_peers >= quorum_peers_needed { Self::Degraded } else { Self::Partitioned } + } + } +} + /// Shared node state that can be updated by the consensus engine. #[derive(Debug, Clone)] pub struct NodeState { @@ -21,30 +64,64 @@ pub struct NodeState { struct NodeStateInner { chain_id: u64, validator_index: u32, + validator_count: NonZeroU32, started_at: Instant, current_view: AtomicU64, finalized_count: AtomicU64, + finalized_height: AtomicU64, proposed_count: AtomicU64, nullified_count: AtomicU64, + equivocation_count: AtomicU64, peer_count: AtomicU64, is_leader: RwLock, + /// Height of the HEAD block recovered from an archive at startup. + /// Zero means a fresh node (never recovered). + recovered_height: AtomicU64, + /// Highest block height that has been fully verified via execution. + last_verified_height: AtomicU64, } impl NodeState { /// Create a new node state. + /// + /// Uses the historical four-validator leader schedule. Validator mode should prefer + /// [`Self::with_validator_count`] so leadership follows the configured validator set. #[must_use] pub fn new(chain_id: u64, validator_index: u32) -> Self { + Self::with_validator_count(chain_id, validator_index, DEFAULT_VALIDATOR_COUNT) + } + + /// Create a new node state with an explicit validator count. + /// + /// # Panics + /// + /// Panics if `validator_count` is zero or if `validator_index >= validator_count`. + #[must_use] + pub fn with_validator_count(chain_id: u64, validator_index: u32, validator_count: u32) -> Self { + let validator_count = + NonZeroU32::new(validator_count).expect("validator count must be non-zero"); + + assert!( + validator_index < validator_count.get(), + "validator_index ({validator_index}) must be less than validator_count ({validator_count})", + ); + Self { inner: Arc::new(NodeStateInner { chain_id, validator_index, + validator_count, started_at: Instant::now(), current_view: AtomicU64::new(0), finalized_count: AtomicU64::new(0), + finalized_height: AtomicU64::new(0), proposed_count: AtomicU64::new(0), nullified_count: AtomicU64::new(0), + equivocation_count: AtomicU64::new(0), peer_count: AtomicU64::new(0), is_leader: RwLock::new(false), + recovered_height: AtomicU64::new(0), + last_verified_height: AtomicU64::new(0), }), } } @@ -52,8 +129,8 @@ impl NodeState { /// Update the current view. pub fn set_view(&self, view: u64) { self.inner.current_view.store(view, Ordering::Relaxed); - // Compute leader: view mod 4 (for 4 validators) - let is_leader = (view % 4) as u32 == self.inner.validator_index; + let leader_index = (view % u64::from(self.inner.validator_count.get())) as u32; + let is_leader = leader_index == self.inner.validator_index; *self.inner.is_leader.write() = is_leader; } @@ -62,6 +139,18 @@ impl NodeState { self.inner.finalized_count.fetch_add(1, Ordering::Relaxed); } + /// Update the latest finalized block height. + /// + /// Uses `fetch_max` so that out-of-order updates never regress the value. + pub fn set_finalized_height(&self, height: u64) { + self.inner.finalized_height.fetch_max(height, Ordering::Relaxed); + } + + /// Return the latest finalized block height. + pub fn finalized_height(&self) -> u64 { + self.inner.finalized_height.load(Ordering::Relaxed) + } + /// Increment proposed block count. pub fn inc_proposed(&self) { self.inner.proposed_count.fetch_add(1, Ordering::Relaxed); @@ -72,13 +161,60 @@ impl NodeState { self.inner.nullified_count.fetch_add(1, Ordering::Relaxed); } + /// Increment equivocation event count. + pub fn inc_equivocations(&self) { + self.inner.equivocation_count.fetch_add(1, Ordering::Relaxed); + } + /// Update peer count. pub fn set_peer_count(&self, count: u64) { self.inner.peer_count.store(count, Ordering::Relaxed); } + /// Set the height of the HEAD block recovered from an archive at startup. + /// + /// This also initialises `last_verified_height` to the same value, + /// matching the semantics in `RevmApplication::with_recovered_height`. + pub fn set_recovered_height(&self, height: u64) { + self.inner.recovered_height.store(height, Ordering::Relaxed); + self.inner.last_verified_height.store(height, Ordering::Relaxed); + } + + /// Return the recovered height (zero for fresh nodes). + pub fn recovered_height(&self) -> u64 { + self.inner.recovered_height.load(Ordering::Relaxed) + } + + /// Advance the last verified height (monotonically increasing). + pub fn set_last_verified_height(&self, height: u64) { + self.inner.last_verified_height.fetch_max(height, Ordering::Relaxed); + } + + /// Return the last verified height. + pub fn last_verified_height(&self) -> u64 { + self.inner.last_verified_height.load(Ordering::Relaxed) + } + + /// Returns `true` when the node is catching up after recovery. + /// + /// A node is catching up when it was recovered from an archive + /// (`recovered_height > 0`) and full-execution verification has not + /// yet advanced past `recovered_height + CATCH_UP_THRESHOLD` (64). + pub fn is_catching_up(&self) -> bool { + let recovered = self.inner.recovered_height.load(Ordering::Relaxed); + if recovered == 0 { + return false; + } + let verified = self.inner.last_verified_height.load(Ordering::Relaxed); + verified < recovered.saturating_add(CATCH_UP_THRESHOLD) + } + /// Get current node status. pub fn status(&self) -> NodeStatus { + let peer_count = self.inner.peer_count.load(Ordering::Relaxed); + let total_expected_peers = u64::from(self.inner.validator_count.get()).saturating_sub(1); + let partition_status = PartitionStatus::from_peer_counts(peer_count, total_expected_peers); + NodeStatus { chain_id: self.inner.chain_id, validator_index: self.inner.validator_index, @@ -87,7 +223,10 @@ impl NodeState { finalized_count: self.inner.finalized_count.load(Ordering::Relaxed), proposed_count: self.inner.proposed_count.load(Ordering::Relaxed), nullified_count: self.inner.nullified_count.load(Ordering::Relaxed), - peer_count: self.inner.peer_count.load(Ordering::Relaxed), + equivocation_count: self.inner.equivocation_count.load(Ordering::Relaxed), + peer_count, + total_expected_peers, + partition_status, is_leader: *self.inner.is_leader.read(), } } @@ -99,7 +238,7 @@ impl NodeState { pub struct NodeStatus { /// Chain ID. pub chain_id: u64, - /// This validator's index (0-3). + /// This validator's index. pub validator_index: u32, /// Seconds since node started. pub uptime_secs: u64, @@ -111,8 +250,14 @@ pub struct NodeStatus { pub proposed_count: u64, /// Number of nullified rounds. pub nullified_count: u64, + /// Number of equivocation events detected (Byzantine behavior). + pub equivocation_count: u64, /// Number of connected peers. pub peer_count: u64, + /// Total number of expected peers (validator_count - 1). + pub total_expected_peers: u64, + /// Network partition status derived from peer connectivity. + pub partition_status: PartitionStatus, /// Whether this node is the current leader. pub is_leader: bool, } @@ -131,7 +276,10 @@ mod tests { finalized_count: 50, proposed_count: 10, nullified_count: 5, + equivocation_count: 2, peer_count: 3, + total_expected_peers: 3, + partition_status: PartitionStatus::Healthy, is_leader: true, }; @@ -145,7 +293,10 @@ mod tests { assert_eq!(status.finalized_count, parsed.finalized_count); assert_eq!(status.proposed_count, parsed.proposed_count); assert_eq!(status.nullified_count, parsed.nullified_count); + assert_eq!(status.equivocation_count, parsed.equivocation_count); assert_eq!(status.peer_count, parsed.peer_count); + assert_eq!(status.total_expected_peers, parsed.total_expected_peers); + assert_eq!(status.partition_status, parsed.partition_status); assert_eq!(status.is_leader, parsed.is_leader); } @@ -159,7 +310,10 @@ mod tests { finalized_count: 0, proposed_count: 0, nullified_count: 0, + equivocation_count: 0, peer_count: 0, + total_expected_peers: 3, + partition_status: PartitionStatus::Partitioned, is_leader: false, }; @@ -171,7 +325,10 @@ mod tests { assert!(json.contains("finalizedCount")); assert!(json.contains("proposedCount")); assert!(json.contains("nullifiedCount")); + assert!(json.contains("equivocationCount")); assert!(json.contains("peerCount")); + assert!(json.contains("totalExpectedPeers")); + assert!(json.contains("partitionStatus")); assert!(json.contains("isLeader")); } @@ -193,6 +350,46 @@ mod tests { assert!(status.is_leader); } + #[test] + fn node_state_leadership_uses_validator_count() { + let state = NodeState::with_validator_count(1, 4, 5); + + state.set_view(4); + assert!(state.status().is_leader); + + state.set_view(5); + assert!(!state.status().is_leader); + + state.set_view(9); + assert!(state.status().is_leader); + } + + #[test] + fn node_state_leadership_supports_non_four_validator_sets() { + let state = NodeState::with_validator_count(1, 2, 3); + + state.set_view(2); + assert!(state.status().is_leader); + + state.set_view(3); + assert!(!state.status().is_leader); + + state.set_view(5); + assert!(state.status().is_leader); + } + + #[test] + #[should_panic(expected = "validator count must be non-zero")] + fn node_state_validator_count_must_be_nonzero() { + let _ = NodeState::with_validator_count(1, 0, 0); + } + + #[test] + #[should_panic(expected = "validator_index (5) must be less than validator_count (4)")] + fn node_state_validator_index_must_be_in_range() { + let _ = NodeState::with_validator_count(1, 5, 4); + } + #[test] fn node_state_inc_counters() { let state = NodeState::new(1, 0); @@ -200,11 +397,15 @@ mod tests { state.inc_finalized(); state.inc_proposed(); state.inc_nullified(); + state.inc_equivocations(); + state.inc_equivocations(); + state.inc_equivocations(); let status = state.status(); assert_eq!(status.finalized_count, 2); assert_eq!(status.proposed_count, 1); assert_eq!(status.nullified_count, 1); + assert_eq!(status.equivocation_count, 3); } #[test] @@ -213,4 +414,142 @@ mod tests { state.set_peer_count(5); assert_eq!(state.status().peer_count, 5); } + + #[test] + fn node_state_finalized_height() { + let state = NodeState::new(1, 0); + assert_eq!(state.finalized_height(), 0); + + state.set_finalized_height(42); + assert_eq!(state.finalized_height(), 42); + + // fetch_max ensures height never regresses + state.set_finalized_height(10); + assert_eq!(state.finalized_height(), 42); + + state.set_finalized_height(100); + assert_eq!(state.finalized_height(), 100); + } + + // -- PartitionStatus tests -- + + #[test] + fn partition_status_healthy_when_all_peers_connected() { + // 4 validators: 3 expected peers, 3 connected + assert_eq!(PartitionStatus::from_peer_counts(3, 3), PartitionStatus::Healthy); + } + + #[test] + fn partition_status_degraded_when_one_peer_missing() { + // 4 validators (f=1): quorum = n-f = 3, need 2 peers + self + assert_eq!(PartitionStatus::from_peer_counts(2, 3), PartitionStatus::Degraded); + } + + #[test] + fn partition_status_partitioned_when_below_quorum() { + // 4 validators (f=1): quorum = n-f = 3, need 2 peers + self, have 1 + assert_eq!(PartitionStatus::from_peer_counts(1, 3), PartitionStatus::Partitioned); + } + + #[test] + fn partition_status_partitioned_when_no_peers() { + assert_eq!(PartitionStatus::from_peer_counts(0, 3), PartitionStatus::Partitioned); + } + + #[test] + fn partition_status_seven_validators() { + // 7 validators (f=2): quorum = n-f = 5, need 4 peers + self + assert_eq!(PartitionStatus::from_peer_counts(6, 6), PartitionStatus::Healthy); + assert_eq!(PartitionStatus::from_peer_counts(5, 6), PartitionStatus::Degraded); + assert_eq!(PartitionStatus::from_peer_counts(4, 6), PartitionStatus::Degraded); + assert_eq!(PartitionStatus::from_peer_counts(3, 6), PartitionStatus::Partitioned); + } + + #[test] + fn partition_status_fifteen_validators() { + // 15 validators (f=4): quorum = n-f = 11, need 10 peers + self + // This is the case where the old 2f formula diverged from n-f. + assert_eq!(PartitionStatus::from_peer_counts(14, 14), PartitionStatus::Healthy); + assert_eq!(PartitionStatus::from_peer_counts(10, 14), PartitionStatus::Degraded); + assert_eq!(PartitionStatus::from_peer_counts(9, 14), PartitionStatus::Partitioned); + assert_eq!(PartitionStatus::from_peer_counts(8, 14), PartitionStatus::Partitioned); + } + + #[test] + fn partition_status_serializes_lowercase() { + let healthy = serde_json::to_string(&PartitionStatus::Healthy).unwrap(); + assert_eq!(healthy, "\"healthy\""); + let degraded = serde_json::to_string(&PartitionStatus::Degraded).unwrap(); + assert_eq!(degraded, "\"degraded\""); + let partitioned = serde_json::to_string(&PartitionStatus::Partitioned).unwrap(); + assert_eq!(partitioned, "\"partitioned\""); + } + + #[test] + fn partition_status_included_in_node_status() { + // With 4 validators (default), peer_count=0 should be partitioned + let state = NodeState::new(1, 0); + let status = state.status(); + assert_eq!(status.total_expected_peers, 3); + assert_eq!(status.partition_status, PartitionStatus::Partitioned); + + // Set all peers connected + state.set_peer_count(3); + let status = state.status(); + assert_eq!(status.partition_status, PartitionStatus::Healthy); + + // One peer missing + state.set_peer_count(2); + let status = state.status(); + assert_eq!(status.partition_status, PartitionStatus::Degraded); + } + + // -- Sync status tests -- + + #[test] + fn fresh_node_not_catching_up() { + let state = NodeState::new(1, 0); + assert!(!state.is_catching_up()); + assert_eq!(state.recovered_height(), 0); + assert_eq!(state.last_verified_height(), 0); + } + + #[test] + fn recovered_node_is_catching_up() { + let state = NodeState::new(1, 0); + state.set_recovered_height(1000); + assert!(state.is_catching_up()); + assert_eq!(state.recovered_height(), 1000); + assert_eq!(state.last_verified_height(), 1000); + } + + #[test] + fn catching_up_ends_after_threshold() { + let state = NodeState::new(1, 0); + state.set_recovered_height(1000); + assert!(state.is_catching_up()); + + // Advance verified height to just below threshold + state.set_last_verified_height(1000 + CATCH_UP_THRESHOLD - 1); + assert!(state.is_catching_up()); + + // Advance verified height to exactly the threshold + state.set_last_verified_height(1000 + CATCH_UP_THRESHOLD); + assert!(!state.is_catching_up()); + } + + #[test] + fn last_verified_height_is_monotonic() { + let state = NodeState::new(1, 0); + state.set_last_verified_height(100); + assert_eq!(state.last_verified_height(), 100); + + // Cannot regress + state.set_last_verified_height(50); + assert_eq!(state.last_verified_height(), 100); + + // Can advance + state.set_last_verified_height(200); + assert_eq!(state.last_verified_height(), 200); + } } diff --git a/crates/node/rpc/src/subscription.rs b/crates/node/rpc/src/subscription.rs new file mode 100644 index 0000000..7a2ac63 --- /dev/null +++ b/crates/node/rpc/src/subscription.rs @@ -0,0 +1,321 @@ +//! JSON-RPC subscription support. + +use alloy_primitives::B256; +use jsonrpsee::{ + RpcModule, + server::SubscriptionMessage, + types::{ErrorObjectOwned, Params}, +}; +use kora_domain::MempoolEvent; +use serde_json::Value; +use tokio::sync::broadcast::{self, error::RecvError}; +use tracing::warn; + +use crate::{error::codes, types::RpcTransaction}; + +/// Default buffer size for pending transaction notifications. +pub const PENDING_TX_CHANNEL_CAPACITY: usize = 2048; + +/// Default buffer size for Kora mempool lifecycle notifications. +pub const MEMPOOL_EVENT_CHANNEL_CAPACITY: usize = 4096; + +/// Broadcast sender for pending transaction events. +pub type PendingTxEventSender = broadcast::Sender; + +/// Broadcast sender for Kora mempool lifecycle events. +pub type MempoolEventSender = broadcast::Sender; + +/// Events broadcast when transactions enter the mempool. +#[derive(Clone, Debug)] +pub enum PendingTxEvent { + /// A new transaction was accepted into the pool. + Added(PendingTxInfo), +} + +/// Pending transaction data sent to Ethereum subscription clients. +#[derive(Clone, Debug)] +pub struct PendingTxInfo { + /// Transaction hash. + pub hash: B256, + /// Full RPC transaction object when available. + pub full_tx: Option, +} + +/// Create a pending transaction broadcast channel with the default capacity. +pub fn pending_tx_channel() -> (PendingTxEventSender, broadcast::Receiver) { + broadcast::channel(PENDING_TX_CHANNEL_CAPACITY) +} + +/// Create a mempool lifecycle broadcast channel with the default capacity. +pub fn mempool_event_channel() -> (MempoolEventSender, broadcast::Receiver) { + broadcast::channel(MEMPOOL_EVENT_CHANNEL_CAPACITY) +} + +/// Build the RPC subscription methods. +pub(crate) fn subscription_module( + pending_tx_broadcast: Option, + mempool_broadcast: Option, +) -> Result, jsonrpsee::core::RegisterMethodError> { + let mut module = RpcModule::new(()); + + let eth_pending = pending_tx_broadcast; + module.register_subscription( + "eth_subscribe", + "eth_subscription", + "eth_unsubscribe", + move |params, pending, _, _| { + let eth_pending = eth_pending.clone(); + async move { + let (kind, options) = match parse_subscription_params(¶ms) { + Ok(parsed) => parsed, + Err(err) => { + pending.reject(err).await; + return; + } + }; + + if kind != "newPendingTransactions" { + pending.reject(unsupported_subscription("eth", &kind)).await; + return; + } + + let Some(sender) = eth_pending else { + pending + .reject(ErrorObjectOwned::owned( + codes::METHOD_NOT_SUPPORTED, + "newPendingTransactions subscriptions are not available", + None::<()>, + )) + .await; + return; + }; + + let full_tx = wants_full_tx(options.as_ref()); + let mut receiver = sender.subscribe(); + let sink = match pending.accept().await { + Ok(sink) => sink, + Err(err) => { + warn!(error = ?err, "failed to accept pending transaction subscription"); + return; + } + }; + + while let Some(event) = + recv_broadcast(&mut receiver, "eth_newPendingTransactions").await + { + let PendingTxEvent::Added(info) = event; + let message = if full_tx { + match &info.full_tx { + Some(tx) => SubscriptionMessage::from_json(tx), + None => SubscriptionMessage::from_json(&info.hash), + } + } else { + SubscriptionMessage::from_json(&info.hash) + } + .map_err(|err| { + warn!(error = %err, "failed to serialize pending transaction notification"); + }); + + let Ok(message) = message else { + break; + }; + + if sink.send(message).await.is_err() { + break; + } + } + } + }, + )?; + + let kora_mempool = mempool_broadcast; + module.register_subscription( + "kora_subscribe", + "kora_subscription", + "kora_unsubscribe", + move |params, pending, _, _| { + let kora_mempool = kora_mempool.clone(); + async move { + let (kind, _) = match parse_subscription_params(¶ms) { + Ok(parsed) => parsed, + Err(err) => { + pending.reject(err).await; + return; + } + }; + + if kind != "mempool" { + pending.reject(unsupported_subscription("kora", &kind)).await; + return; + } + + let Some(sender) = kora_mempool else { + pending + .reject(ErrorObjectOwned::owned( + codes::METHOD_NOT_SUPPORTED, + "mempool subscriptions are not available", + None::<()>, + )) + .await; + return; + }; + + let mut receiver = sender.subscribe(); + let sink = match pending.accept().await { + Ok(sink) => sink, + Err(err) => { + warn!(error = ?err, "failed to accept mempool subscription"); + return; + } + }; + + while let Some(event) = recv_broadcast(&mut receiver, "kora_mempool").await { + let message = SubscriptionMessage::from_json(&event).map_err(|err| { + warn!(error = %err, "failed to serialize mempool notification"); + }); + + let Ok(message) = message else { + break; + }; + + if sink.send(message).await.is_err() { + break; + } + } + } + }, + )?; + + Ok(module) +} + +fn parse_subscription_params( + params: &Params<'_>, +) -> Result<(String, Option), ErrorObjectOwned> { + let mut params = params.sequence(); + let kind = params.next()?; + let options = params.optional_next()?; + Ok((kind, options)) +} + +fn wants_full_tx(options: Option<&Value>) -> bool { + match options { + Some(Value::Bool(full_tx)) => *full_tx, + Some(Value::Object(map)) => map.get("fullTx").and_then(Value::as_bool).unwrap_or_default(), + _ => false, + } +} + +async fn recv_broadcast(receiver: &mut broadcast::Receiver, subscription: &str) -> Option +where + T: Clone, +{ + loop { + match receiver.recv().await { + Ok(event) => return Some(event), + Err(RecvError::Lagged(skipped)) => { + warn!(subscription, skipped, "subscription receiver lagged; skipping events"); + } + Err(RecvError::Closed) => return None, + } + } +} + +fn unsupported_subscription(namespace: &str, kind: &str) -> ErrorObjectOwned { + ErrorObjectOwned::owned( + codes::METHOD_NOT_SUPPORTED, + format!("{namespace}_subscribe does not support {kind:?}"), + None::<()>, + ) +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use alloy_primitives::{Address, B256, U64, U256}; + use serde_json::json; + + use super::*; + + async fn next_value( + sub: &mut jsonrpsee::server::Subscription, + ) -> T { + let next = tokio::time::timeout(Duration::from_secs(1), sub.next::()) + .await + .expect("subscription response timed out") + .expect("subscription closed") + .expect("subscription response should decode"); + next.0 + } + + #[tokio::test] + async fn eth_pending_subscription_receives_hash() { + let (pending_tx, _) = broadcast::channel(16); + let module = subscription_module(Some(pending_tx.clone()), None).unwrap(); + let mut sub = + module.subscribe_unbounded("eth_subscribe", ("newPendingTransactions",)).await.unwrap(); + let hash = B256::repeat_byte(0xaa); + + pending_tx.send(PendingTxEvent::Added(PendingTxInfo { hash, full_tx: None })).unwrap(); + + let value: Value = next_value(&mut sub).await; + assert_eq!(value, json!(hash)); + } + + #[tokio::test] + async fn eth_pending_subscription_receives_full_tx() { + let (pending_tx, _) = broadcast::channel(16); + let module = subscription_module(Some(pending_tx.clone()), None).unwrap(); + let mut sub = module + .subscribe_unbounded( + "eth_subscribe", + ("newPendingTransactions", json!({ "fullTx": true })), + ) + .await + .unwrap(); + let tx = RpcTransaction { + hash: B256::repeat_byte(0xbb), + nonce: U64::from(7), + from: Address::repeat_byte(0x11), + to: Some(Address::repeat_byte(0x22)), + value: U256::from(123), + gas_price: U256::from(1_000_000_000u64), + ..Default::default() + }; + + pending_tx + .send(PendingTxEvent::Added(PendingTxInfo { hash: tx.hash, full_tx: Some(tx.clone()) })) + .unwrap(); + + let value: Value = next_value(&mut sub).await; + assert_eq!(value, serde_json::to_value(tx).unwrap()); + } + + #[tokio::test] + async fn kora_mempool_subscription_receives_event() { + let (mempool, _) = broadcast::channel(16); + let module = subscription_module(None, Some(mempool.clone())).unwrap(); + let mut sub = module.subscribe_unbounded("kora_subscribe", ("mempool",)).await.unwrap(); + let event = MempoolEvent::TxIncluded { + hash: B256::repeat_byte(0xcc), + block_number: 9, + block_hash: B256::repeat_byte(0xdd), + }; + + mempool.send(event.clone()).unwrap(); + + let received: MempoolEvent = next_value(&mut sub).await; + assert_eq!(received, event); + } + + #[tokio::test] + async fn broadcast_receiver_skips_lagged_events() { + let (sender, mut receiver) = broadcast::channel(1); + sender.send(1_u64).unwrap(); + sender.send(2_u64).unwrap(); + + let received = recv_broadcast(&mut receiver, "test").await; + assert_eq!(received, Some(2)); + } +} diff --git a/crates/node/rpc/src/txpool.rs b/crates/node/rpc/src/txpool.rs new file mode 100644 index 0000000..4c1a651 --- /dev/null +++ b/crates/node/rpc/src/txpool.rs @@ -0,0 +1,283 @@ +//! Transaction pool JSON-RPC namespace. + +use std::collections::HashMap; + +use alloy_consensus::{Transaction as _, TxEnvelope}; +use alloy_primitives::{Address, U64, U256}; +use jsonrpsee::{core::RpcResult, proc_macros::rpc}; +use kora_txpool::{OrderedTransaction, TransactionPool}; +use serde::{Deserialize, Serialize}; + +use crate::types::RpcTransaction; + +/// Transaction pool JSON-RPC API. +#[rpc(server, namespace = "txpool")] +pub trait TxpoolApi { + /// Returns all pending and queued transactions grouped by sender and nonce. + #[method(name = "content")] + async fn content(&self) -> RpcResult; + + /// Returns the count of pending and queued transactions. + #[method(name = "status")] + async fn status(&self) -> RpcResult; + + /// Returns a compact text summary of pending and queued transactions. + #[method(name = "inspect")] + async fn inspect(&self) -> RpcResult; +} + +/// Full transaction pool contents grouped by sender and nonce. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct TxpoolContent { + /// Pending executable transactions. + pub pending: HashMap>, + /// Queued future-nonce transactions. + pub queued: HashMap>, +} + +/// Transaction pool counts. +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)] +pub struct TxpoolStatus { + /// Pending executable transaction count. + pub pending: U64, + /// Queued future-nonce transaction count. + pub queued: U64, +} + +/// Compact transaction pool inspection grouped by sender and nonce. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct TxpoolInspect { + /// Pending executable transaction summaries. + pub pending: HashMap>, + /// Queued future-nonce transaction summaries. + pub queued: HashMap>, +} + +/// Transaction pool API implementation. +#[derive(Clone, Debug)] +pub struct TxpoolApiImpl { + pool: TransactionPool, +} + +impl TxpoolApiImpl { + /// Creates a new txpool API implementation. + pub const fn new(pool: TransactionPool) -> Self { + Self { pool } + } +} + +#[jsonrpsee::core::async_trait] +impl TxpoolApiServer for TxpoolApiImpl { + async fn content(&self) -> RpcResult { + let snapshot = self.pool.snapshot(); + let mut pending = HashMap::new(); + let mut queued = HashMap::new(); + + for (sender, (sender_pending, sender_queued)) in snapshot { + if !sender_pending.is_empty() { + pending.insert( + sender, + sender_pending + .iter() + .map(|tx| (nonce_key(tx.nonce), ordered_tx_to_rpc(tx))) + .collect(), + ); + } + if !sender_queued.is_empty() { + queued.insert( + sender, + sender_queued + .iter() + .map(|tx| (nonce_key(tx.nonce), ordered_tx_to_rpc(tx))) + .collect(), + ); + } + } + + Ok(TxpoolContent { pending, queued }) + } + + async fn status(&self) -> RpcResult { + Ok(TxpoolStatus { + pending: U64::from(self.pool.pending_count() as u64), + queued: U64::from(self.pool.queued_count() as u64), + }) + } + + async fn inspect(&self) -> RpcResult { + let snapshot = self.pool.snapshot(); + let mut pending = HashMap::new(); + let mut queued = HashMap::new(); + + for (sender, (sender_pending, sender_queued)) in snapshot { + if !sender_pending.is_empty() { + pending.insert( + sender, + sender_pending.iter().map(|tx| (nonce_key(tx.nonce), inspect_tx(tx))).collect(), + ); + } + if !sender_queued.is_empty() { + queued.insert( + sender, + sender_queued.iter().map(|tx| (nonce_key(tx.nonce), inspect_tx(tx))).collect(), + ); + } + } + + Ok(TxpoolInspect { pending, queued }) + } +} + +fn nonce_key(nonce: u64) -> String { + format!("{nonce:#x}") +} + +fn ordered_tx_to_rpc(tx: &OrderedTransaction) -> RpcTransaction { + let envelope = &tx.envelope; + let signature = envelope.signature(); + + RpcTransaction { + hash: tx.hash, + nonce: U64::from(tx.nonce), + block_hash: None, + block_number: None, + transaction_index: None, + from: tx.sender, + to: envelope.to(), + value: envelope.value(), + gas: U64::from(envelope.gas_limit()), + gas_price: U256::from(tx.effective_gas_price), + input: envelope.input().clone(), + tx_type: U64::from(transaction_type(envelope)), + chain_id: envelope.chain_id().map(U64::from), + max_fee_per_gas: max_fee_per_gas(envelope).map(U256::from), + max_priority_fee_per_gas: max_priority_fee_per_gas(envelope).map(U256::from), + v: U256::from(u64::from(signature.v())), + r: signature.r(), + s: signature.s(), + } +} + +fn inspect_tx(tx: &OrderedTransaction) -> String { + let to = tx + .envelope + .to() + .map_or_else(|| "contract creation".to_string(), |address| address.to_string()); + format!( + "{}: {} wei + {} gas x {} wei", + to, + tx.envelope.value(), + tx.envelope.gas_limit(), + tx.effective_gas_price + ) +} + +const fn transaction_type(envelope: &TxEnvelope) -> u64 { + match envelope { + TxEnvelope::Legacy(_) => 0, + TxEnvelope::Eip2930(_) => 1, + TxEnvelope::Eip1559(_) => 2, + TxEnvelope::Eip4844(_) => 3, + TxEnvelope::Eip7702(_) => 4, + } +} + +const fn max_fee_per_gas(envelope: &TxEnvelope) -> Option { + match envelope { + TxEnvelope::Legacy(_) | TxEnvelope::Eip2930(_) => None, + TxEnvelope::Eip1559(tx) => Some(tx.tx().max_fee_per_gas), + TxEnvelope::Eip4844(tx) => Some(tx.tx().tx().max_fee_per_gas), + TxEnvelope::Eip7702(tx) => Some(tx.tx().max_fee_per_gas), + } +} + +const fn max_priority_fee_per_gas(envelope: &TxEnvelope) -> Option { + match envelope { + TxEnvelope::Legacy(_) | TxEnvelope::Eip2930(_) => None, + TxEnvelope::Eip1559(tx) => Some(tx.tx().max_priority_fee_per_gas), + TxEnvelope::Eip4844(tx) => Some(tx.tx().tx().max_priority_fee_per_gas), + TxEnvelope::Eip7702(tx) => Some(tx.tx().max_priority_fee_per_gas), + } +} + +#[cfg(test)] +mod tests { + use alloy_consensus::{SignableTransaction as _, TxEip1559}; + use alloy_primitives::{B256, Bytes, Signature, TxKind}; + use kora_txpool::PoolConfig; + + use super::*; + + fn make_ordered_tx(sender: Address, nonce: u64, gas_price: u128) -> OrderedTransaction { + let inner = TxEip1559 { + chain_id: 1, + nonce, + gas_limit: 21_000, + max_fee_per_gas: gas_price, + max_priority_fee_per_gas: gas_price, + to: TxKind::Call(Address::repeat_byte(0xbb)), + value: U256::from(1), + access_list: Default::default(), + input: Bytes::new(), + }; + let sig = Signature::from_scalars_and_parity(B256::ZERO, B256::ZERO, false); + let signed = inner.into_signed(sig); + let envelope = TxEnvelope::from(signed); + let mut hash = [0u8; 32]; + hash[..20].copy_from_slice(sender.as_slice()); + hash[20..28].copy_from_slice(&nonce.to_be_bytes()); + hash[28..].copy_from_slice(&(gas_price as u32).to_be_bytes()); + let hash = B256::from(hash); + OrderedTransaction::new(hash, sender, nonce, gas_price, 0, envelope) + } + + #[tokio::test] + async fn txpool_status_returns_counts() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = Address::repeat_byte(0x11); + + pool.add(make_ordered_tx(sender, 0, 100)).unwrap(); + pool.add(make_ordered_tx(sender, 2, 100)).unwrap(); + + let api = TxpoolApiImpl::new(pool); + let status = TxpoolApiServer::status(&api).await.unwrap(); + + assert_eq!(status.pending, U64::from(1)); + assert_eq!(status.queued, U64::from(1)); + } + + #[tokio::test] + async fn txpool_content_groups_by_sender_and_nonce() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender_a = Address::repeat_byte(0x11); + let sender_b = Address::repeat_byte(0x22); + + pool.add(make_ordered_tx(sender_a, 0, 100)).unwrap(); + pool.add(make_ordered_tx(sender_a, 1, 100)).unwrap(); + pool.add(make_ordered_tx(sender_b, 0, 200)).unwrap(); + + let api = TxpoolApiImpl::new(pool); + let content = TxpoolApiServer::content(&api).await.unwrap(); + + assert!(content.pending.contains_key(&sender_a)); + assert!(content.pending.contains_key(&sender_b)); + assert_eq!(content.pending[&sender_a].len(), 2); + assert!(content.pending[&sender_a].contains_key("0x0")); + assert!(content.pending[&sender_a].contains_key("0x1")); + assert_eq!(content.pending[&sender_b].len(), 1); + } + + #[tokio::test] + async fn txpool_inspect_summarizes_transactions() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = Address::repeat_byte(0x11); + + pool.add(make_ordered_tx(sender, 0, 100)).unwrap(); + + let api = TxpoolApiImpl::new(pool); + let inspect = TxpoolApiServer::inspect(&api).await.unwrap(); + let summary = &inspect.pending[&sender]["0x0"]; + + assert!(summary.contains("21000 gas x 100 wei")); + } +} diff --git a/crates/node/rpc/src/types.rs b/crates/node/rpc/src/types.rs index e22e3fe..529df64 100644 --- a/crates/node/rpc/src/types.rs +++ b/crates/node/rpc/src/types.rs @@ -46,6 +46,16 @@ impl BlockNumberOrTag { } } +/// Keccak-256 hash of an empty RLP list, used as the canonical +/// `sha3Uncles` value for post-merge blocks. +pub(crate) const EMPTY_UNCLE_HASH: B256 = + alloy_primitives::b256!("1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347"); + +/// Keccak-256 hash of the RLP encoding of an empty trie (`keccak256(0x80)`), +/// used as the `withdrawalsRoot` for blocks with no beacon-chain withdrawals. +pub(crate) const EMPTY_WITHDRAWALS_ROOT: B256 = + alloy_primitives::b256!("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421"); + /// Rich block representation for JSON-RPC responses. #[derive(Clone, Debug, Default, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] @@ -54,6 +64,9 @@ pub struct RpcBlock { pub hash: B256, /// Parent block hash. pub parent_hash: B256, + /// Hash of the uncle list (always empty-list hash post-merge). + #[serde(rename = "sha3Uncles")] + pub sha3_uncles: B256, /// Block number. pub number: U64, /// State root. @@ -62,7 +75,7 @@ pub struct RpcBlock { pub transactions_root: B256, /// Receipts root. pub receipts_root: B256, - /// Logs bloom filter. + /// Logs bloom filter (256 bytes). pub logs_bloom: Bytes, /// Block timestamp. pub timestamp: U64, @@ -91,6 +104,10 @@ pub struct RpcBlock { pub size: U64, /// Transactions (hashes or full objects). pub transactions: BlockTransactions, + /// Withdrawals list (always empty -- Kora has no beacon chain). + pub withdrawals: Vec<()>, + /// Withdrawals trie root (empty trie root when no withdrawals). + pub withdrawals_root: B256, } /// Transactions in a block response. @@ -152,7 +169,7 @@ pub struct RpcTransaction { #[serde(skip_serializing_if = "Option::is_none")] pub max_priority_fee_per_gas: Option, /// V component of signature. - pub v: U64, + pub v: U256, /// R component of signature. pub r: U256, /// S component of signature. @@ -197,7 +214,7 @@ pub struct RpcTransactionReceipt { } /// Log entry for JSON-RPC responses. -#[derive(Clone, Debug, Default, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct RpcLog { /// Contract address. @@ -266,28 +283,6 @@ impl CallRequest { } } -/// Sync status for eth_syncing. -#[derive(Clone, Debug, Serialize, Deserialize)] -#[serde(untagged)] -pub enum SyncStatus { - /// Not syncing. - NotSyncing(bool), - /// Syncing status. - Syncing(SyncInfo), -} - -/// Syncing information. -#[derive(Clone, Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct SyncInfo { - /// Starting block. - pub starting_block: U64, - /// Current block. - pub current_block: U64, - /// Highest block. - pub highest_block: U64, -} - /// Log filter for `eth_getLogs` queries. #[derive(Clone, Debug, Default, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] @@ -349,6 +344,28 @@ impl TopicFilter { } } +/// Ethereum sync status returned by `eth_syncing`. +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(untagged)] +pub enum SyncStatus { + /// Node is currently syncing. + Syncing(SyncInfo), + /// Node is not syncing (returns `false`). + NotSyncing(bool), +} + +/// Sync progress information. +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct SyncInfo { + /// Block number at which syncing started. + pub starting_block: U64, + /// Current block number being processed. + pub current_block: U64, + /// Highest known block number. + pub highest_block: U64, +} + #[cfg(test)] mod tests { use super::*; @@ -479,27 +496,6 @@ mod tests { assert!(!log.removed); } - #[test] - fn sync_status_not_syncing() { - let status = SyncStatus::NotSyncing(false); - let json = serde_json::to_string(&status).unwrap(); - assert_eq!(json, "false"); - } - - #[test] - fn sync_status_syncing() { - let info = SyncInfo { - starting_block: U64::from(0), - current_block: U64::from(100), - highest_block: U64::from(200), - }; - let status = SyncStatus::Syncing(info); - let json = serde_json::to_string(&status).unwrap(); - assert!(json.contains("startingBlock")); - assert!(json.contains("currentBlock")); - assert!(json.contains("highestBlock")); - } - #[test] fn rpc_log_filter_default() { let filter = RpcLogFilter::default(); diff --git a/crates/node/runner/Cargo.toml b/crates/node/runner/Cargo.toml index 85eba5d..de960ec 100644 --- a/crates/node/runner/Cargo.toml +++ b/crates/node/runner/Cargo.toml @@ -11,6 +11,7 @@ description = "Production node runner for Kora validators" kora-config.workspace = true kora-consensus.workspace = true kora-domain.workspace = true +kora-metrics.workspace = true kora-dkg.workspace = true kora-executor.workspace = true kora-indexer.workspace = true @@ -28,18 +29,29 @@ kora-txpool.workspace = true commonware-codec.workspace = true commonware-consensus.workspace = true commonware-cryptography.workspace = true +commonware-actor.workspace = true commonware-p2p.workspace = true -commonware-parallel.workspace = true commonware-runtime.workspace = true +commonware-storage.workspace = true commonware-utils.workspace = true alloy-consensus = { workspace = true } alloy-primitives.workspace = true +axum.workspace = true +bytes.workspace = true futures.workspace = true +governor.workspace = true +prometheus-client.workspace = true +parking_lot.workspace = true +hex.workspace = true +tokio.workspace = true tracing.workspace = true anyhow.workspace = true rand.workspace = true +[dev-dependencies] +tempfile.workspace = true + [lints] workspace = true diff --git a/crates/node/runner/README.md b/crates/node/runner/README.md index d35bdc4..db1006f 100644 --- a/crates/node/runner/README.md +++ b/crates/node/runner/README.md @@ -28,17 +28,19 @@ let scheme = load_threshold_scheme("/path/to/dkg/output")?; // Create bootstrap configuration with genesis allocations let bootstrap = BootstrapConfig::default(); -// Create the runner +// Load node configuration +let config = NodeConfig::load("/path/to/config.toml")?; +let rpc_addr = config.rpc.http_addr.parse()?; + +// Create the runner (gas limit comes from config.execution.gas_limit at runtime) let runner = ProductionRunner::new( scheme, - 1337, // chain ID - 30_000_000, // gas limit + config.chain_id, bootstrap, ) -.with_rpc(node_state, "0.0.0.0:8545".parse().unwrap()); +.with_rpc(node_state, rpc_addr); // Run as standalone process (blocks until shutdown) -let config = NodeConfig::load("/path/to/config.toml")?; runner.run_standalone(config)?; ``` @@ -48,7 +50,7 @@ runner.run_standalone(config)?; use kora_runner::ProductionRunner; use kora_service::{NodeRunContext, NodeRunner}; -let runner = ProductionRunner::new(scheme, chain_id, gas_limit, bootstrap); +let runner = ProductionRunner::new(scheme, chain_id, bootstrap); // Build transport and context manually let ctx = NodeRunContext::new(runtime_context, config, transport); @@ -105,7 +107,7 @@ The `ProductionRunner` implements the `NodeRunner` trait and executes the follow ## Key Types - `ProductionRunner` - Main production validator runner -- `RevmApplication` - REVM-based consensus application implementing `Application` and `VerifyingApplication` +- `RevmApplication` - REVM-based consensus application implementing `Application` - `ThresholdScheme` - BLS12-381 threshold signing configuration - `RunnerError` - Error types for runner operations @@ -117,7 +119,6 @@ The runner is configured through: |-----------|-------------| | `scheme` | BLS12-381 threshold signing scheme from DKG | | `chain_id` | EVM chain identifier | -| `gas_limit` | Maximum gas per block | | `bootstrap` | Genesis allocations and bootstrap transactions | | `rpc_config` | Optional RPC server configuration | diff --git a/crates/node/runner/src/app.rs b/crates/node/runner/src/app.rs index 15c9ebb..587e2d2 100644 --- a/crates/node/runner/src/app.rs +++ b/crates/node/runner/src/app.rs @@ -1,26 +1,89 @@ //! REVM-based consensus application implementation. -use std::{collections::BTreeSet, time::Instant}; +use std::{ + collections::{BTreeSet, HashMap}, + sync::{ + Arc, + atomic::{AtomicU64, Ordering}, + }, + time::{Duration, Instant, UNIX_EPOCH}, +}; use alloy_consensus::Header; use alloy_primitives::{Address, B256, Bytes}; use commonware_consensus::{ - Application, Block as _, VerifyingApplication, - marshal::ancestry::{AncestorStream, BlockProvider}, - simplex::types::Context, + Application, Block as _, marshal::ancestry::Ancestry, simplex::types::Context, }; use commonware_cryptography::{Committable as _, certificate::Scheme as CertScheme}; use commonware_runtime::{Clock, Metrics, Spawner}; use futures::StreamExt; use kora_consensus::{BlockExecution, SnapshotStore, components::InMemorySnapshotStore}; use kora_domain::{Block, ConsensusDigest}; -use kora_executor::{BlockContext, BlockExecutor}; +use kora_executor::{BaseFeeParams, BlockContext, BlockExecutor, calculate_base_fee}; use kora_ledger::LedgerService; +use kora_metrics::AppMetrics; use kora_overlay::OverlayState; use kora_qmdb_ledger::QmdbState; use kora_rpc::NodeState; +use parking_lot::RwLock; use rand::Rng; -use tracing::{info, trace, warn}; +use tracing::{debug, error, info, trace, warn}; + +/// Maximum time to wait for a parent snapshot to become available before +/// giving up and nullifying the view. Uses event-driven notification +/// (via [`LedgerService::wait_for_snapshot`]) so the wake-up is immediate +/// once the snapshot is inserted, with this timeout as the upper bound. +/// +/// Under CPU contention (e.g. 23 threads on 0.75 cores), the finalization +/// reporter may need more time to produce the parent snapshot. 100 ms +/// provides ample budget; in the common case the Notify fires within the +/// first few milliseconds. +const SNAPSHOT_WAIT_TIMEOUT: Duration = Duration::from_millis(100); + +/// Maximum number of seconds a block timestamp may be ahead of the +/// validator's wall-clock time. Blocks with timestamps further in the +/// future are rejected during verification. 15 seconds is generous enough +/// to tolerate clock skew between validators while preventing malicious +/// leaders from pushing timestamps arbitrarily far forward. +const MAX_FUTURE_TIMESTAMP_DRIFT: u64 = 15; + +/// Maximum number of unfinalized blocks a leader may be ahead of the last +/// finalized height before it voluntarily skips its proposal turn. This +/// prevents a single fast leader from racing too far ahead of finalization, +/// which can cascade into snapshot-miss failures for other validators. +/// +/// The previous value of 8 was too tight under CPU contention and after node +/// restarts: transient finalization stalls (or the finalization pipeline +/// lagging during re-sync) would trip the guard and force every leader to +/// skip, producing a cascade of nullifications that could stall the entire +/// network. A value of 64 gives finalization plenty of room to drain +/// without stalling proposals on healthy nodes. At the current throughput +/// ceiling of ~30 blocks/s, a gap of 64 represents roughly 2 seconds of +/// blocks. +const MAX_PROPOSAL_LAG: u64 = 64; + +fn unix_timestamp_secs(env: &Env) -> u64 { + env.current().duration_since(UNIX_EPOCH).map(|duration| duration.as_secs()).unwrap_or(0) +} + +/// Number of blocks the network must advance PAST the recovered height +/// (as measured by full-execution verification, not certificate trust) +/// before the node exits catch-up mode and starts requiring full +/// re-execution for verification. +/// +/// During catch-up, blocks whose parent snapshot is missing are trusted +/// based on their finality certificate (the resolver already verified the +/// certificate before delivering the block to the application layer). +/// +/// Previously this was set to 2, which meant the node exited catch-up mode +/// almost immediately -- each trusted block advanced `recovered_height`, +/// so the *next* block was only 1 ahead, below the threshold of 2. The +/// catch-up window collapsed after a single block. +/// +/// Now the catch-up window is anchored to the *original* `recovered_height` +/// and only closes when `last_verified_height` (advanced only by full +/// execution, NOT by certificate trust) reaches `recovered_height + 64`. +const CATCH_UP_THRESHOLD: u64 = 64; /// REVM-based consensus application. #[derive(Clone)] @@ -29,7 +92,31 @@ pub struct RevmApplication { executor: E, max_txs: usize, gas_limit: u64, + fee_recipient: Address, node_state: Option, + metrics: Option, + /// Height of the HEAD block that was restored from the archive during + /// startup recovery. This value is set once at startup and never + /// changes; it anchors the catch-up window. + /// + /// Catch-up mode is active as long as `recovered_height > 0` and the + /// node has not yet verified enough blocks past the recovery point. + /// Blocks whose parent snapshot is missing are trusted based on their + /// finality certificate (which the resolver already verified). Once + /// the node successfully verifies a block via full execution at height + /// >= `recovered_height + CATCH_UP_THRESHOLD`, catch-up mode ends. + recovered_height: Arc, + /// The highest block height that has been processed by `verify_block`. + /// Advanced by full-execution verification and by re-encountering + /// previously processed blocks (including certificate-trusted ones). + /// Used to determine when the catch-up window should close. + last_verified_height: Arc, + /// Per-block `(gas_used, base_fee_per_gas)` cache, keyed by consensus + /// digest. Populated when a block is built or verified so that the + /// *next* block can compute its EIP-1559 base fee from the parent's + /// gas usage. Entries are small (32 + 16 bytes) and the map is bounded + /// by the number of unfinalized blocks. + block_fees: Arc>>, _scheme: std::marker::PhantomData, } @@ -38,6 +125,11 @@ impl std::fmt::Debug for RevmApplication { f.debug_struct("RevmApplication") .field("max_txs", &self.max_txs) .field("gas_limit", &self.gas_limit) + .field("fee_recipient", &self.fee_recipient) + .field("metrics", &self.metrics.is_some()) + .field("recovered_height", &self.recovered_height.load(Ordering::Relaxed)) + .field("last_verified_height", &self.last_verified_height.load(Ordering::Relaxed)) + .field("block_fees_cached", &self.block_fees.read().len()) .finish_non_exhaustive() } } @@ -47,13 +139,27 @@ where E: BlockExecutor, Tx = Bytes> + Clone, { /// Create a new REVM application. - pub const fn new(ledger: LedgerService, executor: E, max_txs: usize, gas_limit: u64) -> Self { + pub fn new( + ledger: LedgerService, + executor: E, + max_txs: usize, + gas_limit: u64, + fee_recipient: Address, + ) -> Self { + let mut block_fees = HashMap::new(); + block_fees.insert(ledger.genesis_block().commitment(), (0, kora_config::INITIAL_BASE_FEE)); + Self { ledger, executor, max_txs, gas_limit, + fee_recipient, node_state: None, + metrics: None, + recovered_height: Arc::new(AtomicU64::new(0)), + last_verified_height: Arc::new(AtomicU64::new(0)), + block_fees: Arc::new(RwLock::new(block_fees)), _scheme: std::marker::PhantomData, } } @@ -65,13 +171,80 @@ where self } - fn block_context(&self, height: u64, prevrandao: B256) -> BlockContext { + /// Attach application-level metrics. + #[must_use] + pub fn with_metrics(mut self, metrics: AppMetrics) -> Self { + self.metrics = Some(metrics); + self + } + + /// Set the height of the HEAD block that was recovered from the archive. + /// + /// This activates catch-up mode: when parent snapshots are unavailable, + /// blocks are trusted based on their finality certificate. Catch-up + /// mode remains active until the node has verified blocks far enough + /// past the recovered height (controlled by [`CATCH_UP_THRESHOLD`]). + #[must_use] + pub fn with_recovered_height(self, height: u64) -> Self { + self.recovered_height.store(height, Ordering::Relaxed); + // The recovered height is also the highest successfully verified + // height at startup -- prepopulated snapshots cover everything up + // to this point. + self.last_verified_height.store(height, Ordering::Relaxed); + self + } + + /// Seed the block-fee cache with entries from the block index so that + /// the first blocks after a restart can derive a correct EIP-1559 base + /// fee. Without this, `compute_base_fee` would fall back to + /// `INITIAL_BASE_FEE` for any parent whose fee data was not in the + /// in-memory cache. + /// + /// `entries` should contain `(digest, gas_used, base_fee_per_gas)` for + /// recent blocks (at minimum the HEAD block). + pub fn seed_block_fees(&self, entries: &[(ConsensusDigest, u64, u64)]) { + let mut fees = self.block_fees.write(); + for &(digest, gas_used, base_fee) in entries { + fees.insert(digest, (gas_used, base_fee)); + } + } + + /// Compute the base fee for a new block from the parent's gas usage + /// (EIP-1559). Falls back to [`kora_config::INITIAL_BASE_FEE`] when the + /// parent's fee data is not cached (genesis or catch-up). + fn compute_base_fee(&self, parent_digest: ConsensusDigest) -> u64 { + let fees = self.block_fees.read(); + match fees.get(&parent_digest) { + Some(&(parent_gas_used, parent_base_fee)) => calculate_base_fee( + parent_base_fee, + parent_gas_used, + self.gas_limit, + &BaseFeeParams::DEFAULT, + ), + None => kora_config::INITIAL_BASE_FEE, + } + } + + /// Record a block's gas usage and base fee so that the next block can + /// derive its own base fee via [`Self::compute_base_fee`]. + fn record_block_fees(&self, digest: ConsensusDigest, gas_used: u64, base_fee: u64) { + self.block_fees.write().insert(digest, (gas_used, base_fee)); + } + + fn block_context( + &self, + height: u64, + timestamp: u64, + prevrandao: B256, + parent_digest: ConsensusDigest, + ) -> BlockContext { + let base_fee = self.compute_base_fee(parent_digest); let header = Header { number: height, - timestamp: height, + timestamp, gas_limit: self.gas_limit, - beneficiary: Address::ZERO, - base_fee_per_gas: Some(0), + beneficiary: self.fee_recipient, + base_fee_per_gas: Some(base_fee), ..Default::default() }; BlockContext::new(header, B256::ZERO, prevrandao) @@ -81,16 +254,66 @@ where self.ledger.seed_for_parent(parent_digest).await.unwrap_or(B256::ZERO) } - async fn build_block(&self, parent: &Block) -> Option { + async fn build_block(&self, parent: &Block, timestamp: u64) -> Option { use kora_consensus::Mempool as _; let start = Instant::now(); let parent_digest = parent.commitment(); - let parent_snapshot = self.ledger.parent_snapshot(parent_digest).await?; + + // Wait briefly for the parent snapshot to become available. + // + // Consensus can advance views faster than the execution layer + // produces snapshots. Rather than polling with sleep(), we use + // an event-driven wait: `wait_for_snapshot` blocks on a Notify + // that fires whenever any snapshot is inserted, so we wake up + // immediately when the snapshot arrives instead of sleeping + // through a fixed interval. + let parent_snapshot = { + let wait_start = Instant::now(); + match self.ledger.wait_for_snapshot(parent_digest, SNAPSHOT_WAIT_TIMEOUT).await { + Some(s) => { + let wait_elapsed = wait_start.elapsed(); + if wait_elapsed.as_millis() > 1 { + if let Some(ref m) = self.metrics { + m.snapshot_poll_wait.observe(wait_elapsed.as_secs_f64()); + } + debug!( + parent_height = parent.height, + ?parent_digest, + wait_ms = wait_elapsed.as_millis(), + "build_block: parent snapshot arrived after waiting" + ); + } + s + } + None => { + if let Some(ref m) = self.metrics { + m.proposal_snapshot_misses.inc(); + } + warn!( + parent_height = parent.height, + ?parent_digest, + wait_ms = wait_start.elapsed().as_millis(), + "build_block: parent snapshot not found after waiting \ + -- node has not yet processed this parent block" + ); + return None; + } + } + }; let snapshot_elapsed = start.elapsed(); let (_, mempool, snapshots) = self.ledger.proposal_components().await; - let excluded = self.collect_pending_tx_ids(&snapshots, parent_digest); + let excluded = match self.collect_pending_tx_ids(&snapshots, parent_digest) { + Some(ids) => ids, + None => { + // The snapshot chain has a gap -- we cannot determine which + // transactions were already included in recent blocks. + // Building with an incomplete excluded set risks duplicate + // transactions, so we nullify this round instead. + return None; + } + }; let mempool_len = mempool.len(); let excluded_len = excluded.len(); let txs = mempool.build(self.max_txs, &excluded); @@ -118,42 +341,86 @@ where let prevrandao = self.get_prevrandao(parent_digest).await; let height = parent.height + 1; - let context = self.block_context(height, prevrandao); + let context = self.block_context(height, timestamp, prevrandao, parent_digest); + let base_fee = context.header.base_fee_per_gas.unwrap_or(kora_config::INITIAL_BASE_FEE); let txs_bytes: Vec = txs.iter().map(|tx| tx.bytes.clone()).collect(); let exec_start = Instant::now(); - let outcome = self.executor.execute(&parent_snapshot.state, &context, &txs_bytes).ok()?; + // Run EVM execution on a dedicated blocking thread so that the + // synchronous REVM loop does not occupy an async worker thread. + // All clones are cheap (Arc bumps or small Copy types). + let outcome = { + let executor = self.executor.clone(); + let state = parent_snapshot.state.clone(); + match tokio::task::spawn_blocking(move || { + executor.execute(&state, &context, &txs_bytes) + }) + .await + { + Ok(Ok(outcome)) => outcome, + Ok(Err(err)) => { + error!( + parent = ?parent_digest, + height, + txs = txs.len(), + gas_limit = self.gas_limit, + error = %err, + error_debug = ?err, + "build_block: block execution failed -- \ + this may indicate a bad transaction, OOM, or state corruption" + ); + return None; + } + Err(join_err) => { + error!( + parent = ?parent_digest, + height, + error = %join_err, + "build_block: spawn_blocking join error" + ); + return None; + } + } + }; let exec_elapsed = exec_start.elapsed(); let root_start = Instant::now(); - let state_root = self - .ledger - .compute_root_from_store(parent_digest, outcome.changes.clone()) - .await - .ok()?; + let state_root = + match self.ledger.compute_root_from_store(parent_digest, &outcome.changes).await { + Ok(root) => root, + Err(err) => { + error!( + parent = ?parent_digest, + height, + error = %err, + error_debug = ?err, + "build_block: QMDB state root computation failed -- \ + this may indicate a storage I/O error or inconsistent state" + ); + return None; + } + }; let root_elapsed = root_start.elapsed(); - let block = Block { parent: parent.id(), height, prevrandao, state_root, txs }; + let block = Block::new(parent.id(), height, timestamp, prevrandao, state_root, txs); - let merged_changes = parent_snapshot.state.merge_changes(outcome.changes.clone()); - let next_state = OverlayState::new(parent_snapshot.state.base(), merged_changes); let block_digest = block.commitment(); - self.ledger - .insert_snapshot( - block_digest, - parent_digest, - next_state, - state_root, - outcome.changes, - &block.txs, - ) - .await; + // Cache gas usage so that the next block can derive its base fee. + self.record_block_fees(block_digest, outcome.gas_used, base_fee); let total_elapsed = start.elapsed(); - info!( + + if let Some(ref m) = self.metrics { + m.block_build_time.observe(total_elapsed.as_secs_f64()); + m.evm_execution_seconds.observe(exec_elapsed.as_secs_f64()); + m.block_txs_included.set(block.txs.len() as i64); + } + + debug!( ?block_digest, height, + timestamp, txs = block.txs.len(), snapshot_ms = snapshot_elapsed.as_millis(), exec_ms = exec_elapsed.as_millis(), @@ -164,23 +431,164 @@ where Some(block) } - async fn verify_block(&self, block: &Block) -> bool { + /// Check whether the node is in catch-up mode. + /// + /// Returns `true` when: + /// 1. The node recovered from an archive at startup (`recovered_height > 0`), AND + /// 2. The highest block verified via full execution has not yet reached + /// far enough past the recovery point. + /// + /// The `block_height` parameter is the height of the block being verified. + /// It must be greater than the recovered height (otherwise it is a block + /// we already have and does not need catch-up trust). + /// + /// Unlike the previous implementation, the catch-up window is anchored to + /// the *original* `recovered_height` and only closes when + /// `last_verified_height` advances past + /// `recovered_height + CATCH_UP_THRESHOLD`. `last_verified_height` is + /// advanced both by full-execution verification and by re-encountering + /// previously processed blocks (including certificate-trusted ones) in + /// the "already verified" early-return path of `verify_block`. + fn is_catching_up(&self, block_height: u64) -> bool { + let recovered = self.recovered_height.load(Ordering::Relaxed); + // Fresh node: never recovered, not catching up. + if recovered == 0 { + return false; + } + // Block is at or below the recovered height -- we already have + // state for it (prepopulated cache covers it), no catch-up needed. + if block_height <= recovered { + return false; + } + // Check whether full-execution verification has advanced far enough + // past the recovery point. If it has, catch-up is over. + let verified = self.last_verified_height.load(Ordering::Relaxed); + verified < recovered.saturating_add(CATCH_UP_THRESHOLD) + } + + async fn verify_block( + &self, + block: &Block, + parent_timestamp: Option, + now_secs: u64, + ) -> bool { let start = Instant::now(); let digest = block.commitment(); let parent_digest = block.parent(); if self.ledger.query_state_root(digest).await.is_some() { - trace!(?digest, "block already verified"); + // Block is already in the snapshot store. This can happen either + // because it was fully verified earlier, or because it was + // certificate-trusted during catch-up. In both cases, advance + // `last_verified_height` so the catch-up window eventually closes. + // + // Without this, certificate-trusted blocks create "holes" in the + // verified chain: subsequent `verify` calls stop the ancestry walk + // at the certificate-trusted block (its state_root is in the + // store), so the full-execution path is never reached for that + // height, and `last_verified_height` never advances past it. + self.last_verified_height.fetch_max(block.height, Ordering::Relaxed); + if let Some(ref state) = self.node_state { + state.set_last_verified_height(block.height); + } + trace!(?digest, height = block.height, "block already verified"); return true; } - let Some(parent_snapshot) = self.ledger.parent_snapshot(parent_digest).await else { - warn!(?digest, ?parent_digest, height = block.height, "missing parent snapshot"); - return false; + // ── Timestamp validation ────────────────────────────────────── + // These checks are cheap (no I/O) and catch obviously invalid + // blocks early, before we spend time fetching snapshots and + // executing transactions. During catch-up the blocks are already + // backed by a finality certificate so we skip the checks. + if !self.is_catching_up(block.height) { + // Monotonicity: block timestamp must not move backwards. + // `block.timestamp` is second-granularity wall-clock time, so + // fast blocks can legitimately share the same timestamp. + if let Some(parent_ts) = parent_timestamp + && block.timestamp < parent_ts + { + warn!( + ?digest, + height = block.height, + block_timestamp = block.timestamp, + parent_timestamp = parent_ts, + "verify_block: timestamp moved backwards" + ); + return false; + } + + // Future-drift: reject blocks whose timestamp is too far + // ahead of the validator's wall-clock. + let max_allowed = now_secs.saturating_add(MAX_FUTURE_TIMESTAMP_DRIFT); + if block.timestamp > max_allowed { + warn!( + ?digest, + height = block.height, + block_timestamp = block.timestamp, + now_secs, + max_allowed, + "verify_block: timestamp too far in the future" + ); + return false; + } + } + + let parent_snapshot = match self.ledger.parent_snapshot(parent_digest).await { + Some(snap) => snap, + None => { + // Parent snapshot is missing. During normal operation this + // means we received a genuinely invalid or out-of-order + // block. But after a restart the snapshot cache only + // contains the HEAD (plus prepopulated recent blocks), so + // blocks whose parent we haven't processed yet will fail + // here. + // + // If we are still catching up, trust the finality certificate + // and restore the block as a persisted snapshot so that + // subsequent blocks can find their parent. This is safe + // because the resolver already verified the finality + // certificate (2/3+ threshold signature) before delivering + // the block to the application layer. + if self.is_catching_up(block.height) { + debug!( + ?digest, + ?parent_digest, + height = block.height, + recovered_height = self.recovered_height.load(Ordering::Relaxed), + last_verified = self.last_verified_height.load(Ordering::Relaxed), + "verify_block: parent snapshot missing during catch-up; \ + trusting finality certificate" + ); + // Create a persisted snapshot for this block using the + // current QMDB state. The FinalizedReporter will + // re-execute and properly persist the block when it + // arrives through the finalization pipeline. + self.ledger.restore_persisted_snapshot(block).await; + // We do NOT update last_verified_height here because + // certificate-trust is not full verification. However, + // the "already verified" early-return path at the top of + // verify_block WILL advance last_verified_height when + // this block is encountered again in a future ancestry + // walk, ensuring the catch-up window eventually closes. + return true; + } + + warn!( + ?digest, + ?parent_digest, + height = block.height, + recovered_height = self.recovered_height.load(Ordering::Relaxed), + last_verified = self.last_verified_height.load(Ordering::Relaxed), + "verify_block: missing parent snapshot (not in catch-up mode)" + ); + return false; + } }; let snapshot_elapsed = start.elapsed(); - let context = self.block_context(block.height, block.prevrandao); + let context = + self.block_context(block.height, block.timestamp, block.prevrandao, parent_digest); + let base_fee = context.header.base_fee_per_gas.unwrap_or(kora_config::INITIAL_BASE_FEE); let exec_start = Instant::now(); let execution = match BlockExecution::execute(&parent_snapshot, &self.executor, &context, &block.txs) @@ -188,6 +596,21 @@ where { Ok(result) => result, Err(err) => { + // During catch-up, the parent snapshot may have been + // restored with empty changes (certificate-trusted), so + // execution against it can legitimately fail. Fall back + // to certificate-trust rather than rejecting the block. + if self.is_catching_up(block.height) { + warn!( + ?digest, + height = block.height, + error = ?err, + "verify_block: execution failed during catch-up; \ + falling back to certificate trust" + ); + self.ledger.restore_persisted_snapshot(block).await; + return true; + } warn!(?digest, error = ?err, "execution failed"); return false; } @@ -197,11 +620,22 @@ where let root_start = Instant::now(); let state_root = match self .ledger - .compute_root_from_store(parent_digest, execution.outcome.changes.clone()) + .compute_root_from_store(parent_digest, &execution.outcome.changes) .await { Ok(root) => root, Err(err) => { + if self.is_catching_up(block.height) { + warn!( + ?digest, + height = block.height, + error = ?err, + "verify_block: compute root failed during catch-up; \ + falling back to certificate trust" + ); + self.ledger.restore_persisted_snapshot(block).await; + return true; + } warn!(?digest, error = ?err, "compute root failed"); return false; } @@ -209,6 +643,26 @@ where let root_elapsed = root_start.elapsed(); if state_root != block.state_root { + // During catch-up, the parent snapshot may have been restored + // with an empty changeset via `restore_persisted_snapshot` + // (certificate-trusted). The empty changeset means the parent + // state does not include intermediate block changes, causing the + // computed root to diverge from the expected root. Rather than + // rejecting the block (which would permanently stall catch-up), + // fall back to certificate-trust. + if self.is_catching_up(block.height) { + warn!( + ?digest, + height = block.height, + expected = ?block.state_root, + computed = ?state_root, + "verify_block: state root mismatch during catch-up; \ + falling back to certificate trust \ + (parent snapshot likely has empty changeset from prior trust)" + ); + self.ledger.restore_persisted_snapshot(block).await; + return true; + } warn!( ?digest, expected = ?block.state_root, @@ -218,6 +672,9 @@ where return false; } + // Cache gas usage so the next block can derive its base fee. + self.record_block_fees(digest, execution.outcome.gas_used, base_fee); + let merged_changes = parent_snapshot.state.merge_changes(execution.outcome.changes.clone()); let next_state = OverlayState::new(parent_snapshot.state.base(), merged_changes); @@ -232,8 +689,29 @@ where ) .await; + // Full execution verification succeeded. Advance the verified + // height so that the catch-up window eventually closes once we + // have verified blocks past the recovery point. + let prev_verified = self.last_verified_height.fetch_max(block.height, Ordering::Relaxed); + if let Some(ref state) = self.node_state { + state.set_last_verified_height(block.height); + } + if prev_verified < self.recovered_height.load(Ordering::Relaxed) + && block.height >= self.recovered_height.load(Ordering::Relaxed) + { + info!( + height = block.height, + recovered_height = self.recovered_height.load(Ordering::Relaxed), + "catch-up: first full-execution verification past recovery point" + ); + } + + if let Some(ref m) = self.metrics { + m.evm_execution_seconds.observe(exec_elapsed.as_secs_f64()); + } + let total_elapsed = start.elapsed(); - info!( + debug!( ?digest, height = block.height, txs = block.txs.len(), @@ -246,11 +724,17 @@ where true } + /// Collect transaction IDs from unpersisted ancestor snapshots. + /// + /// Returns `None` if the snapshot chain has a gap (a snapshot was evicted + /// before we could read it). In that case the caller **must not** build a + /// block, because we cannot guarantee the excluded set is complete and + /// would risk including duplicate transactions. fn collect_pending_tx_ids( &self, snapshots: &InMemorySnapshotStore>, from: ConsensusDigest, - ) -> BTreeSet { + ) -> Option> { let mut excluded = BTreeSet::new(); let mut current = Some(from); @@ -259,13 +743,19 @@ where break; } let Some(snapshot) = snapshots.get(&digest) else { - break; + warn!( + ?digest, + collected_so_far = excluded.len(), + "snapshot chain gap during tx exclusion collection -- \ + refusing to build block to prevent duplicate transactions" + ); + return None; }; excluded.extend(snapshot.tx_ids.iter().copied()); current = snapshot.parent; } - excluded + Some(excluded) } } @@ -279,71 +769,108 @@ where type Context = Context; type Block = Block; - fn genesis(&mut self) -> impl std::future::Future + Send { - async move { self.ledger.genesis_block() } - } - - fn propose( + fn propose( &mut self, - _context: (Env, Self::Context), - mut ancestry: AncestorStream, - ) -> impl std::future::Future> + Send - where - A: BlockProvider, - { + context: (Env, Self::Context), + mut ancestry: impl Ancestry, + ) -> impl std::future::Future> + Send { let node_state = self.node_state.clone(); + let metrics = self.metrics.clone(); + let env = context.0; async move { let start = Instant::now(); let parent = ancestry.next().await?; let ancestry_elapsed = start.elapsed(); + // Proposal lag guard: if the tip is too far ahead of the last + // finalized height, skip this proposal to let finalization catch + // up. This prevents a fast leader from building an unbounded + // chain of unfinalized snapshots that other validators cannot + // verify in time. + if let Some(ref state) = node_state { + let finalized = state.finalized_height(); + if parent.height > finalized + MAX_PROPOSAL_LAG { + if let Some(ref m) = metrics { + m.proposal_lag_skips.inc(); + } + warn!( + parent_height = parent.height, + finalized_height = finalized, + max_lag = MAX_PROPOSAL_LAG, + "skipping proposal: parent too far ahead of finalized height" + ); + return None; + } + } + + let now_secs = unix_timestamp_secs(&env); + let timestamp = match Block::next_timestamp(now_secs, parent.timestamp) { + Some(ts) => ts, + None => { + tracing::error!( + parent_timestamp = parent.timestamp, + "timestamp overflow: cannot produce a timestamp after parent" + ); + return None; + } + }; + let build_start = Instant::now(); - let block = self.build_block(&parent).await; + let block = self.build_block(&parent, timestamp).await; let build_elapsed = build_start.elapsed(); - if let Some(ref b) = block { - if let Some(ref state) = node_state { - state.inc_proposed(); + match block { + Some(ref b) => { + if let Some(ref state) = node_state { + state.inc_proposed(); + } + debug!( + height = b.height, + timestamp = b.timestamp, + ancestry_ms = ancestry_elapsed.as_millis(), + build_ms = build_elapsed.as_millis(), + total_ms = start.elapsed().as_millis(), + "propose complete" + ); + } + None => { + warn!( + parent_height = parent.height, + parent_digest = ?parent.commitment(), + build_ms = build_elapsed.as_millis(), + "propose failed: build_block returned None \ + (likely missing parent snapshot -- node may still be catching up)" + ); } - info!( - height = b.height, - ancestry_ms = ancestry_elapsed.as_millis(), - build_ms = build_elapsed.as_millis(), - total_ms = start.elapsed().as_millis(), - "propose complete" - ); } block } } -} -impl VerifyingApplication for RevmApplication -where - Env: Rng + Spawner + Metrics + Clock, - S: CertScheme + Send + Sync + 'static, - E: BlockExecutor, Tx = Bytes> + Clone + Send + Sync + 'static, -{ - fn verify( + fn verify( &mut self, - _context: (Env, Self::Context), - mut ancestry: AncestorStream, - ) -> impl std::future::Future + Send - where - A: BlockProvider, - { + context: (Env, Self::Context), + mut ancestry: impl Ancestry, + ) -> impl std::future::Future + Send { + let env = context.0; async move { let start = Instant::now(); + let now_secs = unix_timestamp_secs(&env); - // The ancestry stream yields tip-first (newest → oldest). + // The ancestry stream yields tip-first (newest -> oldest). // We only need to verify blocks that we haven't seen yet. // Collect blocks until we hit one we've already verified. + // When we find the already-verified parent, capture its + // timestamp so we can validate timestamp monotonicity for + // the oldest unverified block. let mut blocks_to_verify = Vec::new(); + let mut verified_parent_timestamp: Option = None; while let Some(block) = ancestry.next().await { let digest = block.commitment(); // Stop if we've already verified this block if self.ledger.query_state_root(digest).await.is_some() { + verified_parent_timestamp = Some(block.timestamp); break; } blocks_to_verify.push(block); @@ -359,17 +886,21 @@ where let block_count = blocks_to_verify.len(); let tip_height = blocks_to_verify.first().map(|b| b.height).unwrap_or(0); - // Verify from oldest (parent) to newest (tip) + // Verify from oldest (parent) to newest (tip). + // Track the parent timestamp across the chain so each block's + // timestamp monotonicity can be validated. let verify_start = Instant::now(); + let mut parent_ts = verified_parent_timestamp; for block in blocks_to_verify.into_iter().rev() { - if !self.verify_block(&block).await { + if !self.verify_block(&block, parent_ts, now_secs).await { return false; } + parent_ts = Some(block.timestamp); } let verify_elapsed = verify_start.elapsed(); let total_elapsed = start.elapsed(); - info!( + debug!( tip_height, block_count, ancestry_ms = ancestry_elapsed.as_millis(), diff --git a/crates/node/runner/src/commit_marker.rs b/crates/node/runner/src/commit_marker.rs new file mode 100644 index 0000000..f392b1c --- /dev/null +++ b/crates/node/runner/src/commit_marker.rs @@ -0,0 +1,148 @@ +//! Commit digest marker file for crash-recovery validation. +//! +//! After each successful QMDB persist, the digest of the committed block is +//! written to a small marker file (`last_committed_digest`). On startup the +//! recovery procedure reads this marker and compares it against the archive +//! head to detect whether QMDB may be behind or inconsistent. +//! +//! The write uses an atomic rename pattern (write to a temporary file, then +//! rename) so a crash mid-write never produces a corrupt marker. + +use std::{ + io::Write as _, + path::{Path, PathBuf}, +}; + +use commonware_cryptography::sha256; +use kora_domain::ConsensusDigest; +use tracing::{debug, warn}; + +/// Name of the marker file within the data directory. +const MARKER_FILENAME: &str = "last_committed_digest"; + +/// Name of the temporary file used during atomic writes. +const MARKER_TMP_FILENAME: &str = "last_committed_digest.tmp"; + +/// Resolve the marker file path for a given data directory. +pub fn marker_path(data_dir: &Path) -> PathBuf { + data_dir.join(MARKER_FILENAME) +} + +/// Write the committed block's digest to the marker file atomically. +/// +/// The digest is written as 64 lowercase hex characters followed by a newline. +/// The write goes to a temporary file first, which is then renamed into place +/// so that a crash mid-write never leaves a corrupt marker. +pub fn write_commit_marker(data_dir: &Path, digest: &ConsensusDigest) -> std::io::Result<()> { + let tmp_path = data_dir.join(MARKER_TMP_FILENAME); + let final_path = marker_path(data_dir); + + // Ensure the directory exists. + if let Some(parent) = final_path.parent() { + std::fs::create_dir_all(parent)?; + } + + // Write to temp file. + let hex = hex::encode(digest.as_ref()); + { + let mut f = std::fs::File::create(&tmp_path)?; + f.write_all(hex.as_bytes())?; + f.write_all(b"\n")?; + f.sync_all()?; + } + + // Atomic rename. + std::fs::rename(&tmp_path, &final_path)?; + + debug!(digest = %hex, path = %final_path.display(), "wrote commit marker"); + Ok(()) +} + +/// Read the last committed digest from the marker file. +/// +/// Returns `None` if the marker file does not exist (fresh node or pre-fix +/// node). Returns `Some(digest)` if the file exists and contains a valid +/// 64-character hex string. Logs a warning and returns `None` if the file +/// exists but is malformed. +pub fn read_commit_marker(data_dir: &Path) -> Option { + let path = marker_path(data_dir); + let content = match std::fs::read_to_string(&path) { + Ok(c) => c, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return None, + Err(e) => { + warn!( + error = %e, + path = %path.display(), + "failed to read commit marker file" + ); + return None; + } + }; + + let hex_str = content.trim(); + if hex_str.len() != 64 { + warn!( + len = hex_str.len(), + path = %path.display(), + "commit marker file has unexpected length (expected 64 hex chars)" + ); + return None; + } + + let mut bytes = [0u8; 32]; + match hex::decode_to_slice(hex_str, &mut bytes) { + Ok(()) => Some(sha256::Digest(bytes)), + Err(e) => { + warn!( + error = %e, + path = %path.display(), + "commit marker file contains invalid hex" + ); + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trip_write_read() { + let dir = tempfile::tempdir().expect("create temp dir"); + let digest = sha256::Digest([0xab; 32]); + + write_commit_marker(dir.path(), &digest).expect("write"); + let read_back = read_commit_marker(dir.path()); + + assert_eq!(read_back, Some(digest)); + } + + #[test] + fn missing_marker_returns_none() { + let dir = tempfile::tempdir().expect("create temp dir"); + assert_eq!(read_commit_marker(dir.path()), None); + } + + #[test] + fn corrupt_marker_returns_none() { + let dir = tempfile::tempdir().expect("create temp dir"); + let path = marker_path(dir.path()); + std::fs::write(&path, "not-valid-hex\n").expect("write corrupt"); + + assert_eq!(read_commit_marker(dir.path()), None); + } + + #[test] + fn overwrite_marker() { + let dir = tempfile::tempdir().expect("create temp dir"); + let digest_a = sha256::Digest([0x11; 32]); + let digest_b = sha256::Digest([0x22; 32]); + + write_commit_marker(dir.path(), &digest_a).expect("write a"); + assert_eq!(read_commit_marker(dir.path()), Some(digest_a)); + + write_commit_marker(dir.path(), &digest_b).expect("write b"); + assert_eq!(read_commit_marker(dir.path()), Some(digest_b)); + } +} diff --git a/crates/node/runner/src/lib.rs b/crates/node/runner/src/lib.rs index 1d3d64a..b980863 100644 --- a/crates/node/runner/src/lib.rs +++ b/crates/node/runner/src/lib.rs @@ -10,11 +10,15 @@ mod app; pub use app::RevmApplication; +pub mod commit_marker; + mod error; pub use error::RunnerError; +mod no_sync_storage; + mod runner; -pub use runner::ProductionRunner; +pub use runner::{ProductionRunner, runtime_storage_directory}; mod scheme; pub use scheme::{ThresholdScheme, load_threshold_scheme}; diff --git a/crates/node/runner/src/no_sync_storage.rs b/crates/node/runner/src/no_sync_storage.rs new file mode 100644 index 0000000..444f82c --- /dev/null +++ b/crates/node/runner/src/no_sync_storage.rs @@ -0,0 +1,433 @@ +//! Runtime wrapper for non-durable consensus scratch storage. + +use std::{ + collections::BTreeMap, + future::Future, + ops::RangeInclusive, + sync::{Arc, Mutex, RwLock}, + time::{Duration, SystemTime}, +}; + +use commonware_runtime::{ + Blob, BufferPool, BufferPooler, Clock, Error, Handle, IoBufs, IoBufsMut, Metrics, Spawner, + Storage, Supervisor, Tracing, iobuf, signal, + telemetry::metrics::{Metric, Registered}, +}; +use rand::{CryptoRng, RngCore}; + +type PartitionMap = BTreeMap, Arc>>>>; + +/// Wraps a runtime context with in-memory storage for consensus scratch data. +/// +/// Finalized archives and QMDB still use the normal runtime context. This +/// wrapper is only used for state that can be reconstructed from finalized +/// blocks, so it avoids Docker-volume write latency without putting durable +/// state on tmpfs. +pub(crate) struct NoSyncStorage { + inner: C, + partitions: Arc>, + checkpoint_interval: u64, +} + +impl NoSyncStorage { + /// Create a wrapper around an existing context. + pub(crate) fn new(inner: C, checkpoint_interval: u64) -> Self { + Self { + inner, + partitions: Arc::new(Mutex::new(BTreeMap::new())), + checkpoint_interval: checkpoint_interval.max(1), + } + } +} + +impl Clone for NoSyncStorage +where + C: Supervisor, +{ + fn clone(&self) -> Self { + Self { + inner: self.inner.child("nosync_storage"), + partitions: self.partitions.clone(), + checkpoint_interval: self.checkpoint_interval, + } + } +} + +impl std::fmt::Debug for NoSyncStorage +where + C: std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("NoSyncStorage") + .field("inner", &self.inner) + .field("checkpoint_interval", &self.checkpoint_interval) + .finish_non_exhaustive() + } +} + +/// Blob backed either by scratch memory or by the underlying persistent runtime. +#[derive(Clone, Debug)] +pub(crate) enum NoSyncBlob { + Memory { + content: Arc>>, + pool: BufferPool, + }, + /// Direct passthrough to underlying blob — no shadow, no interception. + Passthrough(B), +} + +/// Returns `true` if this partition MUST be written to disk. +/// +/// The marshal's application-metadata partition is the only one that needs +/// durability through NoSyncStorage — it tracks the last acknowledged height +/// so the marshal knows which blocks to redeliver on restart. Everything +/// else (consensus caches, marshal freezer data, journals) can live in memory +/// because it is either reconstructed from the finalized block archive on +/// startup or is transient consensus state. +/// +/// The finalized block archives and QMDB bypass NoSyncStorage entirely (they +/// use the raw runtime context), so durability of actual block data and state +/// is not affected by this function. +fn is_durable_partition(partition: &str) -> bool { + partition.ends_with("-application-metadata") +} + +impl Supervisor for NoSyncStorage +where + C: Supervisor, +{ + fn name(&self) -> commonware_runtime::Name { + self.inner.name() + } + + fn child(&self, label: &'static str) -> Self { + Self { + inner: self.inner.child(label), + partitions: self.partitions.clone(), + checkpoint_interval: self.checkpoint_interval, + } + } + + fn with_attribute(self, key: &'static str, value: impl std::fmt::Display) -> Self { + Self { + inner: self.inner.with_attribute(key, value), + partitions: self.partitions, + checkpoint_interval: self.checkpoint_interval, + } + } +} + +impl Spawner for NoSyncStorage +where + C: Spawner, +{ + fn shared(self, blocking: bool) -> Self { + Self { + inner: self.inner.shared(blocking), + partitions: self.partitions, + checkpoint_interval: self.checkpoint_interval, + } + } + + fn dedicated(self) -> Self { + Self { + inner: self.inner.dedicated(), + partitions: self.partitions, + checkpoint_interval: self.checkpoint_interval, + } + } + + fn spawn(self, f: F) -> Handle + where + F: FnOnce(Self) -> Fut + Send + 'static, + Fut: Future + Send + 'static, + T: Send + 'static, + { + let partitions = self.partitions; + let checkpoint_interval = self.checkpoint_interval; + self.inner.spawn(move |context| f(Self { inner: context, partitions, checkpoint_interval })) + } + + async fn stop(self, value: i32, timeout: Option) -> Result<(), Error> { + self.inner.stop(value, timeout).await + } + + fn stopped(&self) -> signal::Signal { + self.inner.stopped() + } +} + +impl Metrics for NoSyncStorage +where + C: Metrics, +{ + fn register(&self, name: N, help: H, metric: M) -> Registered + where + N: Into, + H: Into, + M: Metric, + { + self.inner.register(name, help, metric) + } + + fn encode(&self) -> String { + self.inner.encode() + } +} + +impl Tracing for NoSyncStorage +where + C: Tracing, +{ + fn with_span(self) -> Self { + Self { + inner: self.inner.with_span(), + partitions: self.partitions, + checkpoint_interval: self.checkpoint_interval, + } + } +} + +impl governor::clock::Clock for NoSyncStorage +where + C: governor::clock::Clock, +{ + type Instant = SystemTime; + + fn now(&self) -> Self::Instant { + self.inner.now() + } +} + +impl governor::clock::ReasonablyRealtime for NoSyncStorage where + C: governor::clock::ReasonablyRealtime + governor::clock::Clock +{ +} + +impl Clock for NoSyncStorage +where + C: Clock, +{ + fn current(&self) -> SystemTime { + self.inner.current() + } + + fn sleep(&self, duration: Duration) -> impl Future + Send + 'static { + self.inner.sleep(duration) + } + + fn sleep_until(&self, deadline: SystemTime) -> impl Future + Send + 'static { + self.inner.sleep_until(deadline) + } +} + +impl BufferPooler for NoSyncStorage +where + C: BufferPooler, +{ + fn network_buffer_pool(&self) -> &BufferPool { + self.inner.network_buffer_pool() + } + + fn storage_buffer_pool(&self) -> &BufferPool { + self.inner.storage_buffer_pool() + } +} + +impl RngCore for NoSyncStorage +where + C: RngCore, +{ + fn next_u32(&mut self) -> u32 { + self.inner.next_u32() + } + + fn next_u64(&mut self) -> u64 { + self.inner.next_u64() + } + + fn fill_bytes(&mut self, dest: &mut [u8]) { + self.inner.fill_bytes(dest); + } + + fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand::Error> { + self.inner.try_fill_bytes(dest) + } +} + +impl CryptoRng for NoSyncStorage where C: CryptoRng + RngCore {} + +impl Storage for NoSyncStorage +where + C: BufferPooler + Storage, +{ + type Blob = NoSyncBlob; + + async fn open_versioned( + &self, + partition: &str, + name: &[u8], + versions: RangeInclusive, + ) -> Result<(Self::Blob, u64, u16), Error> { + if is_durable_partition(partition) { + let (blob, size, version) = + self.inner.open_versioned(partition, name, versions).await?; + return Ok((NoSyncBlob::Passthrough(blob), size, version)); + } + + let mut partitions = self.partitions.lock().expect("scratch storage mutex poisoned"); + let content = partitions + .entry(partition.to_string()) + .or_default() + .entry(name.to_vec()) + .or_default() + .clone(); + let size = content.read().expect("scratch blob lock poisoned").len() as u64; + let version = *versions.end(); + Ok(( + NoSyncBlob::Memory { content, pool: self.storage_buffer_pool().clone() }, + size, + version, + )) + } + + async fn remove(&self, partition: &str, name: Option<&[u8]>) -> Result<(), Error> { + if is_durable_partition(partition) { + return self.inner.remove(partition, name).await; + } + + let mut partitions = self.partitions.lock().expect("scratch storage mutex poisoned"); + match name { + Some(name) => { + if let Some(partition) = partitions.get_mut(partition) { + partition.remove(name); + } + } + None => { + partitions.remove(partition); + } + } + Ok(()) + } + + async fn scan(&self, partition: &str) -> Result>, Error> { + if is_durable_partition(partition) { + return self.inner.scan(partition).await; + } + + let partitions = self.partitions.lock().expect("scratch storage mutex poisoned"); + let mut names = partitions + .get(partition) + .map(|partition| partition.keys().cloned().collect::>()) + .unwrap_or_default(); + names.sort(); + Ok(names) + } +} + +impl Blob for NoSyncBlob +where + B: Blob, +{ + fn read_at_buf( + &self, + offset: u64, + len: usize, + bufs: impl Into + Send, + ) -> impl Future> + Send { + async move { + match self { + Self::Memory { content, .. } => { + let offset: usize = offset.try_into().map_err(|_| Error::OffsetOverflow)?; + let content = content.read().expect("scratch blob lock poisoned"); + let end = offset.checked_add(len).ok_or(Error::OffsetOverflow)?; + if end > content.len() { + return Err(Error::BlobInsufficientLength); + } + let _: iobuf::IoBufsMut = bufs.into(); + Ok(content[offset..end].to_vec().into()) + } + Self::Passthrough(blob) => blob.read_at_buf(offset, len, bufs).await, + } + } + } + + fn read_at( + &self, + offset: u64, + len: usize, + ) -> impl Future> + Send { + async move { + match self { + Self::Memory { pool, .. } => self.read_at_buf(offset, len, pool.alloc(len)).await, + Self::Passthrough(blob) => blob.read_at(offset, len).await, + } + } + } + + fn write_at( + &self, + offset: u64, + bufs: impl Into + Send, + ) -> impl Future> + Send { + async move { + match self { + Self::Memory { content, .. } => { + let buf = bufs.into().coalesce(); + let offset: usize = offset.try_into().map_err(|_| Error::OffsetOverflow)?; + let end = offset.checked_add(buf.len()).ok_or(Error::OffsetOverflow)?; + let mut content = content.write().expect("scratch blob lock poisoned"); + if end > content.len() { + content.resize(end, 0); + } + content[offset..end].copy_from_slice(buf.as_ref()); + Ok(()) + } + Self::Passthrough(blob) => blob.write_at(offset, bufs).await, + } + } + } + + fn write_at_sync( + &self, + offset: u64, + bufs: impl Into + Send, + ) -> impl Future> + Send { + async move { + match self { + Self::Memory { content, .. } => { + let buf = bufs.into().coalesce(); + let offset: usize = offset.try_into().map_err(|_| Error::OffsetOverflow)?; + let end = offset.checked_add(buf.len()).ok_or(Error::OffsetOverflow)?; + let mut content = content.write().expect("scratch blob lock poisoned"); + if end > content.len() { + content.resize(end, 0); + } + content[offset..end].copy_from_slice(buf.as_ref()); + Ok(()) + } + Self::Passthrough(blob) => blob.write_at_sync(offset, bufs).await, + } + } + } + + fn resize(&self, len: u64) -> impl Future> + Send { + async move { + match self { + Self::Memory { content, .. } => { + let len: usize = len.try_into().map_err(|_| Error::OffsetOverflow)?; + content.write().expect("scratch blob lock poisoned").resize(len, 0); + Ok(()) + } + Self::Passthrough(blob) => blob.resize(len).await, + } + } + } + + async fn sync(&self) -> Result<(), Error> { + match self { + Self::Memory { .. } => Ok(()), + Self::Passthrough(blob) => blob.sync().await, + } + } +} diff --git a/crates/node/runner/src/runner.rs b/crates/node/runner/src/runner.rs index f6cc161..6f76a8f 100644 --- a/crates/node/runner/src/runner.rs +++ b/crates/node/runner/src/runner.rs @@ -1,59 +1,595 @@ -use std::{sync::Arc, time::Duration}; +use std::{ + collections::{BTreeMap, HashSet}, + ffi::OsString, + path::{Path, PathBuf}, + sync::{ + Arc, + atomic::{AtomicBool, Ordering}, + }, + time::Duration, +}; use alloy_consensus::Header; -use alloy_primitives::{Address, B256}; +use alloy_primitives::{Address, B256, keccak256}; use anyhow::Context as _; +use commonware_actor::Feedback; use commonware_consensus::{ - Reporters, + Block as _, Reporters, marshal::{ core::Mailbox, standard::{Inline, Standard}, }, - simplex::{self, elector::Random, types::Finalization}, + simplex::{ + self, elector::Random, scheme::bls12381_threshold::vrf::Seedable as _, types::Finalization, + }, types::{Epoch, FixedEpocher, ViewDelta}, }; -use commonware_cryptography::{bls12381::primitives::variant::MinSig, ed25519}; -use commonware_p2p::{Manager, TrackedPeers}; -use commonware_parallel::Sequential; -use commonware_runtime::{Metrics as _, Spawner, buffer::paged::CacheRef, tokio}; +use commonware_cryptography::{ + Committable as _, Hasher as _, Sha256, bls12381::primitives::variant::MinSig, ed25519, +}; +use commonware_p2p::{Blocker, Manager, Receiver as _, Recipients, Sender as _, TrackedPeers}; +use commonware_runtime::{ + Clock as _, Handle as RuntimeHandle, Metrics as _, Spawner, Supervisor as _, ThreadPooler as _, + buffer::paged::CacheRef, tokio as cw_tokio, +}; +use commonware_storage::archive::{Archive, Identifier as ArchiveId}; use commonware_utils::{NZU64, NZUsize, acknowledgement::Exact, ordered::Set}; use futures::StreamExt; +use kora_consensus::BlockExecution; use kora_domain::{Block, BlockCfg, BootstrapConfig, ConsensusDigest, LedgerEvent, Tx, TxCfg}; -use kora_executor::{BlockContext, RevmExecutor}; -use kora_ledger::{LedgerService, LedgerView}; +use kora_executor::{BaseFeeParams, BlockContext, RevmExecutor, calculate_base_fee}; +use kora_indexer::{BlockIndex, EMPTY_ROOT_HASH, IndexedBlock}; +use kora_ledger::{LedgerService, LedgerView, LiveState}; use kora_marshal::{ArchiveInitializer, BroadcastInitializer, PeerInitializer}; +use kora_metrics::AppMetrics; use kora_reporters::{BlockContextProvider, FinalizedReporter, NodeStateReporter, SeedReporter}; use kora_service::{NodeRunContext, NodeRunner}; use kora_simplex::{DEFAULT_MAILBOX_SIZE as MAILBOX_SIZE, DefaultPool}; use kora_transport::NetworkTransport; -use kora_txpool::{PoolConfig, TransactionValidator}; -use tracing::{debug, info, trace, warn}; - -use crate::{RevmApplication, RunnerError, scheme::ThresholdScheme}; - -const BLOCK_CODEC_MAX_TXS: usize = 64; -// Match `PoolConfig::default().max_tx_size` (= 128 KiB) and the domain-level -// `BlockCfg::default().tx.max_tx_bytes` (also 128 KiB). The previous 1024-byte -// cap rejected every real contract deploy: the validator admitted contracts -// up to 128 KiB into the mempool, but the block codec then refused to encode -// anything > 1 KiB, so the producer silently skipped them. Trivial value -// transfers and ~22-byte init contracts mined; any actual Solidity contract -// (1+ KiB of bytecode) was dropped. See PR fixing this for the full diagnostic. -const BLOCK_CODEC_MAX_TX_BYTES: usize = 128 * 1024; +use kora_txpool::{PoolConfig, TransactionPool, TransactionValidator}; +use tracing::{debug, error, info, trace, warn}; + +use crate::{ + RevmApplication, RunnerError, no_sync_storage::NoSyncStorage, scheme::ThresholdScheme, +}; + +/// Adapter that bridges `kora_metrics::MetricsRegister` to the commonware +/// runtime's `Metrics` trait. +struct RuntimeMetrics<'a>(&'a cw_tokio::Context); + +impl kora_metrics::MetricsRegister for RuntimeMetrics<'_> { + fn register, H: Into>( + &self, + name: N, + help: H, + metric: impl prometheus_client::registry::Metric, + ) { + // AppMetrics lives for the process lifetime; keep commonware's + // registration handles alive for the same duration. + std::mem::forget(commonware_runtime::Metrics::register(self.0, name, help, metric)); + } +} + const EPOCH_LENGTH: u64 = u64::MAX; const PARTITION_PREFIX: &str = "kora"; +const TXPOOL_CLEANUP_INTERVAL: Duration = Duration::from_secs(60); +const PARTITION_CHECK_INTERVAL: Duration = Duration::from_secs(30); +const RUNTIME_DIR_ENV: &str = "KORA_RUNTIME_DIR"; +const CHECKPOINT_INTERVAL_ENV: &str = "KORA_CHECKPOINT_INTERVAL"; +const DEFAULT_CHECKPOINT_INTERVAL: u64 = 256; + +/// Maximum number of transaction hashes retained in the gossip seen-set. +/// When the set exceeds this size it is cleared to avoid unbounded memory +/// growth. Under normal load the TTL-based cleanup keeps the set far smaller. +const TX_GOSSIP_SEEN_SET_CAPACITY: usize = 65_536; + +/// Buffer size for the internal channel that forwards locally accepted +/// transactions to the P2P gossip broadcast task. +const TX_GOSSIP_OUTBOUND_BUFFER: usize = 4096; type Peer = ed25519::PublicKey; type CertArchive = Finalization; type MarshalMailbox = Mailbox>; type NodeStateRptr = NodeStateReporter; -fn default_page_cache(context: &tokio::Context) -> CacheRef { +/// A [`Blocker`] that suppresses peer bans during catch-up but delegates to +/// the real oracle blocker during normal operation. +/// +/// When a restarted node catches up, the resolver's `verify_block()` may return +/// `false` because parent state snapshots are missing (not because the peer sent +/// invalid data). The default blocker (`transport.oracle`) permanently blocks +/// that peer, and in a 4-validator cluster all 3 peers get blocked within +/// milliseconds, making catch-up impossible. +/// +/// `GraduatedBlocker` solves this by checking a shared `catching_up` flag: +/// - **During catch-up** (`catching_up = true`): block requests are logged at +/// `warn` level but suppressed, allowing the resolver to retry with other +/// peers. +/// - **During normal operation** (`catching_up = false`): block requests are +/// forwarded to the underlying oracle, which disconnects the peer and +/// prevents future connections. +/// +/// The `catching_up` flag is set to `true` when the node is recovering from a +/// restart (i.e., `recovered_head_height` is `Some`) and cleared to `false` +/// for fresh genesis starts. A future improvement should wire a "backfill +/// complete" signal from the resolver to clear this flag once historical block +/// sync finishes. +#[derive(Clone, Debug)] +struct GraduatedBlocker { + oracle: commonware_p2p::authenticated::discovery::Oracle

, + catching_up: Arc, +} + +impl GraduatedBlocker

{ + const fn new( + oracle: commonware_p2p::authenticated::discovery::Oracle

, + catching_up: Arc, + ) -> Self { + Self { oracle, catching_up } + } +} + +impl Blocker for GraduatedBlocker

{ + type PublicKey = P; + + fn block(&mut self, peer: Self::PublicKey) -> Feedback { + let catching_up = self.catching_up.load(Ordering::Relaxed); + if catching_up { + warn!(?peer, "GraduatedBlocker: suppressing block request during catch-up"); + Feedback::Ok + } else { + warn!(?peer, "GraduatedBlocker: blocking Byzantine peer via oracle"); + self.oracle.block(peer) + } + } +} + +fn default_page_cache(context: &cw_tokio::Context) -> CacheRef { DefaultPool::init(context) } -const fn block_codec_cfg() -> BlockCfg { - BlockCfg { max_txs: BLOCK_CODEC_MAX_TXS, tx: TxCfg { max_tx_bytes: BLOCK_CODEC_MAX_TX_BYTES } } +/// Resolve the storage directory used by the Commonware runtime. +/// +/// By default this lives under `data_dir/runtime` so validator state survives +/// restarts. Local devnets can set `KORA_RUNTIME_DIR` to put consensus journals +/// on tmpfs and avoid Docker-volume fsync latency. +#[must_use] +pub fn runtime_storage_directory(data_dir: &Path) -> PathBuf { + runtime_storage_directory_from(data_dir, std::env::var_os(RUNTIME_DIR_ENV)) +} + +fn runtime_storage_directory_from(data_dir: &Path, override_dir: Option) -> PathBuf { + match override_dir { + Some(path) if !path.is_empty() => PathBuf::from(path), + _ => data_dir.join("runtime"), + } +} + +fn checkpoint_interval() -> u64 { + std::env::var(CHECKPOINT_INTERVAL_ENV) + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(DEFAULT_CHECKPOINT_INTERVAL) +} + +const fn block_codec_cfg(config: &kora_config::ConsensusBlockCodecConfig) -> BlockCfg { + BlockCfg { + max_txs: config.max_txs.get(), + tx: TxCfg { max_tx_bytes: config.max_tx_bytes.get() }, + } +} + +fn seed_genesis_block_index(index: &BlockIndex, genesis: &Block, gas_limit: u64) { + index.insert_block( + IndexedBlock { + hash: genesis.id().0, + number: 0, + parent_hash: genesis.parent.0, + state_root: genesis.state_root.0, + transactions_root: EMPTY_ROOT_HASH, + receipts_root: EMPTY_ROOT_HASH, + timestamp: genesis.timestamp, + gas_limit, + gas_used: 0, + base_fee_per_gas: Some(kora_config::INITIAL_BASE_FEE), + mix_hash: genesis.prevrandao, + logs_bloom: alloy_primitives::Bloom::ZERO, + size: 508, + transaction_hashes: Vec::new(), + }, + Vec::new(), + Vec::new(), + ); +} + +/// Compute the consensus digest for a block hash (BlockId). +/// +/// Mirrors `digest_for_block_id` in `kora_domain::block` which is private. +fn consensus_digest_for_hash(block_hash: B256) -> ConsensusDigest { + let mut hasher = Sha256::default(); + hasher.update(block_hash.as_slice()); + hasher.finalize() +} + +/// Seed the [`RevmApplication`] block-fee cache with entries from the +/// [`BlockIndex`] so that the first blocks after restart derive a correct +/// EIP-1559 base fee. +/// +/// Seeds the last few blocks ending at `head_height`. +fn seed_block_fee_cache( + app: &RevmApplication, + block_index: &BlockIndex, + head_height: u64, +) { + // Seed the last few blocks so that both the HEAD and its recent + // ancestors are available for base-fee derivation. + let start = head_height.saturating_sub(4); + let mut entries = Vec::new(); + for h in start..=head_height { + if let Some(indexed) = block_index.get_block_by_number(h) { + let digest = consensus_digest_for_hash(indexed.hash); + let base_fee = indexed.base_fee_per_gas.unwrap_or(kora_config::INITIAL_BASE_FEE); + entries.push((digest, indexed.gas_used, base_fee)); + } + } + if !entries.is_empty() { + app.seed_block_fees(&entries); + debug!( + head_height, + seeded = entries.len(), + "seeded block-fee cache from block index for EIP-1559 base fee recovery" + ); + } +} + +fn seed_hash(seed: impl commonware_codec::Encode) -> B256 { + keccak256(seed.encode()) +} + +fn index_recovered_block( + index: &kora_indexer::BlockIndex, + block: &Block, + provider: &RevmContextProvider, +) { + let block_context = provider.context(block); + let transaction_hashes = block.txs.iter().map(|tx| keccak256(&tx.bytes)).collect(); + let tx_bytes_total: u64 = block.txs.iter().map(|tx| tx.bytes.len() as u64).sum(); + let indexed_block = kora_indexer::IndexedBlock { + hash: block.id().0, + number: block.height, + parent_hash: block.parent.0, + state_root: block.state_root.0, + transactions_root: EMPTY_ROOT_HASH, + receipts_root: EMPTY_ROOT_HASH, + timestamp: block_context.header.timestamp, + gas_limit: block_context.header.gas_limit, + gas_used: 0, + base_fee_per_gas: block_context.header.base_fee_per_gas, + mix_hash: block.prevrandao, + logs_bloom: alloy_primitives::Bloom::ZERO, + size: 508 + tx_bytes_total, + transaction_hashes, + }; + index.insert_block(indexed_block, Vec::new(), Vec::new()); +} + +/// Number of recent blocks to restore during startup to pre-populate the +/// snapshot cache. This ensures that blocks arriving shortly after restart +/// can find their parent snapshot without entering catch-up mode. +/// +/// A larger window (64 blocks) means the node can survive outages where +/// the network advances up to 64 blocks before the node restarts. Blocks +/// within this window are resolved from the local archive without needing +/// catch-up trust. Beyond this window, the catch-up mechanism in +/// `RevmApplication::verify_block` handles the gap. +const SNAPSHOT_PREPOPULATE_COUNT: u64 = 64; + +async fn recover_finalized_state( + ledger: &LedgerService, + block_index: &Arc, + finalized_blocks: &FB, + finalizations_by_height: &FC, + provider: &RevmContextProvider, + data_dir: &Path, + chain_id: u64, +) -> anyhow::Result> +where + FB: Archive, + FC: Archive, +{ + let block_ranges: Vec<_> = finalized_blocks.ranges().collect(); + let finalization_ranges: Vec<_> = finalizations_by_height.ranges().collect(); + + for (start, end) in finalization_ranges { + for height in start..=end { + if let Some(finalization) = finalizations_by_height + .get(ArchiveId::Index(height)) + .await + .with_context(|| format!("load finalization at height {height}"))? + { + ledger + .set_seed(finalization.proposal.payload, seed_hash(finalization.seed())) + .await; + } + } + } + + let mut recovered = 0u64; + let mut recovered_blocks = BTreeMap::new(); + for (start, end) in block_ranges { + for height in start..=end { + let Some(block) = finalized_blocks + .get(ArchiveId::Index(height)) + .await + .with_context(|| format!("load finalized block at height {height}"))? + else { + continue; + }; + + index_recovered_block(block_index, &block, provider); + recovered_blocks.insert(height, block); + recovered += 1; + } + } + + let head_height = if let Some((_, archive_head)) = recovered_blocks.last_key_value() { + let (restored_height, replayed_tail) = restore_checkpoint_and_replay_tail( + ledger, + &recovered_blocks, + provider, + data_dir, + chain_id, + block_index, + ) + .await?; + info!( + archive_head_height = archive_head.height, + restored_height, + blocks = recovered, + "recovered finalized ledger head from archive" + ); + Some((restored_height, replayed_tail)) + } else { + None + }; + + Ok(head_height) +} + +async fn restore_checkpoint_and_replay_tail( + ledger: &LedgerService, + recovered_blocks: &BTreeMap, + provider: &RevmContextProvider, + data_dir: &Path, + chain_id: u64, + block_index: &BlockIndex, +) -> anyhow::Result<(u64, bool)> { + let Some((_, head)) = recovered_blocks.last_key_value() else { + return Ok((0, false)); + }; + let marker_digest = crate::commit_marker::read_commit_marker(data_dir); + let checkpoint_height = marker_digest.and_then(|marker| { + recovered_blocks + .iter() + .find_map(|(height, block)| (block.commitment() == marker).then_some(*height)) + }); + + match checkpoint_height { + Some(height) => { + let checkpoint = &recovered_blocks[&height]; + ledger.restore_persisted_snapshot(checkpoint).await; + info!( + checkpoint_height = checkpoint.height, + archive_head_height = head.height, + replay_blocks = recovered_blocks.len().saturating_sub( + recovered_blocks + .keys() + .position(|candidate| *candidate == height) + .map_or(0, |index| index + 1) + ), + "restored QMDB checkpoint and replaying archive tail" + ); + + let executor = RevmExecutor::new(chain_id); + let mut restored_height = checkpoint.height; + let mut restored_digest = checkpoint.commitment(); + let mut replayed_tail = false; + for expected_height in checkpoint.height.saturating_add(1)..=head.height { + let Some(block) = recovered_blocks.get(&expected_height) else { + warn!( + expected_height, + archive_head_height = head.height, + restored_height, + "stopping finalized archive replay at durable gap" + ); + break; + }; + if block.parent() != restored_digest { + warn!( + expected_height, + restored_height, + expected_parent = ?restored_digest, + actual_parent = ?block.parent(), + "stopping finalized archive replay at non-contiguous parent" + ); + break; + } + replay_finalized_block(ledger, provider, &executor, block, block_index).await?; + restored_height = block.height; + restored_digest = block.commitment(); + replayed_tail = true; + } + Ok((restored_height, replayed_tail)) + } + None => { + if let Some(marker) = marker_digest { + // A commit marker exists on disk but does not match any + // block in the archive. QMDB was last committed at a + // height we cannot identify, so creating a snapshot from + // the archive head would produce inconsistent state. + let head_digest = head.commitment(); + error!( + marker_digest = %hex::encode(marker.as_ref()), + head_digest = %hex::encode(head_digest.as_ref()), + archive_head_height = head.height, + "commit marker does not match any archived block; \ + QMDB state is at an unknown height. Refusing to \ + start with potentially inconsistent state. \ + Re-sync from a trusted snapshot or wipe state." + ); + anyhow::bail!( + "commit marker {} does not match any archived block; \ + cannot safely determine QMDB state height \ + (archive head is at height {})", + hex::encode(marker.as_ref()), + head.height, + ); + } + // No commit marker at all -- fresh node or upgrade from a + // pre-marker build. Safe to trust the archive head. + info!( + archive_head_height = head.height, + "no commit marker found; restoring archive head as initial \ + QMDB state (expected for fresh nodes or first startup \ + after upgrade)" + ); + ledger.restore_persisted_snapshot(head).await; + Ok((head.height, false)) + } + } +} + +async fn replay_finalized_block( + ledger: &LedgerService, + provider: &RevmContextProvider, + executor: &RevmExecutor, + block: &Block, + block_index: &BlockIndex, +) -> anyhow::Result<()> { + let digest = block.commitment(); + if ledger.query_state_root(digest).await.is_some() { + return Ok(()); + } + + let parent_digest = block.parent(); + let parent_snapshot = ledger.parent_snapshot(parent_digest).await.with_context(|| { + format!("missing parent snapshot while replaying height {}", block.height) + })?; + let block_context = provider.context(block); + let execution = BlockExecution::execute(&parent_snapshot, executor, &block_context, &block.txs) + .await + .with_context(|| format!("failed to replay finalized block at height {}", block.height))?; + let state_root = ledger + .compute_root_from_store(parent_digest, &execution.outcome.changes) + .await + .with_context(|| format!("failed to compute replay root at height {}", block.height))?; + anyhow::ensure!( + state_root == block.state_root, + "replayed root mismatch at height {}: expected {:?}, computed {:?}", + block.height, + block.state_root, + state_root + ); + + // Re-index the block with the real gas_used from execution so that + // subsequent blocks can derive their EIP-1559 base fee correctly. + // The initial `index_recovered_block` call stored gas_used=0 because + // the archive does not include execution results. + let tx_bytes_total: u64 = block.txs.iter().map(|tx| tx.bytes.len() as u64).sum(); + let indexed_block = IndexedBlock { + hash: block.id().0, + number: block.height, + parent_hash: block.parent.0, + state_root: block.state_root.0, + transactions_root: EMPTY_ROOT_HASH, + receipts_root: EMPTY_ROOT_HASH, + timestamp: block_context.header.timestamp, + gas_limit: block_context.header.gas_limit, + gas_used: execution.outcome.gas_used, + base_fee_per_gas: block_context.header.base_fee_per_gas, + mix_hash: block.prevrandao, + logs_bloom: alloy_primitives::Bloom::ZERO, + size: 508 + tx_bytes_total, + transaction_hashes: block.txs.iter().map(|tx| keccak256(&tx.bytes)).collect(), + }; + block_index.insert_block(indexed_block, Vec::new(), Vec::new()); + + let merged_changes = parent_snapshot.state.merge_changes(execution.outcome.changes.clone()); + let next_state = kora_overlay::OverlayState::new(parent_snapshot.state.base(), merged_changes); + ledger + .insert_snapshot( + digest, + parent_digest, + next_state, + state_root, + execution.outcome.changes, + &block.txs, + ) + .await; + Ok(()) +} + +/// Pre-populate the in-memory snapshot cache by restoring recent finalized +/// blocks from the archive. +/// +/// After a restart, only the HEAD snapshot is in the cache. The consensus +/// engine's ancestry walk (`verify`) stops when it hits a block whose +/// `state_root` is already known. By restoring snapshots for the last N +/// blocks, the ancestry walk terminates earlier and fewer blocks need to be +/// re-verified. Any blocks whose parent snapshot is genuinely missing (due +/// to gaps larger than the prepopulation window) are handled by the +/// catch-up trust mechanism in `verify_block`. +async fn prepopulate_snapshot_cache( + ledger: &LedgerService, + finalized_blocks: &FB, + head_height: u64, + count: u64, +) where + FB: Archive, +{ + if head_height == 0 || count == 0 { + return; + } + + // Restore blocks from (head_height - count) to (head_height - 1). + // HEAD itself is already restored by `recover_finalized_state`. + let start_height = head_height.saturating_sub(count); + if start_height == head_height { + return; + } + + let mut populated = 0u64; + for height in start_height..head_height { + match finalized_blocks.get(ArchiveId::Index(height)).await { + Ok(Some(block)) => { + let digest = block.commitment(); + // Skip if already in the cache. + if ledger.query_state_root(digest).await.is_some() { + continue; + } + ledger.restore_persisted_snapshot(&block).await; + populated += 1; + } + Ok(None) => { + debug!(height, "prepopulate: no block at height, stopping"); + break; + } + Err(err) => { + warn!(height, error = ?err, "prepopulate: failed to load block"); + break; + } + } + } + + if populated > 0 { + info!( + populated, + range_start = start_height, + head_height, + "pre-populated snapshot cache with recent finalized blocks" + ); + } } #[derive(Clone)] @@ -81,23 +617,54 @@ impl From for ConstantSchemeProvider { #[derive(Clone, Debug)] struct RevmContextProvider { gas_limit: u64, + fee_recipient: Address, + block_index: Arc, +} + +impl RevmContextProvider { + /// Collect recent block hashes from the block index for the BLOCKHASH opcode. + fn recent_block_hashes(&self, current_height: u64) -> std::collections::HashMap { + self.block_index.recent_block_hashes(current_height) + } } impl BlockContextProvider for RevmContextProvider { fn context(&self, block: &Block) -> BlockContext { + // Compute EIP-1559 base fee from the parent block's gas usage. + // The parent should already be indexed when finalizing in order. + // Fall back to INITIAL_BASE_FEE for genesis (height 0) or if the + // parent is not yet indexed (e.g. during catch-up). + let base_fee = if block.height == 0 { + kora_config::INITIAL_BASE_FEE + } else { + self.block_index + .get_block_by_number(block.height - 1) + .map(|parent| { + calculate_base_fee( + parent.base_fee_per_gas.unwrap_or(kora_config::INITIAL_BASE_FEE), + parent.gas_used, + parent.gas_limit, + &BaseFeeParams::DEFAULT, + ) + }) + .unwrap_or(kora_config::INITIAL_BASE_FEE) + }; + let header = Header { number: block.height, - timestamp: block.height, + timestamp: block.timestamp, gas_limit: self.gas_limit, - beneficiary: Address::ZERO, - base_fee_per_gas: Some(0), + beneficiary: self.fee_recipient, + base_fee_per_gas: Some(base_fee), ..Default::default() }; + let recent_hashes = self.recent_block_hashes(block.height); BlockContext::new(header, B256::ZERO, block.prevrandao) + .with_recent_block_hashes(recent_hashes) } } -fn spawn_ledger_observers(service: LedgerService, spawner: S) { +fn spawn_ledger_observers(service: LedgerService, spawner: S, data_dir: PathBuf) { let mut receiver = service.subscribe(); spawner.shared(true).spawn(move |_| async move { while let Some(event) = receiver.next().await { @@ -110,12 +677,155 @@ fn spawn_ledger_observers(service: LedgerService, spawner: S) { } LedgerEvent::SnapshotPersisted(digest) => { trace!(?digest, "snapshot persisted"); + if let Err(e) = crate::commit_marker::write_commit_marker(&data_dir, &digest) { + warn!( + error = %e, + ?digest, + "failed to write commit marker after persist" + ); + } + } + } + } + }); +} + +fn spawn_txpool_cleanup(pool: TransactionPool, context: cw_tokio::Context) { + context.child("txpool_cleanup").shared(false).spawn(move |ctx| async move { + loop { + ctx.sleep(TXPOOL_CLEANUP_INTERVAL).await; + let removed = pool.cleanup(); + if removed > 0 { + debug!(removed, "expired transactions cleaned from txpool"); + } + } + }); +} + +/// Bounded seen-set for transaction gossip de-duplication. +/// +/// Tracks the hashes of recently seen transactions so we neither re-broadcast +/// locally originated transactions that come back from peers nor re-insert +/// gossipped transactions we already have. When the set exceeds +/// [`TX_GOSSIP_SEEN_SET_CAPACITY`] it is cleared wholesale -- this is cheaper +/// than an LRU and perfectly safe because the txpool itself provides the +/// ultimate dedup (via `AlreadyExists` / `NonceAlreadyInPool`). +type SeenSet = Arc>>; + +fn new_seen_set() -> SeenSet { + Arc::new(parking_lot::Mutex::new(HashSet::with_capacity(1024))) +} + +/// Returns `true` if the hash was **not** previously present (i.e. it is new). +fn mark_seen(seen: &SeenSet, hash: B256) -> bool { + let mut set = seen.lock(); + if set.len() >= TX_GOSSIP_SEEN_SET_CAPACITY { + debug!(capacity = TX_GOSSIP_SEEN_SET_CAPACITY, "tx gossip seen-set full, clearing"); + set.clear(); + } + set.insert(hash) +} + +/// Periodically check peer connectivity and log warnings when the network +/// appears degraded or partitioned. +/// +/// This task reads the peer count from `NodeState` every +/// [`PARTITION_CHECK_INTERVAL`] and compares it against the expected peer +/// count to determine partition status. Warnings and errors are emitted so +/// operators (and log-based alerting) can detect connectivity issues even +/// without Prometheus. +fn spawn_partition_monitor(node_state: kora_rpc::NodeState, context: cw_tokio::Context) { + context.child("partition_monitor").shared(false).spawn(move |ctx| async move { + loop { + ctx.sleep(PARTITION_CHECK_INTERVAL).await; + let status = node_state.status(); + match status.partition_status { + kora_rpc::PartitionStatus::Healthy => { + trace!( + peer_count = status.peer_count, + expected = status.total_expected_peers, + "partition check: healthy" + ); + } + kora_rpc::PartitionStatus::Degraded => { + warn!( + peer_count = status.peer_count, + expected = status.total_expected_peers, + "partition check: DEGRADED — some peers missing but quorum still possible" + ); + } + kora_rpc::PartitionStatus::Partitioned => { + error!( + peer_count = status.peer_count, + expected = status.total_expected_peers, + "partition check: PARTITIONED — below quorum threshold, consensus cannot progress" + ); } } } }); } +/// Monitor critical consensus infrastructure tasks for unexpected termination. +/// +/// Each of the three handles (`engine`, `marshal`, `broadcast`) wraps a +/// long-lived actor that must never exit while the node is running. If any of +/// them resolves it means the actor either panicked (the commonware runtime +/// catches panics and returns [`commonware_runtime::Error::Exited`]) or the +/// runtime context was shut down. In either case the node can no longer make +/// progress on consensus, so we log an error and abort the process. +fn spawn_consensus_monitor( + context: cw_tokio::Context, + engine_handle: RuntimeHandle<()>, + marshal_handle: RuntimeHandle<()>, + broadcast_handle: RuntimeHandle<()>, +) { + spawn_task_watchdog(&context, "consensus_engine", engine_handle); + spawn_task_watchdog(&context, "marshal_actor", marshal_handle); + spawn_task_watchdog(&context, "broadcast_engine", broadcast_handle); +} + +/// Spawn a watchdog that awaits a critical task handle and aborts the process +/// if the task ever terminates. Under normal operation the handle never +/// resolves; if it does, consensus is irrecoverably broken. +/// +/// Before aborting, the watchdog sleeps briefly to allow the tracing subscriber +/// to flush buffered log output. This makes post-mortem diagnosis possible +/// even when the process is restarted by a supervisor immediately. +fn spawn_task_watchdog(context: &cw_tokio::Context, name: &'static str, handle: RuntimeHandle<()>) { + context.child(name).shared(true).spawn(move |ctx| async move { + let reason = match handle.await { + Ok(()) => { + error!(task = name, "critical task exited cleanly — this should never happen for a long-lived consensus actor"); + "exited cleanly (unexpected)" + } + Err(commonware_runtime::Error::Exited) => { + error!(task = name, "critical task panicked (runtime caught panic and returned Error::Exited)"); + "panicked (Error::Exited)" + } + Err(commonware_runtime::Error::Closed) => { + // Runtime context was shut down (e.g. SIGTERM). This is normal + // shutdown -- do NOT abort, just let the process exit cleanly so + // any in-progress cleanup (QMDB flush, log drain) can complete. + info!(task = name, "task stopped (runtime context closed during shutdown)"); + return; + } + Err(ref e) => { + error!(task = name, error = %e, error_debug = ?e, "critical task failed with unexpected error"); + "unexpected error" + } + }; + info!( + task = name, + reason, + "consensus infrastructure is dead — aborting process for supervisor restart" + ); + // Brief delay so the tracing subscriber can flush the log messages above. + ctx.sleep(Duration::from_millis(100)).await; + std::process::abort(); + }); +} + /// Production validator node runner. #[derive(Clone, Debug)] pub struct ProductionRunner { @@ -123,33 +833,31 @@ pub struct ProductionRunner { pub scheme: ThresholdScheme, /// Chain ID. pub chain_id: u64, - /// Gas limit per block. - pub gas_limit: u64, /// Bootstrap configuration. pub bootstrap: BootstrapConfig, /// Storage partition prefix. pub partition_prefix: String, /// Optional RPC configuration (state, bind address). pub rpc_config: Option<(kora_rpc::NodeState, std::net::SocketAddr)>, + /// Optional Prometheus metrics server address. + pub metrics_addr: Option, /// Secondary peers authorized to follow validator traffic without participating in consensus. pub secondary_peers: Vec, } impl ProductionRunner { /// Create a new production runner. - pub fn new( - scheme: ThresholdScheme, - chain_id: u64, - gas_limit: u64, - bootstrap: BootstrapConfig, - ) -> Self { + /// + /// The gas limit is sourced exclusively from `config.execution.gas_limit` + /// at runtime, so it is not accepted here. + pub fn new(scheme: ThresholdScheme, chain_id: u64, bootstrap: BootstrapConfig) -> Self { Self { scheme, chain_id, - gas_limit, bootstrap, partition_prefix: PARTITION_PREFIX.to_string(), rpc_config: None, + metrics_addr: None, secondary_peers: Vec::new(), } } @@ -161,6 +869,13 @@ impl ProductionRunner { self } + /// Configure Prometheus metrics server address. + #[must_use] + pub const fn with_metrics_addr(mut self, addr: std::net::SocketAddr) -> Self { + self.metrics_addr = Some(addr); + self + } + /// Configure secondary peers that should be tracked by the P2P oracle. #[must_use] pub fn with_secondary_peers(mut self, peers: Vec) -> Self { @@ -175,8 +890,16 @@ impl ProductionRunner { use commonware_runtime::Runner; use kora_transport::NetworkConfigExt; - let executor = tokio::Runner::new( - tokio::Config::default().with_storage_directory(config.data_dir.join("runtime")), + let runtime_dir = runtime_storage_directory(&config.data_dir); + info!( + runtime_dir = %runtime_dir.display(), + worker_threads = config.worker_threads, + "Starting Commonware runtime" + ); + let executor = cw_tokio::Runner::new( + cw_tokio::Config::default() + .with_storage_directory(runtime_dir) + .with_worker_threads(config.worker_threads), ); executor.start(|context| async move { let validator_key = config @@ -185,7 +908,7 @@ impl ProductionRunner { let transport = config .network - .build_local_transport(validator_key, context.clone()) + .build_local_transport(validator_key, context.child("transport")) .map_err(|e| anyhow::anyhow!("failed to build transport: {}", e))?; let ctx = @@ -193,26 +916,43 @@ impl ProductionRunner { let _ledger = self.run(ctx).await?; - futures::future::pending::<()>().await; + let mut sigterm = + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to register SIGTERM handler"); + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + _ = sigterm.recv() => {}, + } + info!("Received shutdown signal, initiating graceful shutdown..."); + + // Allow a brief window for in-flight QMDB commits and log drains + // to complete before the runtime drops all task contexts. The + // watchdog no longer calls abort() on `Error::Closed`, so these + // tasks will terminate cleanly when their contexts are dropped. + tokio::time::sleep(Duration::from_millis(200)).await; + + info!("Graceful shutdown complete"); Ok::<(), RunnerError>(()) }) } } impl NodeRunner for ProductionRunner { - type Transport = NetworkTransport; + type Transport = NetworkTransport; type Handle = LedgerService; type Error = RunnerError; async fn run(&self, ctx: NodeRunContext) -> Result { let (context, config, mut transport) = ctx.into_parts(); + let gas_limit = config.execution.gas_limit; + let simplex_config = config.consensus.simplex; info!(chain_id = self.chain_id, "Starting production validator"); let validators = self.scheme.participants().clone(); let secondary = Set::from_iter_dedup(self.secondary_peers.iter().cloned()); let secondary_count = secondary.len(); - transport.oracle.track(0, TrackedPeers::new(validators, secondary)).await; + transport.oracle.track(0, TrackedPeers::new(validators, secondary)); info!( validators = self.scheme.participants().len(), secondary_peers = secondary_count, @@ -220,46 +960,272 @@ impl NodeRunner for ProductionRunner { ); let page_cache = default_page_cache(&context); - let block_cfg = block_codec_cfg(); + let block_cfg = block_codec_cfg(&config.consensus.block_codec); + let partition_prefix = &self.partition_prefix; + // Use a single Rayon worker thread for BLS signature verification. + // Rayon's work-stealing scheduler busy-waits (sched_yield) when idle, + // and BLS batches are small enough (~6-10 msgs at 30 blocks/s) that + // parallelism across 2 threads provides negligible speedup. With + // Docker CPU limits (0.75-1.2 cores), the second idle thread wastes + // ~0.21 cores of CPU in spin loops and inflates involuntary context + // switches by 100K+/5min. + let strategy = context + .create_strategy(NZUsize!(1)) + .map_err(|e| anyhow::anyhow!("failed to create signature strategy: {e}"))?; + let checkpoint_interval = checkpoint_interval(); + info!(checkpoint_interval, "configured finalized archive and QMDB checkpoint interval"); + + // Migrate any legacy immutable archive partitions left over from + // before the switch to prunable archives. The old backend used + // different partition names, so its data is silently orphaned on + // upgrade. This detects, warns, and removes the stale partitions. + let finalizations_prefix = format!("{partition_prefix}-finalizations-by-height"); + let blocks_prefix = format!("{partition_prefix}-finalized-blocks"); + ArchiveInitializer::migrate_from_immutable(&context, &finalizations_prefix).await; + ArchiveInitializer::migrate_from_immutable(&context, &blocks_prefix).await; + + ::certificate_codec_config_unbounded(); + let finalizations_by_height = + ArchiveInitializer::init_prunable_checkpointed::<_, ConsensusDigest, CertArchive>( + context.child("finalizations_by_height"), + finalizations_prefix, + (), + checkpoint_interval, + ) + .await + .context("init finalizations archive")?; - let state = LedgerView::init( - context.with_label("state"), + let finalized_blocks = + ArchiveInitializer::init_prunable_checkpointed::<_, ConsensusDigest, Block>( + context.child("finalized_blocks"), + blocks_prefix, + block_cfg, + checkpoint_interval, + ) + .await + .context("init blocks archive")?; + + let has_finalized_history = finalized_blocks.last_index().is_some(); + let state = LedgerView::init_with_genesis_options( + context.child("state"), format!("{}-qmdb", self.partition_prefix), self.bootstrap.genesis_alloc.clone(), + !has_finalized_history, + self.bootstrap.genesis_timestamp, ) .await .context("init qmdb")?; - let block_index = - self.rpc_config.as_ref().map(|_| Arc::new(kora_indexer::BlockIndex::new())); + let pending_tx_broadcast = + self.rpc_config.as_ref().map(|_| kora_rpc::pending_tx_channel().0); + let mempool_broadcast = + self.rpc_config.as_ref().map(|_| kora_rpc::mempool_event_channel().0); let ledger = LedgerService::new(state.clone()); - spawn_ledger_observers(ledger.clone(), context.clone()); + let block_index = Arc::new(BlockIndex::new()); + seed_genesis_block_index(&block_index, &ledger.genesis_block(), gas_limit); + spawn_ledger_observers( + ledger.clone(), + context.child("ledger_observers"), + config.data_dir.clone(), + ); + let txpool = ledger.txpool().await; + spawn_txpool_cleanup(txpool.clone(), context.child("txpool")); + + // Initialize application-level Prometheus metrics and register them + // with the commonware runtime so they appear on the /metrics endpoint. + let app_metrics = AppMetrics::new(); + app_metrics.register(&RuntimeMetrics(&context)); + txpool.set_metrics(app_metrics.clone()); + // -- Transaction gossip infrastructure -- + let (gossip_outbound_tx, gossip_seen): ( + Option>, + Option, + ) = if config.network.tx_gossip { + let (tx_gossip_sender, tx_gossip_receiver) = transport.tx_gossip.channel; + let seen = new_seen_set(); + let (outbound_tx, gossip_outbound_rx) = + tokio::sync::mpsc::channel::(TX_GOSSIP_OUTBOUND_BUFFER); + + // Outbound: read from internal channel, broadcast via P2P. + { + let seen = seen.clone(); + let mut sender = tx_gossip_sender; + let out_metrics = app_metrics.clone(); + context.child("tx_gossip_out").shared(true).spawn(move |_| async move { + let mut rx = gossip_outbound_rx; + while let Some(raw) = rx.recv().await { + let hash = keccak256(&raw); + if !mark_seen(&seen, hash) { + continue; + } + let msg = bytes::Bytes::copy_from_slice(&raw); + let recipients = sender.send(Recipients::All, msg, false); + if recipients.is_empty() { + warn!("tx gossip: failed to broadcast transaction"); + out_metrics.gossip_tx_broadcast_failed.inc(); + } else { + trace!( + ?hash, + recipients = recipients.len(), + "tx gossip: broadcast transaction to peers" + ); + out_metrics.gossip_tx_broadcast.inc(); + } + } + debug!("tx gossip outbound channel closed"); + }); + } + + // Inbound: read from P2P, validate, insert into local pool. + { + let seen = seen.clone(); + let gossip_ledger = ledger.clone(); + let gossip_chain_id = self.chain_id; + let gossip_pool = txpool.clone(); + let mut receiver = tx_gossip_receiver; + let in_metrics = app_metrics.clone(); + context.child("tx_gossip_in").shared(true).spawn(move |_| async move { + loop { + let (peer, raw) = match receiver.recv().await { + Ok(msg) => msg, + Err(e) => { + warn!(error = %e, "tx gossip: receive error, stopping inbound handler"); + break; + } + }; + + in_metrics.gossip_tx_received.inc(); + let hash = keccak256(&raw); + if !mark_seen(&seen, hash) { + trace!(?hash, ?peer, "tx gossip: skipping already-seen transaction"); + continue; + } + + let data = alloy_primitives::Bytes::copy_from_slice(raw.as_ref()); + let tx = Tx::new(data); + let tx_id = tx.id(); + + // Fetch the latest state on each validation so nonce + // and balance checks reflect finalized blocks. The + // previous code captured state once at startup, making + // gossip validation increasingly stale. + let current_state = gossip_ledger.latest_state().await; + let validator = TransactionValidator::new( + gossip_chain_id, + current_state, + PoolConfig::default(), + ) + .with_pool(gossip_pool.clone()); + if let Err(e) = validator.validate(tx.clone()).await { + trace!(?tx_id, ?peer, error = %e, "tx gossip: peer tx failed validation"); + in_metrics.gossip_tx_invalid.inc(); + continue; + } + + if gossip_ledger.submit_tx(tx).await { + debug!(?tx_id, ?peer, "tx gossip: accepted transaction from peer"); + } else { + trace!(?tx_id, ?peer, "tx gossip: ledger rejected transaction (duplicate)"); + } + } + }); + } + + info!("Transaction gossip enabled"); + (Some(outbound_tx), Some(seen)) + } else { + // Drop the gossip channel - we won't use it + drop(transport.tx_gossip); + info!( + "Transaction gossip disabled (enable with network.tx_gossip = true or --tx-gossip)" + ); + (None, None) + }; + + let fee_recipient = config.execution.fee_recipient.unwrap_or(Address::ZERO); + let context_provider = + RevmContextProvider { gas_limit, fee_recipient, block_index: block_index.clone() }; + let recovered_head_height = recover_finalized_state( + &ledger, + &block_index, + &finalized_blocks, + &finalizations_by_height, + &context_provider, + &config.data_dir, + self.chain_id, + ) + .await + .context("recover finalized state")?; + + // Pre-populate the snapshot cache with the last N blocks so that + // blocks arriving shortly after restart can find their parent + // snapshot. Without this, only the HEAD snapshot exists after + // recovery, and verify_block would fail for any block whose parent + // is not HEAD. + if let Some((head_height, replayed_tail)) = recovered_head_height + && !replayed_tail + { + prepopulate_snapshot_cache( + &ledger, + &finalized_blocks, + head_height, + SNAPSHOT_PREPOPULATE_COUNT, + ) + .await; + } if let Some((node_state, addr)) = &self.rpc_config { - let qmdb_state = state.qmdb_state().await; + let peer_count = self.scheme.participants().len().saturating_sub(1) as u64; + node_state.set_peer_count(peer_count); + + // Restore finalized height from archive so the proposal lag guard + // in RevmApplication does not reject proposals after a restart. + if let Some(last) = finalized_blocks.last_index() { + node_state.set_finalized_height(last); + } + + // Use LiveState so RPC queries read from the latest in-memory + // overlay rather than the persisted QMDB checkpoint (which can lag + // up to 256 blocks behind head). + let live_state = LiveState::new(ledger.clone()); let rpc_executor = Arc::new(RevmExecutor::new(self.chain_id)); let indexed_provider = kora_rpc::IndexedStateProvider::new( - block_index.clone().expect("block index is initialized with RPC"), - qmdb_state, + block_index.clone(), + live_state, rpc_executor, + fee_recipient, ); let tx_ledger = ledger.clone(); - let tx_state = state.qmdb_state().await; let chain_id = self.chain_id; + let tx_pool = txpool.clone(); + let gossip_tx = gossip_outbound_tx.clone(); + let gossip_seen_rpc = gossip_seen.clone(); let tx_submit: kora_rpc::TxSubmitCallback = Arc::new(move |data| { let ledger = tx_ledger.clone(); - let state = tx_state.clone(); + let pool = tx_pool.clone(); + let gossip = gossip_tx.clone(); + let seen = gossip_seen_rpc.clone(); Box::pin(async move { - let tx = Tx::new(data); + let tx = Tx::new(data.clone()); let tx_id = tx.id(); + let state = ledger.latest_state().await; let validator = - TransactionValidator::new(chain_id, state, PoolConfig::default()); + TransactionValidator::new(chain_id, state, PoolConfig::default()) + .with_pool(pool); validator.validate(tx.clone()).await.map_err(|err| { warn!(?tx_id, error = %err, "rpc submit: validator rejected tx"); kora_rpc::RpcError::InvalidTransaction(err.to_string()) })?; if ledger.submit_tx(tx).await { debug!(?tx_id, "rpc submit: tx inserted into mempool"); + // Forward to gossip if enabled. + if let (Some(gossip), Some(seen)) = (&gossip, &seen) { + let hash = keccak256(&data); + mark_seen(seen, hash); + if let Err(e) = gossip.try_send(data) { + warn!(error = %e, "tx gossip: outbound channel full, skipping broadcast"); + } + } Ok(()) } else { warn!( @@ -272,16 +1238,66 @@ impl NodeRunner for ProductionRunner { } }) }); - let rpc = kora_rpc::RpcServer::with_state_provider( + let mut rpc = kora_rpc::RpcServer::with_state_provider( node_state.clone(), *addr, self.chain_id, indexed_provider, ) .with_tx_submit(tx_submit) - .with_peer_count(self.scheme.participants().len().saturating_sub(1) as u64); - drop(rpc.start()); + .with_txpool(txpool.clone()) + .with_peer_count(peer_count) + .with_rpc_requests_counter(app_metrics.rpc_requests_total.clone()); + if let Some(sender) = pending_tx_broadcast.clone() { + rpc = rpc.with_pending_tx_broadcast(sender); + } + if let Some(sender) = mempool_broadcast.clone() { + rpc = rpc.with_mempool_broadcast(sender); + } + // Keep the RPC handle alive so the HTTP and JSON-RPC tasks are not + // cancelled immediately. The handle is dropped when `run()` returns + // (i.e. after the signal handler completes), which cleanly stops the + // RPC servers during shutdown. + let _rpc_handle = rpc.start(); info!(addr = %addr, "RPC server started with live state provider"); + + spawn_partition_monitor(node_state.clone(), context.child("partition")); + } + + if let Some(metrics_addr) = self.metrics_addr { + let metrics_context = Arc::new(context.child("metrics_endpoint")); + context.child("metrics").shared(true).spawn(move |_| async move { + let app = axum::Router::new().route( + "/metrics", + axum::routing::get(move || { + let metrics_context = metrics_context.clone(); + async move { + let body = metrics_context.encode(); + ( + axum::http::StatusCode::OK, + [( + axum::http::header::CONTENT_TYPE, + "application/openmetrics-text; version=1.0.0; charset=utf-8", + )], + body, + ) + } + }), + ); + + let listener = match tokio::net::TcpListener::bind(metrics_addr).await { + Ok(l) => l, + Err(e) => { + error!(addr = %metrics_addr, error = %e, "Failed to bind metrics server"); + return; + } + }; + + info!(addr = %metrics_addr, "Starting metrics server"); + if let Err(e) = axum::serve(listener, app).await { + error!(error = %e, "Metrics server error"); + } + }); } let validator_key = config @@ -289,61 +1305,80 @@ impl NodeRunner for ProductionRunner { .map_err(|e| anyhow::anyhow!("failed to load validator key: {}", e))?; let my_pk = commonware_cryptography::Signer::public_key(&validator_key); - let executor = RevmExecutor::new(self.chain_id); - let context_provider = RevmContextProvider { gas_limit: self.gas_limit }; - let mut finalized_reporter = - FinalizedReporter::new(ledger.clone(), context.clone(), executor, context_provider); - if let Some(block_index) = block_index { - finalized_reporter = finalized_reporter.with_block_index(block_index); + let finalized_executor = RevmExecutor::new(self.chain_id); + let mut finalized_reporter = FinalizedReporter::new( + ledger.clone(), + context.child("finalized_reporter"), + finalized_executor, + context_provider, + ) + .with_block_index(block_index.clone()) + .with_metrics(app_metrics.clone()) + .with_checkpoint_interval(checkpoint_interval); + if let Some((state, _)) = &self.rpc_config { + finalized_reporter = finalized_reporter.with_node_state(state.clone()); + } + if let Some(sender) = mempool_broadcast { + finalized_reporter = finalized_reporter.with_mempool_broadcast(sender); + } + + // Initialize the selfdestruct GC log for tracking orphaned storage. + match kora_reporters::SelfdestructGcLog::open(&config.data_dir) { + Ok(gc_log) => { + info!( + path = %config.data_dir.display(), + "Opened selfdestruct GC log for tracking orphaned storage" + ); + finalized_reporter = finalized_reporter.with_gc_log(Arc::new(gc_log)); + } + Err(e) => { + warn!( + error = %e, + "Failed to open selfdestruct GC log; selfdestructed addresses will not be tracked" + ); + } } let scheme_provider = ConstantSchemeProvider::from(self.scheme.clone()); + // Suppress resolver peer-bans during catch-up to avoid blocking peers + // that serve historical data which fails local verification due to + // missing parent snapshots. The simplex engine uses the real oracle + // blocker unconditionally since it only bans for genuine equivocation. + let resolver_catching_up = Arc::new(AtomicBool::new(recovered_head_height.is_some())); + let resolver_blocker = + GraduatedBlocker::new(transport.oracle.clone(), resolver_catching_up); + let resolver = PeerInitializer::init::<_, _, _, Block, _, _, _>( - &context.with_label("resolver"), + context.child("resolver"), my_pk.clone(), transport.oracle.clone(), - transport.oracle.clone(), + resolver_blocker, transport.marshal.backfill, ); let (broadcast_engine, buffer) = BroadcastInitializer::init::<_, Peer, Block, _>( - context.with_label("broadcast"), + context.child("broadcast"), my_pk.clone(), transport.oracle.clone(), block_cfg, ); - broadcast_engine.start(transport.marshal.blocks); - - let partition_prefix = &self.partition_prefix; - ::certificate_codec_config_unbounded(); - let finalizations_by_height = ArchiveInitializer::init::<_, ConsensusDigest, CertArchive>( - context.with_label("finalizations_by_height"), - format!("{partition_prefix}-finalizations-by-height"), - (), - ) - .await - .context("init finalizations archive")?; - - let finalized_blocks = ArchiveInitializer::init::<_, ConsensusDigest, Block>( - context.with_label("finalized_blocks"), - format!("{partition_prefix}-finalized-blocks"), - block_cfg, - ) - .await - .context("init blocks archive")?; + let broadcast_handle = broadcast_engine.start(transport.marshal.blocks); + let scratch_context = NoSyncStorage::new(context.child("scratch"), checkpoint_interval); let (actor, marshal_mailbox, _last_processed_height) = - kora_marshal::ActorInitializer::init::<_, Block, _, _, _, Exact>( - context.clone(), + kora_marshal::ActorInitializer::init_with_strategy::<_, Block, _, _, _, Exact, _>( + scratch_context.clone(), finalizations_by_height, finalized_blocks, scheme_provider, + commonware_consensus::marshal::Start::Genesis(ledger.genesis_block()), page_cache.clone(), block_cfg, + strategy.clone(), ) .await; - actor.start(finalized_reporter, buffer, resolver); + let marshal_handle = actor.start(finalized_reporter, buffer, resolver); let epocher = FixedEpocher::new(NZU64!(EPOCH_LENGTH)); let executor = RevmExecutor::new(self.chain_id); @@ -351,29 +1386,43 @@ impl NodeRunner for ProductionRunner { ledger.clone(), executor, block_cfg.max_txs, - self.gas_limit, + gas_limit, + fee_recipient, ); + app = app.with_metrics(app_metrics.clone()); + if let Some((height, _)) = recovered_head_height { + app = app.with_recovered_height(height); + // Seed the block-fee cache from the block index so that the + // first blocks after restart can compute a correct EIP-1559 + // base fee. We seed the last few blocks to cover the parent + // of the next proposed/verified block. + seed_block_fee_cache(&app, &block_index, height); + if let Some((state, _)) = &self.rpc_config { + state.set_recovered_height(height); + } + } if let Some((state, _)) = &self.rpc_config { app = app.with_node_state(state.clone()); } let marshaled = - Inline::new(context.with_label("marshaled"), app, marshal_mailbox.clone(), epocher); + Inline::new(scratch_context.child("marshaled"), app, marshal_mailbox.clone(), epocher); let seed_reporter = SeedReporter::::new(ledger.clone()); - let node_state_reporter = self - .rpc_config - .as_ref() - .map(|(state, _)| NodeStateReporter::::new(state.clone())); + let node_state_reporter = self.rpc_config.as_ref().map(|(state, _)| { + NodeStateReporter::::new(state.clone()).with_metrics(app_metrics) + }); let inner_reporters: Reporters<_, MarshalMailbox, Option> = Reporters::from((marshal_mailbox.clone(), node_state_reporter)); let reporter = Reporters::from((seed_reporter, inner_reporters)); for tx in &self.bootstrap.bootstrap_txs { - let _ = ledger.submit_tx(tx.clone()).await; + if !ledger.submit_tx(tx.clone()).await { + warn!("failed to submit bootstrap transaction to mempool"); + } } let engine = simplex::Engine::new( - context.with_label("engine"), + scratch_context.child("engine"), simplex::Config { scheme: self.scheme.clone(), elector: Random, @@ -381,26 +1430,134 @@ impl NodeRunner for ProductionRunner { automaton: marshaled.clone(), relay: marshaled, reporter, - strategy: Sequential, + strategy, partition: self.partition_prefix.clone(), - mailbox_size: MAILBOX_SIZE, + mailbox_size: NZUsize!(MAILBOX_SIZE), epoch: Epoch::zero(), - replay_buffer: NZUsize!(1024 * 1024), - write_buffer: NZUsize!(1024 * 1024), - leader_timeout: Duration::from_millis(500), - certification_timeout: Duration::from_secs(1), - timeout_retry: Duration::from_secs(2), - fetch_timeout: Duration::from_millis(500), - activity_timeout: ViewDelta::new(20), - skip_timeout: ViewDelta::new(10), - fetch_concurrent: 8, + floor: simplex::Floor::Genesis(ledger.genesis_block().commitment()), + replay_buffer: simplex_config.replay_buffer_bytes, + write_buffer: simplex_config.write_buffer_bytes, + leader_timeout: Duration::from_secs(simplex_config.leader_timeout_secs.get()), + certification_timeout: Duration::from_secs( + simplex_config.certification_timeout_secs.get(), + ), + timeout_retry: Duration::from_secs(simplex_config.timeout_retry_secs.get()), + fetch_timeout: Duration::from_secs(simplex_config.fetch_timeout_secs.get()), + activity_timeout: ViewDelta::new(simplex_config.activity_timeout_views.get()), + skip_timeout: ViewDelta::new(simplex_config.skip_timeout_views.get()), + fetch_concurrent: simplex_config.fetch_concurrent, page_cache, - forwarding: simplex::ForwardingPolicy::Disabled, + forwarding: simplex::ForwardingPolicy::SilentLeader, }, ); - engine.start(transport.simplex.votes, transport.simplex.certs, transport.simplex.resolver); + let engine_handle = engine.start( + transport.simplex.votes, + transport.simplex.certs, + transport.simplex.resolver, + ); + + spawn_consensus_monitor(context, engine_handle, marshal_handle, broadcast_handle); info!("Validator started successfully"); Ok(ledger) } } + +#[cfg(test)] +mod tests { + use std::num::NonZeroUsize; + + use kora_config::ConsensusBlockCodecConfig; + use kora_domain::{BlockId, StateRoot}; + + use super::*; + + #[test] + fn seed_genesis_block_index_indexes_real_genesis_metadata() { + let index = BlockIndex::new(); + let genesis = Block::new( + BlockId(B256::repeat_byte(0x11)), + 0, + 0, + B256::repeat_byte(0x22), + StateRoot(B256::repeat_byte(0x33)), + Vec::new(), + ); + let gas_limit = 45_000_000; + + seed_genesis_block_index(&index, &genesis, gas_limit); + + let indexed = index.get_block_by_number(0).expect("genesis indexed"); + assert_eq!(indexed.hash, genesis.id().0); + assert_eq!(indexed.number, 0); + assert_eq!(indexed.parent_hash, genesis.parent.0); + assert_eq!(indexed.state_root, genesis.state_root.0); + assert_eq!(indexed.timestamp, 0); + assert_eq!(indexed.gas_limit, gas_limit); + assert_eq!(indexed.gas_used, 0); + assert_eq!(indexed.base_fee_per_gas, Some(kora_config::INITIAL_BASE_FEE)); + assert_eq!(indexed.transaction_hashes, Vec::::new()); + assert_eq!(index.get_block_by_hash(&genesis.id().0).expect("genesis by hash").number, 0); + } + + #[test] + fn seed_genesis_block_index_uses_genesis_timestamp() { + let index = BlockIndex::new(); + let genesis = Block::new( + BlockId(B256::ZERO), + 0, + 1_700_000_000, + B256::ZERO, + StateRoot(B256::ZERO), + Vec::new(), + ); + + seed_genesis_block_index(&index, &genesis, 30_000_000); + + let indexed = index.get_block_by_number(0).expect("genesis indexed"); + assert_eq!(indexed.timestamp, 1_700_000_000); + } + + #[test] + fn block_codec_cfg_uses_consensus_config() { + let config = ConsensusBlockCodecConfig { + max_txs: NonZeroUsize::new(512).unwrap(), + max_tx_bytes: NonZeroUsize::new(4096).unwrap(), + }; + + let block_cfg = block_codec_cfg(&config); + + assert_eq!(block_cfg.max_txs, 512); + assert_eq!(block_cfg.tx.max_tx_bytes, 4096); + } + + #[test] + fn runtime_storage_directory_defaults_under_data_dir() { + let data_dir = PathBuf::from("/var/lib/kora"); + + assert_eq!( + runtime_storage_directory_from(&data_dir, None), + PathBuf::from("/var/lib/kora/runtime") + ); + } + + #[test] + fn runtime_storage_directory_ignores_empty_override() { + let data_dir = PathBuf::from("/var/lib/kora"); + + assert_eq!( + runtime_storage_directory_from(&data_dir, Some(OsString::new())), + PathBuf::from("/var/lib/kora/runtime") + ); + } + + #[test] + fn runtime_storage_directory_uses_override() { + let data_dir = PathBuf::from("/var/lib/kora"); + + assert_eq!( + runtime_storage_directory_from(&data_dir, Some(OsString::from("/runtime"))), + PathBuf::from("/runtime") + ); + } +} diff --git a/crates/node/service/Cargo.toml b/crates/node/service/Cargo.toml index a19e10c..f295864 100644 --- a/crates/node/service/Cargo.toml +++ b/crates/node/service/Cargo.toml @@ -16,6 +16,7 @@ kora-config = { path = "../config" } kora-transport = { path = "../../network/transport" } # Commonware +commonware-actor.workspace = true commonware-consensus.workspace = true commonware-cryptography.workspace = true commonware-p2p.workspace = true diff --git a/crates/node/service/src/runner.rs b/crates/node/service/src/runner.rs index ce1579c..f9ba2f9 100644 --- a/crates/node/service/src/runner.rs +++ b/crates/node/service/src/runner.rs @@ -7,7 +7,7 @@ use std::sync::Arc; -use commonware_runtime::tokio; +use commonware_runtime::{Supervisor as _, tokio}; use kora_config::NodeConfig; /// Context provided to a node runner. @@ -40,7 +40,7 @@ impl NodeRunContext { /// Get a clone of the runtime context. pub fn context_owned(&self) -> tokio::Context { - self.context.clone() + self.context.child("owned") } /// Get the node configuration. diff --git a/crates/node/service/src/service.rs b/crates/node/service/src/service.rs index 64b3aa4..85f712f 100644 --- a/crates/node/service/src/service.rs +++ b/crates/node/service/src/service.rs @@ -111,7 +111,7 @@ impl LegacyNodeService { let mut transport = self .config .network - .build_local_transport(validator_key, context.clone()) + .build_local_transport(validator_key, context) .map_err(|e| eyre::eyre!("failed to build transport: {}", e))?; tracing::info!("network transport started"); @@ -120,7 +120,7 @@ impl LegacyNodeService { let validator_set: commonware_utils::ordered::Set<_> = validators .try_into() .map_err(|_| eyre::eyre!("failed to convert validator set"))?; - transport.oracle.track(0, validator_set).await; + transport.oracle.track(0, validator_set); tracing::info!("registered validators with oracle"); } diff --git a/crates/node/service/src/stubs.rs b/crates/node/service/src/stubs.rs index 7379d23..58736a8 100644 --- a/crates/node/service/src/stubs.rs +++ b/crates/node/service/src/stubs.rs @@ -6,7 +6,8 @@ use std::future::Future; -use commonware_consensus::{CertifiableAutomaton, Relay, Reporter, types::Epoch}; +use commonware_actor::Feedback; +use commonware_consensus::{CertifiableAutomaton, Relay, Reporter}; use commonware_cryptography::sha256; use commonware_utils::channel::{fallible::OneshotExt as _, oneshot}; @@ -32,10 +33,6 @@ impl commonware_consensus::Automaton for StubAutomaton { type Context = commonware_consensus::simplex::types::Context; type Digest = StubDigest; - fn genesis(&mut self, _epoch: Epoch) -> impl Future + Send { - async { zero_digest() } - } - #[allow(clippy::async_yields_async)] fn propose( &mut self, @@ -74,12 +71,8 @@ impl Relay for StubRelay { type PublicKey = StubPublicKey; type Plan = (); - fn broadcast( - &mut self, - _payload: Self::Digest, - _plan: Self::Plan, - ) -> impl Future + Send { - async {} + fn broadcast(&mut self, _payload: Self::Digest, _plan: Self::Plan) -> Feedback { + Feedback::Ok } } @@ -101,42 +94,41 @@ where { type Activity = commonware_consensus::simplex::types::Activity; - fn report(&mut self, activity: Self::Activity) -> impl Future + Send { + fn report(&mut self, activity: Self::Activity) -> Feedback { use commonware_consensus::simplex::types::Activity; - async move { - match activity { - Activity::Notarize(n) => { - tracing::trace!(view = ?n.proposal.round.view(), "notarize vote"); - } - Activity::Notarization(n) => { - tracing::debug!(view = ?n.proposal.round.view(), "notarization"); - } - Activity::Certification(c) => { - tracing::debug!(view = ?c.proposal.round.view(), "certification"); - } - Activity::Nullify(_) => { - tracing::trace!("nullify vote"); - } - Activity::Nullification(n) => { - tracing::debug!(round = ?n.round, "nullification"); - } - Activity::Finalize(f) => { - tracing::trace!(view = ?f.proposal.round.view(), "finalize vote"); - } - Activity::Finalization(f) => { - tracing::info!(view = ?f.proposal.round.view(), "finalization"); - } - Activity::ConflictingNotarize(_) => { - tracing::warn!("conflicting notarize detected"); - } - Activity::ConflictingFinalize(_) => { - tracing::warn!("conflicting finalize detected"); - } - Activity::NullifyFinalize(_) => { - tracing::warn!("nullify-finalize conflict detected"); - } + match activity { + Activity::Notarize(n) => { + tracing::trace!(view = ?n.proposal.round.view(), "notarize vote"); + } + Activity::Notarization(n) => { + tracing::debug!(view = ?n.proposal.round.view(), "notarization"); + } + Activity::Certification(c) => { + tracing::debug!(view = ?c.proposal.round.view(), "certification"); + } + Activity::Nullify(_) => { + tracing::trace!("nullify vote"); + } + Activity::Nullification(n) => { + tracing::debug!(round = ?n.round, "nullification"); + } + Activity::Finalize(f) => { + tracing::trace!(view = ?f.proposal.round.view(), "finalize vote"); + } + Activity::Finalization(f) => { + tracing::info!(view = ?f.proposal.round.view(), "finalization"); + } + Activity::ConflictingNotarize(_) => { + tracing::warn!("conflicting notarize detected"); + } + Activity::ConflictingFinalize(_) => { + tracing::warn!("conflicting finalize detected"); + } + Activity::NullifyFinalize(_) => { + tracing::warn!("nullify-finalize conflict detected"); } } + Feedback::Ok } } @@ -148,7 +140,7 @@ pub struct StubBlocker; impl commonware_p2p::Blocker for StubBlocker { type PublicKey = StubPublicKey; - fn block(&mut self, _peer: Self::PublicKey) -> impl Future + Send { - async {} + fn block(&mut self, _peer: Self::PublicKey) -> Feedback { + Feedback::Ok } } diff --git a/crates/node/simplex/src/config.rs b/crates/node/simplex/src/config.rs index 92bf4a1..cf619bb 100644 --- a/crates/node/simplex/src/config.rs +++ b/crates/node/simplex/src/config.rs @@ -92,8 +92,9 @@ impl DefaultConfig { reporter, strategy: Sequential, partition: partition.into(), - mailbox_size: DEFAULT_MAILBOX_SIZE, + mailbox_size: NZUsize!(DEFAULT_MAILBOX_SIZE), epoch: Epoch::zero(), + floor: simplex::Floor::Genesis(D::EMPTY), replay_buffer: NZUsize!(DEFAULT_REPLAY_BUFFER), write_buffer: NZUsize!(DEFAULT_WRITE_BUFFER), leader_timeout: DEFAULT_LEADER_TIMEOUT, @@ -102,7 +103,7 @@ impl DefaultConfig { fetch_timeout: DEFAULT_FETCH_TIMEOUT, activity_timeout: DEFAULT_ACTIVITY_TIMEOUT, skip_timeout: DEFAULT_SKIP_TIMEOUT, - fetch_concurrent: DEFAULT_FETCH_CONCURRENT, + fetch_concurrent: NZUsize!(DEFAULT_FETCH_CONCURRENT), page_cache, forwarding: ForwardingPolicy::Disabled, } diff --git a/crates/node/txpool/Cargo.toml b/crates/node/txpool/Cargo.toml index 459f527..521c7ec 100644 --- a/crates/node/txpool/Cargo.toml +++ b/crates/node/txpool/Cargo.toml @@ -13,6 +13,7 @@ workspace = true [dependencies] # Local crates kora-domain = { path = "../domain", features = ["evm"] } +kora-metrics = { path = "../metrics" } kora-traits = { path = "../../storage/traits" } # Alloy @@ -27,6 +28,7 @@ sha3.workspace = true # Concurrency parking_lot.workspace = true +tokio = { workspace = true, features = ["sync"] } # Error handling thiserror.workspace = true @@ -37,4 +39,4 @@ tracing.workspace = true [dev-dependencies] rstest.workspace = true rand.workspace = true -tokio = { workspace = true, features = ["rt", "macros"] } +tokio = { workspace = true, features = ["rt", "macros", "sync"] } diff --git a/crates/node/txpool/src/config.rs b/crates/node/txpool/src/config.rs index 8f28fde..bbcce58 100644 --- a/crates/node/txpool/src/config.rs +++ b/crates/node/txpool/src/config.rs @@ -15,6 +15,10 @@ pub struct PoolConfig { pub min_gas_price: u128, /// Percentage bump required for replacement transactions. pub replacement_bump_percent: u8, + /// Time-to-live for pending transactions, in seconds. + pub pending_ttl_secs: u64, + /// Time-to-live for queued transactions, in seconds. + pub queued_ttl_secs: u64, } impl Default for PoolConfig { @@ -23,9 +27,11 @@ impl Default for PoolConfig { max_pending_txs: 4096, max_queued_txs: 1024, max_txs_per_sender: 256, - max_tx_size: 128 * 1024, // 128 KB - min_gas_price: 0, + max_tx_size: 128 * 1024, // 128 KB + min_gas_price: 1_000_000_000, // 1 gwei, matches INITIAL_BASE_FEE replacement_bump_percent: 10, + pending_ttl_secs: 30 * 60, + queued_ttl_secs: 60 * 60, } } } @@ -38,8 +44,10 @@ impl PoolConfig { max_queued_txs: 1024, max_txs_per_sender: 256, max_tx_size: 128 * 1024, - min_gas_price: 0, + min_gas_price: 1_000_000_000, // 1 gwei, matches INITIAL_BASE_FEE replacement_bump_percent: 10, + pending_ttl_secs: 30 * 60, + queued_ttl_secs: 60 * 60, } } @@ -84,6 +92,20 @@ impl PoolConfig { self.replacement_bump_percent = percent; self } + + /// Sets the time-to-live for pending transactions, in seconds. + #[must_use] + pub const fn with_pending_ttl_secs(mut self, ttl: u64) -> Self { + self.pending_ttl_secs = ttl; + self + } + + /// Sets the time-to-live for queued transactions, in seconds. + #[must_use] + pub const fn with_queued_ttl_secs(mut self, ttl: u64) -> Self { + self.queued_ttl_secs = ttl; + self + } } #[cfg(test)] @@ -97,8 +119,10 @@ mod tests { assert_eq!(config.max_queued_txs, 1024); assert_eq!(config.max_txs_per_sender, 256); assert_eq!(config.max_tx_size, 128 * 1024); - assert_eq!(config.min_gas_price, 0); + assert_eq!(config.min_gas_price, 1_000_000_000); assert_eq!(config.replacement_bump_percent, 10); + assert_eq!(config.pending_ttl_secs, 30 * 60); + assert_eq!(config.queued_ttl_secs, 60 * 60); } #[test] @@ -111,6 +135,8 @@ mod tests { assert_eq!(new.max_tx_size, default.max_tx_size); assert_eq!(new.min_gas_price, default.min_gas_price); assert_eq!(new.replacement_bump_percent, default.replacement_bump_percent); + assert_eq!(new.pending_ttl_secs, default.pending_ttl_secs); + assert_eq!(new.queued_ttl_secs, default.queued_ttl_secs); } #[test] @@ -151,6 +177,18 @@ mod tests { assert_eq!(config.replacement_bump_percent, 25); } + #[test] + fn builder_with_pending_ttl_secs() { + let config = PoolConfig::new().with_pending_ttl_secs(60); + assert_eq!(config.pending_ttl_secs, 60); + } + + #[test] + fn builder_with_queued_ttl_secs() { + let config = PoolConfig::new().with_queued_ttl_secs(120); + assert_eq!(config.queued_ttl_secs, 120); + } + #[test] fn builder_chaining() { let config = PoolConfig::new() @@ -159,7 +197,9 @@ mod tests { .with_max_txs_per_sender(50) .with_max_tx_size(64 * 1024) .with_min_gas_price(500) - .with_replacement_bump_percent(15); + .with_replacement_bump_percent(15) + .with_pending_ttl_secs(45) + .with_queued_ttl_secs(90); assert_eq!(config.max_pending_txs, 10000); assert_eq!(config.max_queued_txs, 5000); @@ -167,6 +207,8 @@ mod tests { assert_eq!(config.max_tx_size, 64 * 1024); assert_eq!(config.min_gas_price, 500); assert_eq!(config.replacement_bump_percent, 15); + assert_eq!(config.pending_ttl_secs, 45); + assert_eq!(config.queued_ttl_secs, 90); } #[test] @@ -174,6 +216,8 @@ mod tests { let config = PoolConfig::new().with_max_pending_txs(100).with_min_gas_price(999); let cloned = config.clone(); + assert_eq!(config.max_pending_txs, 100); + assert_eq!(config.min_gas_price, 999); assert_eq!(cloned.max_pending_txs, 100); assert_eq!(cloned.min_gas_price, 999); } diff --git a/crates/node/txpool/src/error.rs b/crates/node/txpool/src/error.rs index cd3af78..5b3cc8b 100644 --- a/crates/node/txpool/src/error.rs +++ b/crates/node/txpool/src/error.rs @@ -89,6 +89,15 @@ pub enum TxPoolError { #[error("transaction already exists")] AlreadyExists, + /// A transaction with the same sender and nonce already exists in the pool. + #[error("nonce {nonce} already in pool for sender {sender}")] + NonceAlreadyInPool { + /// Sender address. + sender: Address, + /// Conflicting nonce. + nonce: u64, + }, + /// An error occurred while accessing state. #[error("state error: {0}")] StateError(String), @@ -199,6 +208,15 @@ mod tests { assert_eq!(err.to_string(), "replacement transaction underpriced"); } + #[test] + fn test_nonce_already_in_pool_display() { + let addr = Address::repeat_byte(0xab); + let err = TxPoolError::NonceAlreadyInPool { sender: addr, nonce: 7 }; + let display = err.to_string(); + assert!(display.contains("nonce 7")); + assert!(display.contains("already in pool")); + } + #[test] fn test_txpool_error_is_send_sync() { fn assert_send_sync() {} diff --git a/crates/node/txpool/src/ordering.rs b/crates/node/txpool/src/ordering.rs index 5c35b62..a1254ce 100644 --- a/crates/node/txpool/src/ordering.rs +++ b/crates/node/txpool/src/ordering.rs @@ -93,15 +93,26 @@ impl SenderQueue { return Some(tx); } - if tx.nonce == self.next_nonce + self.pending.len() as u64 { + if tx.nonce == self.next_pending_nonce() { self.pending.push(tx); self.promote_queued(); None - } else if tx.nonce > self.next_nonce + self.pending.len() as u64 { - let pos = - self.queued.binary_search_by(|q| q.nonce.cmp(&tx.nonce)).unwrap_or_else(|p| p); - self.queued.insert(pos, tx); - None + } else if tx.nonce > self.next_pending_nonce() { + match self.queued.binary_search_by(|q| q.nonce.cmp(&tx.nonce)) { + Ok(pos) => { + let existing = &self.queued[pos]; + if tx.effective_gas_price > existing.effective_gas_price { + let old = std::mem::replace(&mut self.queued[pos], tx); + Some(old) + } else { + Some(tx) + } + } + Err(pos) => { + self.queued.insert(pos, tx); + None + } + } } else { let idx = (tx.nonce - self.next_nonce) as usize; if idx < self.pending.len() { @@ -115,9 +126,23 @@ impl SenderQueue { } } + /// Removes a transaction by hash while preserving nonce executability. + pub fn remove_by_hash(&mut self, hash: &B256) -> Option { + if let Some(idx) = self.pending.iter().position(|tx| tx.hash == *hash) { + let removed = self.pending.remove(idx); + let mut moved = self.pending.split_off(idx); + self.queued.append(&mut moved); + self.queued.sort_by_key(|tx| tx.nonce); + return Some(removed); + } + + let idx = self.queued.iter().position(|tx| tx.hash == *hash)?; + Some(self.queued.remove(idx)) + } + fn promote_queued(&mut self) { while let Some(first) = self.queued.first() { - if first.nonce == self.next_nonce + self.pending.len() as u64 { + if first.nonce == self.next_pending_nonce() { let tx = self.queued.remove(0); self.pending.push(tx); } else { @@ -131,8 +156,17 @@ impl SenderQueue { self.pending.retain(|tx| tx.nonce > confirmed_nonce); self.queued.retain(|tx| tx.nonce > confirmed_nonce); if confirmed_nonce >= self.next_nonce { - self.next_nonce = confirmed_nonce + 1; + self.next_nonce = confirmed_nonce.saturating_add(1); } + self.promote_queued(); + } + + /// Returns the next expected nonce after all pending (executable) transactions. + /// + /// This is `next_nonce + len(pending)` -- i.e. the nonce a new transaction + /// must use to be appended directly to the pending queue. + pub const fn next_pending_nonce(&self) -> u64 { + self.next_nonce.saturating_add(self.pending.len() as u64) } /// Returns the count of pending transactions. @@ -236,6 +270,45 @@ mod tests { assert_eq!(queue.queued_count(), 0); } + #[test] + fn sender_queue_replaces_queued_transaction() { + let sender = random_address(); + let mut queue = SenderQueue::new(sender, 0); + + let tx0 = make_tx(0, 100); + let tx2_low = make_tx(2, 100); + let tx2_high = make_tx(2, 200); + + assert!(queue.insert(tx0).is_none()); + assert!(queue.insert(tx2_low.clone()).is_none()); + + let replaced = queue.insert(tx2_high.clone()).expect("queued tx should be replaced"); + assert_eq!(replaced.hash, tx2_low.hash); + assert_eq!(queue.queued_count(), 1); + assert_eq!(queue.queued[0].hash, tx2_high.hash); + } + + #[test] + fn sender_queue_remove_pending_moves_tail_to_queued() { + let sender = random_address(); + let mut queue = SenderQueue::new(sender, 0); + + let tx0 = make_tx(0, 100); + let tx1 = make_tx(1, 100); + let tx2 = make_tx(2, 100); + + assert!(queue.insert(tx0.clone()).is_none()); + assert!(queue.insert(tx1.clone()).is_none()); + assert!(queue.insert(tx2.clone()).is_none()); + + let removed = queue.remove_by_hash(&tx1.hash).expect("tx should be removed"); + assert_eq!(removed.hash, tx1.hash); + assert_eq!(queue.pending.len(), 1); + assert_eq!(queue.pending[0].hash, tx0.hash); + assert_eq!(queue.queued.len(), 1); + assert_eq!(queue.queued[0].hash, tx2.hash); + } + #[test] fn ordered_transaction_ordering() { let tx1 = make_tx(0, 100); diff --git a/crates/node/txpool/src/pool.rs b/crates/node/txpool/src/pool.rs index 7ddf98a..fe41a97 100644 --- a/crates/node/txpool/src/pool.rs +++ b/crates/node/txpool/src/pool.rs @@ -2,14 +2,17 @@ use std::{ collections::{BTreeSet, HashMap}, + sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; use alloy_consensus::{Transaction, TxEnvelope}; use alloy_eips::eip2718::{Decodable2718, Encodable2718}; -use alloy_primitives::{Address, B256, Bytes}; -use kora_domain::{Tx, TxId}; +use alloy_primitives::{Address, B256, Bytes, U256}; +use kora_domain::{MempoolEvent, Tx, TxId}; +use kora_metrics::{AppMetrics, ReasonLabel}; use parking_lot::RwLock; +use tokio::sync::broadcast; use tracing::{debug, trace, warn}; use crate::{ @@ -20,9 +23,47 @@ use crate::{ validator::recover_sender_from_envelope, }; +#[derive(Debug)] +struct BuildSenderState { + txs: Vec, + index: usize, + expected_nonce: u64, +} + +impl BuildSenderState { + fn next_candidate(&mut self, excluded: &BTreeSet) -> Option { + while let Some(tx) = self.txs.get(self.index) { + if tx.nonce < self.expected_nonce { + self.index += 1; + continue; + } + + if tx.nonce > self.expected_nonce { + return None; + } + + if excluded.contains(&ordered_tx_id(tx)) { + self.expected_nonce = tx.nonce.saturating_add(1); + self.index += 1; + continue; + } + + return Some(tx.clone()); + } + + None + } + + const fn consume(&mut self) { + self.expected_nonce = self.expected_nonce.saturating_add(1); + self.index += 1; + } +} + #[derive(Debug)] struct PoolInner { by_hash: HashMap, + by_id: HashMap, by_sender: HashMap, pending_count: usize, queued_count: usize, @@ -32,6 +73,7 @@ impl PoolInner { fn new() -> Self { Self { by_hash: HashMap::new(), + by_id: HashMap::new(), by_sender: HashMap::new(), pending_count: 0, queued_count: 0, @@ -42,54 +84,194 @@ impl PoolInner { self.pending_count = self.by_sender.values().map(|q| q.pending_count()).sum(); self.queued_count = self.by_sender.values().map(|q| q.queued_count()).sum(); } + + fn remove_by_hash(&mut self, hash: &B256) -> Option { + let tx = self.by_hash.remove(hash)?; + self.by_id.remove(&ordered_tx_id(&tx)); + + if let Some(queue) = self.by_sender.get_mut(&tx.sender) { + queue.remove_by_hash(hash); + if queue.is_empty() { + self.by_sender.remove(&tx.sender); + } + } + + Some(tx) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum InsertionTarget { + Pending, + Queued, + Replacement, +} + +fn existing_nonce_tx(queue: &SenderQueue, nonce: u64) -> Option<&OrderedTransaction> { + queue.pending.iter().chain(queue.queued.iter()).find(|tx| tx.nonce == nonce) +} + +fn insertion_target(queue: Option<&SenderQueue>, tx: &OrderedTransaction) -> InsertionTarget { + let Some(queue) = queue else { + return InsertionTarget::Pending; + }; + + if existing_nonce_tx(queue, tx.nonce).is_some() { + return InsertionTarget::Replacement; + } + + if tx.nonce == queue.next_nonce + queue.pending.len() as u64 { + InsertionTarget::Pending + } else { + InsertionTarget::Queued + } } /// A thread-safe transaction pool with nonce ordering and fee prioritization. #[derive(Debug)] pub struct TransactionPool { - inner: RwLock, + inner: Arc>, config: PoolConfig, + events: Option>, + metrics: Arc>>, } impl TransactionPool { /// Creates a new transaction pool with the given configuration. #[must_use] pub fn new(config: PoolConfig) -> Self { - Self { inner: RwLock::new(PoolInner::new()), config } + Self { + inner: Arc::new(RwLock::new(PoolInner::new())), + config, + events: None, + metrics: Arc::new(RwLock::new(None)), + } + } + + /// Creates a new transaction pool that broadcasts mempool lifecycle events. + #[must_use] + pub fn new_with_events(config: PoolConfig, events: broadcast::Sender) -> Self { + Self { + inner: Arc::new(RwLock::new(PoolInner::new())), + config, + events: Some(events), + metrics: Arc::new(RwLock::new(None)), + } + } + + /// Attach application-level metrics to this pool. + /// + /// Because the metrics handle is shared across all clones of this pool, + /// this method affects every clone that shares the same backing store. + pub fn set_metrics(&self, metrics: AppMetrics) { + *self.metrics.write() = Some(metrics); + } + + /// Update gauge metrics to reflect current pool state. + /// + /// Must be called while the caller does NOT hold the inner lock (it takes + /// a read lock internally). + fn sync_metrics(&self) { + let metrics_guard = self.metrics.read(); + if let Some(ref m) = *metrics_guard { + let inner = self.inner.read(); + m.txpool_size.set(inner.by_hash.len() as i64); + m.txpool_pending.set(inner.pending_count as i64); + m.txpool_queued.set(inner.queued_count as i64); + } + } + + /// Record a rejected transaction metric. + fn record_rejection(&self, reason: &str) { + let metrics_guard = self.metrics.read(); + if let Some(ref m) = *metrics_guard { + m.txpool_rejected.get_or_create(&ReasonLabel { reason: reason.to_string() }).inc(); + } } /// Adds a validated transaction to the pool. pub fn add(&self, tx: OrderedTransaction) -> Result<(), TxPoolError> { + let added_event = tx_added_event(&tx); + let mut replaced_hash = None; + let mut evicted_hashes = Vec::new(); + let mut inner = self.inner.write(); + let tx_id = ordered_tx_id(&tx); - if inner.by_hash.contains_key(&tx.hash) { + if inner.by_hash.contains_key(&tx.hash) || inner.by_id.contains_key(&tx_id) { return Err(TxPoolError::AlreadyExists); } let sender = tx.sender; - let queue = - inner.by_sender.entry(sender).or_insert_with(|| SenderQueue::new(sender, tx.nonce)); + let target = insertion_target(inner.by_sender.get(&sender), &tx); + + if let Some(queue) = inner.by_sender.get(&sender) { + if tx.nonce < queue.next_nonce { + return Err(TxPoolError::NonceTooLow { got: tx.nonce, expected: queue.next_nonce }); + } - if queue.total_count() >= self.config.max_txs_per_sender { - return Err(TxPoolError::SenderFull(sender)); + if target != InsertionTarget::Replacement + && queue.total_count() >= self.config.max_txs_per_sender + { + return Err(TxPoolError::SenderFull(sender)); + } } + self.reject_underpriced_when_full(&inner, &tx, target)?; + + let queue = + inner.by_sender.entry(sender).or_insert_with(|| SenderQueue::new(sender, tx.nonce)); + if let Some(replaced) = queue.insert(tx.clone()) { if replaced.hash == tx.hash { - return Err(TxPoolError::AlreadyExists); + return Err(TxPoolError::ReplacementUnderpriced); } - inner.by_hash.remove(&replaced.hash); + replaced_hash = Some(replaced.hash); + inner.remove_by_hash(&replaced.hash); debug!(hash = ?replaced.hash, "replaced transaction"); } + let inserted_hash = tx.hash; inner.by_hash.insert(tx.hash, tx); + inner.by_id.insert(tx_id, inserted_hash); inner.update_counts(); + let mut inserted_evicted = false; + while inner.pending_count > self.config.max_pending_txs { + let Some(evicted) = Self::evict_lowest_pending(&mut inner) else { + break; + }; + inserted_evicted |= evicted.hash == inserted_hash; + evicted_hashes.push(evicted.hash); + debug!( + hash = ?evicted.hash, + sender = ?evicted.sender, + nonce = evicted.nonce, + gas_price = evicted.effective_gas_price, + "evicted lowest-fee pending transaction" + ); + } + + while inner.queued_count > self.config.max_queued_txs { + let Some(evicted) = Self::evict_lowest_queued(&mut inner) else { + break; + }; + inserted_evicted |= evicted.hash == inserted_hash; + evicted_hashes.push(evicted.hash); + debug!( + hash = ?evicted.hash, + sender = ?evicted.sender, + nonce = evicted.nonce, + gas_price = evicted.effective_gas_price, + "evicted lowest-fee queued transaction" + ); + } + if inner.pending_count > self.config.max_pending_txs { warn!( count = inner.pending_count, max = self.config.max_pending_txs, - "pool exceeds pending limit" + "pool still exceeds pending limit after eviction" ); } @@ -97,13 +279,111 @@ impl TransactionPool { warn!( count = inner.queued_count, max = self.config.max_queued_txs, - "pool exceeds queued limit" + "pool still exceeds queued limit after eviction" ); } + // Drop the write lock before sending events + drop(inner); + + if let Some(events) = &self.events { + if let Some(hash) = replaced_hash { + let _ = + events.send(MempoolEvent::TxEvicted { hash, reason: "replaced".to_string() }); + } + if !inserted_evicted { + let _ = events.send(added_event); + } + for hash in &evicted_hashes { + let _ = events + .send(MempoolEvent::TxEvicted { hash: *hash, reason: "evicted".to_string() }); + } + } + + self.sync_metrics(); + + if inserted_evicted { + return Err(TxPoolError::PoolFull); + } + + Ok(()) + } + + fn reject_underpriced_when_full( + &self, + inner: &PoolInner, + tx: &OrderedTransaction, + target: InsertionTarget, + ) -> Result<(), TxPoolError> { + match target { + InsertionTarget::Pending => { + if self.config.max_pending_txs == 0 { + return Err(TxPoolError::PoolFull); + } + if inner.pending_count >= self.config.max_pending_txs + && let Some(min_price) = Self::min_pending_price(inner) + && tx.effective_gas_price <= min_price + { + return Err(TxPoolError::PoolFull); + } + } + InsertionTarget::Queued => { + if self.config.max_queued_txs == 0 { + return Err(TxPoolError::PoolFull); + } + if inner.queued_count >= self.config.max_queued_txs + && let Some(min_price) = Self::min_queued_price(inner) + && tx.effective_gas_price <= min_price + { + return Err(TxPoolError::PoolFull); + } + } + InsertionTarget::Replacement => {} + } + Ok(()) } + fn min_pending_price(inner: &PoolInner) -> Option { + inner + .by_sender + .values() + .flat_map(|queue| queue.pending.iter().map(|tx| tx.effective_gas_price)) + .min() + } + + fn min_queued_price(inner: &PoolInner) -> Option { + inner + .by_sender + .values() + .flat_map(|queue| queue.queued.iter().map(|tx| tx.effective_gas_price)) + .min() + } + + fn evict_lowest_pending(inner: &mut PoolInner) -> Option { + let hash = inner + .by_sender + .values() + .flat_map(|queue| queue.pending.iter()) + .min_by_key(|tx| (tx.effective_gas_price, std::cmp::Reverse(tx.timestamp), tx.hash)) + .map(|tx| tx.hash)?; + let removed = inner.remove_by_hash(&hash); + inner.update_counts(); + removed + } + + fn evict_lowest_queued(inner: &mut PoolInner) -> Option { + let hash = inner + .by_sender + .values() + .flat_map(|queue| queue.queued.iter()) + .min_by_key(|tx| (tx.effective_gas_price, std::cmp::Reverse(tx.timestamp), tx.hash)) + .map(|tx| tx.hash)?; + let removed = inner.remove_by_hash(&hash); + inner.update_counts(); + removed + } + /// Returns pending transactions sorted by effective gas price. pub fn pending(&self, max_txs: usize) -> Vec { let inner = self.inner.read(); @@ -122,31 +402,40 @@ impl TransactionPool { inner.by_sender.get(sender).map(|q| q.pending.clone()).unwrap_or_default() } + /// Returns the next expected nonce for `sender` after all pending + /// (executable) transactions, or `None` if the sender has no queue. + pub fn next_nonce(&self, sender: &Address) -> Option { + let inner = self.inner.read(); + inner.by_sender.get(sender).map(SenderQueue::next_pending_nonce) + } + /// Gets a transaction by its hash. pub fn get(&self, hash: &B256) -> Option { self.inner.read().by_hash.get(hash).cloned() } - /// Removes a transaction by its hash. - pub fn remove(&self, hash: &B256) -> Option { + /// Removes a transaction by its hash, emitting a `TxEvicted` event with the + /// provided `reason`. + pub fn remove_with_reason(&self, hash: &B256, reason: &str) -> Option { let mut inner = self.inner.write(); + let tx = inner.remove_by_hash(hash)?; + inner.update_counts(); + drop(inner); - let tx = inner.by_hash.remove(hash)?; - let sender = tx.sender; - - if let Some(queue) = inner.by_sender.get_mut(&sender) { - queue.pending.retain(|t| t.hash != *hash); - queue.queued.retain(|t| t.hash != *hash); - - if queue.is_empty() { - inner.by_sender.remove(&sender); - } + if let Some(events) = &self.events { + let _ = + events.send(MempoolEvent::TxEvicted { hash: *hash, reason: reason.to_string() }); } - inner.update_counts(); + self.sync_metrics(); Some(tx) } + /// Removes a transaction by its hash. + pub fn remove(&self, hash: &B256) -> Option { + self.remove_with_reason(hash, "removed") + } + /// Removes confirmed transactions for a sender up to the given nonce. pub fn remove_confirmed(&self, sender: &Address, confirmed_nonce: u64) { let mut inner = self.inner.write(); @@ -158,6 +447,7 @@ impl TransactionPool { queue .pending .iter() + .chain(queue.queued.iter()) .filter(|tx| tx.nonce <= confirmed_nonce) .map(|tx| tx.hash) .collect() @@ -165,7 +455,7 @@ impl TransactionPool { .unwrap_or_default(); for hash in hashes_to_remove { - inner.by_hash.remove(&hash); + inner.remove_by_hash(&hash); } if let Some(queue) = inner.by_sender.get_mut(sender) { @@ -176,6 +466,8 @@ impl TransactionPool { } inner.update_counts(); + drop(inner); + self.sync_metrics(); } /// Returns the count of pending (executable) transactions. @@ -208,35 +500,139 @@ impl TransactionPool { self.inner.read().by_hash.contains_key(hash) } + /// Returns `true` if the pool already contains a transaction from `sender` + /// with the given `nonce`. + /// + /// This is a cheap, synchronous check (read-lock only) intended for use by + /// the transaction validator to reject same-nonce duplicates at ingress. + pub fn has_nonce(&self, sender: &Address, nonce: u64) -> bool { + let inner = self.inner.read(); + let Some(queue) = inner.by_sender.get(sender) else { + return false; + }; + queue.pending.iter().chain(queue.queued.iter()).any(|tx| tx.nonce == nonce) + } + + /// Returns all sender queues for pool introspection. + pub fn snapshot(&self) -> HashMap, Vec)> { + self.inner + .read() + .by_sender + .iter() + .map(|(sender, queue)| (*sender, (queue.pending.clone(), queue.queued.clone()))) + .collect() + } + + /// Removes expired transactions and returns the number removed. + pub fn cleanup(&self) -> usize { + let now = current_timestamp(); + let mut inner = self.inner.write(); + let expired: Vec = inner + .by_sender + .values() + .flat_map(|queue| { + let pending = queue.pending.iter().filter_map(|tx| { + (now.saturating_sub(tx.timestamp) > self.config.pending_ttl_secs) + .then_some(tx.hash) + }); + let queued = queue.queued.iter().filter_map(|tx| { + (now.saturating_sub(tx.timestamp) > self.config.queued_ttl_secs) + .then_some(tx.hash) + }); + pending.chain(queued) + }) + .collect(); + + let mut removed = 0; + for hash in expired { + if inner.remove_by_hash(&hash).is_some() { + removed += 1; + } + } + inner.update_counts(); + drop(inner); + if removed > 0 { + self.sync_metrics(); + } + removed + } + + /// Returns the pool configuration. + pub const fn config(&self) -> &PoolConfig { + &self.config + } + /// Removes all transactions from the pool. pub fn clear(&self) { let mut inner = self.inner.write(); inner.by_hash.clear(); + inner.by_id.clear(); inner.by_sender.clear(); inner.pending_count = 0; inner.queued_count = 0; + drop(inner); + self.sync_metrics(); } } impl Clone for TransactionPool { fn clone(&self) -> Self { - let inner = self.inner.read(); Self { - inner: RwLock::new(PoolInner { - by_hash: inner.by_hash.clone(), - by_sender: inner.by_sender.clone(), - pending_count: inner.pending_count, - queued_count: inner.queued_count, - }), + inner: self.inner.clone(), config: self.config.clone(), + events: self.events.clone(), + metrics: self.metrics.clone(), // Arc clone: all clones share the same metrics handle } } } +fn tx_added_event(tx: &OrderedTransaction) -> MempoolEvent { + MempoolEvent::TxAdded { + hash: tx.hash, + from: tx.sender, + to: tx.envelope.to(), + value: tx.envelope.value(), + gas_price: U256::from(tx.effective_gas_price), + nonce: tx.nonce, + } +} + fn current_timestamp() -> u64 { SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0) } +fn ordered_to_tx(tx: &OrderedTransaction) -> Tx { + let mut raw = Vec::new(); + tx.envelope.encode_2718(&mut raw); + Tx::new(Bytes::from(raw)) +} + +fn ordered_tx_id(tx: &OrderedTransaction) -> TxId { + ordered_to_tx(tx).id() +} + +/// Map a [`TxPoolError`] to a short label suitable for the `reason` +/// dimension of the `kora_txpool_rejected_total` metric. +fn rejection_reason(err: &TxPoolError) -> String { + match err { + TxPoolError::PoolFull => "pool_full".to_string(), + TxPoolError::SenderFull(_) => "sender_full".to_string(), + TxPoolError::TxTooLarge { .. } => "tx_too_large".to_string(), + TxPoolError::GasPriceTooLow { .. } => "gas_price_too_low".to_string(), + TxPoolError::NonceTooLow { .. } => "nonce_too_low".to_string(), + TxPoolError::NonceGap { .. } => "nonce_gap".to_string(), + TxPoolError::InsufficientBalance { .. } => "insufficient_balance".to_string(), + TxPoolError::InvalidChainId { .. } => "invalid_chain_id".to_string(), + TxPoolError::InvalidSignature => "invalid_signature".to_string(), + TxPoolError::DecodeError(_) => "decode_error".to_string(), + TxPoolError::IntrinsicGasTooLow { .. } => "intrinsic_gas_too_low".to_string(), + TxPoolError::AlreadyExists => "already_exists".to_string(), + TxPoolError::NonceAlreadyInPool { .. } => "nonce_already_in_pool".to_string(), + TxPoolError::StateError(_) => "state_error".to_string(), + TxPoolError::ReplacementUnderpriced => "replacement_underpriced".to_string(), + } +} + fn tx_to_ordered(tx: &Tx) -> Option { let envelope = TxEnvelope::decode_2718(&mut tx.bytes.as_ref()).ok()?; let sender = recover_sender_from_envelope(&envelope).ok()?; @@ -264,6 +660,7 @@ impl Mempool for TransactionPool { fn insert(&self, tx: Tx) -> bool { let Some(ordered) = tx_to_ordered(&tx) else { trace!("failed to decode transaction for mempool insert"); + self.record_rejection("decode_error"); return false; }; @@ -271,6 +668,7 @@ impl Mempool for TransactionPool { Ok(()) => true, Err(e) => { trace!(?e, "failed to insert transaction"); + self.record_rejection(&rejection_reason(&e)); false } } @@ -278,36 +676,38 @@ impl Mempool for TransactionPool { fn build(&self, max_txs: usize, excluded: &BTreeSet) -> Vec { let inner = self.inner.read(); - - let mut candidates: Vec<_> = inner + let mut senders: HashMap = inner .by_sender - .values() - .flat_map(|q| q.pending.iter()) - .filter(|tx| !excluded.contains(&TxId(tx.hash))) - .cloned() + .iter() + .filter(|(_, queue)| !queue.pending.is_empty()) + .map(|(sender, queue)| { + ( + *sender, + BuildSenderState { + txs: queue.pending.clone(), + index: 0, + expected_nonce: queue.next_nonce, + }, + ) + }) .collect(); - - candidates.sort(); - - let mut result = Vec::with_capacity(max_txs.min(candidates.len())); - let mut included_senders: HashMap = HashMap::new(); - - for tx in candidates { - if result.len() >= max_txs { + let pending_count = senders.values().map(|state| state.txs.len()).sum(); + let mut result = Vec::with_capacity(max_txs.min(pending_count)); + + while result.len() < max_txs { + let Some((sender, tx)) = senders + .iter_mut() + .filter_map(|(sender, state)| { + state.next_candidate(excluded).map(|tx| (*sender, tx)) + }) + .min_by(|(_, left), (_, right)| left.cmp(right)) + else { break; - } + }; - let expected_nonce = included_senders - .get(&tx.sender) - .copied() - .or_else(|| inner.by_sender.get(&tx.sender).map(|q| q.next_nonce)) - .unwrap_or(0); - - if tx.nonce == expected_nonce { - included_senders.insert(tx.sender, tx.nonce + 1); - let mut raw = Vec::new(); - tx.envelope.encode_2718(&mut raw); - result.push(Tx::new(Bytes::from(raw))); + if let Some(state) = senders.get_mut(&sender) { + state.consume(); + result.push(ordered_to_tx(&tx)); } } @@ -317,18 +717,40 @@ impl Mempool for TransactionPool { fn prune(&self, tx_ids: &[TxId]) { let mut inner = self.inner.write(); - let mut senders_to_check: Vec

= Vec::new(); - + let mut confirmed_by_sender: HashMap = HashMap::new(); for id in tx_ids { - if let Some(tx) = inner.by_hash.remove(&id.0) { - senders_to_check.push(tx.sender); - if let Some(queue) = inner.by_sender.get_mut(&tx.sender) { - queue.pending.retain(|t| t.hash != id.0); - queue.queued.retain(|t| t.hash != id.0); - } + let Some(hash) = inner.by_id.get(id) else { + continue; + }; + if let Some(tx) = inner.by_hash.get(hash) { + confirmed_by_sender + .entry(tx.sender) + .and_modify(|nonce| *nonce = (*nonce).max(tx.nonce)) + .or_insert(tx.nonce); + } + } + + let mut senders_to_check: Vec
= Vec::with_capacity(confirmed_by_sender.len()); + let mut hashes_to_remove = Vec::new(); + for (sender, confirmed_nonce) in confirmed_by_sender { + if let Some(queue) = inner.by_sender.get_mut(&sender) { + hashes_to_remove.extend( + queue + .pending + .iter() + .chain(queue.queued.iter()) + .filter(|tx| tx.nonce <= confirmed_nonce) + .map(|tx| tx.hash), + ); + queue.remove_confirmed(confirmed_nonce); + senders_to_check.push(sender); } } + for hash in hashes_to_remove { + inner.remove_by_hash(&hash); + } + for sender in senders_to_check { if let Some(queue) = inner.by_sender.get(&sender) && queue.is_empty() @@ -338,6 +760,8 @@ impl Mempool for TransactionPool { } inner.update_counts(); + drop(inner); + self.sync_metrics(); } fn len(&self) -> usize { @@ -383,6 +807,24 @@ mod tests { OrderedTransaction::new(random_b256(), sender, nonce, gas_price, 0, envelope) } + fn tx_nonce(tx: &Tx) -> u64 { + let mut data = tx.bytes.as_ref(); + TxEnvelope::decode_2718(&mut data).unwrap().nonce() + } + + fn tx_nonce_and_gas_price(tx: &Tx) -> (u64, u128) { + let mut data = tx.bytes.as_ref(); + let envelope = TxEnvelope::decode_2718(&mut data).unwrap(); + let gas_price = match &envelope { + TxEnvelope::Legacy(tx) => tx.tx().gas_price, + TxEnvelope::Eip2930(tx) => tx.tx().gas_price, + TxEnvelope::Eip1559(tx) => tx.tx().max_fee_per_gas, + TxEnvelope::Eip4844(tx) => tx.tx().tx().max_fee_per_gas, + TxEnvelope::Eip7702(tx) => tx.tx().max_fee_per_gas, + }; + (envelope.nonce(), gas_price) + } + #[test] fn pool_add_and_pending() { let config = PoolConfig::default(); @@ -392,8 +834,8 @@ mod tests { let tx0 = make_ordered_tx(sender, 0, 100); let tx1 = make_ordered_tx(sender, 1, 100); - pool.add(tx0.clone()).unwrap(); - pool.add(tx1.clone()).unwrap(); + pool.add(tx0).unwrap(); + pool.add(tx1).unwrap(); assert_eq!(pool.pending_count(), 2); assert_eq!(pool.len(), 2); @@ -402,6 +844,51 @@ mod tests { assert_eq!(pending.len(), 2); } + #[test] + fn pool_broadcasts_tx_added_on_insert() { + let (events, mut receiver) = broadcast::channel(16); + let pool = TransactionPool::new_with_events(PoolConfig::default(), events); + let sender = random_address(); + let tx = make_ordered_tx(sender, 0, 100); + + pool.add(tx.clone()).unwrap(); + + let event = receiver.try_recv().unwrap(); + assert_eq!( + event, + MempoolEvent::TxAdded { + hash: tx.hash, + from: tx.sender, + to: tx.envelope.to(), + value: tx.envelope.value(), + gas_price: U256::from(tx.effective_gas_price), + nonce: tx.nonce, + } + ); + } + + #[test] + fn pool_broadcasts_replaced_transaction_as_evicted() { + let (events, mut receiver) = broadcast::channel(16); + let pool = TransactionPool::new_with_events(PoolConfig::default(), events); + let sender = random_address(); + let low_fee = make_ordered_tx(sender, 0, 100); + let high_fee = make_ordered_tx(sender, 0, 200); + + pool.add(low_fee.clone()).unwrap(); + pool.add(high_fee.clone()).unwrap(); + + let _ = receiver.try_recv().unwrap(); + assert_eq!( + receiver.try_recv().unwrap(), + MempoolEvent::TxEvicted { hash: low_fee.hash, reason: "replaced".to_string() } + ); + assert!(matches!( + receiver.try_recv().unwrap(), + MempoolEvent::TxAdded { hash, .. } if hash == high_fee.hash + )); + } + #[test] fn pool_duplicate_rejected() { let config = PoolConfig::default(); @@ -430,6 +917,125 @@ mod tests { )); } + #[test] + fn pool_evicts_lowest_fee_pending_on_overflow() { + let config = PoolConfig::default().with_max_pending_txs(3); + let pool = TransactionPool::new(config); + + let tx_low = make_ordered_tx(random_address(), 0, 10); + let tx_med = make_ordered_tx(random_address(), 0, 20); + let tx_high = make_ordered_tx(random_address(), 0, 30); + let tx_new = make_ordered_tx(random_address(), 0, 15); + + pool.add(tx_low.clone()).unwrap(); + pool.add(tx_med.clone()).unwrap(); + pool.add(tx_high.clone()).unwrap(); + pool.add(tx_new.clone()).unwrap(); + + assert_eq!(pool.pending_count(), 3); + assert!(!pool.contains(&tx_low.hash)); + assert!(pool.contains(&tx_new.hash)); + assert!(pool.contains(&tx_med.hash)); + assert!(pool.contains(&tx_high.hash)); + } + + #[test] + fn pool_rejects_low_fee_pending_when_full() { + let config = PoolConfig::default().with_max_pending_txs(2); + let pool = TransactionPool::new(config); + + pool.add(make_ordered_tx(random_address(), 0, 100)).unwrap(); + pool.add(make_ordered_tx(random_address(), 0, 200)).unwrap(); + + let low_fee = make_ordered_tx(random_address(), 0, 50); + assert!(matches!(pool.add(low_fee), Err(TxPoolError::PoolFull))); + assert_eq!(pool.pending_count(), 2); + } + + #[test] + fn pool_evicts_lowest_fee_queued_on_overflow() { + let config = PoolConfig::default().with_max_queued_txs(2); + let pool = TransactionPool::new(config); + let sender = random_address(); + + let tx0 = make_ordered_tx(sender, 0, 100); + let tx2_low = make_ordered_tx(sender, 2, 10); + let tx3_high = make_ordered_tx(sender, 3, 30); + let tx4_mid = make_ordered_tx(sender, 4, 20); + + pool.add(tx0).unwrap(); + pool.add(tx2_low.clone()).unwrap(); + pool.add(tx3_high.clone()).unwrap(); + pool.add(tx4_mid.clone()).unwrap(); + + assert_eq!(pool.queued_count(), 2); + assert!(!pool.contains(&tx2_low.hash)); + assert!(pool.contains(&tx3_high.hash)); + assert!(pool.contains(&tx4_mid.hash)); + } + + #[test] + fn pool_rejects_low_fee_queued_when_full() { + let config = PoolConfig::default().with_max_queued_txs(1); + let pool = TransactionPool::new(config); + let sender = random_address(); + + pool.add(make_ordered_tx(sender, 0, 100)).unwrap(); + pool.add(make_ordered_tx(sender, 2, 100)).unwrap(); + + let low_fee = make_ordered_tx(sender, 3, 50); + assert!(matches!(pool.add(low_fee), Err(TxPoolError::PoolFull))); + assert_eq!(pool.queued_count(), 1); + } + + #[test] + fn pool_eviction_preserves_sender_nonce_gap() { + let config = PoolConfig::default().with_max_pending_txs(2); + let pool = TransactionPool::new(config); + let sender = random_address(); + + let tx0_low = make_ordered_tx(sender, 0, 10); + let tx1_high = make_ordered_tx(sender, 1, 100); + let other = make_ordered_tx(random_address(), 0, 50); + + pool.add(tx0_low.clone()).unwrap(); + pool.add(tx1_high.clone()).unwrap(); + pool.add(other.clone()).unwrap(); + + assert!(!pool.contains(&tx0_low.hash)); + assert!(pool.contains(&tx1_high.hash)); + assert_eq!(pool.pending_count(), 1); + assert_eq!(pool.queued_count(), 1); + + let built = pool.build(10, &BTreeSet::new()); + assert_eq!(built.len(), 1); + assert_eq!(tx_nonce(&built[0]), other.nonce); + + let tx0_replacement = make_ordered_tx(sender, 0, 200); + pool.add(tx0_replacement.clone()).unwrap(); + + let built = pool.build(10, &BTreeSet::new()); + assert_eq!(built.len(), 2); + assert_eq!(tx_nonce(&built[0]), tx0_replacement.nonce); + assert_eq!(tx_nonce(&built[1]), tx1_high.nonce); + } + + #[test] + fn pool_cleanup_removes_expired_transactions() { + let config = PoolConfig::default().with_pending_ttl_secs(60).with_queued_ttl_secs(60 * 60); + let pool = TransactionPool::new(config); + + let sender = random_address(); + let mut expired = make_ordered_tx(sender, 0, 100); + expired.timestamp = current_timestamp().saturating_sub(120); + pool.add(expired.clone()).unwrap(); + + let removed = pool.cleanup(); + assert_eq!(removed, 1); + assert!(!pool.contains(&expired.hash)); + assert!(pool.is_empty()); + } + #[test] fn pool_remove() { let config = PoolConfig::default(); @@ -446,6 +1052,54 @@ mod tests { assert_eq!(pool.len(), 0); } + #[test] + fn pool_remove_confirmed_removes_queued_hashes() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + let tx0 = make_ordered_tx(sender, 0, 100); + let tx2 = make_ordered_tx(sender, 2, 100); + + pool.add(tx0.clone()).unwrap(); + pool.add(tx2.clone()).unwrap(); + + assert_eq!(pool.len(), 2); + assert_eq!(pool.pending_count(), 1); + assert_eq!(pool.queued_count(), 1); + assert!(pool.contains(&tx2.hash)); + + pool.remove_confirmed(&sender, 2); + + assert_eq!(pool.len(), 0); + assert_eq!(pool.pending_count(), 0); + assert_eq!(pool.queued_count(), 0); + assert!(!pool.contains(&tx0.hash)); + assert!(!pool.contains(&tx2.hash)); + } + + #[test] + fn pool_remove_confirmed_preserves_queued_progress_after_gap() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + let tx0 = make_ordered_tx(sender, 0, 100); + let tx2 = make_ordered_tx(sender, 2, 100); + + pool.add(tx0).unwrap(); + pool.add(tx2.clone()).unwrap(); + pool.remove_confirmed(&sender, 0); + + assert_eq!(pool.len(), 1); + assert!(pool.contains(&tx2.hash)); + assert!(pool.build(10, &BTreeSet::new()).is_empty()); + + let tx1 = make_ordered_tx(sender, 1, 100); + pool.add(tx1.clone()).unwrap(); + + let txs = pool.build(10, &BTreeSet::new()); + assert_eq!(txs.len(), 2); + assert_eq!(tx_nonce(&txs[0]), tx1.nonce); + assert_eq!(tx_nonce(&txs[1]), tx2.nonce); + } + #[test] fn pool_clear() { let config = PoolConfig::default(); @@ -458,4 +1112,217 @@ mod tests { pool.clear(); assert!(pool.is_empty()); } + + #[test] + fn pool_prune_advances_sender_nonce() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + let tx0 = make_ordered_tx(sender, 0, 100); + let tx1 = make_ordered_tx(sender, 1, 100); + let tx2 = make_ordered_tx(sender, 2, 100); + let tx3 = make_ordered_tx(sender, 3, 100); + + pool.add(tx0.clone()).unwrap(); + pool.add(tx1.clone()).unwrap(); + pool.add(tx2.clone()).unwrap(); + pool.add(tx3.clone()).unwrap(); + + pool.prune(&[ordered_tx_id(&tx0), ordered_tx_id(&tx1)]); + + let txs = pool.build(10, &BTreeSet::new()); + assert_eq!(txs.len(), 2); + assert_eq!(tx_nonce(&txs[0]), tx2.nonce); + assert_eq!(tx_nonce(&txs[1]), tx3.nonce); + } + + #[test] + fn pool_prune_uses_domain_tx_ids() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + let tx0 = make_ordered_tx(sender, 0, 100); + let tx1 = make_ordered_tx(sender, 1, 100); + + pool.add(tx0.clone()).unwrap(); + pool.add(tx1.clone()).unwrap(); + + let built = pool.build(10, &BTreeSet::new()); + assert_eq!(built.len(), 2); + + let ids: Vec = built.iter().map(Tx::id).collect(); + pool.prune(&ids[..1]); + + assert!(!pool.contains(&tx0.hash)); + assert!(pool.contains(&tx1.hash)); + let rebuilt = pool.build(10, &BTreeSet::new()); + assert_eq!(rebuilt.len(), 1); + assert_eq!(tx_nonce(&rebuilt[0]), tx1.nonce); + } + + #[test] + fn pool_build_treats_excluded_ancestors_as_nonce_progress() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + let tx0 = make_ordered_tx(sender, 0, 100); + let tx1 = make_ordered_tx(sender, 1, 100); + let tx2 = make_ordered_tx(sender, 2, 100); + + pool.add(tx0.clone()).unwrap(); + pool.add(tx1.clone()).unwrap(); + pool.add(tx2.clone()).unwrap(); + + let excluded = BTreeSet::from([ordered_tx_id(&tx0)]); + let txs = pool.build(10, &excluded); + + assert_eq!(txs.len(), 2); + assert_eq!(tx_nonce(&txs[0]), tx1.nonce); + assert_eq!(tx_nonce(&txs[1]), tx2.nonce); + } + + #[test] + fn pool_remove_broadcasts_tx_evicted() { + let (events, mut receiver) = broadcast::channel(16); + let pool = TransactionPool::new_with_events(PoolConfig::default(), events); + let sender = random_address(); + let tx = make_ordered_tx(sender, 0, 100); + let hash = tx.hash; + + pool.add(tx).unwrap(); + // drain the TxAdded event + let _ = receiver.try_recv().unwrap(); + + pool.remove(&hash); + + assert_eq!( + receiver.try_recv().unwrap(), + MempoolEvent::TxEvicted { hash, reason: "removed".to_string() } + ); + } + + #[test] + fn pool_remove_with_reason_broadcasts_custom_reason() { + let (events, mut receiver) = broadcast::channel(16); + let pool = TransactionPool::new_with_events(PoolConfig::default(), events); + let sender = random_address(); + let tx = make_ordered_tx(sender, 0, 100); + let hash = tx.hash; + + pool.add(tx).unwrap(); + // drain the TxAdded event + let _ = receiver.try_recv().unwrap(); + + pool.remove_with_reason(&hash, "expired"); + + assert_eq!( + receiver.try_recv().unwrap(), + MempoolEvent::TxEvicted { hash, reason: "expired".to_string() } + ); + } + + #[test] + fn pool_prune_batches_highest_confirmed_nonce_per_sender() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender_a = random_address(); + let sender_b = random_address(); + let a0 = make_ordered_tx(sender_a, 0, 100); + let a1 = make_ordered_tx(sender_a, 1, 100); + let a2 = make_ordered_tx(sender_a, 2, 100); + let a3 = make_ordered_tx(sender_a, 3, 100); + let b0 = make_ordered_tx(sender_b, 0, 101); + let b1 = make_ordered_tx(sender_b, 1, 101); + + for tx in [&a0, &a1, &a2, &a3, &b0, &b1] { + pool.add(tx.clone()).unwrap(); + } + + pool.prune(&[ordered_tx_id(&a1), ordered_tx_id(&b0)]); + + assert_eq!(pool.len(), 3); + assert!(!pool.contains(&a0.hash)); + assert!(!pool.contains(&a1.hash)); + assert!(!pool.contains(&b0.hash)); + assert!(pool.contains(&a2.hash)); + assert!(pool.contains(&a3.hash)); + assert!(pool.contains(&b1.hash)); + + let sender_a_nonces: Vec<_> = + pool.pending_for_sender(&sender_a).into_iter().map(|tx| tx.nonce).collect(); + let sender_b_nonces: Vec<_> = + pool.pending_for_sender(&sender_b).into_iter().map(|tx| tx.nonce).collect(); + assert_eq!(sender_a_nonces, vec![2, 3]); + assert_eq!(sender_b_nonces, vec![1]); + } + + #[test] + fn pool_prune_promotes_queued_transactions_after_gap_fills() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + let tx0 = make_ordered_tx(sender, 0, 100); + let tx2 = make_ordered_tx(sender, 2, 100); + + pool.add(tx0.clone()).unwrap(); + pool.add(tx2.clone()).unwrap(); + pool.prune(&[ordered_tx_id(&tx0)]); + + assert!(pool.build(10, &BTreeSet::new()).is_empty()); + + let tx1 = make_ordered_tx(sender, 1, 100); + pool.add(tx1.clone()).unwrap(); + + let txs = pool.build(10, &BTreeSet::new()); + assert_eq!(txs.len(), 2); + assert_eq!(tx_nonce(&txs[0]), tx1.nonce); + assert_eq!(tx_nonce(&txs[1]), tx2.nonce); + } + + #[test] + fn pool_build_preserves_sender_nonce_order_under_fee_pressure() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender_a = random_address(); + let sender_b = random_address(); + let a0 = make_ordered_tx(sender_a, 0, 10); + let a1 = make_ordered_tx(sender_a, 1, 1_000); + let b0 = make_ordered_tx(sender_b, 0, 500); + + pool.add(a0).unwrap(); + pool.add(a1).unwrap(); + pool.add(b0).unwrap(); + + let txs = pool.build(10, &BTreeSet::new()); + let order: Vec<_> = txs.iter().map(tx_nonce_and_gas_price).collect(); + + assert_eq!(order, vec![(0, 500), (0, 10), (1, 1_000)]); + } + + #[test] + fn pool_has_nonce_returns_true_for_pending_tx() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + pool.add(make_ordered_tx(sender, 0, 100)).unwrap(); + pool.add(make_ordered_tx(sender, 1, 100)).unwrap(); + + assert!(pool.has_nonce(&sender, 0)); + assert!(pool.has_nonce(&sender, 1)); + assert!(!pool.has_nonce(&sender, 2)); + } + + #[test] + fn pool_has_nonce_returns_true_for_queued_tx() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + pool.add(make_ordered_tx(sender, 0, 100)).unwrap(); + // nonce 2 is queued (gap at nonce 1) + pool.add(make_ordered_tx(sender, 2, 100)).unwrap(); + + assert!(pool.has_nonce(&sender, 0)); + assert!(!pool.has_nonce(&sender, 1)); + assert!(pool.has_nonce(&sender, 2)); + } + + #[test] + fn pool_has_nonce_returns_false_for_unknown_sender() { + let pool = TransactionPool::new(PoolConfig::default()); + let sender = random_address(); + + assert!(!pool.has_nonce(&sender, 0)); + } } diff --git a/crates/node/txpool/src/validator.rs b/crates/node/txpool/src/validator.rs index 82046df..10a3381 100644 --- a/crates/node/txpool/src/validator.rs +++ b/crates/node/txpool/src/validator.rs @@ -8,7 +8,9 @@ use kora_domain::Tx; use kora_traits::StateDbRead; use sha3::{Digest, Keccak256}; -use crate::{config::PoolConfig, error::TxPoolError, ordering::OrderedTransaction}; +use crate::{ + config::PoolConfig, error::TxPoolError, ordering::OrderedTransaction, pool::TransactionPool, +}; const TX_BASE_GAS: u64 = 21000; const TX_DATA_ZERO_GAS: u64 = 4; @@ -44,12 +46,21 @@ pub struct TransactionValidator { chain_id: u64, state: S, config: PoolConfig, + pool: Option, } impl TransactionValidator { /// Creates a new transaction validator. pub const fn new(chain_id: u64, state: S, config: PoolConfig) -> Self { - Self { chain_id, state, config } + Self { chain_id, state, config, pool: None } + } + + /// Attach a transaction pool so the validator can reject same-nonce + /// duplicates at ingress time. + #[must_use] + pub fn with_pool(mut self, pool: TransactionPool) -> Self { + self.pool = Some(pool); + self } /// Validates a raw transaction. @@ -99,6 +110,15 @@ impl TransactionValidator { return Err(TxPoolError::NonceGap { got: nonce, expected: state_nonce }); } + // Reject if the pool already contains a transaction from this sender + // with the same nonce. This prevents same-nonce conflicts from + // passing validation when only finalized state is checked. + if let Some(pool) = &self.pool + && pool.has_nonce(&sender, nonce) + { + return Err(TxPoolError::NonceAlreadyInPool { sender, nonce }); + } + let max_cost = max_tx_cost(&envelope); let balance = self .state @@ -466,7 +486,7 @@ mod tests { let state = MockState::new().with_account(sender, 0, U256::from(1_000_000_000_000_000_000u64)); - let config = PoolConfig::default().with_min_gas_price(1_000_000_000); + let config = PoolConfig::default(); // min_gas_price defaults to 1 gwei let validator = TransactionValidator::new(chain_id, state, config); let result = validator.validate(raw_tx).await; @@ -854,4 +874,166 @@ mod tests { let result = validator.validate(invalid_tx).await; assert!(matches!(result, Err(TxPoolError::DecodeError(_)))); } + + /// Sign a transaction with a given key and return (sender, signed_envelope, raw_bytes). + fn sign_eip1559_tx_with_key( + signing_key: &SigningKey, + chain_id: u64, + nonce: u64, + gas_limit: u64, + max_fee_per_gas: u128, + value: U256, + to: Option
, + ) -> (Address, TxEnvelope, Tx) { + let verifying_key = signing_key.verifying_key(); + let pubkey = verifying_key.to_encoded_point(false); + let pubkey_bytes = pubkey.as_bytes(); + let pubkey_hash = sha3::Keccak256::digest(&pubkey_bytes[1..]); + let sender = Address::from_slice(&pubkey_hash[12..]); + + let tx = TxEip1559 { + chain_id, + nonce, + gas_limit, + max_fee_per_gas, + max_priority_fee_per_gas: max_fee_per_gas, + to: to.map(TxKind::Call).unwrap_or(TxKind::Create), + value, + access_list: Default::default(), + input: Bytes::new(), + }; + + let sig_hash = tx.signature_hash(); + let (sig, recovery_id) = signing_key.sign_prehash_recoverable(sig_hash.as_slice()).unwrap(); + let r = U256::from_be_slice(&sig.r().to_bytes()); + let s = U256::from_be_slice(&sig.s().to_bytes()); + let v = recovery_id.is_y_odd(); + let signature = Signature::new(r, s, v); + + let signed = tx.into_signed(signature); + let envelope = TxEnvelope::from(signed); + let mut raw_bytes = Vec::new(); + envelope.encode_2718(&mut raw_bytes); + + (sender, envelope, Tx::new(raw_bytes.into())) + } + + #[tokio::test] + async fn reject_nonce_already_in_pool() { + let chain_id = 1u64; + let key = SigningKey::random(&mut OsRng); + let (sender, _, raw_tx1) = sign_eip1559_tx_with_key( + &key, + chain_id, + 0, + 21000, + 1_000_000_000, + U256::from(1000), + Some(Address::ZERO), + ); + // Create a second tx with the same sender+nonce but different value. + let (_, _, raw_tx2) = sign_eip1559_tx_with_key( + &key, + chain_id, + 0, + 21000, + 1_000_000_000, + U256::from(2000), + Some(Address::ZERO), + ); + + let state = + MockState::new().with_account(sender, 0, U256::from(1_000_000_000_000_000_000u64)); + let pool = TransactionPool::new(PoolConfig::default()); + + // Validate and insert the first transaction into the pool. + let config = PoolConfig::default(); + let validator = TransactionValidator::new(chain_id, state.clone(), config.clone()) + .with_pool(pool.clone()); + let validated = validator.validate(raw_tx1).await.unwrap(); + pool.add(validated.into_ordered(0)).unwrap(); + + // The second tx with the same sender+nonce should be rejected. + let validator2 = TransactionValidator::new(chain_id, state, config).with_pool(pool); + let result = validator2.validate(raw_tx2).await; + assert!( + matches!(result, Err(TxPoolError::NonceAlreadyInPool { nonce: 0, .. })), + "expected NonceAlreadyInPool, got: {:?}", + result, + ); + } + + #[tokio::test] + async fn allow_different_nonce_with_pool() { + // A transaction with a different nonce should still pass when + // the pool has a tx from the same sender at a lower nonce. + let chain_id = 1u64; + let key = SigningKey::random(&mut OsRng); + let (sender, _, raw_tx0) = sign_eip1559_tx_with_key( + &key, + chain_id, + 0, + 21000, + 1_000_000_000, + U256::from(1000), + Some(Address::ZERO), + ); + let (_, _, raw_tx1) = sign_eip1559_tx_with_key( + &key, + chain_id, + 1, + 21000, + 1_000_000_000, + U256::from(1000), + Some(Address::ZERO), + ); + + let state = + MockState::new().with_account(sender, 0, U256::from(1_000_000_000_000_000_000u64)); + let pool = TransactionPool::new(PoolConfig::default()); + + let config = PoolConfig::default(); + let validator = TransactionValidator::new(chain_id, state.clone(), config.clone()) + .with_pool(pool.clone()); + let validated = validator.validate(raw_tx0).await.unwrap(); + pool.add(validated.into_ordered(0)).unwrap(); + + // nonce 1 should pass + let validator2 = TransactionValidator::new(chain_id, state, config).with_pool(pool); + assert!(validator2.validate(raw_tx1).await.is_ok()); + } + + #[tokio::test] + async fn allow_same_nonce_without_pool() { + // Without a pool attached, the validator cannot detect same-nonce + // conflicts. Both transactions should pass validation independently. + let chain_id = 1u64; + let key = SigningKey::random(&mut OsRng); + let (sender, _, raw_tx1) = sign_eip1559_tx_with_key( + &key, + chain_id, + 0, + 21000, + 1_000_000_000, + U256::from(1000), + Some(Address::ZERO), + ); + let (_, _, raw_tx2) = sign_eip1559_tx_with_key( + &key, + chain_id, + 0, + 21000, + 1_000_000_000, + U256::from(2000), + Some(Address::ZERO), + ); + + let state = + MockState::new().with_account(sender, 0, U256::from(1_000_000_000_000_000_000u64)); + let config = PoolConfig::default(); + let validator = TransactionValidator::new(chain_id, state, config); + + assert!(validator.validate(raw_tx1).await.is_ok()); + assert!(validator.validate(raw_tx2).await.is_ok()); + } } diff --git a/crates/storage/backend/Cargo.toml b/crates/storage/backend/Cargo.toml index d2c2403..c429929 100644 --- a/crates/storage/backend/Cargo.toml +++ b/crates/storage/backend/Cargo.toml @@ -19,10 +19,12 @@ async-trait = "0.1" bytes.workspace = true commonware-codec.workspace = true commonware-cryptography.workspace = true +commonware-parallel.workspace = true commonware-runtime.workspace = true commonware-storage.workspace = true commonware-utils.workspace = true thiserror.workspace = true +tracing.workspace = true [dev-dependencies] tempfile = "3" diff --git a/crates/storage/backend/src/accounts.rs b/crates/storage/backend/src/accounts.rs index 8d64261..7384777 100644 --- a/crates/storage/backend/src/accounts.rs +++ b/crates/storage/backend/src/accounts.rs @@ -2,6 +2,7 @@ use alloy_primitives::Address; use commonware_cryptography::sha256::Digest as QmdbDigest; +use commonware_parallel::Sequential; use commonware_storage::{qmdb::any::VariableConfig, translator::EightCap}; use kora_qmdb::{AccountEncoding, QmdbBatchable, QmdbGettable}; @@ -30,7 +31,7 @@ impl AccountStore { /// Initialize the account store. pub async fn init( context: Context, - config: VariableConfig, + config: VariableConfig, ) -> Result { let inner = AccountDb::init(context, config) .await diff --git a/crates/storage/backend/src/backend.rs b/crates/storage/backend/src/backend.rs index 2031eaf..1a33cd3 100644 --- a/crates/storage/backend/src/backend.rs +++ b/crates/storage/backend/src/backend.rs @@ -4,14 +4,16 @@ use alloy_primitives::B256; use async_trait::async_trait; use commonware_codec::RangeCfg; use commonware_cryptography::sha256::Digest as QmdbDigest; -use commonware_runtime::{Metrics as _, buffer::paged::CacheRef}; +use commonware_parallel::Sequential; +use commonware_runtime::{Supervisor as _, buffer::paged::CacheRef}; use commonware_storage::{ - journal::contiguous::variable::Config as JournalConfig, - merkle::journaled::Config as MerkleConfig, qmdb::any::VariableConfig, translator::EightCap, + journal::contiguous::variable::Config as JournalConfig, merkle::full::Config as MerkleConfig, + qmdb::any::VariableConfig, translator::EightCap, }; use commonware_utils::{NZU64, NZUsize}; use kora_handlers::{HandleError, RootProvider}; -use kora_qmdb::{ChangeSet, QmdbStore, StateRoot}; +use kora_qmdb::{ChangeSet, PartitionCommitSeqs, QmdbStore, StateRoot}; +use tracing::{error, info}; use crate::{ AccountStore, BackendError, CodeStore, QmdbBackendConfig, StorageStore, @@ -33,7 +35,6 @@ pub struct CommonwareBackend { } /// Root provider that computes state roots from commonware-storage partitions. -#[derive(Clone)] pub struct CommonwareRootProvider { context: Context, config: QmdbBackendConfig, @@ -62,7 +63,7 @@ impl CommonwareRootProvider { impl CommonwareBackend { /// Open a backend with the given configuration. pub async fn open(context: Context, config: QmdbBackendConfig) -> Result { - let stores = open_stores(context.clone(), &config).await?; + let stores = open_stores(&context, &config).await?; Ok(Self { accounts: stores.accounts, storage: stores.storage, @@ -115,19 +116,54 @@ impl CommonwareBackend { /// Build a root provider for this backend configuration. pub fn root_provider(&self) -> CommonwareRootProvider { - CommonwareRootProvider::new(self.context.clone(), self.config.clone()) + CommonwareRootProvider::new(self.context.child("root_provider"), self.config.clone()) } /// Get the current state root. pub fn state_root(&self) -> Result { state_root_from_stores(&self.accounts, &self.storage, &self.code) } + + /// Check cross-partition commit sequence consistency. + /// + /// Reads the commit sequence marker from each QMDB partition and verifies + /// they all agree. If no markers exist (backward-compatible with pre-fix + /// databases), the check passes. If markers are present but differ, a + /// partial commit occurred during a previous crash and the node must not + /// start. + /// + /// Returns the [`PartitionCommitSeqs`] on success so the caller can + /// initialize the `QmdbStore` with the correct starting sequence. + /// + /// # Errors + /// + /// Returns [`BackendError::InconsistentPartitions`] if the sequences differ, + /// or a storage error if reading the markers fails. + pub async fn verify_partition_consistency(&self) -> Result { + let seqs = read_partition_commit_seqs(&self.accounts, &self.storage, &self.code).await?; + + if let Some(msg) = seqs.inconsistency_message() { + error!( + accounts_seq = ?seqs.accounts, + storage_seq = ?seqs.storage, + code_seq = ?seqs.code, + "QMDB partition consistency check FAILED" + ); + return Err(BackendError::InconsistentPartitions(msg)); + } + + info!( + commit_seq = ?seqs.accounts.unwrap_or(0), + "QMDB partition consistency check passed" + ); + Ok(seqs) + } } #[async_trait] impl RootProvider for CommonwareRootProvider { async fn state_root(&self) -> Result { - let stores = open_stores(self.context.clone(), &self.config) + let stores = open_stores(&self.context, &self.config) .await .map_err(|e| HandleError::RootComputation(e.to_string()))?; state_root_from_stores(&stores.accounts, &stores.storage, &stores.code) @@ -139,7 +175,7 @@ impl RootProvider for CommonwareRootProvider { return self.state_root().await; } - let stores = open_dirty_stores(self.context.clone(), &self.config) + let stores = open_dirty_stores(&self.context, &self.config) .await .map_err(|e| HandleError::RootComputation(e.to_string()))?; let mut qmdb = QmdbStore::new(stores.accounts, stores.storage, stores.code); @@ -175,14 +211,14 @@ fn store_config( name: &str, page_cache: CacheRef, log_codec_config: C, -) -> VariableConfig { +) -> VariableConfig { VariableConfig { merkle_config: MerkleConfig { journal_partition: format!("{prefix}-{name}-mmr"), metadata_partition: format!("{prefix}-{name}-mmr-meta"), items_per_blob: NZU64!(128), write_buffer: NZUsize!(1024 * 1024), - thread_pool: None, + strategy: Sequential, page_cache: page_cache.clone(), }, journal_config: JournalConfig { @@ -197,25 +233,28 @@ fn store_config( } } -async fn open_stores(context: Context, config: &QmdbBackendConfig) -> Result { - let page_cache = CacheRef::from_pooler(&context, config.page_size, config.page_cache_size); +async fn open_stores( + context: &Context, + config: &QmdbBackendConfig, +) -> Result { + let page_cache = CacheRef::from_pooler(context, config.page_size, config.page_cache_size); let accounts = AccountStore::init( - context.with_label("accounts"), + context.child("accounts"), store_config(&config.partition_prefix, "accounts", page_cache.clone(), ()), ) .await .map_err(|e| BackendError::Storage(e.to_string()))?; let storage = StorageStore::init( - context.with_label("storage"), + context.child("storage"), store_config(&config.partition_prefix, "storage", page_cache.clone(), ()), ) .await .map_err(|e| BackendError::Storage(e.to_string()))?; let code = CodeStore::init( - context.with_label("code"), + context.child("code"), store_config( &config.partition_prefix, "code", @@ -230,7 +269,7 @@ async fn open_stores(context: Context, config: &QmdbBackendConfig) -> Result Result { let stores = open_stores(context, config).await?; @@ -241,6 +280,51 @@ async fn open_dirty_stores( }) } +/// Read commit sequence markers from all three partitions. +/// +/// This is a standalone helper so it can operate on borrowed stores without +/// taking ownership. The function uses the well-known sentinel keys defined +/// in [`kora_qmdb`] to retrieve the sequence numbers. +async fn read_partition_commit_seqs( + accounts: &AccountStore, + storage: &StorageStore, + code: &CodeStore, +) -> Result { + use kora_qmdb::{ + AccountEncoding, COMMIT_SEQ_ACCOUNT_KEY, COMMIT_SEQ_CODE_KEY, COMMIT_SEQ_STORAGE_KEY, + QmdbGettable, + }; + + let accounts_seq = match accounts.get(&COMMIT_SEQ_ACCOUNT_KEY).await { + Ok(Some(bytes)) => AccountEncoding::decode(&bytes).map(|(nonce, _, _, _)| nonce), + Ok(None) => None, + Err(e) => return Err(BackendError::Storage(e.to_string())), + }; + + let storage_seq = match storage.get(&COMMIT_SEQ_STORAGE_KEY).await { + Ok(Some(value)) => { + let limbs: [u64; 4] = value.into_limbs(); + if limbs[1] == 0 && limbs[2] == 0 && limbs[3] == 0 { Some(limbs[0]) } else { None } + } + Ok(None) => None, + Err(e) => return Err(BackendError::Storage(e.to_string())), + }; + + let code_seq = match code.get(&COMMIT_SEQ_CODE_KEY).await { + Ok(Some(bytes)) => { + if bytes.len() >= 8 { + bytes[..8].try_into().ok().map(u64::from_be_bytes) + } else { + None + } + } + Ok(None) => None, + Err(e) => return Err(BackendError::Storage(e.to_string())), + }; + + Ok(PartitionCommitSeqs { accounts: accounts_seq, storage: storage_seq, code: code_seq }) +} + fn state_root_from_stores( accounts: &AccountStore, storage: &StorageStore, diff --git a/crates/storage/backend/src/code.rs b/crates/storage/backend/src/code.rs index d88c340..a62853e 100644 --- a/crates/storage/backend/src/code.rs +++ b/crates/storage/backend/src/code.rs @@ -2,12 +2,11 @@ use alloy_primitives::B256; use commonware_cryptography::sha256::Digest as QmdbDigest; -use commonware_storage::{qmdb::any::VariableConfig, translator::EightCap}; use kora_qmdb::{QmdbBatchable, QmdbGettable}; use crate::{ BackendError, - types::{CodeDb, CodeKey, Context, StoreSlot}, + types::{CodeConfig, CodeDb, CodeKey, Context, StoreSlot}, }; /// Code partition backed by commonware-storage. @@ -27,10 +26,7 @@ pub(crate) struct CodeStoreDirty { impl CodeStore { /// Initialize the code store. - pub async fn init( - context: Context, - config: VariableConfig, ()))>, - ) -> Result { + pub async fn init(context: Context, config: CodeConfig) -> Result { let inner = CodeDb::init(context, config) .await .map_err(|e| BackendError::Storage(e.to_string()))?; diff --git a/crates/storage/backend/src/config.rs b/crates/storage/backend/src/config.rs index b1beedd..d1b2f89 100644 --- a/crates/storage/backend/src/config.rs +++ b/crates/storage/backend/src/config.rs @@ -5,7 +5,7 @@ use std::num::{NonZeroU16, NonZeroUsize}; use commonware_utils::{NZU16, NZUsize}; const DEFAULT_PAGE_SIZE: NonZeroU16 = NZU16!(16 * 1024); -const DEFAULT_PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(1_024); +const DEFAULT_PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(4_096); /// Configuration for the full QMDB backend. #[derive(Clone)] diff --git a/crates/storage/backend/src/error.rs b/crates/storage/backend/src/error.rs index b451fa3..bd468df 100644 --- a/crates/storage/backend/src/error.rs +++ b/crates/storage/backend/src/error.rs @@ -24,6 +24,14 @@ pub enum BackendError { /// State root computation failed. #[error("root computation failed: {0}")] RootComputation(String), + + /// Cross-partition commit sequences are inconsistent. + /// + /// Indicates a partial commit occurred due to a crash between sequential + /// partition writes. The node cannot safely start; see issue #88 for + /// block replay recovery. + #[error("inconsistent partitions: {0}")] + InconsistentPartitions(String), } #[cfg(test)] diff --git a/crates/storage/backend/src/lib.rs b/crates/storage/backend/src/lib.rs index 5e2e182..aa348d1 100644 --- a/crates/storage/backend/src/lib.rs +++ b/crates/storage/backend/src/lib.rs @@ -12,6 +12,7 @@ mod types; mod backend; pub use backend::{CommonwareBackend, CommonwareRootProvider}; +pub use kora_qmdb::PartitionCommitSeqs; mod code; pub use code::{CodeStore, CodeStoreError}; diff --git a/crates/storage/backend/src/storage.rs b/crates/storage/backend/src/storage.rs index 703ba2d..e556e34 100644 --- a/crates/storage/backend/src/storage.rs +++ b/crates/storage/backend/src/storage.rs @@ -2,6 +2,7 @@ use alloy_primitives::U256; use commonware_cryptography::sha256::Digest as QmdbDigest; +use commonware_parallel::Sequential; use commonware_storage::{qmdb::any::VariableConfig, translator::EightCap}; use kora_qmdb::{QmdbBatchable, QmdbGettable, StorageKey}; @@ -30,7 +31,7 @@ impl StorageStore { /// Initialize the storage store. pub async fn init( context: Context, - config: VariableConfig, + config: VariableConfig, ) -> Result { let inner = StorageDb::init(context, config) .await diff --git a/crates/storage/backend/src/types.rs b/crates/storage/backend/src/types.rs index c370a16..5a1028c 100644 --- a/crates/storage/backend/src/types.rs +++ b/crates/storage/backend/src/types.rs @@ -4,6 +4,7 @@ use alloy_primitives::U256; use bytes::{Buf, BufMut}; use commonware_codec::{EncodeSize, Error as CodecError, Read, Write}; use commonware_cryptography::sha256::Sha256 as QmdbHasher; +use commonware_parallel::Sequential; use commonware_runtime::tokio; use commonware_storage::{merkle::mmr, qmdb::any, translator::EightCap}; use commonware_utils::sequence::FixedBytes; @@ -79,6 +80,7 @@ pub(crate) type AccountDb = any::unordered::variable::Db< AccountValue, QmdbHasher, EightCap, + Sequential, >; pub(crate) type StorageDb = any::unordered::variable::Db< mmr::Family, @@ -87,9 +89,19 @@ pub(crate) type StorageDb = any::unordered::variable::Db< StorageValue, QmdbHasher, EightCap, + Sequential, >; -pub(crate) type CodeDb = - any::unordered::variable::Db, QmdbHasher, EightCap>; +pub(crate) type CodeDb = any::unordered::variable::Db< + mmr::Family, + Context, + CodeKey, + Vec, + QmdbHasher, + EightCap, + Sequential, +>; +pub(crate) type CodeConfig = + any::VariableConfig, ())), Sequential>; pub(crate) struct StoreSlot(Option); diff --git a/crates/storage/handlers/Cargo.toml b/crates/storage/handlers/Cargo.toml index 606243b..c03cb46 100644 --- a/crates/storage/handlers/Cargo.toml +++ b/crates/storage/handlers/Cargo.toml @@ -18,6 +18,7 @@ alloy-primitives.workspace = true async-trait = "0.1" futures.workspace = true revm = { workspace = true, features = ["std", "asyncdb"] } +tracing.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["sync"] } diff --git a/crates/storage/handlers/src/adapter.rs b/crates/storage/handlers/src/adapter.rs index 7efb17b..b01769c 100644 --- a/crates/storage/handlers/src/adapter.rs +++ b/crates/storage/handlers/src/adapter.rs @@ -17,6 +17,7 @@ use revm::{ primitives::AddressMap, state::Account, }; +use tracing::error; use crate::{error::HandleError, qmdb::QmdbHandle}; @@ -220,8 +221,12 @@ where continue; } - let storage: BTreeMap = - account.storage.iter().map(|(k, v)| (*k, v.present_value())).collect(); + let storage: BTreeMap = account + .storage + .iter() + .filter(|(_, v)| v.is_changed()) + .map(|(k, v)| (*k, v.present_value())) + .collect(); let code = account.info.code.as_ref().map(|c| c.bytes().to_vec()); @@ -239,8 +244,17 @@ where ); } - // Ignore errors in DatabaseCommit (matches REVM's signature) - let _ = block_on(Self::commit(self, changeset)); + // REVM's `DatabaseCommit::commit` returns `()`, so we cannot propagate + // errors through the return type. Instead we log at error level and + // set an atomic flag that callers can check after execution. + if let Err(err) = block_on(Self::commit(self, changeset)) { + error!( + %err, + "CRITICAL: DatabaseCommit failed — QMDB write error swallowed by infallible \ + REVM trait. Subsequent transactions in this block may execute against stale state." + ); + self.mark_commit_failed(); + } } } diff --git a/crates/storage/handlers/src/qmdb.rs b/crates/storage/handlers/src/qmdb.rs index 63ee069..ce122c9 100644 --- a/crates/storage/handlers/src/qmdb.rs +++ b/crates/storage/handlers/src/qmdb.rs @@ -1,6 +1,9 @@ //! Thread-safe QMDB handle. -use std::sync::Arc; +use std::sync::{ + Arc, + atomic::{AtomicBool, Ordering}, +}; use alloy_primitives::{Address, B256, U256}; use async_trait::async_trait; @@ -35,6 +38,13 @@ pub struct QmdbHandle { inner: Arc>>, root_provider: Option>>, storage_access: Arc>, + /// Flag set when a [`revm::database_interface::DatabaseCommit::commit`] call fails. + /// + /// Because the REVM `DatabaseCommit` trait returns `()`, errors from the + /// underlying QMDB write cannot be propagated through the return type. + /// Instead, this flag is set so callers can check after execution and + /// surface the failure. + commit_failed: Arc, } impl Clone for QmdbHandle { @@ -43,6 +53,7 @@ impl Clone for QmdbHandle { inner: Arc::clone(&self.inner), root_provider: self.root_provider.clone(), storage_access: Arc::clone(&self.storage_access), + commit_failed: Arc::clone(&self.commit_failed), } } } @@ -55,6 +66,7 @@ impl QmdbHandle { inner: Arc::new(RwLock::new(QmdbStore::new(accounts, storage, code))), root_provider: None, storage_access: Arc::new(Mutex::new(())), + commit_failed: Arc::new(AtomicBool::new(false)), } } @@ -65,6 +77,7 @@ impl QmdbHandle { inner: Arc::new(RwLock::new(store)), root_provider: None, storage_access: Arc::new(Mutex::new(())), + commit_failed: Arc::new(AtomicBool::new(false)), } } @@ -85,6 +98,25 @@ impl QmdbHandle { self.storage_access.lock().await } + /// Returns `true` if a [`revm::database_interface::DatabaseCommit::commit`] + /// call has failed since the last call to this method, and clears the flag. + /// + /// This is the side-channel mechanism for propagating errors from the + /// infallible REVM `DatabaseCommit` trait. Callers should check this after + /// block execution and treat a `true` return as a fatal execution error. + pub fn take_commit_failure(&self) -> bool { + self.commit_failed.swap(false, Ordering::SeqCst) + } + + /// Record that a [`revm::database_interface::DatabaseCommit::commit`] call + /// has failed. + /// + /// This is called from the `DatabaseCommit` implementation in `adapter.rs` + /// when a QMDB write error occurs during the infallible REVM commit. + pub(crate) fn mark_commit_failed(&self) { + self.commit_failed.store(true, Ordering::SeqCst); + } + /// Acquire read lock on the underlying store. pub async fn read(&self) -> RwLockReadGuard<'_, QmdbStore> { self.inner.read().await diff --git a/crates/storage/handlers/src/state.rs b/crates/storage/handlers/src/state.rs index 24333a5..f48e27b 100644 --- a/crates/storage/handlers/src/state.rs +++ b/crates/storage/handlers/src/state.rs @@ -51,6 +51,10 @@ where ) } + fn take_commit_failure(&self) -> bool { + self.take_commit_failure() + } + async fn storage(&self, address: &Address, slot: &U256) -> Result { let store = self.read().await; diff --git a/crates/storage/indexer/src/lib.rs b/crates/storage/indexer/src/lib.rs index dacb560..7e7f342 100644 --- a/crates/storage/indexer/src/lib.rs +++ b/crates/storage/indexer/src/lib.rs @@ -15,4 +15,6 @@ mod store; pub use store::BlockIndex; mod types; -pub use types::{IndexStats, IndexedBlock, IndexedLog, IndexedReceipt, IndexedTransaction}; +pub use types::{ + EMPTY_ROOT_HASH, IndexStats, IndexedBlock, IndexedLog, IndexedReceipt, IndexedTransaction, +}; diff --git a/crates/storage/indexer/src/store.rs b/crates/storage/indexer/src/store.rs index 4f51d08..835a8a1 100644 --- a/crates/storage/indexer/src/store.rs +++ b/crates/storage/indexer/src/store.rs @@ -32,6 +32,13 @@ impl Default for BlockIndex { } impl BlockIndex { + /// Maximum number of blocks to retain in the index. + /// + /// 10,000 blocks at 33 blocks/s is roughly 5 minutes of history. + /// This must exceed 256 so the EVM `BLOCKHASH` opcode (served by + /// [`Self::recent_block_hashes`]) always has a full window available. + pub const MAX_RETAINED_BLOCKS: u64 = 10_000; + /// Creates a new empty block index. #[must_use] pub fn new() -> Self { @@ -105,6 +112,67 @@ impl BlockIndex { } } + /// Removes all index entries for blocks with `number < min_block_number`. + /// + /// This bounds memory by evicting blocks, transactions, receipts, and logs + /// that are older than the retention window. Lock ordering matches + /// [`Self::insert_block`] (block-level maps first, then tx-level maps) to + /// avoid deadlocks. + pub fn prune_before(&self, min_block_number: u64) { + // Phase 1: collect block numbers, hashes, and tx hashes to prune + // under short-lived read locks. + let hashes_to_remove: Vec<(u64, B256)> = { + let by_number = self.blocks_by_number.read(); + by_number + .iter() + .filter(|(num, _)| **num < min_block_number) + .map(|(num, hash)| (*num, *hash)) + .collect() + }; + + if hashes_to_remove.is_empty() { + return; + } + + let tx_hashes: Vec = { + let by_hash = self.blocks_by_hash.read(); + hashes_to_remove + .iter() + .filter_map(|(_, h)| by_hash.get(h)) + .flat_map(|b| b.transaction_hashes.iter().copied()) + .collect() + }; + + // Phase 2: remove block-level entries under write locks. + { + let mut by_number = self.blocks_by_number.write(); + let mut by_hash = self.blocks_by_hash.write(); + let mut logs = self.logs_by_block.write(); + for &(num, hash) in &hashes_to_remove { + by_number.remove(&num); + by_hash.remove(&hash); + logs.remove(&hash); + } + } + + // Phase 3: remove transaction-level entries under write locks. + { + let mut txs = self.transactions.write(); + let mut rcpts = self.receipts.write(); + for h in &tx_hashes { + txs.remove(h); + rcpts.remove(h); + } + } + + debug!( + min_block_number, + pruned_blocks = hashes_to_remove.len(), + pruned_txs = tx_hashes.len(), + "pruned old index entries", + ); + } + /// Gets a block by its hash. pub fn get_block_by_hash(&self, hash: &B256) -> Option { self.blocks_by_hash.read().get(hash).cloned() @@ -148,8 +216,9 @@ impl BlockIndex { /// Gets logs matching the given filter. pub fn get_logs(&self, filter: &LogFilter) -> Vec { + let head = self.head_block_number(); let from_block = filter.from_block.unwrap_or(0); - let to_block = filter.to_block.unwrap_or_else(|| self.head_block_number()); + let to_block = filter.to_block.unwrap_or(head).min(head); let mut result = Vec::new(); @@ -211,6 +280,22 @@ impl BlockIndex { } } + /// Returns up to 256 recent block hashes keyed by block number, looking + /// backwards from `head` (exclusive). Used to populate the BLOCKHASH opcode + /// context. + #[must_use] + pub fn recent_block_hashes(&self, head: u64) -> HashMap { + let blocks_by_number = self.blocks_by_number.read(); + let depth = head.min(256); + let mut hashes = HashMap::with_capacity(depth as usize); + for num in head.saturating_sub(depth)..head { + if let Some(hash) = blocks_by_number.get(&num) { + hashes.insert(num, *hash); + } + } + hashes + } + fn matches_filter(log: &IndexedLog, filter: &LogFilter) -> bool { if let Some(addresses) = &filter.address && !addresses.contains(&log.address) @@ -233,7 +318,7 @@ impl BlockIndex { #[cfg(test)] mod tests { - use alloy_primitives::{Address, B256, Bytes, U256}; + use alloy_primitives::{Address, B256, Bloom, Bytes, U256}; use super::*; @@ -243,10 +328,15 @@ mod tests { number, parent_hash: B256::ZERO, state_root: B256::ZERO, + transactions_root: B256::ZERO, + receipts_root: B256::ZERO, timestamp: 1000 + number, gas_limit: 30_000_000, gas_used: 21_000, base_fee_per_gas: Some(1_000_000_000), + mix_hash: B256::ZERO, + logs_bloom: Bloom::ZERO, + size: 508, transaction_hashes: vec![], } } @@ -262,6 +352,13 @@ mod tests { value: U256::ZERO, gas_limit: 21_000, gas_price: 1_000_000_000, + tx_type: 0, + chain_id: Some(1337), + max_fee_per_gas: None, + max_priority_fee_per_gas: None, + v: 27, + r: U256::from(1), + s: U256::from(2), input: Bytes::new(), nonce: 0, } @@ -279,6 +376,9 @@ mod tests { gas_used: 21_000, contract_address: None, logs: vec![], + logs_bloom: Bloom::ZERO, + tx_type: 0, + effective_gas_price: 1_000_000_000, status: true, } } @@ -344,6 +444,10 @@ mod tests { topics: vec![topic], data: Bytes::new(), log_index: 0, + block_number: 1, + block_hash, + transaction_hash: B256::repeat_byte(2), + transaction_index: 0, }; let receipt = IndexedReceipt { @@ -357,6 +461,9 @@ mod tests { gas_used: 21_000, contract_address: None, logs: vec![log], + logs_bloom: Bloom::ZERO, + tx_type: 0, + effective_gas_price: 1_000_000_000, status: true, }; @@ -446,4 +553,116 @@ mod tests { assert_eq!(stats.receipt_count, 1); assert_eq!(stats.head_block_number, 5); } + + #[test] + fn test_recent_block_hashes() { + let index = BlockIndex::new(); + + // Insert blocks 0..5 + for i in 0..5 { + index.insert_block(create_test_block(i, B256::repeat_byte(i as u8)), vec![], vec![]); + } + + // Head=5 should return hashes for blocks 0..5 + let hashes = index.recent_block_hashes(5); + assert_eq!(hashes.len(), 5); + for i in 0..5 { + assert_eq!(hashes[&i], B256::repeat_byte(i as u8)); + } + + // Head=0 should return empty + let hashes = index.recent_block_hashes(0); + assert!(hashes.is_empty()); + + // Head=3 should return blocks 0..3 + let hashes = index.recent_block_hashes(3); + assert_eq!(hashes.len(), 3); + assert!(hashes.contains_key(&0)); + assert!(hashes.contains_key(&1)); + assert!(hashes.contains_key(&2)); + assert!(!hashes.contains_key(&3)); + } + + #[test] + fn test_prune_before_removes_old_blocks() { + let index = BlockIndex::new(); + + // Insert blocks 1..=5, each with one tx and one receipt. + for i in 1..=5u64 { + let block_hash = B256::repeat_byte(i as u8); + let tx_hash = B256::repeat_byte((100 + i) as u8); + let mut block = create_test_block(i, block_hash); + block.transaction_hashes = vec![tx_hash]; + let tx = create_test_tx(tx_hash, block_hash, i); + let receipt = create_test_receipt(tx_hash, block_hash, i); + index.insert_block(block, vec![tx], vec![receipt]); + } + + assert_eq!(index.block_count(), 5); + assert_eq!(index.transaction_count(), 5); + assert_eq!(index.receipt_count(), 5); + + // Prune everything below block 3 (removes blocks 1, 2). + index.prune_before(3); + + assert_eq!(index.block_count(), 3); + assert_eq!(index.transaction_count(), 3); + assert_eq!(index.receipt_count(), 3); + + // Blocks 1 and 2 are gone. + assert!(index.get_block_by_number(1).is_none()); + assert!(index.get_block_by_number(2).is_none()); + + // Block 3, 4, 5 remain. + assert!(index.get_block_by_number(3).is_some()); + assert!(index.get_block_by_number(4).is_some()); + assert!(index.get_block_by_number(5).is_some()); + + // Head block unchanged. + assert_eq!(index.head_block_number(), 5); + + // Pruned tx hashes are gone. + assert!(index.get_transaction(&B256::repeat_byte(101)).is_none()); + assert!(index.get_transaction(&B256::repeat_byte(102)).is_none()); + + // Retained tx hashes still present. + assert!(index.get_transaction(&B256::repeat_byte(103)).is_some()); + } + + #[test] + fn test_prune_before_noop_when_nothing_to_prune() { + let index = BlockIndex::new(); + + index.insert_block(create_test_block(5, B256::repeat_byte(5)), vec![], vec![]); + + // min_block_number <= all stored blocks: should be a no-op. + index.prune_before(1); + assert_eq!(index.block_count(), 1); + + // min_block_number = 0: also a no-op. + index.prune_before(0); + assert_eq!(index.block_count(), 1); + } + + #[test] + fn test_prune_preserves_recent_block_hashes_window() { + let index = BlockIndex::new(); + + // Insert 300 blocks (more than the 256 BLOCKHASH window). + for i in 0..300u64 { + index.insert_block( + create_test_block(i, B256::repeat_byte((i % 256) as u8)), + vec![], + vec![], + ); + } + + // Prune old blocks, keeping only 270+ (simulates a retention window). + index.prune_before(270); + + // recent_block_hashes(300) looks back 256 blocks (44..300). + // Only blocks 270..300 remain, so we should get exactly 30 entries. + let hashes = index.recent_block_hashes(300); + assert_eq!(hashes.len(), 30); + } } diff --git a/crates/storage/indexer/src/types.rs b/crates/storage/indexer/src/types.rs index b5514f3..38bbc41 100644 --- a/crates/storage/indexer/src/types.rs +++ b/crates/storage/indexer/src/types.rs @@ -1,6 +1,17 @@ //! Indexed types for blocks, transactions, receipts, and logs. -use alloy_primitives::{Address, B256, Bytes, U256}; +use alloy_primitives::{Address, B256, Bloom, Bytes, U256, b256}; + +/// The root hash of an empty Merkle Patricia Trie. +/// +/// This is the keccak256 hash of the RLP encoding of an empty string, which is +/// the expected value for `transactionsRoot` and `receiptsRoot` in blocks that +/// contain no transactions. +/// +/// Equal to `keccak256(rlp(""))` = +/// `0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421`. +pub const EMPTY_ROOT_HASH: B256 = + b256!("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421"); /// An indexed block containing header information and transaction hashes. #[derive(Debug, Clone)] @@ -13,6 +24,10 @@ pub struct IndexedBlock { pub parent_hash: B256, /// State root after executing this block. pub state_root: B256, + /// Transactions trie root (MPT root of RLP-encoded transactions). + pub transactions_root: B256, + /// Receipts trie root (MPT root of RLP-encoded receipts). + pub receipts_root: B256, /// Block timestamp. pub timestamp: u64, /// Gas limit for this block. @@ -21,6 +36,12 @@ pub struct IndexedBlock { pub gas_used: u64, /// Base fee per gas (EIP-1559). pub base_fee_per_gas: Option, + /// Mix hash / prevrandao value for this block. + pub mix_hash: B256, + /// Block-level Bloom filter (bitwise OR of all receipt Bloom filters). + pub logs_bloom: Bloom, + /// Approximate block size in bytes (header overhead + sum of raw tx sizes). + pub size: u64, /// Hashes of transactions included in this block. pub transaction_hashes: Vec, } @@ -46,6 +67,20 @@ pub struct IndexedTransaction { pub gas_limit: u64, /// Gas price. pub gas_price: u128, + /// EIP-2718 transaction type. + pub tx_type: u8, + /// Chain ID. + pub chain_id: Option, + /// Max fee per gas (EIP-1559 and later typed transactions). + pub max_fee_per_gas: Option, + /// Max priority fee per gas (EIP-1559 and later typed transactions). + pub max_priority_fee_per_gas: Option, + /// V component of the transaction signature (u128 to represent the full EIP-155 range). + pub v: u128, + /// R component of the transaction signature. + pub r: U256, + /// S component of the transaction signature. + pub s: U256, /// Input data. pub input: Bytes, /// Sender nonce. @@ -75,6 +110,12 @@ pub struct IndexedReceipt { pub contract_address: Option
, /// Logs emitted by this transaction. pub logs: Vec, + /// Logs bloom filter for this receipt. + pub logs_bloom: Bloom, + /// EIP-2718 transaction type. + pub tx_type: u8, + /// Effective gas price paid by this transaction. + pub effective_gas_price: u128, /// Transaction status (true = success, false = revert). pub status: bool, } @@ -90,6 +131,14 @@ pub struct IndexedLog { pub data: Bytes, /// Log index within the block. pub log_index: u64, + /// Number of the block containing this log. + pub block_number: u64, + /// Hash of the block containing this log. + pub block_hash: B256, + /// Hash of the transaction that emitted this log. + pub transaction_hash: B256, + /// Index of the transaction that emitted this log. + pub transaction_index: u64, } /// Statistics about the block index. diff --git a/crates/storage/overlay/src/overlay.rs b/crates/storage/overlay/src/overlay.rs index 54cc008..a6f03d1 100644 --- a/crates/storage/overlay/src/overlay.rs +++ b/crates/storage/overlay/src/overlay.rs @@ -18,6 +18,18 @@ impl OverlayState { Self { base, changes: Arc::new(changes) } } + /// Return the number of accounts in the overlay change set. + #[must_use] + pub fn change_len(&self) -> usize { + self.changes.len() + } + + /// Return whether the overlay change set is empty. + #[must_use] + pub fn changes_is_empty(&self) -> bool { + self.changes.is_empty() + } + /// Merge the current overlay changes with a newer change set. pub fn merge_changes(&self, newer: ChangeSet) -> ChangeSet { let mut merged = (*self.changes).clone(); @@ -405,6 +417,22 @@ mod tests { let _ = overlay.base(); } + #[test] + fn test_changes_is_empty_and_change_len() { + let addr = Address::repeat_byte(0x0A); + let base = MockStateDb::new(); + + let empty_overlay = OverlayState::new(base.clone(), ChangeSet::new()); + assert!(empty_overlay.changes_is_empty()); + assert_eq!(empty_overlay.change_len(), 0); + + let mut changes = ChangeSet::new(); + changes.accounts.insert(addr, test_account(1, 100)); + let non_empty_overlay = OverlayState::new(base, changes); + assert!(!non_empty_overlay.changes_is_empty()); + assert_eq!(non_empty_overlay.change_len(), 1); + } + #[tokio::test] async fn test_overlay_code_hash_from_changes() { let addr = Address::repeat_byte(0x06); diff --git a/crates/storage/qmdb-ledger/Cargo.toml b/crates/storage/qmdb-ledger/Cargo.toml index 08d008d..ecedcc3 100644 --- a/crates/storage/qmdb-ledger/Cargo.toml +++ b/crates/storage/qmdb-ledger/Cargo.toml @@ -27,5 +27,8 @@ alloy-primitives.workspace = true # Error handling thiserror.workspace = true +# Logging +tracing.workspace = true + # Async tokio.workspace = true diff --git a/crates/storage/qmdb-ledger/src/ledger.rs b/crates/storage/qmdb-ledger/src/ledger.rs index 8be0000..dfb83a7 100644 --- a/crates/storage/qmdb-ledger/src/ledger.rs +++ b/crates/storage/qmdb-ledger/src/ledger.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use alloy_primitives::{Address, B256, U256}; -use commonware_runtime::tokio::Context; +use commonware_runtime::{Supervisor as _, tokio::Context}; use kora_backend::{ AccountStore, CodeStore, CommonwareBackend, CommonwareRootProvider, QmdbBackendConfig, StorageStore, @@ -12,6 +12,7 @@ use kora_qmdb::StateRoot as QmdbStateRoot; use kora_traits::{StateDb, StateDbWrite}; use thiserror::Error; use tokio::sync::RwLock; +use tracing::info; /// QMDB configuration for the backend. pub type QmdbConfig = QmdbBackendConfig; @@ -54,12 +55,40 @@ impl QmdbLedger { config: QmdbConfig, genesis_alloc: Vec<(Address, U256)>, ) -> Result { - let backend = CommonwareBackend::open(context.clone(), config.clone()).await?; - let root_provider = CommonwareRootProvider::new(context, config); + Self::init_with_genesis(context, config, genesis_alloc, true).await + } + + /// Initializes the QMDB partitions, optionally applying the genesis allocation. + /// + /// Runs a cross-partition consistency check before proceeding. If the + /// partitions have mismatched commit sequences (indicating a partial commit + /// from a previous crash), initialization will fail with an error. + pub async fn init_with_genesis( + context: Context, + config: QmdbConfig, + genesis_alloc: Vec<(Address, U256)>, + apply_genesis: bool, + ) -> Result { + let backend = CommonwareBackend::open(context.child("backend"), config.clone()).await?; + + // Verify cross-partition consistency before consuming the backend. + let seqs = backend.verify_partition_consistency().await?; + let starting_seq = seqs.accounts.unwrap_or(0); + info!(commit_seq = starting_seq, "QMDB partition consistency verified"); + + let root_provider = CommonwareRootProvider::new(context.child("root_provider"), config); let (accounts, storage, code) = backend.into_stores(); - let handle = Handle::new(accounts, storage, code) - .with_root_provider(Arc::new(RwLock::new(root_provider))); - handle.init_genesis(genesis_alloc).await?; + + // Create a QmdbStore with the persisted commit sequence so that + // subsequent commits continue the monotonic sequence. + let mut store = kora_qmdb::QmdbStore::new(accounts, storage, code); + store.set_commit_seq(starting_seq); + let handle = + Handle::from_store(store).with_root_provider(Arc::new(RwLock::new(root_provider))); + + if apply_genesis { + handle.init_genesis(genesis_alloc).await?; + } Ok(Self { handle }) } diff --git a/crates/storage/qmdb/src/error.rs b/crates/storage/qmdb/src/error.rs index 77d0a62..47863b6 100644 --- a/crates/storage/qmdb/src/error.rs +++ b/crates/storage/qmdb/src/error.rs @@ -21,6 +21,10 @@ pub enum QmdbError { /// Code not found for hash. #[error("code not found: {0}")] CodeNotFound(B256), + + /// Cross-partition commit sequences are inconsistent after a crash. + #[error("inconsistent partitions: {0}")] + InconsistentPartitions(String), } #[cfg(test)] diff --git a/crates/storage/qmdb/src/lib.rs b/crates/storage/qmdb/src/lib.rs index b9e51de..1a93e14 100644 --- a/crates/storage/qmdb/src/lib.rs +++ b/crates/storage/qmdb/src/lib.rs @@ -21,7 +21,10 @@ mod root; pub use root::StateRoot; mod store; -pub use store::{QmdbStore, Stores}; +pub use store::{ + COMMIT_SEQ_ACCOUNT_KEY, COMMIT_SEQ_CODE_KEY, COMMIT_SEQ_STORAGE_KEY, PartitionCommitSeqs, + QmdbStore, Stores, +}; mod traits; pub use traits::{QmdbBatchable, QmdbGettable}; diff --git a/crates/storage/qmdb/src/root.rs b/crates/storage/qmdb/src/root.rs index b6008c5..4ff522b 100644 --- a/crates/storage/qmdb/src/root.rs +++ b/crates/storage/qmdb/src/root.rs @@ -28,7 +28,11 @@ impl StateRoot { return parent_root; } - let mut buf = Vec::new(); + // Pre-allocate: namespace(27) + parent(32) + count(8) + ~128 bytes per account + // (address + flags + nonce + balance + code_hash + code_flag + storage_count + slots). + let estimated = + KORA_TRANSITION_ROOT_NAMESPACE.len() + 32 + 8 + changes.accounts.len() * 128; + let mut buf = Vec::with_capacity(estimated); buf.extend_from_slice(KORA_TRANSITION_ROOT_NAMESPACE); buf.extend_from_slice(parent_root.as_slice()); buf.extend_from_slice(&(changes.accounts.len() as u64).to_be_bytes()); diff --git a/crates/storage/qmdb/src/store.rs b/crates/storage/qmdb/src/store.rs index 603d6ff..5b492c7 100644 --- a/crates/storage/qmdb/src/store.rs +++ b/crates/storage/qmdb/src/store.rs @@ -10,6 +10,106 @@ use crate::{ traits::{QmdbBatchable, QmdbGettable}, }; +/// Sentinel address used to store the commit sequence number in the accounts partition. +/// +/// Derived from the first 20 bytes of keccak256(b"__QMDB_COMMIT_SEQ__"). +/// This is a preimage-resistant address that will not collide with any real Ethereum account. +pub const COMMIT_SEQ_ACCOUNT_KEY: Address = Address::new([ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFE, +]); + +/// Sentinel storage key used to store the commit sequence number in the storage partition. +/// +/// Uses the sentinel address with generation `u64::MAX` and slot `U256::MAX` to avoid +/// collision with any real contract storage slot. +pub const COMMIT_SEQ_STORAGE_KEY: StorageKey = + StorageKey::new(COMMIT_SEQ_ACCOUNT_KEY, u64::MAX, U256::MAX); + +/// Sentinel code hash used to store the commit sequence number in the code partition. +/// +/// Uses `0xFFFF...FFFE` which is not a valid keccak256 output. +pub const COMMIT_SEQ_CODE_KEY: B256 = B256::new([ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, +]); + +/// Encode a commit sequence number into an 80-byte account value. +/// +/// The sequence is stored in the first 8 bytes (nonce field) with the rest zeroed. +fn encode_commit_seq_account(seq: u64) -> [u8; AccountEncoding::SIZE] { + AccountEncoding::encode(seq, U256::ZERO, B256::ZERO, 0) +} + +/// Decode a commit sequence number from an 80-byte account value. +fn decode_commit_seq_account(bytes: &[u8; AccountEncoding::SIZE]) -> Option { + AccountEncoding::decode(bytes).map(|(nonce, _, _, _)| nonce) +} + +/// Encode a commit sequence number into a code partition value. +fn encode_commit_seq_code(seq: u64) -> Vec { + seq.to_be_bytes().to_vec() +} + +/// Decode a commit sequence number from a code partition value. +fn decode_commit_seq_code(bytes: &[u8]) -> Option { + if bytes.len() < 8 { + return None; + } + Some(u64::from_be_bytes(bytes[..8].try_into().ok()?)) +} + +/// Per-partition commit sequence numbers. +/// +/// Used to detect cross-partition inconsistency after a crash. If all three +/// values match, the partitions are consistent. If they differ, a partial +/// commit occurred and the node should not start. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PartitionCommitSeqs { + /// Commit sequence from the accounts partition. + pub accounts: Option, + /// Commit sequence from the storage partition. + pub storage: Option, + /// Commit sequence from the code partition. + pub code: Option, +} + +impl PartitionCommitSeqs { + /// Check whether all partitions are consistent. + /// + /// Returns `true` if all present sequences match, or if no sequences are + /// present (backward-compatible: pre-fix node that has never written a + /// sequence marker). + #[must_use] + pub const fn is_consistent(&self) -> bool { + match (self.accounts, self.storage, self.code) { + // No markers at all -- pre-fix node, skip check. + (None, None, None) => true, + // All present and matching. + (Some(a), Some(s), Some(c)) => a == s && s == c, + // Mixed presence means inconsistency (or very first commit was partial). + _ => false, + } + } + + /// Return an error message describing the inconsistency, or `None` if consistent. + #[must_use] + pub fn inconsistency_message(&self) -> Option { + if self.is_consistent() { + return None; + } + Some(format!( + "QMDB partition commit sequences are inconsistent: \ + accounts={}, storage={}, code={}. \ + A partial cross-partition commit was detected. \ + The node cannot safely start without state recovery (see issue #88).", + self.accounts.map_or("none".to_string(), |s| s.to_string()), + self.storage.map_or("none".to_string(), |s| s.to_string()), + self.code.map_or("none".to_string(), |s| s.to_string()), + )) + } +} + /// The three QMDB stores. #[derive(Debug)] pub struct Stores { @@ -32,15 +132,43 @@ impl Stores { /// /// NO synchronization - that's the caller's responsibility. /// Use `kora-handlers::QmdbHandle` for thread-safe access. +/// +/// Tracks a `commit_seq` counter that is written as a sentinel key in each +/// partition during [`apply_batches()`](Self::apply_batches). On startup the +/// sequences can be read back via [`read_partition_commit_seqs()`](Self::read_partition_commit_seqs) +/// to detect partial cross-partition commits caused by crashes. #[derive(Debug)] pub struct QmdbStore { stores: Option>, + /// Monotonically increasing commit sequence number. + /// + /// Incremented after all three partition writes succeed in `apply_batches()`. + /// Written as a sentinel key in each partition to enable cross-partition + /// consistency detection on startup. + commit_seq: u64, } impl QmdbStore { /// Create a new store from the three partitions. + /// + /// The commit sequence starts at 0. Call [`set_commit_seq()`](Self::set_commit_seq) + /// after reading persisted sequences to resume from the correct value. pub const fn new(accounts: A, storage: S, code: C) -> Self { - Self { stores: Some(Stores::new(accounts, storage, code)) } + Self { stores: Some(Stores::new(accounts, storage, code)), commit_seq: 0 } + } + + /// Return the current commit sequence number. + pub const fn commit_seq(&self) -> u64 { + self.commit_seq + } + + /// Set the commit sequence number. + /// + /// Intended to be called after startup once the persisted sequence has been + /// read from the partitions, so that subsequent commits continue the + /// monotonic sequence. + pub const fn set_commit_seq(&mut self, seq: u64) { + self.commit_seq = seq; } /// Borrow stores for reading. @@ -189,29 +317,50 @@ where /// Apply batches to stores. /// + /// Each partition batch is augmented with a commit sequence marker before + /// writing. The marker uses well-known sentinel keys + /// ([`COMMIT_SEQ_ACCOUNT_KEY`], [`COMMIT_SEQ_STORAGE_KEY`], + /// [`COMMIT_SEQ_CODE_KEY`]) that are outside the normal key space. + /// + /// The next sequence number (`commit_seq + 1`) is written to each partition. + /// After all three writes succeed, the in-memory `commit_seq` is advanced. + /// If a crash occurs between partition writes, the sentinel values will + /// differ across partitions, which is detectable on startup via + /// [`read_partition_commit_seqs()`](Self::read_partition_commit_seqs). + /// /// # Errors /// /// Returns an error if stores are unavailable or any batch write operation fails. pub async fn apply_batches(&mut self, batches: StoreBatches) -> Result<(), QmdbError> { + let next_seq = self.commit_seq.saturating_add(1); let stores = self.stores_mut()?; + // Inject commit sequence markers into each partition batch. + let mut account_ops = batches.accounts; + account_ops.push((COMMIT_SEQ_ACCOUNT_KEY, Some(encode_commit_seq_account(next_seq)))); + + let mut storage_ops = batches.storage; + storage_ops.push((COMMIT_SEQ_STORAGE_KEY, Some(U256::from(next_seq)))); + + let mut code_ops = batches.code; + code_ops.push((COMMIT_SEQ_CODE_KEY, Some(encode_commit_seq_code(next_seq)))); + stores .accounts - .write_batch(batches.accounts) + .write_batch(account_ops) .await .map_err(|e| QmdbError::Storage(e.to_string()))?; stores .storage - .write_batch(batches.storage) + .write_batch(storage_ops) .await .map_err(|e| QmdbError::Storage(e.to_string()))?; - stores - .code - .write_batch(batches.code) - .await - .map_err(|e| QmdbError::Storage(e.to_string()))?; + stores.code.write_batch(code_ops).await.map_err(|e| QmdbError::Storage(e.to_string()))?; + + // All three partitions committed successfully; advance the sequence. + self.commit_seq = next_seq; Ok(()) } @@ -228,6 +377,43 @@ where let batches = self.build_batches(&changes).await?; self.apply_batches(batches).await } + + /// Read the commit sequence marker from each partition. + /// + /// Returns [`PartitionCommitSeqs`] containing the sequence number found in + /// each partition, or `None` if no marker exists (backward-compatible with + /// databases created before this feature was added). + /// + /// # Errors + /// + /// Returns an error if stores are unavailable or an underlying read fails. + pub async fn read_partition_commit_seqs(&self) -> Result { + let stores = self.stores()?; + + let accounts_seq = match stores.accounts.get(&COMMIT_SEQ_ACCOUNT_KEY).await { + Ok(Some(bytes)) => decode_commit_seq_account(&bytes), + Ok(None) => None, + Err(e) => return Err(QmdbError::Storage(e.to_string())), + }; + + let storage_seq = match stores.storage.get(&COMMIT_SEQ_STORAGE_KEY).await { + Ok(Some(value)) => { + // U256 -> u64: the sequence number fits in a u64. + let limbs: [u64; 4] = value.into_limbs(); + if limbs[1] == 0 && limbs[2] == 0 && limbs[3] == 0 { Some(limbs[0]) } else { None } + } + Ok(None) => None, + Err(e) => return Err(QmdbError::Storage(e.to_string())), + }; + + let code_seq = match stores.code.get(&COMMIT_SEQ_CODE_KEY).await { + Ok(Some(bytes)) => decode_commit_seq_code(&bytes), + Ok(None) => None, + Err(e) => return Err(QmdbError::Storage(e.to_string())), + }; + + Ok(PartitionCommitSeqs { accounts: accounts_seq, storage: storage_seq, code: code_seq }) + } } #[cfg(test)] @@ -317,4 +503,95 @@ mod tests { let mut store = create_test_store(); store.commit_changes(ChangeSet::new()).await.unwrap(); } + + #[test] + fn new_store_has_zero_commit_seq() { + let store = create_test_store(); + assert_eq!(store.commit_seq(), 0); + } + + #[test] + fn set_commit_seq_updates_value() { + let mut store = create_test_store(); + store.set_commit_seq(42); + assert_eq!(store.commit_seq(), 42); + } + + #[tokio::test] + async fn apply_batches_increments_commit_seq() { + let mut store = create_test_store(); + assert_eq!(store.commit_seq(), 0); + + let batches = StoreBatches::new(); + store.apply_batches(batches).await.unwrap(); + assert_eq!(store.commit_seq(), 1); + + let batches = StoreBatches::new(); + store.apply_batches(batches).await.unwrap(); + assert_eq!(store.commit_seq(), 2); + } + + #[tokio::test] + async fn apply_batches_writes_commit_seq_markers() { + let mut store = create_test_store(); + let batches = StoreBatches::new(); + store.apply_batches(batches).await.unwrap(); + + // Read back the sentinel keys. + let seqs = store.read_partition_commit_seqs().await.unwrap(); + assert_eq!(seqs.accounts, Some(1)); + assert_eq!(seqs.storage, Some(1)); + assert_eq!(seqs.code, Some(1)); + assert!(seqs.is_consistent()); + } + + #[tokio::test] + async fn read_partition_commit_seqs_returns_none_for_empty_store() { + let store = create_test_store(); + let seqs = store.read_partition_commit_seqs().await.unwrap(); + assert_eq!(seqs.accounts, None); + assert_eq!(seqs.storage, None); + assert_eq!(seqs.code, None); + assert!(seqs.is_consistent()); + } + + #[test] + fn partition_commit_seqs_consistent_when_all_match() { + let seqs = PartitionCommitSeqs { accounts: Some(5), storage: Some(5), code: Some(5) }; + assert!(seqs.is_consistent()); + assert!(seqs.inconsistency_message().is_none()); + } + + #[test] + fn partition_commit_seqs_inconsistent_when_different() { + let seqs = PartitionCommitSeqs { accounts: Some(5), storage: Some(4), code: Some(5) }; + assert!(!seqs.is_consistent()); + let msg = seqs.inconsistency_message().unwrap(); + assert!(msg.contains("accounts=5")); + assert!(msg.contains("storage=4")); + assert!(msg.contains("code=5")); + } + + #[test] + fn partition_commit_seqs_inconsistent_when_partially_present() { + let seqs = PartitionCommitSeqs { accounts: Some(1), storage: None, code: None }; + assert!(!seqs.is_consistent()); + } + + #[tokio::test] + async fn multiple_commits_track_sequence_correctly() { + let mut store = create_test_store(); + + for i in 1..=5 { + let batches = StoreBatches::new(); + store.apply_batches(batches).await.unwrap(); + assert_eq!(store.commit_seq(), i); + + let seqs = store.read_partition_commit_seqs().await.unwrap(); + assert_eq!(seqs.accounts, Some(i)); + assert_eq!(seqs.storage, Some(i)); + assert_eq!(seqs.code, Some(i)); + assert!(seqs.is_consistent()); + } + } } diff --git a/crates/storage/traits/src/state.rs b/crates/storage/traits/src/state.rs index 484cc15..936ba7f 100644 --- a/crates/storage/traits/src/state.rs +++ b/crates/storage/traits/src/state.rs @@ -34,6 +34,18 @@ pub trait StateDbRead: Clone + Send + Sync + 'static { slot: &U256, ) -> impl Future> + Send; + /// Returns `true` if the REVM `DatabaseCommit` side-channel recorded a + /// commit failure since the last call, and clears the flag. + /// + /// Backends whose `DatabaseCommit::commit()` can fail (e.g. QMDB) set an + /// internal flag because the REVM trait is infallible. The executor calls + /// this after the transaction loop to detect silent failures. + /// + /// The default implementation returns `false` (no failure). + fn take_commit_failure(&self) -> bool { + false + } + /// Check if an account exists. fn exists(&self, address: &Address) -> impl Future> + Send { let address = *address; diff --git a/crates/utilities/crypto/src/test_utils.rs b/crates/utilities/crypto/src/test_utils.rs index 72a860a..da49809 100644 --- a/crates/utilities/crypto/src/test_utils.rs +++ b/crates/utilities/crypto/src/test_utils.rs @@ -3,7 +3,7 @@ use commonware_consensus::simplex::scheme::bls12381_threshold::vrf as bls12381_t use commonware_cryptography::{ Signer as _, bls12381::{ - dkg, + dkg::feldman_desmedt as dkg, primitives::{sharing::Mode, variant::MinSig}, }, ed25519, diff --git a/deny.toml b/deny.toml index 7f149a6..c6bda26 100644 --- a/deny.toml +++ b/deny.toml @@ -2,6 +2,10 @@ ignore = [ # paste is a transitive dep from alloy-primitives "RUSTSEC-2024-0436", + # ark-relations 0.5.1 enables tracing-subscriber 0.2 through its std + # feature, and commonware-cryptography 2026.5.0 currently depends on that + # arkworks line. Remove this once commonware can move to ark-relations >=0.6. + "RUSTSEC-2025-0055", ] [licenses] @@ -17,7 +21,6 @@ allow = [ "Zlib", "CC0-1.0", "BSL-1.0", - "OpenSSL", ] confidence-threshold = 0.8 @@ -25,10 +28,7 @@ confidence-threshold = 0.8 multiple-versions = "allow" wildcards = "allow" skip = [ - "block-buffer", - "digest", "getrandom", - "windows-sys", "windows-link", ] diff --git a/docker/Dockerfile b/docker/Dockerfile index d76424e..4d922dc 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -38,7 +38,7 @@ RUN cargo chef cook --release --recipe-path recipe.json COPY . . # Build all binaries -RUN cargo build --release -p kora -p keygen +RUN cargo build --release -p kora -p keygen -p loadgen # ─────────────────────────────────────────────────────────────────────────── # Stage 4: Runtime - Minimal production image @@ -56,12 +56,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Create non-root user for security RUN useradd -m -u 1000 -s /bin/bash kora && \ - mkdir -p /var/lib/kora /etc/kora /data /shared && \ - chown -R kora:kora /var/lib/kora /etc/kora /data /shared + mkdir -p /var/lib/kora /etc/kora /data /shared /runtime && \ + chown -R kora:kora /var/lib/kora /etc/kora /data /shared /runtime # Copy binaries from builder COPY --from=builder /app/target/release/kora /usr/local/bin/ COPY --from=builder /app/target/release/keygen /usr/local/bin/ +COPY --from=builder /app/target/release/loadgen /usr/local/bin/ # Copy entrypoint scripts COPY docker/scripts/ /scripts/ @@ -81,6 +82,13 @@ EXPOSE 30303 8545 8546 9002 # Default volumes VOLUME ["/data", "/shared"] +# Runtime health check shared with Compose. Override HEALTHCHECK_MODE to +# switch the check: dkg (share.key + output.json exist), p2p (port 30303), +# ready (eth_blockNumber + stall detection). Default mode is p2p. +# The compose file overrides these timings; these are conservative defaults. +HEALTHCHECK --interval=30s --timeout=10s --retries=6 --start-period=120s \ + CMD /scripts/healthcheck.sh + # Default entrypoint - can be overridden for different modes ENTRYPOINT ["/scripts/entrypoint.sh"] CMD ["validator"] diff --git a/docker/Justfile b/docker/Justfile index dadda88..0080cd2 100644 --- a/docker/Justfile +++ b/docker/Justfile @@ -18,16 +18,19 @@ trusted-devnet: ./scripts/devnet-run.sh devnet-minimal: - @COMPOSE_PROFILES="" ./scripts/devnet-run.sh + @COMPOSE_PROFILES="none" ./scripts/devnet-run.sh stats: ./scripts/devnet-stats.sh +health: + ./scripts/devnet-health.sh + down: - docker compose -f compose/devnet.yaml down + docker compose -f compose/devnet.yaml --profile observability --profile interactive-dkg down reset: - docker compose -f compose/devnet.yaml down -v + docker compose -f compose/devnet.yaml --profile observability --profile interactive-dkg down -v @echo "Devnet reset. Next 'just devnet' runs fresh DKG." restart: down devnet diff --git a/docker/README.md b/docker/README.md index bd008f1..9ab5391 100644 --- a/docker/README.md +++ b/docker/README.md @@ -45,7 +45,7 @@ Run from repository root (`just `) or from `docker/` directory (`just |---------|-------------| | `just devnet` | Start devnet with interactive DKG (production-like) | | `just trusted-devnet` | Start devnet with trusted dealer DKG (fast, insecure) | -| `just devnet-down` | Stop all containers (preserves state) | +| `just devnet-down` | Stop all containers (preserves keys/config volumes; runtime state is ephemeral) | | `just devnet-reset` | Stop and delete all state (fresh DKG on next start) | | `just devnet-logs` | Stream validator logs | | `just devnet-status` | Show container status and endpoints | @@ -61,7 +61,7 @@ Run from repository root (`just `) or from `docker/` directory (`just | `just devnet` | Start devnet with interactive DKG (production-like) | | `just trusted-devnet` | Start devnet with trusted dealer DKG (fast, insecure) | | `just devnet-minimal` | Start devnet without observability stack | -| `just down` | Stop all containers (preserves state) | +| `just down` | Stop all containers (preserves keys/config volumes; runtime state is ephemeral) | | `just reset` | Stop and delete all state (fresh DKG on next start) | | `just restart` | Stop and restart the devnet | | `just restart-validators` | Restart only validator nodes | @@ -164,10 +164,14 @@ Environment variables (set in `.env` or export): |----------|---------|-------------| | `CHAIN_ID` | 1337 | Chain identifier | | `RUST_LOG` | info | Log level (trace, debug, info, warn, error) | +| `KORA_RUNTIME_DIR` | /runtime | Commonware runtime storage directory. The Docker devnet mounts per-node named volumes here so consensus state survives container restarts. | +| `KORA_CHECKPOINT_INTERVAL` | 256 | Number of finalized blocks between durable QMDB state checkpoints. Finalized block/certificate archives remain on disk; on restart, nodes replay any archive tail after the last checkpoint. | | `COMPOSE_PROFILES` | observability | Comma-separated profiles (observability, distributed-dkg) | | `VALIDATOR_INDEX` | - | Node index (0-3), set per container | +| `VALIDATOR_COUNT` | 0 | Total number of validators. When > 0, entrypoint waits for all validators via a shared barrier volume before starting consensus | | `IS_BOOTSTRAP` | - | Whether node is bootstrap node | | `BOOTSTRAP_PEERS` | - | Bootstrap peer addresses | +| `PEER_NODES` | - | Comma-separated list of all validator hostnames (e.g. node0,node1,node2,node3) | | `HEALTHCHECK_MODE` | - | Health check mode (dkg, ready) | ## Secondary Peers diff --git a/docker/compose/devnet.yaml b/docker/compose/devnet.yaml index b18a326..b5c4175 100644 --- a/docker/compose/devnet.yaml +++ b/docker/compose/devnet.yaml @@ -10,14 +10,26 @@ volumes: data_node2: data_node3: data_secondary0: + runtime_node0: + runtime_node1: + runtime_node2: + runtime_node3: + runtime_secondary0: shared_config: + startup_barrier: prometheus_data: grafana_data: + loki_data: x-node-common: &node-common image: kora:local networks: - kora-net + logging: + driver: json-file + options: + max-size: "50m" + max-file: "5" environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} @@ -25,15 +37,39 @@ x-node-common: &node-common x-validator-common: &validator-common <<: *node-common restart: unless-stopped + init: true + stop_grace_period: 5s + stop_signal: SIGTERM + read_only: true + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + ulimits: + nofile: + soft: 65536 + hard: 65536 + core: 0 + deploy: + resources: + limits: + memory: 4G + cpus: "2" + pids: 4096 + tmpfs: + - /tmp:size=64m,mode=1777 healthcheck: test: ["CMD", "/scripts/healthcheck.sh"] - interval: 10s - timeout: 5s - retries: 3 - start_period: 30s + interval: 30s + timeout: 10s + retries: 6 + start_period: 120s environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} + - KORA_RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + - KORA_CHECKPOINT_INTERVAL=${KORA_CHECKPOINT_INTERVAL:-256} + - TX_GOSSIP=${TX_GOSSIP:-true} - HEALTHCHECK_MODE=ready services: @@ -48,11 +84,10 @@ services: /usr/local/bin/keygen setup \ --validators=4 \ --secondary-peers=1 \ - --threshold=3 \ --chain-id=${CHAIN_ID:-1337} \ --output-dir=/shared && \ echo "[init] Setting permissions..." && \ - chown -R 1000:1000 /shared/node0 /shared/node1 /shared/node2 /shared/node3 /shared/secondary0 && \ + chown -R 1000:1000 /shared/node0 /shared/node1 /shared/node2 /shared/node3 /shared/secondary0 /barrier && \ echo "[init] Setup complete (run DKG ceremony next)" volumes: - shared_config:/shared @@ -61,6 +96,7 @@ services: - data_node2:/shared/node2 - data_node3:/shared/node3 - data_secondary0:/shared/secondary0 + - startup_barrier:/barrier # Setup + trusted dealer DKG (for fast local dev) init-config: @@ -69,20 +105,24 @@ services: entrypoint: ["/bin/bash", "-c"] command: - | + if [ -f /shared/node0/share.key ] && [ -f /shared/node0/output.json ]; then + echo "[init] DKG already completed, skipping" + exit 0 + fi + echo "[init] Clearing startup barrier from previous runs..." && \ + rm -f /barrier/*.ready && \ echo "[init] Running keygen setup..." && \ /usr/local/bin/keygen setup \ --validators=4 \ --secondary-peers=1 \ - --threshold=3 \ --chain-id=${CHAIN_ID:-1337} \ --output-dir=/shared && \ echo "[init] Running trusted dealer DKG..." && \ /usr/local/bin/keygen dkg-deal \ --validators=4 \ - --threshold=3 \ --output-dir=/shared && \ echo "[init] Setting permissions..." && \ - chown -R 1000:1000 /shared/node0 /shared/node1 /shared/node2 /shared/node3 /shared/secondary0 && \ + chown -R 1000:1000 /shared/node0 /shared/node1 /shared/node2 /shared/node3 /shared/secondary0 /barrier && \ echo "[init] Init complete" volumes: - shared_config:/shared @@ -91,6 +131,7 @@ services: - data_node2:/shared/node2 - data_node3:/shared/node3 - data_secondary0:/shared/secondary0 + - startup_barrier:/barrier # Interactive DKG nodes - run ceremony then exit dkg-node0: @@ -182,86 +223,116 @@ services: validator-node0: <<: *validator-common hostname: node0 + depends_on: + init-config: + condition: service_completed_successfully entrypoint: ["/scripts/entrypoint.sh", "validator"] volumes: - shared_config:/shared:ro - data_node0:/data + - runtime_node0:/runtime + - startup_barrier:/barrier environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} + - KORA_RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + - KORA_CHECKPOINT_INTERVAL=${KORA_CHECKPOINT_INTERVAL:-256} + - TX_GOSSIP=${TX_GOSSIP:-true} - VALIDATOR_INDEX=0 + - VALIDATOR_COUNT=4 - IS_BOOTSTRAP=true + - PEER_NODES=node0,node1,node2,node3 - HEALTHCHECK_MODE=ready ports: - "30400:30303" - - "8545:8545" - - "9000:9002" + - "127.0.0.1:8545:8545" + - "127.0.0.1:9000:9002" validator-node1: <<: *validator-common hostname: node1 depends_on: - validator-node0: - condition: service_healthy + init-config: + condition: service_completed_successfully entrypoint: ["/scripts/entrypoint.sh", "validator"] volumes: - shared_config:/shared:ro - data_node1:/data + - runtime_node1:/runtime + - startup_barrier:/barrier environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} + - KORA_RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + - KORA_CHECKPOINT_INTERVAL=${KORA_CHECKPOINT_INTERVAL:-256} + - TX_GOSSIP=${TX_GOSSIP:-true} - VALIDATOR_INDEX=1 - - IS_BOOTSTRAP=false - - BOOTSTRAP_PEERS=node0:30303 + - VALIDATOR_COUNT=4 + - IS_BOOTSTRAP=true + - PEER_NODES=node0,node1,node2,node3 - HEALTHCHECK_MODE=ready ports: - "30401:30303" - - "8546:8545" - - "9001:9002" + - "127.0.0.1:8546:8545" + - "127.0.0.1:9001:9002" validator-node2: <<: *validator-common hostname: node2 depends_on: - validator-node0: - condition: service_healthy + init-config: + condition: service_completed_successfully entrypoint: ["/scripts/entrypoint.sh", "validator"] volumes: - shared_config:/shared:ro - data_node2:/data + - runtime_node2:/runtime + - startup_barrier:/barrier environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} + - KORA_RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + - KORA_CHECKPOINT_INTERVAL=${KORA_CHECKPOINT_INTERVAL:-256} + - TX_GOSSIP=${TX_GOSSIP:-true} - VALIDATOR_INDEX=2 + - VALIDATOR_COUNT=4 - IS_BOOTSTRAP=false - - BOOTSTRAP_PEERS=node0:30303 + - BOOTSTRAP_PEERS=node0:30303,node1:30303 + - PEER_NODES=node0,node1,node2,node3 - HEALTHCHECK_MODE=ready ports: - "30402:30303" - - "8547:8545" - - "9002:9002" + - "127.0.0.1:8547:8545" + - "127.0.0.1:9002:9002" validator-node3: <<: *validator-common hostname: node3 depends_on: - validator-node0: - condition: service_healthy + init-config: + condition: service_completed_successfully entrypoint: ["/scripts/entrypoint.sh", "validator"] volumes: - shared_config:/shared:ro - data_node3:/data + - runtime_node3:/runtime + - startup_barrier:/barrier environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} + - KORA_RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + - KORA_CHECKPOINT_INTERVAL=${KORA_CHECKPOINT_INTERVAL:-256} + - TX_GOSSIP=${TX_GOSSIP:-true} - VALIDATOR_INDEX=3 + - VALIDATOR_COUNT=4 - IS_BOOTSTRAP=false - - BOOTSTRAP_PEERS=node0:30303 + - BOOTSTRAP_PEERS=node0:30303,node1:30303 + - PEER_NODES=node0,node1,node2,node3 - HEALTHCHECK_MODE=ready ports: - "30403:30303" - - "8548:8545" - - "9003:9002" + - "127.0.0.1:8548:8545" + - "127.0.0.1:9003:9002" secondary-node0: <<: *validator-common @@ -273,45 +344,115 @@ services: volumes: - shared_config:/shared:ro - data_secondary0:/data + - runtime_secondary0:/runtime environment: - RUST_LOG=${RUST_LOG:-info} - CHAIN_ID=${CHAIN_ID:-1337} + - KORA_RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + - KORA_CHECKPOINT_INTERVAL=${KORA_CHECKPOINT_INTERVAL:-256} - IS_BOOTSTRAP=false - - BOOTSTRAP_PEERS=node0:30303 - - HEALTHCHECK_MODE=ready + - BOOTSTRAP_PEERS=node0:30303,node1:30303 + - HEALTHCHECK_MODE=p2p ports: - "30500:30303" + - "127.0.0.1:8549:8545" + - "127.0.0.1:9004:9002" prometheus: image: prom/prometheus:latest profiles: ["observability"] + restart: unless-stopped + read_only: true + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + tmpfs: + - /tmp:size=64m,mode=1777 volumes: - prometheus_data:/prometheus - ../config/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ../config/alerts.yml:/etc/prometheus/alerts.yml:ro + - ../config/recording-rules.yml:/etc/prometheus/recording-rules.yml:ro command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.enable-lifecycle' ports: - - "9090:9090" + - "127.0.0.1:9090:9090" + networks: + - kora-net + + loki: + image: grafana/loki:3.4.2 + profiles: ["observability"] + restart: unless-stopped + read_only: true + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + tmpfs: + - /tmp:size=64m,mode=1777 + volumes: + - loki_data:/loki + - ../config/loki.yml:/etc/loki/local-config.yaml:ro + command: -config.file=/etc/loki/local-config.yaml + ports: + - "127.0.0.1:3100:3100" + networks: + - kora-net + + promtail: + image: grafana/promtail:3.4.2 + profiles: ["observability"] + restart: unless-stopped + read_only: true + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + tmpfs: + - /tmp:size=64m,mode=1777 + depends_on: + - loki + volumes: + - ../config/promtail.yml:/etc/promtail/config.yml:ro + # SECURITY: Docker socket is mounted read-only so Promtail can discover + # container labels for log collection. This grants the container visibility + # into all Docker API metadata. In production, consider using a socket + # proxy (e.g. tecnativa/docker-socket-proxy) to restrict API access. + - /var/run/docker.sock:/var/run/docker.sock:ro + command: -config.file=/etc/promtail/config.yml networks: - kora-net grafana: image: grafana/grafana:latest profiles: ["observability"] + restart: unless-stopped + read_only: true + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + tmpfs: + - /tmp:size=64m,mode=1777 depends_on: - prometheus + - loki volumes: - grafana_data:/var/lib/grafana - ../grafana/provisioning:/etc/grafana/provisioning:ro - ../grafana/dashboards:/var/lib/grafana/dashboards:ro environment: - GF_SECURITY_ADMIN_USER=admin - - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD:-admin} - GF_AUTH_ANONYMOUS_ENABLED=true - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer + # read_only rootfs prevents writing to /var/log/grafana; use console only + - GF_LOG_MODE=console ports: - - "3000:3000" + - "127.0.0.1:3000:3000" networks: - kora-net diff --git a/docker/config/alerts.yml b/docker/config/alerts.yml new file mode 100644 index 0000000..981df32 --- /dev/null +++ b/docker/config/alerts.yml @@ -0,0 +1,317 @@ +groups: + - name: consensus_critical + rules: + # Node completely down + - alert: ValidatorDown + expr: up{job="kora-validators"} == 0 + for: 30s + labels: + severity: critical + annotations: + summary: "Validator {{ $labels.instance }} is down" + description: "Validator has been unreachable for 30 seconds. Check container health." + + # Consensus has stalled — no blocks finalized + - alert: ConsensusStall + expr: rate(finalized_height{job="kora-validators"}[5m]) < 0.001 and up{job="kora-validators"} == 1 + for: 2m + labels: + severity: critical + annotations: + summary: "Consensus stall on {{ $labels.instance }}" + description: "No blocks finalized in 5 minutes while node is up. Likely mempool poisoning or quorum loss." + + # Voter panic detection — zero finalization rate with node up + - alert: VoterCrash + expr: | + (rate(finalized_height{job="kora-validators"}[1m]) < 0.001) + and (up{job="kora-validators"} == 1) + and (rate(engine_voter_state_current_view{job="kora-validators"}[1m]) < 0.001) + for: 1m + labels: + severity: critical + annotations: + summary: "Possible voter crash on {{ $labels.instance }}" + description: "Node is up but view is not advancing. Voter actor may have panicked." + + - name: consensus_warnings + rules: + # Nodes diverging in height + - alert: HeightDrift + expr: max(finalized_height{job="kora-validators"}) - min(finalized_height{job="kora-validators"}) > 10 + for: 1m + labels: + severity: warning + annotations: + summary: "Validator height drift exceeds 10 blocks" + description: "Max height={{ $value }}. A node may be struggling to keep up or is stuck in catch-up." + + # High nullification rate — wasted consensus rounds. + # Healthy baseline is ~27% nullification rate (~44 nullifications/s at + # full throughput). Threshold set above steady-state to avoid false alarms. + - alert: HighNullificationRate + expr: sum(rate(engine_voter_state_nullifications_total{job="kora-validators"}[5m])) > 60 + for: 2m + labels: + severity: warning + annotations: + summary: "Nullification rate is {{ $value }}/s" + description: "High nullification rate indicates block building failures. Check executor errors and mempool state." + + # Skip rate elevated — approaching stall territory. + # Healthy 4-validator network has ~27-33% skip rate (round-robin + # leadership means some views naturally nullify). Threshold raised + # to 45% to avoid alerting on steady-state behavior. + - alert: HighSkipRate + expr: | + (1 - (avg(rate(finalized_height{job="kora-validators"}[5m])) / avg(rate(engine_voter_state_current_view{job="kora-validators"}[5m])))) > 0.45 + for: 3m + labels: + severity: warning + annotations: + summary: "Skip rate is {{ $value | humanizePercentage }}" + description: "Over 45% of consensus views are wasted. Healthy baseline is ~33%; investigate if sustained." + + # High timeout rate — correlated with nullifications in steady state. + # Raised threshold above healthy baseline to reduce false positives. + - alert: HighTimeoutRate + expr: sum(rate(engine_voter_state_timeouts_total{job="kora-validators"}[5m])) > 60 + for: 2m + labels: + severity: warning + annotations: + summary: "Timeout rate is {{ $value }}/s" + description: "High timeout rate. Leaders may be failing to propose blocks in time." + + # Node catching up poorly — finalization rate much lower than peers + - alert: NodeLagging + expr: | + rate(finalized_height[5m]) < 0.1 * avg(rate(finalized_height[5m])) + and rate(finalized_height[5m]) > 0 + for: 3m + labels: + severity: warning + annotations: + summary: "Node {{ $labels.instance }} is lagging behind peers" + description: "Finalization rate is much lower than cluster average. Node may have resolver catch-up issues." + + - name: resource_alerts + rules: + # Memory growing rapidly + - alert: HighMemoryUsage + expr: runtime_process_rss > 2e9 + for: 5m + labels: + severity: warning + annotations: + summary: "High memory on {{ $labels.instance }}: {{ $value | humanize1024 }}B" + description: "RSS memory exceeds 2GB. Possible memory leak or unbounded mempool growth." + + # Broadcast failures spiking + - alert: BroadcastFailures + expr: rate(broadcast_get_total{status="Failure"}[5m]) > 1 + for: 2m + labels: + severity: warning + annotations: + summary: "Broadcast failures on {{ $labels.instance }}" + description: "Block broadcast is failing. P2P connectivity may be degraded." + + # View advancing but no finalization — quorum issue + - alert: ViewWithoutFinalization + expr: | + rate(engine_voter_state_current_view{job="kora-validators"}[5m]) > 0 + and rate(finalized_height{job="kora-validators"}[5m]) < 0.001 + for: 3m + labels: + severity: warning + annotations: + summary: "Views advancing without finalization on {{ $labels.instance }}" + description: "Consensus rounds are progressing but no blocks are being finalized. Possible quorum loss or executor failures." + + - name: performance_alerts + rules: + # Block build time approaching leader timeout + - alert: SlowBlockBuild + expr: kora:build_duration:p95 > 0.5 + for: 2m + labels: + severity: warning + annotations: + summary: "Block build p95 is {{ $value | humanizeDuration }}" + description: "Block build time p95 exceeding 500ms (leader timeout is 1s). ECDSA recovery or mempool size may be the cause." + + # Block build time critical — will cause nullifications + - alert: CriticalBlockBuild + expr: kora:build_duration:p99 > 0.8 + for: 1m + labels: + severity: critical + annotations: + summary: "Block build p99 at {{ $value | humanizeDuration }} — imminent nullifications" + description: "Block build is approaching 1s leader timeout. Proposals will fail. Reduce BLOCK_CODEC_MAX_TXS or fix mempool." + + # Finalization latency degrading + - alert: HighFinalizationLatency + expr: kora:finalization_latency:p95 > 2 + for: 3m + labels: + severity: warning + annotations: + summary: "Finalization p95 is {{ $value | humanizeDuration }}" + description: "Taking over 2s to collect 2/3+ votes. Check network connectivity and signature verification." + + # Throughput dropped significantly from recent baseline + - alert: ThroughputDrop + expr: | + kora:blocks_per_sec < 0.3 * avg_over_time(kora:blocks_per_sec[1h]) + and kora:blocks_per_sec > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Throughput dropped to {{ $value }} blocks/sec (70%+ drop from 1h average)" + description: "Block production rate has dropped significantly. Check build time, nullification rate, and network." + + # Consensus efficiency dropping — early warning before stall + - alert: LowConsensusEfficiency + expr: kora:consensus_efficiency < 0.5 + for: 5m + labels: + severity: warning + annotations: + summary: "Consensus efficiency at {{ $value | humanizePercentage }}" + description: "Less than 50% of views produce blocks. This preceded the production stall (which was at 67%)." + + # Resolver peers blocked — catch-up impaired + - alert: ResolverPeersBlocked + expr: engine_resolver_resolver_peers_blocked > 0 + for: 1m + labels: + severity: warning + annotations: + summary: "Node {{ $labels.instance }} has {{ $value }} blocked resolver peers" + description: "Blocked peers cannot provide blocks for catch-up. This caused permanent stall after node restarts." + + # Memory growth rate (leak detection). + # Previous threshold of 10MB/s (10e6) was too sensitive — normal state + # accumulation and mempool churn can easily produce transient spikes. + # Raised to 50MB/s to catch genuine leaks without false positives. + - alert: MemoryLeakSuspected + expr: deriv(runtime_process_rss[15m]) > 50e6 + for: 10m + labels: + severity: warning + annotations: + summary: "Memory growing at {{ $value | humanize }}B/s on {{ $labels.instance }}" + description: "Sustained memory growth >50MB/s for 10min. Possible unbounded mempool or state accumulation." + + # Storage write stall (persistence blocked) + - alert: StorageWriteStall + expr: | + rate(finalized_height[5m]) > 0 + and rate(runtime_storage_writes_total[5m]) < 0.001 + for: 2m + labels: + severity: critical + annotations: + summary: "No storage writes on {{ $labels.instance }} despite finalization" + description: "Blocks are finalizing but nothing is being persisted. State will be lost on restart." + + - name: transaction_alerts + rules: + # Chain stall: views advancing but no blocks finalized + - alert: MempoolPoisoning + expr: | + sum(rate(engine_voter_state_current_view[2m])) > 0 + and sum(rate(finalized_height[2m])) < 0.001 + and sum(rate(engine_voter_state_nullifications_total[2m])) > 5 + for: 1m + labels: + severity: critical + annotations: + summary: "Chain stalled with active nullifications — likely mempool poisoning" + description: "Views are advancing but no blocks finalize. High nullification rate suggests every proposal fails (executor abort on bad tx). Requires chain reset." + + # Every leader failing to propose + - alert: AllLeadersFailing + expr: | + sum(rate(engine_voter_state_nullifications_total[2m])) > 20 + and sum(rate(finalized_height[2m])) < 0.001 + for: 30s + labels: + severity: critical + annotations: + summary: "All leaders failing to propose — {{ $value }} nullifications/s" + description: "No blocks finalized and nullification rate is very high. Check for invalid transactions in mempool." + + # Sudden efficiency drop (pre-stall warning) + - alert: EfficiencyCliff + expr: | + kora:consensus_efficiency < 0.1 + and kora:consensus_efficiency offset 5m > 0.5 + for: 1m + labels: + severity: critical + annotations: + summary: "Consensus efficiency crashed from >50% to {{ $value | humanizePercentage }}" + description: "Efficiency dropped off a cliff. This pattern precedes permanent stalls caused by mempool poisoning." + + - name: network_partition + rules: + # Individual peer disconnected — no messages received from a tracked peer + - alert: PeerDisconnected + expr: > + network_tracker_directory_tracked > 0 + and sum by (instance) (rate(network_spawner_messages_received_total[2m])) == 0 + for: 30s + labels: + severity: warning + annotations: + summary: "No P2P messages received on {{ $labels.instance }}" + description: "Node has tracked peers but received zero messages in 2 minutes. Likely disconnected from the network." + + # Potential network partition — fewer than 3 tracked peers (quorum requires 3/4) + - alert: NetworkPartition + expr: network_tracker_directory_tracked < 3 + for: 1m + labels: + severity: critical + annotations: + summary: "Network partition detected on {{ $labels.instance }}: only {{ $value }} tracked peers" + description: "Fewer than 3 peers tracked. BFT consensus requires 2f+1 (3 of 4) validators. This node cannot participate in quorum." + + # High message drop rate — early warning for degraded connectivity + - alert: HighMessageDropRate + expr: > + sum(rate(network_router_messages_dropped_total[5m])) + / clamp_min(sum(rate(network_spawner_messages_sent_total[5m])), 1) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "P2P message drop rate is {{ $value | humanizePercentage }}" + description: "Over 10% of sent messages are being dropped. Network connectivity may be degraded. Observed 18.5% drop rate before production stall." + + # Asymmetric connectivity — node can send but not receive (or vice versa) + - alert: AsymmetricConnectivity + expr: > + sum(rate(network_spawner_messages_sent_total[5m])) > 1 + and sum(rate(network_spawner_messages_received_total[5m])) < 0.01 + for: 2m + labels: + severity: critical + annotations: + summary: "Asymmetric connectivity on {{ $labels.instance }}: sending but not receiving" + description: "Node is sending P2P messages but receiving none. Likely a one-way network partition." + + # All peers rate-limited — possible flooding or misconfiguration + - alert: HighRateLimitedMessages + expr: > + sum(rate(network_spawner_messages_rate_limited_total[5m])) > 10 + for: 2m + labels: + severity: warning + annotations: + summary: "{{ $value }} messages/s rate-limited on {{ $labels.instance }}" + description: "High rate of P2P messages being rate-limited. May indicate flooding, replay storms, or overly aggressive rate limits." diff --git a/docker/config/loki.yml b/docker/config/loki.yml new file mode 100644 index 0000000..c799b22 --- /dev/null +++ b/docker/config/loki.yml @@ -0,0 +1,35 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: "2024-01-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: 72h + max_query_length: 720h + max_query_series: 100000 + +compactor: + working_directory: /loki/compactor + retention_enabled: true + delete_request_store: filesystem diff --git a/docker/config/prometheus.yml b/docker/config/prometheus.yml index 4641ae9..13365f6 100644 --- a/docker/config/prometheus.yml +++ b/docker/config/prometheus.yml @@ -1,6 +1,10 @@ global: - scrape_interval: 15s - evaluation_interval: 15s + scrape_interval: 10s + evaluation_interval: 10s + +rule_files: + - /etc/prometheus/alerts.yml + - /etc/prometheus/recording-rules.yml scrape_configs: - job_name: 'prometheus' @@ -19,3 +23,13 @@ scrape_configs: regex: 'validator-node(\d+):.*' target_label: validator_index replacement: '$1' + + - job_name: 'kora-secondary' + static_configs: + - targets: + - 'secondary-node0:9002' + relabel_configs: + - source_labels: [__address__] + regex: 'secondary-node(\d+):.*' + target_label: secondary_index + replacement: '$1' diff --git a/docker/config/promtail.yml b/docker/config/promtail.yml new file mode 100644 index 0000000..23535d7 --- /dev/null +++ b/docker/config/promtail.yml @@ -0,0 +1,69 @@ +server: + http_listen_port: 9080 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + # WARNING: Mounting the Docker socket grants root-equivalent access to the host. + # This configuration is intended for LOCAL DEVELOPMENT / DEVNET use only. + # Do NOT use this in production — use a log driver or sidecar pattern instead. + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["com.docker.compose.project=kora-devnet"] + relabel_configs: + # Extract container name + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: container + # Extract compose service name + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + target_label: service + # Extract compose project + - source_labels: ['__meta_docker_container_label_com_docker_compose_project'] + target_label: project + # Add node_type label (validator, secondary, init, dkg) + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + regex: 'validator-node(\d+)' + target_label: node_type + replacement: validator + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + regex: 'secondary-node(\d+)' + target_label: node_type + replacement: secondary + # Extract validator index + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + regex: '(?:validator|secondary)-node(\d+)' + target_label: node_index + pipeline_stages: + # Parse tracing-subscriber output: "2026-05-21T12:00:00.000Z WARN module::path: message key=value" + - regex: + expression: '^(?P\S+)\s+(?P\w+)\s+(?P[^:]+):\s+(?P.*)$' + - labels: + level: + module: + - timestamp: + source: timestamp + format: RFC3339Nano + # Extract specific warning/error patterns for fast filtering + - match: + selector: '{level="WARN"}' + stages: + - regex: + expression: '(?Pinvalid data received|ledger\.submit_tx returned false|validator rejected tx|state root mismatch|missing parent snapshot|execution failed)' + - labels: + warn_type: + - match: + selector: '{level="ERROR"}' + stages: + - regex: + expression: '(?Ptask panicked|failed to persist|failed to execute|failed to compute)' + - labels: + error_type: diff --git a/docker/config/recording-rules.yml b/docker/config/recording-rules.yml new file mode 100644 index 0000000..cef7ced --- /dev/null +++ b/docker/config/recording-rules.yml @@ -0,0 +1,214 @@ +groups: + # Pre-compute expensive histogram quantiles so dashboards load fast + - name: performance_recording + interval: 10s + rules: + # Block build percentiles + - record: kora:build_duration:p50 + expr: histogram_quantile(0.50, sum(rate(marshaled_build_duration_bucket[5m])) by (le)) + - record: kora:build_duration:p95 + expr: histogram_quantile(0.95, sum(rate(marshaled_build_duration_bucket[5m])) by (le)) + - record: kora:build_duration:p99 + expr: histogram_quantile(0.99, sum(rate(marshaled_build_duration_bucket[5m])) by (le)) + + # Finalization latency percentiles + - record: kora:finalization_latency:p50 + expr: histogram_quantile(0.50, sum(rate(engine_voter_finalization_latency_bucket[5m])) by (le)) + - record: kora:finalization_latency:p95 + expr: histogram_quantile(0.95, sum(rate(engine_voter_finalization_latency_bucket[5m])) by (le)) + - record: kora:finalization_latency:p99 + expr: histogram_quantile(0.99, sum(rate(engine_voter_finalization_latency_bucket[5m])) by (le)) + + # Notarization latency percentiles + - record: kora:notarization_latency:p50 + expr: histogram_quantile(0.50, sum(rate(engine_voter_notarization_latency_bucket[5m])) by (le)) + - record: kora:notarization_latency:p95 + expr: histogram_quantile(0.95, sum(rate(engine_voter_notarization_latency_bucket[5m])) by (le)) + - record: kora:notarization_latency:p99 + expr: histogram_quantile(0.99, sum(rate(engine_voter_notarization_latency_bucket[5m])) by (le)) + + # Sig verify percentiles + - record: kora:verify_latency:p50 + expr: histogram_quantile(0.50, sum(rate(engine_batcher_verify_latency_bucket[5m])) by (le)) + - record: kora:verify_latency:p95 + expr: histogram_quantile(0.95, sum(rate(engine_batcher_verify_latency_bucket[5m])) by (le)) + + # Resolver fetch percentiles + - record: kora:resolver_fetch:p50 + expr: histogram_quantile(0.50, sum(rate(engine_resolver_resolver_fetch_duration_bucket[5m])) by (le)) + - record: kora:resolver_fetch:p95 + expr: histogram_quantile(0.95, sum(rate(engine_resolver_resolver_fetch_duration_bucket[5m])) by (le)) + - record: kora:resolver_fetch:p99 + expr: histogram_quantile(0.99, sum(rate(engine_resolver_resolver_fetch_duration_bucket[5m])) by (le)) + + - name: throughput_recording + interval: 10s + rules: + # Core throughput — use sum of per-instance rates to preserve visibility + # when individual nodes drop out, instead of avg() which masks failures + - record: kora:blocks_per_sec + expr: sum(rate(finalized_height{job="kora-validators"}[1m])) / clamp_min(count(up{job="kora-validators"} == 1), 1) + - record: kora:views_per_sec + expr: avg(rate(engine_voter_state_current_view{job="kora-validators"}[1m])) + + # Effective block time + - record: kora:block_time + expr: 1 / clamp_min(avg(rate(finalized_height[1m])), 0.001) + + # Consensus efficiency (finalized / total views) + - record: kora:consensus_efficiency + expr: avg(rate(finalized_height[5m])) / clamp_min(avg(rate(engine_voter_state_current_view[5m])), 0.001) + + # Skip rate (wasted views) + - record: kora:skip_rate + expr: 1 - (avg(rate(finalized_height[5m])) / clamp_min(avg(rate(engine_voter_state_current_view[5m])), 0.001)) + + # Height drift + - record: kora:height_drift + expr: max(finalized_height) - min(finalized_height) + + # Nullification rate + - record: kora:nullification_rate + expr: sum(rate(engine_voter_state_nullifications_total[5m])) + + # Network cost per block (bytes) + - record: kora:network_bytes_per_block + expr: avg(rate(runtime_outbound_bandwidth_total[5m]) + rate(runtime_inbound_bandwidth_total[5m])) / clamp_min(avg(rate(finalized_height[5m])), 0.001) + + # Storage write rate + - record: kora:storage_write_rate + expr: sum(rate(runtime_storage_write_bytes_total[1m])) + - record: kora:storage_iops + expr: sum(rate(runtime_storage_writes_total[1m])) + + # Map Commonware's generic data_N channel labels to human-readable names. + # Channel assignments: + # data_0 = simplex votes + # data_1 = simplex certs + # data_2 = simplex resolver + # data_3 = broadcast blocks + # data_4 = marshal backfill + # + # Each metric uses a single rule with `or` to combine all channels, + # producing one time series per channel label value. This avoids the + # duplicate-record-name bug where Prometheus only evaluates the last + # rule when multiple rules share the same record name. + - name: p2p_channel_recording + interval: 10s + rules: + # ---------- Messages sent per channel (aggregated across peers) ---------- + - record: kora:p2p:channel_sent:rate1m + expr: >- + label_replace( + sum by (message) (rate(network_spawner_messages_sent_total{message="data_0"}[1m])), + "channel", "simplex_votes", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_sent_total{message="data_1"}[1m])), + "channel", "simplex_certs", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_sent_total{message="data_2"}[1m])), + "channel", "simplex_resolver", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_sent_total{message="data_3"}[1m])), + "channel", "broadcast_blocks", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_sent_total{message="data_4"}[1m])), + "channel", "marshal_backfill", "message", ".*" + ) + + # ---------- Messages received per channel ---------- + - record: kora:p2p:channel_recv:rate1m + expr: >- + label_replace( + sum by (message) (rate(network_spawner_messages_received_total{message="data_0"}[1m])), + "channel", "simplex_votes", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_received_total{message="data_1"}[1m])), + "channel", "simplex_certs", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_received_total{message="data_2"}[1m])), + "channel", "simplex_resolver", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_received_total{message="data_3"}[1m])), + "channel", "broadcast_blocks", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_spawner_messages_received_total{message="data_4"}[1m])), + "channel", "marshal_backfill", "message", ".*" + ) + + # ---------- Messages dropped per channel ---------- + - record: kora:p2p:channel_dropped:rate1m + expr: >- + label_replace( + sum by (message) (rate(network_router_messages_dropped_total{message="data_0"}[1m])), + "channel", "simplex_votes", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_router_messages_dropped_total{message="data_1"}[1m])), + "channel", "simplex_certs", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_router_messages_dropped_total{message="data_2"}[1m])), + "channel", "simplex_resolver", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_router_messages_dropped_total{message="data_3"}[1m])), + "channel", "broadcast_blocks", "message", ".*" + ) + or + label_replace( + sum by (message) (rate(network_router_messages_dropped_total{message="data_4"}[1m])), + "channel", "marshal_backfill", "message", ".*" + ) + + # ---------- Aggregate P2P health ---------- + # Total messages dropped/s across all channels + - record: kora:p2p:total_dropped:rate1m + expr: sum(rate(network_router_messages_dropped_total[1m])) + + # Total messages rate-limited/s + - record: kora:p2p:total_rate_limited:rate1m + expr: sum(rate(network_spawner_messages_rate_limited_total[1m])) + + # Drop ratio: fraction of received messages that were dropped. + # Returns 0 when no messages are flowing (avoids divide-by-zero + # producing NaN, which was previously masked by clamp_min(…, 1) + # inflating the ratio when receive rate was below 1 msg/s). + - record: kora:p2p:drop_ratio + expr: >- + sum(rate(network_router_messages_dropped_total[5m])) + / + sum(rate(network_spawner_messages_received_total[5m])) + or vector(0) + + # Peer count (tracked peers in the directory) + - record: kora:p2p:tracked_peers + expr: avg(network_tracker_directory_tracked) + + # Message delivery ratio (fraction of sent messages that are not dropped) + - record: kora:p2p:delivery_ratio + expr: >- + 1 - ( + sum(rate(network_router_messages_dropped_total[5m])) + / + clamp_min(sum(rate(network_spawner_messages_sent_total[5m])), 1) + ) diff --git a/docker/grafana/dashboards/kora-logs.json b/docker/grafana/dashboards/kora-logs.json new file mode 100644 index 0000000..838847d --- /dev/null +++ b/docker/grafana/dashboards/kora-logs.json @@ -0,0 +1,277 @@ +{ + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": null, + "links": [ + {"title": "Overview Dashboard", "url": "/d/kora-overview", "type": "link"}, + {"title": "Performance & Block Time", "url": "/d/kora-performance", "type": "link"}, + {"title": "Stall Diagnostics", "url": "/d/kora-stall-diagnostics", "type": "link"} + ], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 300, + "title": "Log Volume & Errors", + "type": "row" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Log lines per second by level across all nodes. Spikes in WARN/ERROR precede stalls.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 20, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + }, + "overrides": [ + {"matcher": {"id": "byName", "options": "ERROR"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "WARN"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "INFO"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "DEBUG"}, "properties": [{"id": "color", "value": {"fixedColor": "blue", "mode": "fixed"}}]} + ] + }, + "gridPos": {"h": 5, "w": 12, "x": 0, "y": 1}, + "id": 301, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "sum by (level) (rate({node_type=~\"validator|secondary\"}[1m]))", "legendFormat": "{{level}}", "refId": "A"} + ], + "title": "Log Volume by Level", + "type": "timeseries" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Error + warning rate per node. Asymmetric rates indicate per-node issues.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 5, "w": 12, "x": 12, "y": 1}, + "id": 302, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "sum by (service) (rate({node_type=~\"validator|secondary\"} |~ \"(?i)(ERROR|WARN)\" [1m]))", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "Error+Warn Rate per Node", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 6}, + "id": 310, + "title": "Critical Error Patterns (Stall Indicators)", + "type": "row" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Voter panics — fatal consensus crashes. Any hit means a node's consensus died.", + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 7}, + "id": 311, + "options": { + "showTime": true, + "showLabels": true, + "showCommonLabels": false, + "wrapLogMessage": true, + "prettifyLogMessage": false, + "enableLogDetails": true, + "sortOrder": "Descending", + "dedupStrategy": "none" + }, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "{node_type=~\"validator|secondary\"} |~ \"task panicked|voter should not finish|PANIC\"", "refId": "A"} + ], + "title": "Voter Panics / Task Crashes", + "type": "logs" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Resolver 'invalid data received' — indicates catch-up failure after restart.", + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 13}, + "id": 312, + "options": { + "showTime": true, + "showLabels": true, + "showCommonLabels": false, + "wrapLogMessage": true, + "prettifyLogMessage": false, + "enableLogDetails": true, + "sortOrder": "Descending", + "dedupStrategy": "none" + }, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "{node_type=~\"validator|secondary\"} |~ \"invalid data received\"", "refId": "A"} + ], + "title": "Resolver Invalid Data (Catch-up Failures)", + "type": "logs" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 19}, + "id": 320, + "title": "Transaction & Mempool Errors", + "type": "row" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Duplicate transaction rejections. Bursts indicate tx storm or rebroadcast.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 20, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 5, "w": 8, "x": 0, "y": 20}, + "id": 321, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "sum by (service) (rate({node_type=\"validator\"} |~ \"ledger.submit_tx returned false\" [1m]))", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "Duplicate TX Rejections/s", + "type": "timeseries" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Transaction validation failures (nonce too low, etc).", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 20, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 5, "w": 8, "x": 8, "y": 20}, + "id": 322, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "sum by (service) (rate({node_type=\"validator\"} |~ \"validator rejected tx\" [1m]))", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "TX Validation Failures/s", + "type": "timeseries" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Block execution failures — the executor aborting blocks due to bad transactions.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 20, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 5, "w": 8, "x": 16, "y": 20}, + "id": 323, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "sum by (service) (rate({node_type=\"validator\"} |~ \"execution failed|state root mismatch\" [1m]))", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "Block Execution Failures/s", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 25}, + "id": 330, + "title": "Finalization & Persistence Errors", + "type": "row" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Persistence failures, state root mismatches, and finalization errors. These indicate the FinalizedReporter early-return bug path.", + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 26}, + "id": 331, + "options": { + "showTime": true, + "showLabels": true, + "showCommonLabels": false, + "wrapLogMessage": true, + "prettifyLogMessage": false, + "enableLogDetails": true, + "sortOrder": "Descending", + "dedupStrategy": "none" + }, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "{node_type=~\"validator|secondary\"} |~ \"failed to persist|failed to execute finalized|failed to compute qmdb|state root mismatch|missing parent snapshot\"", "refId": "A"} + ], + "title": "Finalization & Persistence Errors", + "type": "logs" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 340, + "title": "Consensus Activity Logs", + "type": "row" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Consensus initialization, restarts, and state changes. Key for tracking node restart behavior.", + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 33}, + "id": 341, + "options": { + "showTime": true, + "showLabels": true, + "showCommonLabels": false, + "wrapLogMessage": true, + "prettifyLogMessage": false, + "enableLogDetails": true, + "sortOrder": "Descending", + "dedupStrategy": "none" + }, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "{node_type=~\"validator|secondary\"} |~ \"consensus initialized|Validator started|recovered finalized|Starting production|nullification floor\"", "refId": "A"} + ], + "title": "Consensus Lifecycle Events", + "type": "logs" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 39}, + "id": 350, + "title": "Full Log Stream", + "type": "row" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "description": "Full log stream filtered to WARN and ERROR. Use this for ad-hoc investigation.", + "gridPos": {"h": 10, "w": 24, "x": 0, "y": 40}, + "id": 351, + "options": { + "showTime": true, + "showLabels": true, + "showCommonLabels": false, + "wrapLogMessage": true, + "prettifyLogMessage": false, + "enableLogDetails": true, + "sortOrder": "Descending", + "dedupStrategy": "none" + }, + "targets": [ + {"datasource": {"type": "loki", "uid": "loki"}, "expr": "{node_type=~\"validator|secondary\"} |~ \"(?i)(ERROR|WARN)\"", "refId": "A"} + ], + "title": "All Warnings & Errors", + "type": "logs" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "tags": ["kora", "logs"], + "templating": {"list": []}, + "time": {"from": "now-30m", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Kora Logs Explorer", + "uid": "kora-logs", + "version": 1 +} diff --git a/docker/grafana/dashboards/kora-overview.json b/docker/grafana/dashboards/kora-overview.json index 8c2cbe7..659f614 100644 --- a/docker/grafana/dashboards/kora-overview.json +++ b/docker/grafana/dashboards/kora-overview.json @@ -1,97 +1,537 @@ { - "annotations": { - "list": [] - }, + "annotations": {"list": []}, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, + "graphTooltip": 2, "id": null, - "links": [], + "links": [ + {"title": "Performance & Block Time", "url": "/d/kora-performance", "type": "link"}, + {"title": "P2P & Network", "url": "/d/kora-p2p", "type": "link"}, + {"title": "Stall Diagnostics", "url": "/d/kora-stall-diagnostics", "type": "link"}, + {"title": "Logs Explorer", "url": "/d/kora-logs", "type": "link"} + ], "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "title": "Overview", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "red", "value": null}, {"color": "yellow", "value": 3}, {"color": "green", "value": 4} + ]} + } }, + "gridPos": {"h": 3, "w": 3, "x": 0, "y": 1}, + "id": 1, + "options": {"colorMode": "background", "graphMode": "none", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "count(up{job=\"kora-validators\"} == 1)", "refId": "A"}], + "title": "Validators Up", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, "fieldConfig": { "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "green", "value": null} - ] - }, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, "unit": "short" - }, - "overrides": [] + } }, - "gridPos": {"h": 4, "w": 6, "x": 0, "y": 0}, - "id": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": ["lastNotNull"], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.0.0", - "targets": [ - { - "expr": "count(up{job=\"kora-validators\"})", - "refId": "A" - } - ], - "title": "Active Validators", + "gridPos": {"h": 3, "w": 3, "x": 3, "y": 1}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "max(finalized_height)", "refId": "A"}], + "title": "Finalized Height", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + } + }, + "gridPos": {"h": 3, "w": 3, "x": 6, "y": 1}, + "id": 3, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(finalized_height[1m]))", "refId": "A"}], + "title": "Blocks/sec", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.05}, {"color": "red", "value": 0.2} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 3, "w": 3, "x": 9, "y": 1}, + "id": 4, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(engine_voter_finalization_latency_sum[1m]) / rate(engine_voter_finalization_latency_count[1m]))", "refId": "A"}], + "title": "Avg Finalization Latency", "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 10} + ]}, + "unit": "short" + } }, + "gridPos": {"h": 3, "w": 3, "x": 12, "y": 1}, + "id": 5, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "sum(rate(engine_voter_state_nullifications_total[5m]))", "refId": "A"}], + "title": "Nullifications/s", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, "fieldConfig": { "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "green", "value": null} - ] - }, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 10} + ]}, "unit": "short" - }, - "overrides": [] + } }, - "gridPos": {"h": 4, "w": 6, "x": 6, "y": 0}, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": ["lastNotNull"], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.0.0", - "targets": [ - { - "expr": "up{job=\"kora-validators\"}", - "legendFormat": "Node {{validator_index}}", - "refId": "A" - } - ], - "title": "Node Health", + "gridPos": {"h": 3, "w": 3, "x": 15, "y": 1}, + "id": 6, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "sum(rate(engine_voter_state_timeouts_total[5m]))", "refId": "A"}], + "title": "Timeouts/s", "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + } + }, + "gridPos": {"h": 3, "w": 3, "x": 18, "y": 1}, + "id": 7, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(runtime_process_rss)", "refId": "A"}], + "title": "Avg Memory", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 20} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 3, "w": 3, "x": 21, "y": 1}, + "id": 8, + "options": {"colorMode": "value", "graphMode": "none", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "max(finalized_height) - min(finalized_height)", "refId": "A"}], + "title": "Height Drift", + "type": "stat" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 4}, + "id": 101, + "title": "Consensus Health", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "axisBorderShow": true}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 5}, + "id": 10, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"expr": "finalized_height", "legendFormat": "Node {{validator_index}}", "refId": "A"}], + "title": "Finalized Height", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 5}, + "id": 11, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"expr": "rate(finalized_height{job=\"kora-validators\"}[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"}], + "title": "Finalization Rate (blocks/s)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Difference between max and min finalized height across validators. Sustained drift > 0 indicates a lagging node.", + "fieldConfig": { + "defaults": { + "color": {"fixedColor": "orange", "mode": "fixed"}, + "custom": {"lineWidth": 2, "fillOpacity": 20, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "area"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 20} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 5}, + "id": 12, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "max(finalized_height) - min(finalized_height)", "legendFormat": "Height Drift", "refId": "A"}, + {"expr": "max(engine_voter_state_current_view) - min(engine_voter_state_current_view)", "legendFormat": "View Drift", "refId": "B"} + ], + "title": "Node Divergence (lower is better)", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 12}, + "id": 102, + "title": "Latency", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 13}, + "id": 20, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_voter_notarization_latency_sum[1m]) / rate(engine_voter_notarization_latency_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Notarization Latency", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 13}, + "id": 21, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_voter_finalization_latency_sum[1m]) / rate(engine_voter_finalization_latency_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Finalization Latency", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 13}, + "id": 22, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(marshaled_build_duration_sum[1m]) / rate(marshaled_build_duration_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Block Build Duration", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 20}, + "id": 103, + "title": "Faults & Anomalies", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Timeouts by reason. MissingProposal = leader didn't propose. LeaderNullify = leader nullified. LeaderTimeout = leader was too slow.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 15, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 21}, + "id": 30, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (reason) (rate(engine_voter_state_timeouts_total[5m]))", "legendFormat": "{{reason}}", "refId": "A"} + ], + "title": "Timeouts by Reason", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Which leader is causing the most nullifications? High counts for one leader suggest that node is struggling.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 15, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 21}, + "id": 31, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (instance) (rate(engine_voter_state_nullifications_total[5m]))", "legendFormat": "Reporter: {{instance}}", "refId": "A"} + ], + "title": "Nullification Rate by Reporter", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Ratio of nullified views to total views. Higher = more wasted consensus rounds.", + "fieldConfig": { + "defaults": { + "color": {"fixedColor": "red", "mode": "fixed"}, + "custom": {"lineWidth": 2, "fillOpacity": 10, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "area"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.1}, {"color": "red", "value": 0.3} + ]}, + "unit": "percentunit", + "min": 0, + "max": 1 + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 21}, + "id": 32, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "1 - (rate(finalized_height[5m]) / rate(engine_voter_state_current_view[5m]))", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Skip Rate (wasted views)", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 28}, + "id": 104, + "title": "Network", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 29}, + "id": 40, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_outbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} out", "refId": "A"}, + {"expr": "rate(runtime_inbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} in", "refId": "B"} + ], + "title": "Network Bandwidth", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 29}, + "id": 41, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (instance) (rate(network_spawner_messages_sent_total[1m]))", "legendFormat": "Node {{validator_index}} sent", "refId": "A"}, + {"expr": "sum by (instance) (rate(network_spawner_messages_received_total[1m]))", "legendFormat": "Node {{validator_index}} recv", "refId": "B"} + ], + "title": "Message Rate", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 29}, + "id": 42, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (message) (rate(engine_voter_outbound_messages_total[1m]))", "legendFormat": "{{message}}", "refId": "A"} + ], + "title": "Consensus Message Types", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 36}, + "id": 105, + "title": "Resources", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "bytes" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 37}, + "id": 50, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "runtime_process_rss", "legendFormat": "Node {{validator_index}} RSS", "refId": "A"} + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 37}, + "id": 51, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_storage_write_bytes_total[1m])", "legendFormat": "Node {{validator_index}} write", "refId": "A"}, + {"expr": "rate(runtime_storage_read_bytes_total[1m])", "legendFormat": "Node {{validator_index}} read", "refId": "B"} + ], + "title": "Disk I/O", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 37}, + "id": 52, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "runtime_tasks_running", "legendFormat": "Node {{validator_index}} running", "refId": "A"}, + {"expr": "rate(runtime_tasks_spawned_total[1m])", "legendFormat": "Node {{validator_index}} spawned/s", "refId": "B"} + ], + "title": "Tasks", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 44}, + "id": 106, + "title": "Broadcast & Resolver", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 45}, + "id": 60, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(broadcast_get_total{status=\"Success\"}[1m])", "legendFormat": "Node {{validator_index}} success", "refId": "A"}, + {"expr": "rate(broadcast_get_total{status=\"Failure\"}[1m])", "legendFormat": "Node {{validator_index}} failure", "refId": "B"} + ], + "title": "Broadcast Gets", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 45}, + "id": 61, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(broadcast_receive_total[1m])", "legendFormat": "{{status}}", "refId": "A"} + ], + "title": "Broadcast Receives", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 45}, + "id": 62, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_batcher_verify_latency_sum[1m]) / rate(engine_batcher_verify_latency_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Signature Verify Latency", + "type": "timeseries" } ], "refresh": "5s", @@ -103,5 +543,5 @@ "timezone": "browser", "title": "Kora Devnet Overview", "uid": "kora-overview", - "version": 1 + "version": 3 } diff --git a/docker/grafana/dashboards/kora-p2p.json b/docker/grafana/dashboards/kora-p2p.json new file mode 100644 index 0000000..39b807c --- /dev/null +++ b/docker/grafana/dashboards/kora-p2p.json @@ -0,0 +1,417 @@ +{ + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": null, + "links": [ + {"title": "Overview", "url": "/d/kora-overview", "type": "link"}, + {"title": "Performance & Block Time", "url": "/d/kora-performance", "type": "link"}, + {"title": "Stall Diagnostics", "url": "/d/kora-stall-diagnostics", "type": "link"} + ], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "title": "P2P Health Overview", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 3, "w": 4, "x": 0, "y": 1}, + "id": 1, + "options": {"colorMode": "value", "graphMode": "none", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "network_tracker_directory_tracked", "legendFormat": "", "refId": "A"}], + "title": "Tracked Peers", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 3, "w": 4, "x": 4, "y": 1}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "count(network_tracker_directory_connected)", "legendFormat": "", "refId": "A"}], + "title": "Connected Peers", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 100}, {"color": "red", "value": 1000} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 3, "w": 4, "x": 8, "y": 1}, + "id": 3, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "sum(rate(network_router_messages_dropped_total[1m]))", "legendFormat": "", "refId": "A"}], + "title": "Msgs Dropped/s", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 10} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 3, "w": 4, "x": 12, "y": 1}, + "id": 4, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "sum(rate(network_spawner_messages_rate_limited_total[1m]))", "legendFormat": "", "refId": "A"}], + "title": "Msgs Rate Limited/s", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.01}, {"color": "red", "value": 0.1} + ]}, + "unit": "percentunit" + } + }, + "gridPos": {"h": 3, "w": 4, "x": 16, "y": 1}, + "id": 5, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "kora:p2p:drop_ratio or vector(0)", "legendFormat": "", "refId": "A"}], + "title": "Drop Ratio", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null} + ]}, + "unit": "Bps" + } + }, + "gridPos": {"h": 3, "w": 4, "x": 20, "y": 1}, + "id": 6, + "options": {"colorMode": "value", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "rate(runtime_outbound_bandwidth_total[1m]) + rate(runtime_inbound_bandwidth_total[1m])", "legendFormat": "", "refId": "A"}], + "title": "Total Bandwidth", + "type": "stat" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 4}, + "id": 101, + "title": "Channel Message Rates (human-readable names via recording rules)", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Messages sent per second by channel. Uses recording rules to map data_0..data_4 to human-readable names:\n- simplex_votes (data_0)\n- simplex_certs (data_1)\n- simplex_resolver (data_2)\n- broadcast_blocks (data_3)\n- marshal_backfill (data_4)", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "none"}}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 5}, + "id": 10, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "kora:p2p:channel_sent:rate1m", "legendFormat": "{{channel}}", "refId": "A"} + ], + "title": "Messages Sent/s by Channel", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Messages received per second by channel. Uses recording rules to map data_0..data_4 to human-readable names.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "none"}}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 5}, + "id": 11, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "kora:p2p:channel_recv:rate1m", "legendFormat": "{{channel}}", "refId": "A"} + ], + "title": "Messages Received/s by Channel", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Messages dropped per second by channel. High drop rates indicate backpressure or a peer falling behind.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 15, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 13}, + "id": 12, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "kora:p2p:channel_dropped:rate1m", "legendFormat": "{{channel}}", "refId": "A"} + ], + "title": "Messages Dropped/s by Channel", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Raw data_N channel view. Shows all message types including protocol-level (greeting, bit_vec, peers).", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "none"}}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 13}, + "id": 13, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (message) (rate(network_spawner_messages_sent_total[1m]))", "legendFormat": "sent: {{message}}", "refId": "A"}, + {"expr": "sum by (message) (rate(network_spawner_messages_received_total[1m]))", "legendFormat": "recv: {{message}}", "refId": "B"} + ], + "title": "Raw Message Types (sent + received)", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 21}, + "id": 102, + "title": "Per-Peer Metrics", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Messages sent/s broken down by peer. Peer keys are hex-encoded public keys.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 22}, + "id": 20, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (peer) (rate(network_spawner_messages_sent_total[1m]))", "legendFormat": "{{peer}}", "refId": "A"} + ], + "title": "Messages Sent/s by Peer", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Messages received/s broken down by peer.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 22}, + "id": 21, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (peer) (rate(network_spawner_messages_received_total[1m]))", "legendFormat": "{{peer}}", "refId": "A"} + ], + "title": "Messages Received/s by Peer", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Messages dropped/s broken down by peer. High drops for a single peer indicate that peer is flooding or the local node cannot keep up with its messages.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 15, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 30}, + "id": 22, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (peer) (rate(network_router_messages_dropped_total[1m]))", "legendFormat": "{{peer}}", "refId": "A"} + ], + "title": "Messages Dropped/s by Peer", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Resolver peer performance: exponential moving average of response time in ms. Lower is better. 4999ms indicates the peer has not responded yet.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "ms" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 30}, + "id": 23, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "lastNotNull"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "engine_resolver_resolver_fetcher_peer_performance", "legendFormat": "{{peer}}", "refId": "A"} + ], + "title": "Resolver Peer Performance (response EMA)", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 38}, + "id": 103, + "title": "Connections & Bandwidth", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Total inbound and outbound bandwidth in bytes/sec.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 39}, + "id": 30, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_outbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} outbound", "refId": "A"}, + {"expr": "rate(runtime_inbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} inbound", "refId": "B"} + ], + "title": "Bandwidth (bytes/s)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Cumulative dial attempts per peer. High retry counts indicate connectivity problems to that peer.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 39}, + "id": 31, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["lastNotNull"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(network_dialer_attempts_total[1m])", "legendFormat": "{{peer}}", "refId": "A"} + ], + "title": "Dial Attempts/s by Peer", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Cumulative inbound/outbound TCP connections and handshake failures.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 39}, + "id": 32, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_inbound_connections_total[1m])", "legendFormat": "Node {{validator_index}} inbound conn/s", "refId": "A"}, + {"expr": "rate(runtime_outbound_connections_total[1m])", "legendFormat": "Node {{validator_index}} outbound conn/s", "refId": "B"}, + {"expr": "rate(network_listener_handshakes_blocked_total[1m])", "legendFormat": "Node {{validator_index}} handshakes blocked/s", "refId": "C"}, + {"expr": "rate(network_listener_handshake_ip_rate_limited_total[1m])", "legendFormat": "Node {{validator_index}} IP rate limited/s", "refId": "D"} + ], + "title": "Connection Rate & Handshake Failures", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 47}, + "id": 104, + "title": "Consensus Inbound Messages (per-peer)", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Consensus messages received from each peer (Notarize, Nullify, Nullification, Finalization). Useful for identifying peers that have stopped voting.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "none"}}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 48}, + "id": 40, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (peer, message) (rate(engine_batcher_inbound_messages_total[1m]))", "legendFormat": "{{peer}}: {{message}}", "refId": "A"} + ], + "title": "Inbound Consensus Messages by Peer", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "View of latest vote received from each peer. Peers stuck at a low view number may be stalled or partitioned.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 48}, + "id": 41, + "options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["lastNotNull"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "engine_batcher_latest_vote", "legendFormat": "{{peer}}", "refId": "A"} + ], + "title": "Latest Vote View per Peer", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": ["kora", "p2p", "network"], + "templating": {"list": []}, + "time": {"from": "now-15m", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Kora P2P & Network", + "uid": "kora-p2p", + "version": 1 +} diff --git a/docker/grafana/dashboards/kora-performance.json b/docker/grafana/dashboards/kora-performance.json new file mode 100644 index 0000000..3042dee --- /dev/null +++ b/docker/grafana/dashboards/kora-performance.json @@ -0,0 +1,619 @@ +{ + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": null, + "links": [ + {"title": "Overview Dashboard", "url": "/d/kora-overview", "type": "link"}, + {"title": "Stall Diagnostics", "url": "/d/kora-stall-diagnostics", "type": "link"}, + {"title": "Logs Explorer", "url": "/d/kora-logs", "type": "link"} + ], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 400, + "title": "Block Time & Throughput", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Effective block time (inverse of finalization rate). Target: <1s for fast consensus.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.5}, {"color": "red", "value": 2} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "id": 401, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "1 / clamp_min(avg(rate(finalized_height[1m])), 0.001)", "refId": "A"}], + "title": "Avg Block Time", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Peak throughput observed over the last 5 minutes.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "red", "value": null}, {"color": "yellow", "value": 1}, {"color": "green", "value": 3} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "id": 402, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "max(rate(finalized_height[1m]))", "refId": "A"}], + "title": "Peak Blocks/sec", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Consensus efficiency: what fraction of views produce finalized blocks. 100% = no wasted rounds.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "red", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "green", "value": 0.9} + ]}, + "unit": "percentunit" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "id": 403, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(finalized_height[5m])) / avg(rate(engine_voter_state_current_view[5m]))", "refId": "A"}], + "title": "Consensus Efficiency", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Average block build duration. Must stay well under LEADER_TIMEOUT (1s default).", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.2}, {"color": "red", "value": 1} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "id": 404, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(marshaled_build_duration_sum[1m]) / rate(marshaled_build_duration_count[1m]))", "refId": "A"}], + "title": "Avg Build Time", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Average time from proposal to finalization (notarize + finalize votes collected).", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.1}, {"color": "red", "value": 0.5} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "id": 405, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(engine_voter_finalization_latency_sum[1m]) / rate(engine_voter_finalization_latency_count[1m]))", "refId": "A"}], + "title": "Avg Finalization Latency", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Average BLS signature verification time. Bottleneck if > 50ms.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.02}, {"color": "red", "value": 0.05} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "id": 406, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(engine_batcher_verify_latency_sum[1m]) / rate(engine_batcher_verify_latency_count[1m]))", "refId": "A"}], + "title": "Avg Sig Verify", + "type": "stat" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 410, + "title": "Block Time Breakdown (Where Time Goes)", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Block time breakdown: build + notarization + finalization. Shows where optimization effort should focus.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 30, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "s" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "id": 411, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "avg(rate(marshaled_build_duration_sum[1m]) / rate(marshaled_build_duration_count[1m]))", "legendFormat": "Block Build", "refId": "A"}, + {"expr": "avg(rate(engine_voter_notarization_latency_sum[1m]) / rate(engine_voter_notarization_latency_count[1m]))", "legendFormat": "Notarization", "refId": "B"}, + {"expr": "avg(rate(engine_voter_finalization_latency_sum[1m]) / rate(engine_voter_finalization_latency_count[1m])) - avg(rate(engine_voter_notarization_latency_sum[1m]) / rate(engine_voter_notarization_latency_count[1m]))", "legendFormat": "Finalization (after notar.)", "refId": "C"} + ], + "title": "Block Time Composition (stacked)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Effective block time (1/blocks_per_sec) including wasted views. The gap between theoretical min and actual shows optimization potential.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line+area"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "transparent", "value": null}, {"color": "rgba(255,0,0,0.1)", "value": 2} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "id": 412, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "1 / clamp_min(avg(rate(finalized_height[1m])), 0.001)", "legendFormat": "Actual Block Time", "refId": "A"}, + {"expr": "avg(rate(marshaled_build_duration_sum[1m]) / rate(marshaled_build_duration_count[1m])) + avg(rate(engine_voter_finalization_latency_sum[1m]) / rate(engine_voter_finalization_latency_count[1m]))", "legendFormat": "Theoretical Min (build+finalize)", "refId": "B"}, + {"expr": "avg(rate(engine_batcher_verify_latency_sum[1m]) / rate(engine_batcher_verify_latency_count[1m]))", "legendFormat": "Sig Verify Overhead", "refId": "C"} + ], + "title": "Actual vs Theoretical Block Time", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 420, + "title": "Latency Percentiles (p50 / p95 / p99)", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Block build duration percentiles. p99 approaching 2s leader timeout means proposals will start failing.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "transparent", "value": null}, {"color": "red", "value": 2} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 15}, + "id": 421, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "histogram_quantile(0.50, sum(rate(marshaled_build_duration_bucket[5m])) by (le))", "legendFormat": "p50", "refId": "A"}, + {"expr": "histogram_quantile(0.95, sum(rate(marshaled_build_duration_bucket[5m])) by (le))", "legendFormat": "p95", "refId": "B"}, + {"expr": "histogram_quantile(0.99, sum(rate(marshaled_build_duration_bucket[5m])) by (le))", "legendFormat": "p99", "refId": "C"} + ], + "title": "Block Build Duration Percentiles", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Finalization latency percentiles. Shows consistency of vote collection time.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "transparent", "value": null}, {"color": "red", "value": 4} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 15}, + "id": 422, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "histogram_quantile(0.50, sum(rate(engine_voter_finalization_latency_bucket[5m])) by (le))", "legendFormat": "p50", "refId": "A"}, + {"expr": "histogram_quantile(0.95, sum(rate(engine_voter_finalization_latency_bucket[5m])) by (le))", "legendFormat": "p95", "refId": "B"}, + {"expr": "histogram_quantile(0.99, sum(rate(engine_voter_finalization_latency_bucket[5m])) by (le))", "legendFormat": "p99", "refId": "C"} + ], + "title": "Finalization Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "BLS signature verification percentiles. If p99 > 50ms, increase SIGNATURE_THREADS from 2.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "transparent", "value": null}, {"color": "red", "value": 0.05} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 15}, + "id": 423, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "histogram_quantile(0.50, sum(rate(engine_batcher_verify_latency_bucket[5m])) by (le))", "legendFormat": "p50", "refId": "A"}, + {"expr": "histogram_quantile(0.95, sum(rate(engine_batcher_verify_latency_bucket[5m])) by (le))", "legendFormat": "p95", "refId": "B"}, + {"expr": "histogram_quantile(0.99, sum(rate(engine_batcher_verify_latency_bucket[5m])) by (le))", "legendFormat": "p99", "refId": "C"} + ], + "title": "Sig Verify Latency Percentiles", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 22}, + "id": 430, + "title": "Consensus Pipeline Efficiency", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Rate of views advancing vs blocks finalized. Gap = wasted consensus rounds eating into throughput.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 23}, + "id": 431, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "avg(rate(engine_voter_state_current_view[1m]))", "legendFormat": "Views/sec (capacity)", "refId": "A"}, + {"expr": "avg(rate(finalized_height[1m]))", "legendFormat": "Blocks/sec (actual)", "refId": "B"}, + {"expr": "avg(rate(engine_voter_state_current_view[1m])) - avg(rate(finalized_height[1m]))", "legendFormat": "Wasted views/sec", "refId": "C"} + ], + "title": "Capacity vs Actual Throughput", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Consensus messages sent per second by type. Shows the communication overhead at current block rate.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 20, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 23}, + "id": 432, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (message) (rate(engine_voter_outbound_messages_total[1m]))", "legendFormat": "{{message}}", "refId": "A"} + ], + "title": "Consensus Messages/sec by Type", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Batch verification size. Larger batches = more efficient but higher latency per batch.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 23}, + "id": 433, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "histogram_quantile(0.50, sum(rate(engine_batcher_batch_size_bucket[5m])) by (le))", "legendFormat": "p50 batch size", "refId": "A"}, + {"expr": "histogram_quantile(0.95, sum(rate(engine_batcher_batch_size_bucket[5m])) by (le))", "legendFormat": "p95 batch size", "refId": "B"}, + {"expr": "sum(rate(engine_batcher_added[1m]))", "legendFormat": "Messages/sec to batcher", "refId": "C"} + ], + "title": "Signature Batch Size & Throughput", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 30}, + "id": 440, + "title": "Storage & I/O Performance", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Write throughput vs finalization rate. If writes plateau while finalization slows, disk I/O is the bottleneck.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 31}, + "id": 441, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum(rate(runtime_storage_write_bytes_total[1m]))", "legendFormat": "Total Write B/s", "refId": "A"}, + {"expr": "sum(rate(runtime_storage_read_bytes_total[1m]))", "legendFormat": "Total Read B/s", "refId": "B"} + ], + "title": "Aggregate Storage I/O", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Write IOPS. High IOPS with low bandwidth indicates many small writes (journaling).", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "iops" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 31}, + "id": 442, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (instance) (rate(runtime_storage_writes_total[1m]))", "legendFormat": "Node {{validator_index}} writes/s", "refId": "A"}, + {"expr": "sum by (instance) (rate(runtime_storage_reads_total[1m]))", "legendFormat": "Node {{validator_index}} reads/s", "refId": "B"} + ], + "title": "Storage IOPS per Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Average write size (bytes per write op). Small writes indicate journal syncs; large writes indicate state commits.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "bytes" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 31}, + "id": 443, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_storage_write_bytes_total[1m]) / rate(runtime_storage_writes_total[1m])", "legendFormat": "Node {{validator_index}} avg write size", "refId": "A"} + ], + "title": "Average Write Size", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 38}, + "id": 450, + "title": "Network Performance", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Total network bandwidth per node. Shows communication overhead at current throughput.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 39}, + "id": 451, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_outbound_bandwidth_total[1m]) + rate(runtime_inbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} total", "refId": "A"} + ], + "title": "Network Bandwidth per Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Bytes per finalized block (network cost per block). Lower = more efficient protocol.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "bytes" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 39}, + "id": 452, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "(rate(runtime_outbound_bandwidth_total[5m]) + rate(runtime_inbound_bandwidth_total[5m])) / clamp_min(rate(finalized_height[5m]), 0.001)", "legendFormat": "Node {{validator_index}} bytes/block", "refId": "A"} + ], + "title": "Network Cost per Block", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Block fetch/resolve duration percentiles. High values indicate slow block propagation between nodes.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 39}, + "id": 453, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "histogram_quantile(0.50, sum(rate(engine_resolver_resolver_fetch_duration_bucket[5m])) by (le))", "legendFormat": "Fetch p50", "refId": "A"}, + {"expr": "histogram_quantile(0.95, sum(rate(engine_resolver_resolver_fetch_duration_bucket[5m])) by (le))", "legendFormat": "Fetch p95", "refId": "B"}, + {"expr": "histogram_quantile(0.50, sum(rate(engine_resolver_resolver_serve_duration_bucket[5m])) by (le))", "legendFormat": "Serve p50", "refId": "C"}, + {"expr": "histogram_quantile(0.95, sum(rate(engine_resolver_resolver_serve_duration_bucket[5m])) by (le))", "legendFormat": "Serve p95", "refId": "D"} + ], + "title": "Resolver Fetch/Serve Latency", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 46}, + "id": 460, + "title": "Resource Utilization vs Throughput", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Memory growth rate. Steady growth under load = mempool leak or state accumulation.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "bytes" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 47}, + "id": 461, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "runtime_process_rss", "legendFormat": "Node {{validator_index}} RSS", "refId": "A"} + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Active async tasks. Correlate with block rate — task count should scale linearly with throughput.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 47}, + "id": 462, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "runtime_tasks_running", "legendFormat": "Node {{validator_index}} running", "refId": "A"}, + {"expr": "rate(runtime_tasks_spawned_total[1m])", "legendFormat": "Node {{validator_index}} spawned/s", "refId": "B"} + ], + "title": "Task Concurrency", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Memory consumed per finalized block. Rising trend indicates state or mempool inefficiency.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "bytes" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 47}, + "id": 463, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "deriv(runtime_process_rss[5m]) / clamp_min(rate(finalized_height[5m]), 0.001)", "legendFormat": "Node {{validator_index}} bytes/block", "refId": "A"} + ], + "title": "Memory Growth per Block", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 54}, + "id": 470, + "title": "Optimization Targets", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Time budget breakdown: what fraction of block time is spent in each phase. Largest slice = optimization target.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 0, "fillOpacity": 80} + }, + "overrides": [ + {"matcher": {"id": "byName", "options": "Build"}, "properties": [{"id": "color", "value": {"fixedColor": "blue", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Notarization"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Finalization"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Wasted (nullified)"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]} + ] + }, + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 55}, + "id": 471, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "pieType": "donut", "legend": {"displayMode": "list", "placement": "right"}}, + "targets": [ + {"expr": "avg(rate(marshaled_build_duration_sum[5m]) / rate(marshaled_build_duration_count[5m]))", "legendFormat": "Build", "refId": "A"}, + {"expr": "avg(rate(engine_voter_notarization_latency_sum[5m]) / rate(engine_voter_notarization_latency_count[5m]))", "legendFormat": "Notarization", "refId": "B"}, + {"expr": "clamp_min(avg(rate(engine_voter_finalization_latency_sum[5m]) / rate(engine_voter_finalization_latency_count[5m])) - avg(rate(engine_voter_notarization_latency_sum[5m]) / rate(engine_voter_notarization_latency_count[5m])), 0)", "legendFormat": "Finalization", "refId": "C"}, + {"expr": "clamp_min((1 / clamp_min(avg(rate(finalized_height[5m])), 0.001)) - avg(rate(engine_voter_finalization_latency_sum[5m]) / rate(engine_voter_finalization_latency_count[5m])) - avg(rate(marshaled_build_duration_sum[5m]) / rate(marshaled_build_duration_count[5m])), 0)", "legendFormat": "Wasted (nullified)", "refId": "D"} + ], + "title": "Time Budget Breakdown", + "type": "piechart" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Blocked resolver peers — nodes that can't fetch blocks from peers. Non-zero means catch-up is impaired.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 55}, + "id": 472, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "engine_resolver_resolver_peers_blocked", "legendFormat": "Node {{validator_index}} blocked peers", "refId": "A"}, + {"expr": "engine_resolver_resolver_fetch_active", "legendFormat": "Node {{validator_index}} active fetches", "refId": "B"}, + {"expr": "engine_resolver_resolver_fetch_pending", "legendFormat": "Node {{validator_index}} pending fetches", "refId": "C"} + ], + "title": "Resolver Health (Blocked Peers & Fetch Queue)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Journal tracked items and sync rate. Growing tracked items without syncs = journal backpressure.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 55}, + "id": 473, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "engine_voter_journal_tracked", "legendFormat": "Node {{validator_index}} tracked", "refId": "A"}, + {"expr": "rate(engine_voter_journal_synced[1m])", "legendFormat": "Node {{validator_index}} synced/s", "refId": "B"}, + {"expr": "rate(engine_voter_journal_pruned[1m])", "legendFormat": "Node {{validator_index}} pruned/s", "refId": "C"} + ], + "title": "Voter Journal Activity", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": ["kora", "performance"], + "templating": {"list": []}, + "time": {"from": "now-30m", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Kora Performance & Block Time", + "uid": "kora-performance", + "version": 1 +} diff --git a/docker/grafana/dashboards/kora-stall-diagnostics.json b/docker/grafana/dashboards/kora-stall-diagnostics.json new file mode 100644 index 0000000..ef868e0 --- /dev/null +++ b/docker/grafana/dashboards/kora-stall-diagnostics.json @@ -0,0 +1,506 @@ +{ + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "id": null, + "links": [ + {"title": "Overview Dashboard", "url": "/d/kora-overview", "type": "link"}, + {"title": "Performance & Block Time", "url": "/d/kora-performance", "type": "link"}, + {"title": "Logs Explorer", "url": "/d/kora-logs", "type": "link"} + ], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 200, + "title": "Stall Detection", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Zero means consensus is stalled. This is the single most important metric.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "red", "value": null}, {"color": "yellow", "value": 0.01}, {"color": "green", "value": 0.5} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "id": 201, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "avg(rate(finalized_height[1m]))", "refId": "A"}], + "title": "Blocks/sec", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Percentage of consensus rounds that produced no block. >30% preceded the production stall.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.1}, {"color": "red", "value": 0.3} + ]}, + "unit": "percentunit" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "id": 202, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "1 - (avg(rate(finalized_height[5m])) / avg(rate(engine_voter_state_current_view[5m])))", "refId": "A"}], + "title": "Skip Rate", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Nodes where view is advancing (>0 means consensus is trying but failing to finalize).", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "red", "value": null}, {"color": "yellow", "value": 3}, {"color": "green", "value": 4} + ]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "id": 203, + "options": {"colorMode": "background", "graphMode": "none", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "count(rate(engine_voter_state_current_view{job=\"kora-validators\"}[1m]) > 0)", "refId": "A"}], + "title": "Nodes w/ Active Views", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Nodes with finalization rate > 0. If active views > finalizing nodes, blocks are being proposed but not finalized.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "red", "value": null}, {"color": "yellow", "value": 3}, {"color": "green", "value": 4} + ]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "id": 204, + "options": {"colorMode": "background", "graphMode": "none", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "count(rate(finalized_height{job=\"kora-validators\"}[1m]) > 0)", "refId": "A"}], + "title": "Nodes Finalizing", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Max height minus min height across validators. >10 means a node is falling behind.", + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 50} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "id": 205, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "max(finalized_height{job=\"kora-validators\"}) - min(finalized_height{job=\"kora-validators\"})", "refId": "A"}], + "title": "Height Drift", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 10} + ]}, + "unit": "short" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "id": 206, + "options": {"colorMode": "background", "graphMode": "area", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "sum(rate(engine_voter_state_nullifications_total[5m]))", "refId": "A"}], + "title": "Nullifications/s", + "type": "stat" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 210, + "title": "Per-Node Consensus State", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Current consensus view per node. Nodes stuck at a fixed view have crashed or stalled.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 6}, + "id": 211, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "engine_voter_state_current_view", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Current View per Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Finalized height per node. A node stuck at a low height while others advance indicates catch-up failure.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 6}, + "id": 212, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "finalized_height", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Finalized Height per Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "View advancement rate. A node with view rate > 0 but finalization rate = 0 is stuck in nullification loop.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 6}, + "id": 213, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_voter_state_current_view{job=\"kora-validators\"}[1m])", "legendFormat": "Node {{validator_index}} view/s", "refId": "A"}, + {"expr": "rate(finalized_height{job=\"kora-validators\"}[1m])", "legendFormat": "Node {{validator_index}} finalized/s", "refId": "B"} + ], + "title": "View Rate vs Finalization Rate", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 13}, + "id": 220, + "title": "Nullification & Timeout Analysis", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Nullification rate per node. If all nodes nullify at similar rates, the problem is systemic (mempool/executor). If one node is much higher, that node is struggling.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 14}, + "id": 221, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_voter_state_nullifications_total[5m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Nullifications/s per Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Timeout reasons stacked. MissingProposal = leader didn't propose (executor/mempool issue). LeaderTimeout = proposal too slow.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 20, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 14}, + "id": 222, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (reason) (rate(engine_voter_state_timeouts_total[5m]))", "legendFormat": "{{reason}}", "refId": "A"} + ], + "title": "Timeouts by Reason", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Cumulative nullification ratio — total nullifications / total views. The production failure showed 33%.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 2, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "area"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.1}, {"color": "red", "value": 0.3} + ]}, + "unit": "percentunit", "min": 0, "max": 1 + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 14}, + "id": 223, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "1 - (rate(finalized_height[5m]) / rate(engine_voter_state_current_view[5m]))", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Skip Rate per Node", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 21}, + "id": 230, + "title": "Block Building & Execution", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Time to build blocks. If approaching the 2s leader timeout, proposals will fail.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 2} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 22}, + "id": 231, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(marshaled_build_duration_sum[1m]) / rate(marshaled_build_duration_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Block Build Duration (2s timeout line)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Finalization latency per node. Spikes indicate consensus struggling to gather 2/3+ votes.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "yellow", "value": 0.5}, {"color": "red", "value": 2} + ]}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 22}, + "id": 232, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_voter_finalization_latency_sum[1m]) / rate(engine_voter_finalization_latency_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Finalization Latency", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "BLS signature verification latency. Spikes here indicate crypto bottleneck.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "s" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 22}, + "id": 233, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(engine_batcher_verify_latency_sum[1m]) / rate(engine_batcher_verify_latency_count[1m])", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Signature Verify Latency", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 29}, + "id": 240, + "title": "Network & P2P Health", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Broadcast get success vs failure. High failure rate = P2P connectivity issues, possible resolver blocking.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 30}, + "id": 241, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(broadcast_get_total{status=\"Success\"}[1m])", "legendFormat": "Node {{validator_index}} success", "refId": "A"}, + {"expr": "rate(broadcast_get_total{status=\"Failure\"}[1m])", "legendFormat": "Node {{validator_index}} failure", "refId": "B"} + ], + "title": "Broadcast Success vs Failure", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Consensus message types over time. Drop to zero means consensus stopped communicating.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 10, "spanNulls": false, "showPoints": "never", "stacking": {"mode": "normal"}}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 30}, + "id": 242, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "sum by (message) (rate(engine_voter_outbound_messages_total[1m]))", "legendFormat": "{{message}}", "refId": "A"} + ], + "title": "Consensus Message Types", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Network bandwidth per node. Sudden drop to zero means network isolation.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 30}, + "id": 243, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_outbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} out", "refId": "A"}, + {"expr": "rate(runtime_inbound_bandwidth_total[1m])", "legendFormat": "Node {{validator_index}} in", "refId": "B"} + ], + "title": "Network Bandwidth", + "type": "timeseries" + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 37}, + "id": 250, + "title": "Prometheus Alerts", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Active Prometheus alert rules. Red = firing, orange = pending, green = inactive.", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 38}, + "id": 251, + "options": {}, + "targets": [ + {"expr": "ALERTS{alertstate=\"firing\"}", "legendFormat": "{{ alertname }} ({{ instance }})", "refId": "A"} + ], + "title": "Firing Alerts", + "type": "table", + "fieldConfig": { + "defaults": { + "custom": {"filterable": true} + } + }, + "transformations": [ + {"id": "labelsToFields", "options": {}}, + {"id": "organize", "options": {"excludeByName": {"Time": false, "__name__": true, "Value": true}}} + ] + }, + + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 46}, + "id": 260, + "title": "Resource Correlation", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Memory per node. Sustained growth without plateau suggests unbounded mempool or state leak.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never", "thresholdsStyle": {"mode": "line"}}, + "thresholds": {"mode": "absolute", "steps": [ + {"color": "green", "value": null}, {"color": "red", "value": 2000000000} + ]}, + "unit": "bytes" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 0, "y": 47}, + "id": 261, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "runtime_process_rss", "legendFormat": "Node {{validator_index}}", "refId": "A"} + ], + "title": "Memory (RSS) per Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Disk I/O per node. Spikes correlate with finalization. Drop to zero means no blocks persisted.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "Bps" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 8, "y": 47}, + "id": 262, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "rate(runtime_storage_write_bytes_total[1m])", "legendFormat": "Node {{validator_index}} write", "refId": "A"}, + {"expr": "rate(runtime_storage_read_bytes_total[1m])", "legendFormat": "Node {{validator_index}} read", "refId": "B"} + ], + "title": "Disk I/O", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "description": "Active async tasks. Sudden changes indicate task crashes or spawning storms.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"lineWidth": 1, "fillOpacity": 5, "spanNulls": false, "showPoints": "never"}, + "unit": "short" + } + }, + "gridPos": {"h": 7, "w": 8, "x": 16, "y": 47}, + "id": 263, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [ + {"expr": "runtime_tasks_running", "legendFormat": "Node {{validator_index}} running", "refId": "A"} + ], + "title": "Active Tasks", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": ["kora", "diagnostics"], + "templating": {"list": []}, + "time": {"from": "now-30m", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Kora Stall Diagnostics", + "uid": "kora-stall-diagnostics", + "version": 1 +} diff --git a/docker/grafana/dashboards/kora-transaction-flow.json b/docker/grafana/dashboards/kora-transaction-flow.json new file mode 100644 index 0000000..a8e75a8 --- /dev/null +++ b/docker/grafana/dashboards/kora-transaction-flow.json @@ -0,0 +1,306 @@ +{ + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "id": null, + "links": [ + {"title": "Overview", "url": "/d/kora-overview", "type": "link", "icon": "dashboard"}, + {"title": "Performance", "url": "/d/kora-performance", "type": "link", "icon": "dashboard"}, + {"title": "Stall Diagnostics", "url": "/d/kora-stall-diagnostics", "type": "link", "icon": "dashboard"}, + {"title": "Logs Explorer", "url": "/d/kora-logs", "type": "link", "icon": "dashboard"} + ], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "title": "Transaction Throughput", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 1}, {"color": "green", "value": 50}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "id": 1, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, + "title": "Blocks/sec", + "type": "stat", + "targets": [{"expr": "kora:blocks_per_sec", "legendFormat": "blocks/sec"}] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "unit": "s", + "thresholds": {"steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.05}, {"color": "red", "value": 0.1}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "id": 2, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, + "title": "Block Time", + "type": "stat", + "targets": [{"expr": "kora:block_time", "legendFormat": "block time"}] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "unit": "percentunit", + "thresholds": {"steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 0.5}, {"color": "green", "value": 0.7}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "id": 3, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, + "title": "Consensus Efficiency", + "type": "stat", + "targets": [{"expr": "kora:consensus_efficiency", "legendFormat": "efficiency"}] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "unit": "percentunit", + "thresholds": {"steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.2}, {"color": "red", "value": 0.4}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "id": 4, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, + "title": "Skip Rate", + "type": "stat", + "targets": [{"expr": "kora:skip_rate", "legendFormat": "skip rate"}] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 2}, {"color": "red", "value": 5}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "id": 5, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, + "title": "Nullifications/s", + "type": "stat", + "targets": [{"expr": "kora:nullification_rate", "legendFormat": "nullifications/s"}] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 10}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "id": 6, + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, + "title": "Height Drift", + "type": "stat", + "targets": [{"expr": "kora:height_drift", "legendFormat": "drift"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 200, + "title": "Finalized Height & Consensus Progress", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "id": 7, + "options": {"legend": {"displayMode": "table", "placement": "bottom"}}, + "title": "Finalized Height (all validators)", + "type": "timeseries", + "targets": [ + {"expr": "finalized_height", "legendFormat": "{{instance}}"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "id": 8, + "options": {"legend": {"displayMode": "table", "placement": "bottom"}}, + "title": "Consensus View (all validators)", + "type": "timeseries", + "targets": [ + {"expr": "engine_voter_state_current_view", "legendFormat": "view {{instance}}"} + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 300, + "title": "Block Building & Execution", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "s", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 15}, + "id": 9, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}}, + "title": "Block Build Duration (p50/p95/p99)", + "type": "timeseries", + "targets": [ + {"expr": "kora:build_duration:p50", "legendFormat": "p50"}, + {"expr": "kora:build_duration:p95", "legendFormat": "p95"}, + {"expr": "kora:build_duration:p99", "legendFormat": "p99"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "s", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 15}, + "id": 10, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}}, + "title": "Finalization Latency (p50/p95/p99)", + "type": "timeseries", + "targets": [ + {"expr": "kora:finalization_latency:p50", "legendFormat": "p50"}, + {"expr": "kora:finalization_latency:p95", "legendFormat": "p95"}, + {"expr": "kora:finalization_latency:p99", "legendFormat": "p99"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "ops", "custom": {"lineWidth": 2, "fillOpacity": 20, "stacking": {"mode": "normal"}}}}, + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 15}, + "id": 11, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}}, + "title": "Nullifications vs Timeouts (rate)", + "type": "timeseries", + "targets": [ + {"expr": "sum(rate(engine_voter_state_nullifications_total[1m]))", "legendFormat": "nullifications/s"}, + {"expr": "sum(rate(engine_voter_state_timeouts_total[1m]))", "legendFormat": "timeouts/s"} + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 400, + "title": "Per-Node Skip Rate & Health", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1, "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "id": 12, + "options": {"legend": {"displayMode": "table", "placement": "bottom"}}, + "title": "Skip Rate per Node", + "type": "timeseries", + "targets": [ + {"expr": "1 - (rate(finalized_height[5m]) / rate(engine_voter_state_current_view[5m]))", "legendFormat": "{{instance}}"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "id": 13, + "options": {"legend": {"displayMode": "table", "placement": "bottom"}}, + "title": "Resolver Blocked Peers", + "type": "timeseries", + "targets": [ + {"expr": "engine_resolver_resolver_peers_blocked", "legendFormat": "blocked {{instance}}"} + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 500, + "title": "Resource Usage", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "bytes", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 33}, + "id": 14, + "options": {"legend": {"displayMode": "table", "placement": "bottom"}}, + "title": "Memory (RSS) per Node", + "type": "timeseries", + "targets": [ + {"expr": "runtime_process_rss", "legendFormat": "{{instance}}"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "Bps", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 33}, + "id": 15, + "options": {"legend": {"displayMode": "table", "placement": "bottom"}}, + "title": "Storage Write Rate", + "type": "timeseries", + "targets": [ + {"expr": "rate(runtime_storage_write_bytes_total[1m])", "legendFormat": "{{instance}}"} + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 41}, + "id": 600, + "title": "Stall Indicators", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "custom": {"lineWidth": 2, "fillOpacity": 30, "thresholdsStyle": {"mode": "area"}}, "thresholds": {"steps": [{"color": "transparent", "value": null}, {"color": "red", "value": 1}]}}}, + "gridPos": {"h": 6, "w": 8, "x": 0, "y": 42}, + "id": 16, + "title": "Views Without Finalization (STALL INDICATOR)", + "type": "timeseries", + "targets": [ + {"expr": "rate(engine_voter_state_current_view[1m]) > 0 and rate(finalized_height[1m]) < 0.001", "legendFormat": "STALLED {{instance}}"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "custom": {"lineWidth": 2, "fillOpacity": 30, "thresholdsStyle": {"mode": "area"}}, "thresholds": {"steps": [{"color": "transparent", "value": null}, {"color": "orange", "value": 5}]}}}, + "gridPos": {"h": 6, "w": 8, "x": 8, "y": 42}, + "id": 17, + "title": "Timeout Rate by Reason", + "type": "timeseries", + "targets": [ + {"expr": "sum by (reason) (rate(engine_voter_state_timeouts_total[1m]))", "legendFormat": "{{reason}}"} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "custom": {"lineWidth": 2, "fillOpacity": 10}}}, + "gridPos": {"h": 6, "w": 8, "x": 16, "y": 42}, + "id": 18, + "title": "Broadcast Failures", + "type": "timeseries", + "targets": [ + {"expr": "sum(rate(broadcast_get_total{status=\"Failure\"}[1m]))", "legendFormat": "failures/s"}, + {"expr": "sum(rate(broadcast_get_total{status=\"Success\"}[1m]))", "legendFormat": "success/s"} + ] + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": ["kora", "transactions", "loadtest"], + "templating": {"list": []}, + "time": {"from": "now-15m", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Kora Transaction Flow & Load Test", + "uid": "kora-txflow", + "version": 1 +} diff --git a/docker/grafana/provisioning/datasources/prometheus.yaml b/docker/grafana/provisioning/datasources/prometheus.yaml index bb009bb..f5632dc 100644 --- a/docker/grafana/provisioning/datasources/prometheus.yaml +++ b/docker/grafana/provisioning/datasources/prometheus.yaml @@ -3,7 +3,16 @@ apiVersion: 1 datasources: - name: Prometheus type: prometheus + uid: prometheus access: proxy url: http://prometheus:9090 isDefault: true editable: false + + - name: Loki + type: loki + uid: loki + access: proxy + url: http://loki:3100 + isDefault: false + editable: false diff --git a/docker/scripts/devnet-health.sh b/docker/scripts/devnet-health.sh new file mode 100755 index 0000000..6932271 --- /dev/null +++ b/docker/scripts/devnet-health.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Devnet health diagnostic tool — queryable by humans and Claude. +# Queries Prometheus and prints a structured health report. +set -euo pipefail + +PROM="${PROM_URL:-http://localhost:9090}" + +query() { curl -sG --data-urlencode "query=$1" "${PROM}/api/v1/query" 2>/dev/null; } +val() { echo "$1" | python3 -c "import json,sys; r=json.load(sys.stdin)['data']['result']; print(r[0]['value'][1] if r else 'N/A')" 2>/dev/null || echo "N/A"; } +vals() { echo "$1" | python3 -c " +import json,sys +r=json.load(sys.stdin)['data']['result'] +for m in r: + lbl = m['metric'].get('validator_index', m['metric'].get('instance','?')) + print(f' node{lbl}: {m[\"value\"][1]}') +" 2>/dev/null || echo " (no data)"; } + +echo "============================================" +echo " KORA DEVNET HEALTH REPORT" +echo " $(date -u '+%Y-%m-%d %H:%M:%S UTC')" +echo "============================================" +echo "" + +# --- Cluster Status --- +echo "## Cluster Status" +up=$(query 'count(up{job="kora-validators"}==1)') +echo " Validators up: $(val "$up") / 4" + +height=$(query 'max(finalized_height)') +echo " Finalized height: $(val "$height")" + +view=$(query 'max(engine_voter_state_current_view)') +echo " Current view: $(val "$view")" + +drift=$(query 'max(finalized_height)-min(finalized_height)') +drift_val=$(val "$drift") +echo " Height drift: ${drift_val}" +if [[ "$drift_val" != "N/A" ]] && python3 -c "exit(0 if float('${drift_val}') > 5 else 1)" 2>/dev/null; then + echo " ⚠ WARNING: nodes are diverging!" +fi +echo "" + +# --- Per-node heights --- +echo "## Per-Node Finalized Height" +vals "$(query 'finalized_height')" +echo "" + +# --- Throughput --- +echo "## Throughput" +bps=$(query 'avg(rate(finalized_height[1m]))') +echo " Blocks/sec (1m avg): $(val "$bps")" +echo "" + +# --- Latency --- +echo "## Latency (1m avg)" +nota=$(query 'avg(rate(engine_voter_notarization_latency_sum[1m])/rate(engine_voter_notarization_latency_count[1m]))') +echo " Notarization: $(val "$nota")s" + +fin=$(query 'avg(rate(engine_voter_finalization_latency_sum[1m])/rate(engine_voter_finalization_latency_count[1m]))') +echo " Finalization: $(val "$fin")s" + +build=$(query 'avg(rate(marshaled_build_duration_sum[1m])/rate(marshaled_build_duration_count[1m]))') +echo " Block build: $(val "$build")s" + +sig=$(query 'avg(rate(engine_batcher_verify_latency_sum[1m])/rate(engine_batcher_verify_latency_count[1m]))') +echo " Sig verify: $(val "$sig")s" +echo "" + +# --- Faults --- +echo "## Faults" +nulls=$(query 'sum(engine_voter_state_nullifications_total)') +echo " Total nullifications: $(val "$nulls")" + +timeouts=$(query 'sum(engine_voter_state_timeouts_total)') +echo " Total timeouts: $(val "$timeouts")" + +null_rate=$(query 'sum(rate(engine_voter_state_nullifications_total[5m]))') +echo " Nullification rate (5m): $(val "$null_rate")/s" + +skip=$(query 'avg(1-(rate(finalized_height[5m])/rate(engine_voter_state_current_view[5m])))') +echo " Avg skip rate (wasted views): $(val "$skip")" + +echo "" +echo " Timeouts by reason:" +curl -sg "${PROM}/api/v1/query?query=sum%20by%20(reason)(engine_voter_state_timeouts_total)" 2>/dev/null | python3 -c " +import json,sys +r=json.load(sys.stdin)['data']['result'] +for m in r: + print(f\" {m['metric']['reason']}: {m['value'][1]}\") +" 2>/dev/null || echo " (no data)" +echo "" + +# --- Resources --- +echo "## Resources" +echo " Memory (RSS) per node:" +vals "$(query 'runtime_process_rss')" +echo "" + +disk_w=$(query 'sum(runtime_storage_write_bytes_total)') +echo " Total disk written: $(val "$disk_w") bytes" + +disk_r=$(query 'sum(runtime_storage_read_bytes_total)') +echo " Total disk read: $(val "$disk_r") bytes" +echo "" + +# --- Network --- +echo "## Network" +in_bw=$(query 'sum(rate(runtime_inbound_bandwidth_total[1m]))') +echo " Inbound bandwidth: $(val "$in_bw") B/s" + +out_bw=$(query 'sum(rate(runtime_outbound_bandwidth_total[1m]))') +echo " Outbound bandwidth: $(val "$out_bw") B/s" + +in_conn=$(query 'sum(runtime_inbound_connections_total)') +echo " Inbound connections: $(val "$in_conn")" + +out_conn=$(query 'sum(runtime_outbound_connections_total)') +echo " Outbound connections: $(val "$out_conn")" +echo "" + +echo "============================================" +echo " Dashboard: http://localhost:3000/d/kora-overview" +echo " Prometheus: http://localhost:9090" +echo "============================================" diff --git a/docker/scripts/devnet-run.sh b/docker/scripts/devnet-run.sh index de95495..d346962 100755 --- a/docker/scripts/devnet-run.sh +++ b/docker/scripts/devnet-run.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -set -e +set -eo pipefail # Parse arguments INTERACTIVE_DKG=false @@ -161,17 +161,24 @@ clear_dkg_outputs() { clear_runtime_state() { for volume in \ - kora-devnet_data_node0 \ - kora-devnet_data_node1 \ - kora-devnet_data_node2 \ - kora-devnet_data_node3 \ - kora-devnet_data_secondary0; do + kora-devnet_runtime_node0 \ + kora-devnet_runtime_node1 \ + kora-devnet_runtime_node2 \ + kora-devnet_runtime_node3 \ + kora-devnet_runtime_secondary0; do docker volume inspect "$volume" >/dev/null 2>&1 || continue - docker run --rm -v "${volume}:/data" alpine \ - rm -rf /data/runtime >/dev/null 2>&1 || true + docker run --rm -v "${volume}:/runtime" alpine \ + sh -c 'rm -rf /runtime/* /runtime/.[!.]* /runtime/..?*' >/dev/null 2>&1 || true done } +clear_startup_barrier() { + local volume="kora-devnet_startup_barrier" + docker volume inspect "$volume" >/dev/null 2>&1 || return 0 + docker run --rm -v "${volume}:/barrier" alpine \ + sh -c 'rm -f /barrier/*.ready' >/dev/null 2>&1 || true +} + cd "$(dirname "$0")/.." print_header @@ -309,10 +316,16 @@ print_phase "2/3" "Starting validators and secondary peers" docker compose -f compose/devnet.yaml stop \ validator-node0 validator-node1 validator-node2 validator-node3 secondary-node0 >/dev/null 2>&1 || true clear_runtime_state +clear_startup_barrier -run_with_spinner "Launching validator and secondary containers..." docker compose -f compose/devnet.yaml ${COMPOSE_PROFILES:+--profile observability} up -d \ - validator-node0 validator-node1 validator-node2 validator-node3 secondary-node0 \ - ${COMPOSE_PROFILES:+prometheus grafana} +if [[ "${COMPOSE_PROFILES:-}" == *observability* ]]; then + run_with_spinner "Launching validator, secondary, and observability containers..." docker compose -f compose/devnet.yaml --profile observability up -d \ + validator-node0 validator-node1 validator-node2 validator-node3 secondary-node0 \ + prometheus grafana loki promtail +else + run_with_spinner "Launching validator and secondary containers..." docker compose -f compose/devnet.yaml up -d \ + validator-node0 validator-node1 validator-node2 validator-node3 secondary-node0 +fi # Wait for validators with spinner start_time=$(date +%s) diff --git a/docker/scripts/devnet-stats.sh b/docker/scripts/devnet-stats.sh index f08b4a5..ecd86da 100755 --- a/docker/scripts/devnet-stats.sh +++ b/docker/scripts/devnet-stats.sh @@ -1,5 +1,5 @@ -#!/bin/bash -set -e +#!/usr/bin/env bash +set -eo pipefail # Colors RED='\033[0;31m' @@ -17,6 +17,20 @@ CHAIN_ID="${CHAIN_ID:-1337}" RPC_PORTS=(8545 8546 8547 8548) FOLLOWER_SERVICE="secondary-node0" FOLLOWER_P2P_PORT=30500 +declare -a PREV_FINALIZED=() +declare -a PREV_SAMPLE_MS=() + +# Portable millisecond timestamp (macOS date lacks %N) +millis() { + if perl -MTime::HiRes=time -e 'printf "%d\n", time()*1000' 2>/dev/null; then + return + elif python3 -c 'import time; print(int(time.time()*1000))' 2>/dev/null; then + return + else + # Fallback: second-precision (loses sub-second accuracy for blocks/s) + echo "$(date +%s)000" + fi +} cleanup() { tput cnorm @@ -37,12 +51,16 @@ format_uptime() { fetch_all_statuses() { local tmpdir=$(mktemp -d) - # Launch parallel fetches using JSON-RPC POST to get block number (indicates node is alive) + # Launch parallel fetches using JSON-RPC POST to get node status. for i in 0 1 2 3; do - (curl -s --max-time 0.2 -X POST -H "Content-Type: application/json" \ - -d '{"jsonrpc":"2.0","method":"kora_nodeStatus","params":[],"id":1}' \ - "http://localhost:${RPC_PORTS[$i]}" 2>/dev/null | \ - jq -c '.result // {}' > "$tmpdir/$i" 2>/dev/null || echo "{}" > "$tmpdir/$i") & + ( + status=$(curl -s --max-time 0.2 -X POST -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"kora_nodeStatus","params":[],"id":1}' \ + "http://localhost:${RPC_PORTS[$i]}" 2>/dev/null | \ + jq -c '.result // {}' 2>/dev/null || true) + [[ -n "$status" ]] || status="{}" + printf "%s\n" "$status" > "$tmpdir/$i" + ) & done wait @@ -78,7 +96,7 @@ render() { echo -e "${BOLD}${CYAN}Node Status${NC}" echo -e "┌───────┬──────────┬────────────┬──────────┬────────────┬────────────┬────────────┬────────────┬────────┐" - echo -e "│ ${BOLD}Node${NC} │ ${BOLD}Status${NC} │ ${BOLD}Uptime${NC} │ ${BOLD}View${NC} │ ${BOLD}Finalized${NC} │ ${BOLD}Nullified${NC} │ ${BOLD}Proposed${NC} │ ${BOLD}Throughput${NC} │ ${BOLD}Leader${NC} │" + echo -e "│ ${BOLD}Node${NC} │ ${BOLD}Status${NC} │ ${BOLD}Uptime${NC} │ ${BOLD}View${NC} │ ${BOLD}Finalized${NC} │ ${BOLD}Nullified${NC} │ ${BOLD}Proposed${NC} │ ${BOLD}Blocks/s${NC} │ ${BOLD}Leader${NC} │" echo -e "├───────┼──────────┼────────────┼──────────┼────────────┼────────────┼────────────┼────────────┼────────┤" local rpc_count=0 @@ -87,7 +105,7 @@ render() { local max_uptime=0 local total_finalized=0 local max_view=0 - local max_throughput=0 + local max_blocks_per_sec=0 local follower_status="offline" local follower_color=$RED local follower_state="-" @@ -98,6 +116,8 @@ render() { # Fetch all statuses in parallel local all_status all_status=$(fetch_all_statuses) + local sample_ms + sample_ms=$(millis) local i=0 while IFS= read -r status; do @@ -135,23 +155,31 @@ render() { ((++healthy_count)) fi - # Calculate throughput (blocks/sec) - local throughput_str="-" - if [[ $uptime -gt 0 && $finalized -gt 0 ]]; then - # Use awk for floating point division - local tps=$(awk "BEGIN {printf \"%.2f\", $finalized / $uptime}") - throughput_str="${tps} b/s" - # Track max for summary - local tps_int=$(awk "BEGIN {printf \"%d\", $finalized * 100 / $uptime}") - [[ $tps_int -gt $max_throughput ]] && max_throughput=$tps_int + # Calculate live finalized blocks per second since the previous refresh. + local blocks_per_sec_str="-" + if [[ -n "${PREV_FINALIZED[$i]:-}" && -n "${PREV_SAMPLE_MS[$i]:-}" ]]; then + local delta_blocks=$((finalized - PREV_FINALIZED[$i])) + local delta_ms=$((sample_ms - PREV_SAMPLE_MS[$i])) + if [[ $delta_blocks -ge 0 && $delta_ms -gt 0 ]]; then + local blocks_per_sec + blocks_per_sec=$(awk -v blocks="$delta_blocks" -v ms="$delta_ms" 'BEGIN {printf "%.2f", blocks * 1000 / ms}') + blocks_per_sec_str="${blocks_per_sec} b/s" + local blocks_per_sec_int + blocks_per_sec_int=$(awk -v blocks="$delta_blocks" -v ms="$delta_ms" 'BEGIN {printf "%d", blocks * 100000 / ms}') + [[ $blocks_per_sec_int -gt $max_blocks_per_sec ]] && max_blocks_per_sec=$blocks_per_sec_int + fi fi + PREV_FINALIZED[$i]=$finalized + PREV_SAMPLE_MS[$i]=$sample_ms printf "│ ${CYAN}%-5s${NC} │ %b │ %-10s │ %-8s │ %-10s │ %-10s │ %-10s │ %-10s │ %b │\n" \ - "$validator_index" "$rpc_status" "$uptime_str" "$view" "$finalized" "$nullified" "$proposed" "$throughput_str" "$leader_str" + "$validator_index" "$rpc_status" "$uptime_str" "$view" "$finalized" "$nullified" "$proposed" "$blocks_per_sec_str" "$leader_str" else + unset "PREV_FINALIZED[$i]" "PREV_SAMPLE_MS[$i]" printf "│ ${CYAN}%-5s${NC} │ ${RED}offline${NC} │ - │ - │ - │ - │ - │ - │ - │\n" "$i" fi else + unset "PREV_FINALIZED[$i]" "PREV_SAMPLE_MS[$i]" printf "│ ${CYAN}%-5s${NC} │ ${RED}offline${NC} │ - │ - │ - │ - │ - │ - │ - │\n" "$i" fi ((++i)) @@ -206,13 +234,13 @@ render() { local uptime_str="0s" [[ $max_uptime -gt 0 ]] && uptime_str=$(format_uptime "$max_uptime") - # Format throughput from stored integer (x100) - local throughput_str="0.00 b/s" - if [[ $max_throughput -gt 0 ]]; then - throughput_str=$(awk "BEGIN {printf \"%.2f b/s\", $max_throughput / 100}") + # Format live blocks/sec from stored integer (x100) + local blocks_per_sec_str="0.00 b/s" + if [[ $max_blocks_per_sec -gt 0 ]]; then + blocks_per_sec_str=$(awk -v bps="$max_blocks_per_sec" 'BEGIN {printf "%.2f b/s", bps / 100}') fi - echo -e " ${DIM}Consensus:${NC} ${health_color}${healthy_count}/4${NC} │ ${DIM}RPC:${NC} ${GREEN}${rpc_count}/4${NC} │ ${DIM}Follower:${NC} ${follower_color}${follower_status}${NC} │ ${DIM}Stalled:${NC} ${YELLOW}${stalled_count}${NC} │ ${DIM}Threshold:${NC} $threshold │ ${DIM}View:${NC} ${CYAN}$max_view${NC} │ ${DIM}Finalized:${NC} ${GREEN}$total_finalized${NC} │ ${DIM}Throughput:${NC} ${CYAN}$throughput_str${NC} │ ${DIM}Uptime:${NC} $uptime_str" + echo -e " ${DIM}Consensus:${NC} ${health_color}${healthy_count}/4${NC} │ ${DIM}RPC:${NC} ${GREEN}${rpc_count}/4${NC} │ ${DIM}Follower:${NC} ${follower_color}${follower_status}${NC} │ ${DIM}Stalled:${NC} ${YELLOW}${stalled_count}${NC} │ ${DIM}Threshold:${NC} $threshold │ ${DIM}View:${NC} ${CYAN}$max_view${NC} │ ${DIM}Finalized:${NC} ${GREEN}$total_finalized${NC} │ ${DIM}Blocks/s:${NC} ${CYAN}$blocks_per_sec_str${NC} │ ${DIM}Uptime:${NC} $uptime_str" echo "" echo -e "${BOLD}${CYAN}Follower Node${NC}" diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh index fc86d45..39ae5d2 100644 --- a/docker/scripts/entrypoint.sh +++ b/docker/scripts/entrypoint.sh @@ -2,11 +2,25 @@ set -euo pipefail VALIDATOR_INDEX=${VALIDATOR_INDEX:-0} +VALIDATOR_COUNT=${VALIDATOR_COUNT:-0} IS_BOOTSTRAP=${IS_BOOTSTRAP:-false} BOOTSTRAP_PEERS=${BOOTSTRAP_PEERS:-""} CHAIN_ID=${CHAIN_ID:-1337} DATA_DIR=${DATA_DIR:-/data} SHARED_DIR=${SHARED_DIR:-/shared} +BARRIER_DIR=${BARRIER_DIR:-/barrier} + +RUNTIME_DIR=${KORA_RUNTIME_DIR:-/runtime} + +# Cap Tokio and Rayon thread counts to avoid oversubscription. +# Inside Docker, Tokio/Rayon read the HOST CPU count (e.g. 12) rather than +# the cgroup limit (e.g. 2 CPUs), creating massive context switching overhead. +# The default of 8 Tokio workers provides enough async concurrency for +# consensus pipelining, networking, and I/O without extreme oversubscription. +# Rayon is used only for BLS batch verification; 2 threads match the strategy +# parameter (NZUsize!(2)) in runner.rs. +export TOKIO_WORKER_THREADS="${TOKIO_WORKER_THREADS:-8}" +export RAYON_NUM_THREADS="${RAYON_NUM_THREADS:-2}" MODE="${1:-validator}" shift || true @@ -14,72 +28,180 @@ shift || true log() { echo "[entrypoint] $*"; } error() { echo "[entrypoint] ERROR: $*" >&2; exit 1; } +# Wait for at least one bootstrap peer from a comma-separated list to become +# reachable. With multi-bootstrap support a node can join the network through +# any available bootstrapper, removing the single-bootstrap-node SPOF. +# +# Usage: wait_for_any_bootstrap "$BOOTSTRAP_PEERS" +# BOOTSTRAP_PEERS is a comma-separated list of host:port pairs, e.g. +# "node0:30303,node1:30303" +wait_for_any_bootstrap() { + local peers_csv="$1" + [[ -z "$peers_csv" ]] && return 0 + + # Parse into arrays + local hosts=() + local ports=() + IFS=',' read -ra PEER_LIST <<< "$peers_csv" + for peer in "${PEER_LIST[@]}"; do + peer=$(echo "$peer" | tr -d ' ') + [[ -z "$peer" ]] && continue + local host port + host=$(echo "$peer" | rev | cut -d: -f2- | rev) + port=$(echo "$peer" | rev | cut -d: -f1 | rev) + hosts+=("$host") + ports+=("$port") + done + + if [[ ${#hosts[@]} -eq 0 ]]; then + return 0 + fi + + log "Waiting for any bootstrap peer to become reachable: ${peers_csv}" + + local timeout=120 + while true; do + for i in "${!hosts[@]}"; do + if nc -z "${hosts[$i]}" "${ports[$i]}" 2>/dev/null; then + log "Bootstrap peer ${hosts[$i]}:${ports[$i]} reachable" + return 0 + fi + done + timeout=$((timeout - 1)) + if [[ $timeout -le 0 ]]; then + error "Timeout waiting for bootstrap peers (tried: ${peers_csv})" + fi + sleep 1 + done +} + +# Ensure runtime directory exists and is writable by the kora user. +# Docker named volumes inherit ownership from the image on first mount, +# but we verify here in case an external volume with different ownership +# is attached. +if [[ -d "$RUNTIME_DIR" ]]; then + if [[ ! -w "$RUNTIME_DIR" ]]; then + log "WARNING: runtime dir ${RUNTIME_DIR} is not writable, attempting chown..." + chown -R "$(id -u):$(id -g)" "$RUNTIME_DIR" 2>/dev/null || \ + error "Cannot write to runtime dir ${RUNTIME_DIR}. Fix volume permissions." + fi +else + mkdir -p "$RUNTIME_DIR" 2>/dev/null || error "Cannot create runtime dir ${RUNTIME_DIR}" +fi +log "Runtime dir: ${RUNTIME_DIR} (writable)" + +# Startup barrier: ensures all validators reach this point before any starts +# consensus. Each validator writes a marker file to a shared volume, then waits +# until the expected number of markers are present. +wait_for_barrier() { + local count="$1" + if [[ "$count" -le 0 || ! -d "$BARRIER_DIR" ]]; then + return 0 + fi + + # Write our own marker + touch "${BARRIER_DIR}/node${VALIDATOR_INDEX}.ready" + log "Barrier: marked node${VALIDATOR_INDEX} ready (waiting for ${count} validators)" + + # Wait for all markers + local timeout=120 + while true; do + local ready + ready=$(find "$BARRIER_DIR" -maxdepth 1 -name '*.ready' 2>/dev/null | wc -l | tr -d ' ') + if [[ "$ready" -ge "$count" ]]; then + log "Barrier: all ${count} validators ready, proceeding" + return 0 + fi + timeout=$((timeout - 1)) + if [[ $timeout -le 0 ]]; then + log "Barrier: WARNING timeout after 120s (${ready}/${count} ready), proceeding anyway" + return 0 + fi + sleep 1 + done +} + case "$MODE" in setup) log "Running setup mode..." exec /usr/local/bin/keygen setup "$@" ;; - + dkg) log "Running DKG ceremony mode..." - + [[ -f "${SHARED_DIR}/peers.json" ]] || error "peers.json not found" [[ -f "${DATA_DIR}/validator.key" ]] || error "validator.key not found" - + if [[ -f "${DATA_DIR}/share.key" && -f "${DATA_DIR}/output.json" ]]; then log "DKG already completed (share.key exists)" exit 0 fi - + if [[ "$IS_BOOTSTRAP" != "true" && -n "$BOOTSTRAP_PEERS" ]]; then - BOOTSTRAP_HOST=$(echo "$BOOTSTRAP_PEERS" | cut -d: -f1) - BOOTSTRAP_PORT=$(echo "$BOOTSTRAP_PEERS" | cut -d: -f2) - - log "Waiting for bootstrap peer ${BOOTSTRAP_HOST}:${BOOTSTRAP_PORT}..." - timeout=120 - while ! nc -z "$BOOTSTRAP_HOST" "$BOOTSTRAP_PORT" 2>/dev/null; do - timeout=$((timeout - 1)) - [[ $timeout -le 0 ]] && error "Timeout waiting for bootstrap peer" - sleep 1 - done - log "Bootstrap peer reachable" + wait_for_any_bootstrap "$BOOTSTRAP_PEERS" fi - + exec /usr/local/bin/kora dkg \ --data-dir "$DATA_DIR" \ --peers "${SHARED_DIR}/peers.json" \ --chain-id "$CHAIN_ID" \ "$@" ;; - + validator) log "Running validator mode..." - + [[ -f "${SHARED_DIR}/genesis.json" ]] || error "genesis.json not found" [[ -f "${DATA_DIR}/validator.key" ]] || error "validator.key not found" [[ -f "${DATA_DIR}/share.key" ]] || error "share.key not found (run DKG first)" [[ -f "${DATA_DIR}/output.json" ]] || error "output.json not found (run DKG first)" - + + # Log key fingerprints so DKG key mismatches are immediately obvious + SHARE_KEY_HASH=$(sha256sum "${DATA_DIR}/share.key" 2>/dev/null | cut -c1-16) + OUTPUT_HASH=$(sha256sum "${DATA_DIR}/output.json" 2>/dev/null | cut -c1-16) + log "DKG key fingerprints: share.key=${SHARE_KEY_HASH} output.json=${OUTPUT_HASH}" + cp "${SHARED_DIR}/genesis.json" "${DATA_DIR}/" 2>/dev/null || true + + # Detect whether this is a first startup or a restart by checking + # for the commit marker on the persistent /data volume. If it exists, + # the node has finalized at least one block previously and does not + # need the bootstrap peer or the startup barrier to proceed. + # DO NOT use archive or QMDB paths -- those live on tmpfs (/runtime) + # and are wiped on every container restart. + if [[ -f "${DATA_DIR}/last_committed_digest" ]]; then + log "Restart detected (last_committed_digest exists), skipping barrier and bootstrap wait" + else + # First startup -- wait for all validators to be ready before + # starting consensus. This prevents height drift caused by + # staggered startup: if the bootstrap node enters consensus + # minutes before the others, it advances heights alone and + # later leaders return None from propose() because they lack + # the parent snapshot. + wait_for_barrier "$VALIDATOR_COUNT" + + if [[ "$IS_BOOTSTRAP" != "true" && -n "$BOOTSTRAP_PEERS" ]]; then + wait_for_any_bootstrap "$BOOTSTRAP_PEERS" + fi + fi + touch "${DATA_DIR}/.ready" - - if [[ "$IS_BOOTSTRAP" != "true" && -n "$BOOTSTRAP_PEERS" ]]; then - BOOTSTRAP_HOST=$(echo "$BOOTSTRAP_PEERS" | cut -d: -f1) - BOOTSTRAP_PORT=$(echo "$BOOTSTRAP_PEERS" | cut -d: -f2) - - log "Waiting for bootstrap peer ${BOOTSTRAP_HOST}:${BOOTSTRAP_PORT}..." - timeout=120 - while ! nc -z "$BOOTSTRAP_HOST" "$BOOTSTRAP_PORT" 2>/dev/null; do - timeout=$((timeout - 1)) - [[ $timeout -le 0 ]] && error "Timeout waiting for bootstrap peer" - sleep 1 - done + + TX_GOSSIP=${TX_GOSSIP:-false} + GOSSIP_FLAG="" + if [[ "$TX_GOSSIP" == "true" ]]; then + GOSSIP_FLAG="--tx-gossip" + log "Transaction gossip ENABLED" + else + log "Transaction gossip DISABLED (set TX_GOSSIP=true to enable)" fi - + exec /usr/local/bin/kora validator \ --data-dir "$DATA_DIR" \ --peers "${SHARED_DIR}/peers.json" \ --chain-id "$CHAIN_ID" \ + $GOSSIP_FLAG \ "$@" ;; @@ -89,28 +211,26 @@ case "$MODE" in [[ -f "${SHARED_DIR}/peers.json" ]] || error "peers.json not found" [[ -f "${DATA_DIR}/validator.key" ]] || error "validator.key not found" - touch "${DATA_DIR}/.ready" - if [[ "$IS_BOOTSTRAP" != "true" && -n "$BOOTSTRAP_PEERS" ]]; then - BOOTSTRAP_HOST=$(echo "$BOOTSTRAP_PEERS" | cut -d: -f1) - BOOTSTRAP_PORT=$(echo "$BOOTSTRAP_PEERS" | cut -d: -f2) - - log "Waiting for bootstrap peer ${BOOTSTRAP_HOST}:${BOOTSTRAP_PORT}..." - timeout=120 - while ! nc -z "$BOOTSTRAP_HOST" "$BOOTSTRAP_PORT" 2>/dev/null; do - timeout=$((timeout - 1)) - [[ $timeout -le 0 ]] && error "Timeout waiting for bootstrap peer" - sleep 1 - done + # Only wait for bootstrap on first startup. On restarts, the + # P2P layer handles reconnection internally. + if [[ ! -f "${DATA_DIR}/.bootstrap_done" ]]; then + wait_for_any_bootstrap "$BOOTSTRAP_PEERS" + touch "${DATA_DIR}/.bootstrap_done" + else + log "Restart detected (.bootstrap_done exists), skipping bootstrap peer wait" + fi fi + touch "${DATA_DIR}/.ready" + exec /usr/local/bin/kora secondary \ --data-dir "$DATA_DIR" \ --peers "${SHARED_DIR}/peers.json" \ --chain-id "$CHAIN_ID" \ "$@" ;; - + *) exec "$MODE" "$@" ;; diff --git a/docker/scripts/healthcheck.sh b/docker/scripts/healthcheck.sh index 859d378..a61f0ac 100644 --- a/docker/scripts/healthcheck.sh +++ b/docker/scripts/healthcheck.sh @@ -1,7 +1,47 @@ #!/bin/bash +# Health check script for Kora nodes. +# +# Modes (set via HEALTHCHECK_MODE env var): +# dkg - DKG ceremony completed (share.key + output.json exist) +# p2p - P2P port is listening +# ready - RPC responsive AND chain is making progress AND consensus +# participation is verified via kora_nodeStatus +# +# Stall detection (ready mode): +# On each invocation, the script fetches eth_blockNumber and compares it +# against the value from the previous check (cached in /tmp/healthcheck_*). +# If the block number has not advanced for HEALTHCHECK_STALL_THRESHOLD +# consecutive checks, the health check fails. This catches nodes whose +# RPC is up but consensus has stalled. +# +# The stall counter resets whenever the block number advances. +# A grace period of HEALTHCHECK_GRACE_BLOCKS=0 means any single stalled +# check increments the counter. Default threshold is 6 consecutive stalls +# (at 30s interval = 3 minutes of no progress before unhealthy). +# +# Consensus participation check (ready mode): +# After the block-number stall check, queries kora_nodeStatus to verify: +# 1. The node has sufficient peers for BFT quorum (partitionStatus != "partitioned") +# 2. The node's finalized block count is advancing (not just serving stale RPC data) +# These checks detect nodes that appear alive via RPC but are disconnected +# from consensus — a blind spot in the original eth_blockNumber-only check. +# +# The finalized-count stall check uses the same threshold as the block-number +# check so that both signals trigger unhealthy at the same pace. set -e MODE="${HEALTHCHECK_MODE:-p2p}" +STALL_THRESHOLD="${HEALTHCHECK_STALL_THRESHOLD:-6}" +RPC_TIMEOUT="${HEALTHCHECK_RPC_TIMEOUT:-8}" +# Minimum peers required for health. Default 0 disables the absolute floor; +# quorum is still enforced via partitionStatus from kora_nodeStatus. +MIN_PEERS="${HEALTHCHECK_MIN_PEERS:-0}" + +# Persistent state files (on tmpfs, survives across checks but not restarts) +BLOCK_FILE="/tmp/healthcheck_block" +STALL_FILE="/tmp/healthcheck_stall_count" +FINALIZED_FILE="/tmp/healthcheck_finalized" +FINALIZED_STALL_FILE="/tmp/healthcheck_finalized_stall" case "$MODE" in dkg) @@ -11,7 +51,110 @@ case "$MODE" in nc -z localhost 30303 ;; ready) - [[ -f "/data/.ready" ]] && nc -z localhost 30303 + # Step 1: Verify the RPC server responds to eth_blockNumber. + # Use --max-time to enforce our own timeout rather than relying on + # curl's default (which interacts poorly with Docker's health check + # timeout under CPU contention). + RESULT=$(curl -sf --max-time "$RPC_TIMEOUT" -X POST http://localhost:8545 \ + -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' 2>/dev/null) || exit 1 + + # Extract the hex block number and convert to decimal + BLOCK_HEX=$(echo "$RESULT" | jq -r '.result // empty' 2>/dev/null) || exit 1 + [[ -z "$BLOCK_HEX" ]] && exit 1 + + # Strip 0x prefix and convert hex to decimal. + # Use shell arithmetic to avoid dependency on bc. + BLOCK_DEC=$((16#${BLOCK_HEX#0x})) + + # Step 2: Stall detection — compare against previous block number. + PREV_BLOCK=0 + STALL_COUNT=0 + [[ -f "$BLOCK_FILE" ]] && PREV_BLOCK=$(cat "$BLOCK_FILE" 2>/dev/null) || true + [[ -f "$STALL_FILE" ]] && STALL_COUNT=$(cat "$STALL_FILE" 2>/dev/null) || true + + # Ensure numeric values + PREV_BLOCK=${PREV_BLOCK:-0} + STALL_COUNT=${STALL_COUNT:-0} + + if [[ "$BLOCK_DEC" -gt "$PREV_BLOCK" ]]; then + # Chain is progressing — reset stall counter + STALL_COUNT=0 + else + # Block number has not advanced since last check + STALL_COUNT=$((STALL_COUNT + 1)) + fi + + # Persist state for next invocation + echo "$BLOCK_DEC" > "$BLOCK_FILE" + echo "$STALL_COUNT" > "$STALL_FILE" + + # Step 3: Fail if stalled for too long + if [[ "$STALL_COUNT" -ge "$STALL_THRESHOLD" ]]; then + echo "UNHEALTHY: chain stalled at block $BLOCK_DEC for $STALL_COUNT consecutive checks" >&2 + exit 1 + fi + + # Step 4: Consensus participation — query kora_nodeStatus. + # This is a soft check: if the RPC method is unavailable (e.g. older + # binary, secondary node), we skip gracefully and rely on the + # eth_blockNumber stall check above. + STATUS=$(curl -sf --max-time "$RPC_TIMEOUT" -X POST http://localhost:8545 \ + -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"kora_nodeStatus","params":[],"id":2}' 2>/dev/null) || true + + if [[ -n "$STATUS" ]]; then + # Parse fields from the kora_nodeStatus response. + # jq exits 0 even on null, so we check for empty strings. + PARTITION=$(echo "$STATUS" | jq -r '.result.partitionStatus // empty' 2>/dev/null) || true + PEER_COUNT=$(echo "$STATUS" | jq -r '.result.peerCount // empty' 2>/dev/null) || true + FINALIZED_COUNT=$(echo "$STATUS" | jq -r '.result.finalizedCount // empty' 2>/dev/null) || true + + # 4a: Reject if the node is network-partitioned (below BFT quorum). + # A partitioned node cannot participate in consensus and will + # inevitably stall, but the block-number check takes 3 minutes + # to detect this. The partition check catches it immediately. + if [[ "$PARTITION" == "partitioned" ]]; then + echo "UNHEALTHY: node is network-partitioned (insufficient peers for BFT quorum)" >&2 + exit 1 + fi + + # 4b: Optional absolute peer floor (disabled by default). + if [[ -n "$PEER_COUNT" && "$MIN_PEERS" -gt 0 ]]; then + if [[ "$PEER_COUNT" -lt "$MIN_PEERS" ]]; then + echo "UNHEALTHY: only $PEER_COUNT peers connected (minimum: $MIN_PEERS)" >&2 + exit 1 + fi + fi + + # 4c: Finalized-count stall detection. + # Similar to the block-number stall check, but tracks the node's + # own finalized_count from the consensus engine. A node that is + # RPC-responsive but not finalizing blocks (e.g. disconnected from + # consensus, serving stale data) will fail this check. + if [[ -n "$FINALIZED_COUNT" ]]; then + PREV_FINALIZED=0 + FIN_STALL=0 + [[ -f "$FINALIZED_FILE" ]] && PREV_FINALIZED=$(cat "$FINALIZED_FILE" 2>/dev/null) || true + [[ -f "$FINALIZED_STALL_FILE" ]] && FIN_STALL=$(cat "$FINALIZED_STALL_FILE" 2>/dev/null) || true + PREV_FINALIZED=${PREV_FINALIZED:-0} + FIN_STALL=${FIN_STALL:-0} + + if [[ "$FINALIZED_COUNT" -gt "$PREV_FINALIZED" ]]; then + FIN_STALL=0 + else + FIN_STALL=$((FIN_STALL + 1)) + fi + + echo "$FINALIZED_COUNT" > "$FINALIZED_FILE" + echo "$FIN_STALL" > "$FINALIZED_STALL_FILE" + + if [[ "$FIN_STALL" -ge "$STALL_THRESHOLD" ]]; then + echo "UNHEALTHY: consensus stalled — finalized count stuck at $FINALIZED_COUNT for $FIN_STALL consecutive checks" >&2 + exit 1 + fi + fi + fi ;; *) exit 1 diff --git a/docs/public-testnet.md b/docs/public-testnet.md new file mode 100644 index 0000000..34f9a9a --- /dev/null +++ b/docs/public-testnet.md @@ -0,0 +1,346 @@ +# Public Testnet Standup + +This runbook describes how to stand up a public Kora testnet from the same +building blocks used by the Docker devnet. It assumes the first public testnet +uses validators with stable public IP addresses or public DNS names. + +Kora is pre-alpha software. Treat this as a testnet procedure, not a production +mainnet security guide. + +## Overview + +The local devnet starts four validators in one Docker Compose network. That +works because the generated peer file can use Docker hostnames such as +`node0:30303`. A public testnet needs the same artifacts, but the peer file must +use addresses that every validator can dial from the public internet. + +For the first public testnet: + +- Every validator operator provides a stable P2P endpoint, preferably DNS such + as `validator-0.testnet.kora.network:30303`, or a static public IP such as + `203.0.113.10:30303`. +- Every validator opens the P2P port on that endpoint. +- All validators use the same finalized `peers.json`. +- Validators run the interactive DKG ceremony before starting consensus. + +Future iterations can document a private validator mesh using ZeroTier or a +similar VPN. A later P2P design could also explore iroh. Those are follow-up +designs and should not block the public-IP standup path described here. + +## Current Devnet Primitives + +The public testnet flow reuses the existing commands and files: + +- `keygen setup` generates `genesis.json`, `peers.json`, per-validator + `validator.key` files, and optional secondary identities. +- `kora dkg --peers ` runs the interactive DKG ceremony and writes + `share.key` and `output.json` into each validator data directory. +- `kora validator --peers ` starts a validator after DKG has + completed. +- `genesis.json` contains chain state only. It does not contain P2P endpoints. +- `peers.json` contains `participants`, `secondary_participants`, `threshold`, + and `bootstrappers`. + +The important difference from the Docker devnet is the `bootstrappers` section +in `peers.json`. `keygen setup` currently writes Docker-local addresses like +`node0:30303`; for a public testnet, replace them with public DNS names or +public IP addresses before running DKG. + +## Prerequisites + +Choose and record these values before generating artifacts: + +- Validator count, for example `4`. +- Threshold, for example `3` for a 4-validator testnet. +- Chain ID, for example a testnet-specific value distinct from local devnets. +- Public P2P endpoint for each validator. +- Optional secondary peer count. +- A shared release artifact or Docker image version that all validators will + run. + +Each validator host should have: + +- A static public IP address or stable DNS record. +- Inbound TCP open for the Kora P2P port, default `30303`. +- Outbound TCP allowed to every other validator P2P endpoint. +- NTP or another reliable clock sync service. +- Persistent disk for the Kora data directory. +- Log collection and a restart supervisor such as systemd, Docker restart + policy, or equivalent. + +RPC is currently started by the validator command on `0.0.0.0:8545`. Do not +leave RPC open to the internet unless that is an intentional testnet policy. +Prefer firewalling RPC to operator IPs, a bastion, or a public load balancer +that you explicitly control. + +## Address Handling + +Use public DNS names when possible: + +```text +validator-0.testnet.kora.network:30303 +validator-1.testnet.kora.network:30303 +validator-2.testnet.kora.network:30303 +validator-3.testnet.kora.network:30303 +``` + +Static public IPs are also valid: + +```text +203.0.113.10:30303 +203.0.113.11:30303 +203.0.113.12:30303 +203.0.113.13:30303 +``` + +The address in `peers.json` must be the address other validators can dial. Do +not use `0.0.0.0`, `127.0.0.1`, Docker service names, or private cloud +addresses unless every validator is intentionally on the same private network. + +The node listen address can remain the default `0.0.0.0:30303`, which means +"listen on all local interfaces." The public endpoint belongs in `peers.json`. +If a host is behind NAT, the public `host:port` must forward to the local Kora +P2P listener. + +## Artifact Layout + +Use one coordinator directory while preparing the network: + +```text +testnet-artifacts/ + genesis.json + peers.json + node0/ + validator.key + setup.json + node1/ + validator.key + setup.json + node2/ + validator.key + setup.json + node3/ + validator.key + setup.json + secondary0/ + validator.key + setup.json +``` + +After DKG, each validator directory also contains: + +```text +share.key +output.json +``` + +Artifact ownership: + +- Share `genesis.json` with every validator and secondary operator. +- Share the finalized `peers.json` with every validator and secondary operator. +- Give each validator operator only its own `nodeN/validator.key`. +- After DKG, keep each `nodeN/share.key` private to that validator. +- `output.json` is required for the validator to start and should be kept with + that validator's data directory. +- Do not publish `validator.key` or `share.key`. + +The current `keygen setup` command generates validator identity keys centrally. +This is a workflow issue that needs to be fixed: each operator should generate +its own `validator.key` locally and provide only the public key to the +coordinator. Until that tooling exists, the coordinator must distribute each +private key securely and should not retain copies. + +## Generate Initial Artifacts + +From a trusted coordinator machine: + +```sh +cargo run --release -p keygen -- setup \ + --validators 4 \ + --secondary-peers 1 \ + --threshold 3 \ + --chain-id 424242 \ + --output-dir ./testnet-artifacts +``` + +Then edit `testnet-artifacts/peers.json` and replace the generated +Docker-local bootstrapper addresses with public endpoints. + +Example shape: + +```json +{ + "validators": 4, + "threshold": 3, + "participants": [ + "", + "", + "", + "" + ], + "secondary_participants": [ + "" + ], + "bootstrappers": { + "": "validator-0.testnet.kora.network:30303", + "": "validator-1.testnet.kora.network:30303", + "": "validator-2.testnet.kora.network:30303", + "": "validator-3.testnet.kora.network:30303" + } +} +``` + + +## Run Interactive DKG + +Interactive DKG is the preferred testnet ceremony because no single party +generates all BLS threshold shares. The trusted dealer command is only for local +development and should not be used for public testnet keys. + +Before the ceremony, each validator host should have: + +```text +/var/lib/kora/ + validator.key + genesis.json + peers.json +``` + +Start DKG on every validator using the same chain ID and finalized peer file: + +```sh +kora \ + --data-dir /var/lib/kora \ + --chain-id 424242 \ + dkg \ + --peers /var/lib/kora/peers.json +``` + +All validators need to be online and reachable for the ceremony. A successful +ceremony writes: + +```text +/var/lib/kora/share.key +/var/lib/kora/output.json +``` + +If the ceremony fails, inspect validator logs, confirm every public endpoint is +reachable, confirm every operator has the same `peers.json`, and rerun DKG only +after deciding whether to preserve or clear partial DKG state. Use +`--force-restart` only when every operator agrees to restart the ceremony. + +## Start Validators + +After DKG, every validator data directory should contain: + +```text +/var/lib/kora/ + genesis.json + peers.json + validator.key + share.key + output.json +``` + +Start each validator: + +```sh +kora \ + --data-dir /var/lib/kora \ + --chain-id 424242 \ + validator \ + --peers /var/lib/kora/peers.json +``` + +For a systemd deployment, use the same command in a unit file and set a restart +policy appropriate for a testnet. Keep the data directory on persistent storage. + +The existing single-host Docker Compose file is not a public testnet deployment +template. It is useful as a reference for local devnet behavior, but public +testnet operators should use a per-host service definition or a future per-host +compose template. + +## Start Secondary Peers + +Secondary peers are authenticated P2P participants that follow validator traffic +without participating in consensus. Their public keys must already be listed in +`secondary_participants`. + +Prepare the secondary data directory with its own `validator.key` plus the +shared `peers.json`: + +```text +/var/lib/kora-secondary/ + validator.key + peers.json +``` + +Start the secondary: + +```sh +kora \ + --data-dir /var/lib/kora-secondary \ + --chain-id 424242 \ + secondary \ + --peers /var/lib/kora-secondary/peers.json +``` + +## Validation Checklist + +Before announcing the testnet: + +- Every validator can resolve every validator DNS name, if DNS is used. +- Every validator can open TCP connections to every other validator P2P + endpoint. +- Every validator has the same `genesis.json` and finalized `peers.json`. +- Every validator has its own `validator.key`, `share.key`, and `output.json`. +- Validators start without DKG-output errors. +- Logs show peer connections and consensus progress. +- At least one controlled RPC endpoint responds on the expected chain ID. +- Metrics and logs are visible to the testnet operators. +- RPC and metrics exposure match the intended firewall policy. + +## Operations + +Recommended minimum operating practices: + +- Keep `validator.key` and `share.key` backed up securely. +- Keep `genesis.json` and the finalized `peers.json` in versioned release + artifacts so operators can verify they are running the intended network. +- Use DNS records with low enough TTLs to recover from host replacement. +- Monitor process restarts, disk usage, peer connectivity, block production, + RPC health, and host clock drift. +- Restrict SSH, RPC, metrics, and dashboards. Only the P2P port needs to be + broadly reachable by other validators. + +## Reset Or Re-DKG + +Changing validator identities, validator count, threshold, or DKG output creates +a new network ceremony. Coordinate resets explicitly: + +1. Stop validators. +2. Decide whether the existing chain data is being discarded. +3. Generate or agree on the new `peers.json`. +4. Clear old `share.key`, `output.json`, and partial DKG state from each + validator data directory if the ceremony is being restarted. +5. Run interactive DKG again. +6. Restart validators with the new artifacts. + +Do not mix old and new `peers.json`, `share.key`, or `output.json` files across +validators. + +## Future Improvements + +The current flow can stand up a public-IP testnet, but the rough edges are worth +tracking: + +- Add a `keygen setup` option that accepts an endpoint manifest and writes public + bootstrappers directly, avoiding manual `peers.json` edits. +- Add a flow for operator-generated validator identity public keys so the + coordinator does not create or distribute `validator.key` files. +- Add a per-host systemd or Docker Compose template for validators and + secondaries. +- Document a ZeroTier-based private validator mesh for closed rehearsals. +- Evaluate whether iroh can simplify future P2P connectivity and NAT traversal. +- Make RPC bind address configurable for the validator command, or document the + exact firewall/reverse-proxy pattern used by the public testnet. diff --git a/keygen.bash b/keygen.bash new file mode 100644 index 0000000..0b7cb6d --- /dev/null +++ b/keygen.bash @@ -0,0 +1,6 @@ +cargo run --release -p keygen -- setup \ + --validators 3 \ + --secondary-peers 0 \ + --threshold 2 \ + --chain-id 424242 \ + --output-dir ./testnet-artifacts diff --git a/repro-logs/chaos_monitor.py b/repro-logs/chaos_monitor.py new file mode 100644 index 0000000..109b8ae --- /dev/null +++ b/repro-logs/chaos_monitor.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +"""Monitor Kora devnet block production during chaos tests.""" + +from __future__ import annotations + +import json +import sys +import time +import urllib.error +import urllib.request +from dataclasses import dataclass +from typing import Any + +RPC_PORTS = [8545, 8546, 8547, 8548] +NODE_NAMES = ["node0", "node1", "node2", "node3"] + + +@dataclass +class Sample: + ts: float + heights: list[int | None] + views: list[int | None] + nullified: list[int | None] + + +def rpc(port: int, method: str, params: list[Any] | None = None) -> dict[str, Any]: + body = json.dumps({"jsonrpc": "2.0", "method": method, "params": params or [], "id": 1}).encode() + req = urllib.request.Request( + f"http://127.0.0.1:{port}", + data=body, + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=3) as resp: + return json.loads(resp.read()) + except (urllib.error.URLError, TimeoutError, ConnectionResetError, json.JSONDecodeError) as err: + raise RuntimeError(str(err)) from err + + +def sample() -> Sample: + heights: list[int | None] = [] + views: list[int | None] = [] + nullified: list[int | None] = [] + for port in RPC_PORTS: + try: + height = int(rpc(port, "eth_blockNumber")["result"], 16) + status = rpc(port, "kora_nodeStatus")["result"] + heights.append(height) + views.append(int(status.get("currentView", 0))) + nullified.append(int(status.get("nullifiedCount", 0))) + except (RuntimeError, KeyError, ValueError): + heights.append(None) + views.append(None) + nullified.append(None) + return Sample(time.time(), heights, views, nullified) + + +def fmt_sample(label: str, s: Sample, prev: Sample | None) -> str: + parts = [f"[{label}] t={s.ts:.0f}"] + for i, name in enumerate(NODE_NAMES): + h = s.heights[i] + v = s.views[i] + n = s.nullified[i] + delta = "" + if prev and h is not None and prev.heights[i] is not None: + dh = h - prev.heights[i] + if dh: + delta = f" (+{dh})" + parts.append(f"{name}: h={h}{delta} view={v} null={n}") + if prev and prev.heights[0] is not None and s.heights[0] is not None: + dt = s.ts - prev.ts + dh = s.heights[0] - prev.heights[0] + if dt > 0 and dh >= 0: + parts.append(f"net_rate={dh/dt:.3f} blk/s (~{dt/max(dh,1):.3f}s/blk)") + return " | ".join(parts) + + +def monitor(duration_secs: int, interval: float, label_prefix: str) -> list[str]: + lines: list[str] = [] + end = time.time() + duration_secs + prev: Sample | None = None + while time.time() < end: + s = sample() + line = fmt_sample(label_prefix, s, prev) + print(line, flush=True) + lines.append(line) + prev = s + time.sleep(interval) + return lines + + +def main() -> int: + if len(sys.argv) != 4: + print(f"usage: {sys.argv[0]}