From 4961ce008897182aaac6da758de6ae7de0aaa23c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 17:02:39 -0400 Subject: [PATCH 01/40] initial impl with incoming/outgoing message impl --- Cargo.lock | 1200 +++++++++++++++++++++++++++++++++-- Cargo.toml | 3 +- crates/p2p/Cargo.toml | 15 + crates/p2p/src/behaviour.rs | 165 +++++ crates/p2p/src/lib.rs | 299 +++++++++ crates/p2p/src/message.rs | 122 ++++ crates/p2p/src/protocol.rs | 73 +++ 7 files changed, 1821 insertions(+), 56 deletions(-) create mode 100644 crates/p2p/Cargo.toml create mode 100644 crates/p2p/src/behaviour.rs create mode 100644 crates/p2p/src/lib.rs create mode 100644 crates/p2p/src/message.rs create mode 100644 crates/p2p/src/protocol.rs diff --git a/Cargo.lock b/Cargo.lock index 6e8a731e..93983a60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -1235,6 +1260,12 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -1293,6 +1324,36 @@ dependencies = [ "serde_json", ] +[[package]] +name = "async-io" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3" +dependencies = [ + "async-lock", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.6.0", + "parking", + "polling", + "rustix 1.0.7", + "slab", + "tracing", + "windows-sys 0.59.0", +] + +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -1337,6 +1398,19 @@ dependencies = [ "rustc_version 0.4.1", ] +[[package]] +name = "asynchronous-codec" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a860072022177f903e59730004fb5dc13db9275b79bb2aef7ba8ce831956c233" +dependencies = [ + "bytes", + "futures-sink", + "futures-util", + "memchr", + "pin-project-lite", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1420,6 +1494,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base-x" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" + [[package]] name = "base16ct" version = "0.2.0" @@ -1528,6 +1608,15 @@ dependencies = [ "wyz", ] +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest 0.10.7", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -1563,7 +1652,7 @@ dependencies = [ "hex", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-named-pipe", "hyper-util", "hyperlocal", @@ -1620,6 +1709,15 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -1678,6 +1776,15 @@ dependencies = [ "serde", ] +[[package]] +name = "cbor4ii" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "472931dd4dfcc785075b09be910147f9c6258883fc4591d0dac6116392b2daa6" +dependencies = [ + "serde", +] + [[package]] name = "cc" version = "1.2.20" @@ -1721,6 +1828,19 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.41" @@ -1941,6 +2061,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -2075,6 +2204,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "ctrlc" version = "3.4.6" @@ -2169,6 +2307,26 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +[[package]] +name = "data-encoding-macro" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" +dependencies = [ + "data-encoding", + "data-encoding-macro-internal", +] + +[[package]] +name = "data-encoding-macro-internal" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" +dependencies = [ + "data-encoding", + "syn 2.0.101", +] + [[package]] name = "der" version = "0.7.10" @@ -2634,6 +2792,27 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "event-listener" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "eyre" version = "0.6.12" @@ -2735,7 +2914,7 @@ dependencies = [ "futures-core", "futures-sink", "nanorand", - "spin", + "spin 0.9.8", ] [[package]] @@ -2801,6 +2980,16 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-bounded" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91f328e7fb845fc832912fb6a34f40cf6d1888c92f974d1893a54e97b5ff542e" +dependencies = [ + "futures-timer", + "futures-util", +] + [[package]] name = "futures-buffered" version = "0.2.11" @@ -2811,7 +3000,7 @@ dependencies = [ "diatomic-waker", "futures-core", "pin-project-lite", - "spin", + "spin 0.9.8", ] [[package]] @@ -2839,6 +3028,7 @@ dependencies = [ "futures-core", "futures-task", "futures-util", + "num_cpus", ] [[package]] @@ -2886,6 +3076,17 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "futures-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f2f12607f92c69b12ed746fabf9ca4f5c482cba46679c1a75b874ed7c26adb" +dependencies = [ + "futures-io", + "rustls", + "rustls-pki-types", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -2898,6 +3099,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2973,6 +3180,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "gimli" version = "0.31.1" @@ -3072,7 +3289,7 @@ dependencies = [ "regex", "reqwest", "reqwest-middleware", - "ring", + "ring 0.17.14", "serde", "serde_json", "sha2", @@ -3177,6 +3394,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -3195,6 +3418,31 @@ dependencies = [ "arrayvec", ] +[[package]] +name = "hickory-proto" +version = "0.24.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92652067c9ce6f66ce53cc38d1169daa36e6e7eb7dd3b63b5103bd9d97117248" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna 1.0.3", + "ipnet", + "once_cell", + "rand 0.8.5", + "socket2", + "thiserror 1.0.69", + "tinyvec", + "tokio", + "tracing", + "url", +] + [[package]] name = "hickory-proto" version = "0.25.2" @@ -3212,7 +3460,7 @@ dependencies = [ "ipnet", "once_cell", "rand 0.9.1", - "ring", + "ring 0.17.14", "thiserror 2.0.12", "tinyvec", "tokio", @@ -3220,6 +3468,27 @@ dependencies = [ "url", ] +[[package]] +name = "hickory-resolver" +version = "0.24.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb117a1ca520e111743ab2f6688eddee69db4e0ea242545a604dce8a66fd22e" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto 0.24.4", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot 0.12.3", + "rand 0.8.5", + "resolv-conf", + "smallvec", + "thiserror 1.0.69", + "tokio", + "tracing", +] + [[package]] name = "hickory-resolver" version = "0.25.2" @@ -3228,7 +3497,7 @@ checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" dependencies = [ "cfg-if", "futures-util", - "hickory-proto", + "hickory-proto 0.25.2", "ipconfig", "moka", "once_cell", @@ -3241,6 +3510,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -3315,6 +3593,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -3334,7 +3623,7 @@ dependencies = [ "bytes", "futures-core", "http 1.3.1", - "http-body", + "http-body 1.0.1", "pin-project-lite", ] @@ -3350,6 +3639,30 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.6.0" @@ -3361,7 +3674,7 @@ dependencies = [ "futures-util", "h2 0.4.9", "http 1.3.1", - "http-body", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -3378,7 +3691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" dependencies = [ "hex", - "hyper", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -3394,7 +3707,7 @@ checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" dependencies = [ "futures-util", "http 1.3.1", - "hyper", + "hyper 1.6.0", "hyper-util", "rustls", "rustls-pki-types", @@ -3412,7 +3725,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "native-tls", "tokio", @@ -3430,8 +3743,8 @@ dependencies = [ "futures-channel", "futures-util", "http 1.3.1", - "http-body", - "hyper", + "http-body 1.0.1", + "hyper 1.6.0", "libc", "pin-project-lite", "socket2", @@ -3448,7 +3761,7 @@ checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ "hex", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -3634,12 +3947,64 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "if-addrs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cabb0019d51a643781ff15c9c8a3e5dedc365c47211270f4e8f82812fedd8f0a" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "if-watch" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf9d64cfcf380606e64f9a0bcf493616b65331199f984151a6fa11a7b3cde38" +dependencies = [ + "async-io", + "core-foundation", + "fnv", + "futures", + "if-addrs", + "ipnet", + "log", + "netlink-packet-core", + "netlink-packet-route 0.17.1", + "netlink-proto", + "netlink-sys", + "rtnetlink 0.13.1", + "system-configuration", + "tokio", + "windows 0.52.0", +] + [[package]] name = "if_chain" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" +[[package]] +name = "igd-next" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "064d90fec10d541084e7b39ead8875a5a80d9114a2b18791565253bae25f49e4" +dependencies = [ + "async-trait", + "attohttpc", + "bytes", + "futures", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rand 0.8.5", + "tokio", + "url", + "xmltree", +] + [[package]] name = "igd-next" version = "0.15.1" @@ -3652,7 +4017,7 @@ dependencies = [ "futures", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "log", "rand 0.8.5", @@ -3801,9 +4166,9 @@ dependencies = [ "derive_more 1.0.0", "ed25519-dalek", "futures-util", - "hickory-resolver", + "hickory-resolver 0.25.2", "http 1.3.1", - "igd-next", + "igd-next 0.15.1", "instant", "iroh-base", "iroh-metrics", @@ -3818,9 +4183,9 @@ dependencies = [ "pkarr", "portmapper", "rand 0.8.5", - "rcgen", + "rcgen 0.13.2", "reqwest", - "ring", + "ring 0.17.14", "rustls", "rustls-webpki 0.102.8", "serde", @@ -3900,7 +4265,7 @@ dependencies = [ "bytes", "getrandom 0.2.16", "rand 0.8.5", - "ring", + "ring 0.17.14", "rustc-hash 2.1.1", "rustls", "rustls-pki-types", @@ -3936,10 +4301,10 @@ dependencies = [ "cfg_aliases", "data-encoding", "derive_more 1.0.0", - "hickory-resolver", + "hickory-resolver 0.25.2", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "iroh-base", "iroh-metrics", @@ -4061,7 +4426,7 @@ dependencies = [ "base64 0.22.1", "js-sys", "pem", - "ring", + "ring 0.17.14", "serde", "serde_json", "simple_asn1", @@ -4113,32 +4478,464 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] -name = "lazycell" -version = "1.3.0" +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.172" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "libloading" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + +[[package]] +name = "libm" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" + +[[package]] +name = "libp2p" +version = "0.54.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbe80f9c7e00526cd6b838075b9c171919404a4732cb2fa8ece0a093223bfc4" +dependencies = [ + "bytes", + "either", + "futures", + "futures-timer", + "getrandom 0.2.16", + "libp2p-allow-block-list", + "libp2p-autonat", + "libp2p-connection-limits", + "libp2p-core", + "libp2p-dns", + "libp2p-identify", + "libp2p-identity", + "libp2p-kad", + "libp2p-mdns", + "libp2p-metrics", + "libp2p-noise", + "libp2p-ping", + "libp2p-quic", + "libp2p-request-response", + "libp2p-swarm", + "libp2p-tcp", + "libp2p-upnp", + "libp2p-yamux", + "multiaddr", + "pin-project", + "rw-stream-sink", + "thiserror 1.0.69", +] + +[[package]] +name = "libp2p-allow-block-list" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1027ccf8d70320ed77e984f273bc8ce952f623762cb9bf2d126df73caef8041" +dependencies = [ + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "void", +] + +[[package]] +name = "libp2p-autonat" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a083675f189803d0682a2726131628e808144911dad076858bfbe30b13065499" +dependencies = [ + "async-trait", + "asynchronous-codec", + "bytes", + "either", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-request-response", + "libp2p-swarm", + "quick-protobuf", + "quick-protobuf-codec", + "rand 0.8.5", + "rand_core 0.6.4", + "thiserror 1.0.69", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-connection-limits" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d003540ee8baef0d254f7b6bfd79bac3ddf774662ca0abf69186d517ef82ad8" +dependencies = [ + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "void", +] + +[[package]] +name = "libp2p-core" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a61f26c83ed111104cd820fe9bc3aaabbac5f1652a1d213ed6e900b7918a1298" +dependencies = [ + "either", + "fnv", + "futures", + "futures-timer", + "libp2p-identity", + "multiaddr", + "multihash", + "multistream-select", + "once_cell", + "parking_lot 0.12.3", + "pin-project", + "quick-protobuf", + "rand 0.8.5", + "rw-stream-sink", + "smallvec", + "thiserror 1.0.69", + "tracing", + "unsigned-varint 0.8.0", + "void", + "web-time", +] + +[[package]] +name = "libp2p-dns" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97f37f30d5c7275db282ecd86e54f29dd2176bd3ac656f06abf43bedb21eb8bd" +dependencies = [ + "async-trait", + "futures", + "hickory-resolver 0.24.4", + "libp2p-core", + "libp2p-identity", + "parking_lot 0.12.3", + "smallvec", + "tracing", +] + +[[package]] +name = "libp2p-identify" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1711b004a273be4f30202778856368683bd9a83c4c7dcc8f848847606831a4e3" +dependencies = [ + "asynchronous-codec", + "either", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "lru 0.12.5", + "quick-protobuf", + "quick-protobuf-codec", + "smallvec", + "thiserror 1.0.69", + "tracing", + "void", +] + +[[package]] +name = "libp2p-identity" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3104e13b51e4711ff5738caa1fb54467c8604c2e94d607e27745bcf709068774" +dependencies = [ + "bs58", + "ed25519-dalek", + "hkdf", + "multihash", + "quick-protobuf", + "rand 0.8.5", + "sha2", + "thiserror 2.0.12", + "tracing", + "zeroize", +] + +[[package]] +name = "libp2p-kad" +version = "0.46.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced237d0bd84bbebb7c2cad4c073160dacb4fe40534963c32ed6d4c6bb7702a3" +dependencies = [ + "arrayvec", + "asynchronous-codec", + "bytes", + "either", + "fnv", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "quick-protobuf", + "quick-protobuf-codec", + "rand 0.8.5", + "sha2", + "smallvec", + "thiserror 1.0.69", + "tracing", + "uint", + "void", + "web-time", +] + +[[package]] +name = "libp2p-mdns" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b8546b6644032565eb29046b42744aee1e9f261ed99671b2c93fb140dba417" +dependencies = [ + "data-encoding", + "futures", + "hickory-proto 0.24.4", + "if-watch", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "rand 0.8.5", + "smallvec", + "socket2", + "tokio", + "tracing", + "void", +] + +[[package]] +name = "libp2p-metrics" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ebafa94a717c8442d8db8d3ae5d1c6a15e30f2d347e0cd31d057ca72e42566" +dependencies = [ + "futures", + "libp2p-core", + "libp2p-identify", + "libp2p-identity", + "libp2p-kad", + "libp2p-ping", + "libp2p-swarm", + "pin-project", + "prometheus-client", + "web-time", +] + +[[package]] +name = "libp2p-noise" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36b137cb1ae86ee39f8e5d6245a296518912014eaa87427d24e6ff58cfc1b28c" +dependencies = [ + "asynchronous-codec", + "bytes", + "curve25519-dalek", + "futures", + "libp2p-core", + "libp2p-identity", + "multiaddr", + "multihash", + "once_cell", + "quick-protobuf", + "rand 0.8.5", + "sha2", + "snow", + "static_assertions", + "thiserror 1.0.69", + "tracing", + "x25519-dalek", + "zeroize", +] + +[[package]] +name = "libp2p-ping" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "005a34420359223b974ee344457095f027e51346e992d1e0dcd35173f4cdd422" +dependencies = [ + "either", + "futures", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "rand 0.8.5", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-quic" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46352ac5cd040c70e88e7ff8257a2ae2f891a4076abad2c439584a31c15fd24e" +dependencies = [ + "bytes", + "futures", + "futures-timer", + "if-watch", + "libp2p-core", + "libp2p-identity", + "libp2p-tls", + "parking_lot 0.12.3", + "quinn", + "rand 0.8.5", + "ring 0.17.14", + "rustls", + "socket2", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "libp2p-request-response" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1356c9e376a94a75ae830c42cdaea3d4fe1290ba409a22c809033d1b7dcab0a6" +dependencies = [ + "async-trait", + "cbor4ii", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "rand 0.8.5", + "serde", + "smallvec", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-swarm" +version = "0.45.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7dd6741793d2c1fb2088f67f82cf07261f25272ebe3c0b0c311e0c6b50e851a" +dependencies = [ + "either", + "fnv", + "futures", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm-derive", + "lru 0.12.5", + "multistream-select", + "once_cell", + "rand 0.8.5", + "smallvec", + "tokio", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-swarm-derive" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206e0aa0ebe004d778d79fb0966aa0de996c19894e2c0605ba2f8524dd4443d8" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "libp2p-tcp" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +checksum = "ad964f312c59dcfcac840acd8c555de8403e295d39edf96f5240048b5fcaa314" +dependencies = [ + "futures", + "futures-timer", + "if-watch", + "libc", + "libp2p-core", + "libp2p-identity", + "socket2", + "tokio", + "tracing", +] [[package]] -name = "libc" -version = "0.2.172" +name = "libp2p-tls" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "47b23dddc2b9c355f73c1e36eb0c3ae86f7dc964a3715f0731cfad352db4d847" +dependencies = [ + "futures", + "futures-rustls", + "libp2p-core", + "libp2p-identity", + "rcgen 0.11.3", + "ring 0.17.14", + "rustls", + "rustls-webpki 0.101.7", + "thiserror 1.0.69", + "x509-parser", + "yasna", +] [[package]] -name = "libloading" -version = "0.8.6" +name = "libp2p-upnp" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +checksum = "01bf2d1b772bd3abca049214a3304615e6a36fa6ffc742bdd1ba774486200b8f" dependencies = [ - "cfg-if", - "windows-targets 0.52.6", + "futures", + "futures-timer", + "igd-next 0.14.3", + "libp2p-core", + "libp2p-swarm", + "tokio", + "tracing", + "void", ] [[package]] -name = "libm" -version = "0.2.13" +name = "libp2p-yamux" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" +checksum = "788b61c80789dba9760d8c669a5bedb642c8267555c803fabd8396e4ca5c5882" +dependencies = [ + "either", + "futures", + "libp2p-core", + "thiserror 1.0.69", + "tracing", + "yamux 0.12.1", + "yamux 0.13.5", +] [[package]] name = "libredox" @@ -4159,12 +4956,24 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litemap" version = "0.7.5" @@ -4251,6 +5060,15 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "mach" version = "0.3.2" @@ -4375,9 +5193,9 @@ dependencies = [ "colored", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "log", "rand 0.9.1", @@ -4407,6 +5225,60 @@ dependencies = [ "uuid", ] +[[package]] +name = "multiaddr" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6351f60b488e04c1d21bc69e56b89cb3f5e8f5d22557d6e8031bdfd79b6961" +dependencies = [ + "arrayref", + "byteorder", + "data-encoding", + "libp2p-identity", + "multibase", + "multihash", + "percent-encoding", + "serde", + "static_assertions", + "unsigned-varint 0.8.0", + "url", +] + +[[package]] +name = "multibase" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404" +dependencies = [ + "base-x", + "data-encoding", + "data-encoding-macro", +] + +[[package]] +name = "multihash" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" +dependencies = [ + "core2", + "unsigned-varint 0.8.0", +] + +[[package]] +name = "multistream-select" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea0df8e5eec2298a62b326ee4f0d7fe1a6b90a09dfcf9df37b38f947a8c42f19" +dependencies = [ + "bytes", + "futures", + "log", + "pin-project", + "smallvec", + "unsigned-varint 0.7.2", +] + [[package]] name = "n0-future" version = "0.1.3" @@ -4651,6 +5523,12 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -4741,7 +5619,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] @@ -4934,6 +5812,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "p2p" +version = "0.3.10" +dependencies = [ + "anyhow", + "libp2p", + "serde", + "tokio", + "void", +] + [[package]] name = "parity-scale-codec" version = "3.7.4" @@ -5216,6 +6105,21 @@ dependencies = [ "pnet_macros_support", ] +[[package]] +name = "polling" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b53a684391ad002dd6a596ceb6c74fd004fdce75f4be2e3f615068abbea5fd50" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi 0.5.2", + "pin-project-lite", + "rustix 1.0.7", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "poly1305" version = "0.8.0" @@ -5227,6 +6131,18 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.11.0" @@ -5253,7 +6169,7 @@ dependencies = [ "derive_more 1.0.0", "futures-lite 2.6.0", "futures-util", - "igd-next", + "igd-next 0.15.1", "iroh-metrics", "libc", "netwatch", @@ -5574,6 +6490,28 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-protobuf" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6da84cc204722a989e01ba2f6e1e276e190f22263d0cb6ce8526fcdb0d2e1f" +dependencies = [ + "byteorder", +] + +[[package]] +name = "quick-protobuf-codec" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15a0580ab32b169745d7a39db2ba969226ca16738931be152a3209b409de2474" +dependencies = [ + "asynchronous-codec", + "bytes", + "quick-protobuf", + "thiserror 1.0.69", + "unsigned-varint 0.8.0", +] + [[package]] name = "quinn" version = "0.11.7" @@ -5582,6 +6520,7 @@ checksum = "c3bd15a6f2967aef83887dcb9fec0014580467e33720d073560cf015a5683012" dependencies = [ "bytes", "cfg_aliases", + "futures-io", "pin-project-lite", "quinn-proto", "quinn-udp", @@ -5603,7 +6542,7 @@ dependencies = [ "bytes", "getrandom 0.3.2", "rand 0.9.1", - "ring", + "ring 0.17.14", "rustc-hash 2.1.1", "rustls", "rustls-pki-types", @@ -5765,6 +6704,18 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rcgen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c4f3084aa3bc7dfbba4eff4fab2a54db4324965d8872ab933565e6fbd83bc6" +dependencies = [ + "pem", + "ring 0.16.20", + "time", + "yasna", +] + [[package]] name = "rcgen" version = "0.13.2" @@ -5772,7 +6723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75e669e5202259b5314d1ea5397316ad400819437857b90861765f24c4cf80a2" dependencies = [ "pem", - "ring", + "ring 0.17.14", "rustls-pki-types", "time", "yasna", @@ -5911,9 +6862,9 @@ dependencies = [ "futures-util", "h2 0.4.9", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-rustls", "hyper-tls", "hyper-util", @@ -5981,6 +6932,21 @@ dependencies = [ "subtle", ] +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + [[package]] name = "ring" version = "0.17.14" @@ -5991,7 +6957,7 @@ dependencies = [ "cfg-if", "getrandom 0.2.16", "libc", - "untrusted", + "untrusted 0.9.0", "windows-sys 0.52.0", ] @@ -6168,7 +7134,20 @@ dependencies = [ "bitflags 2.9.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags 2.9.0", + "errno", + "libc", + "linux-raw-sys 0.9.4", "windows-sys 0.59.0", ] @@ -6181,7 +7160,7 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", - "ring", + "ring 0.17.14", "rustls-pki-types", "rustls-webpki 0.103.1", "subtle", @@ -6206,15 +7185,25 @@ dependencies = [ "web-time", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring 0.17.14", + "untrusted 0.9.0", +] + [[package]] name = "rustls-webpki" version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ - "ring", + "ring 0.17.14", "rustls-pki-types", - "untrusted", + "untrusted 0.9.0", ] [[package]] @@ -6224,9 +7213,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" dependencies = [ "aws-lc-rs", - "ring", + "ring 0.17.14", "rustls-pki-types", - "untrusted", + "untrusted 0.9.0", ] [[package]] @@ -6247,6 +7236,17 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rw-stream-sink" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8c9026ff5d2f23da5e45bbc283f156383001bfb09c4e44256d02c1a685fe9a1" +dependencies = [ + "futures", + "pin-project", + "static_assertions", +] + [[package]] name = "ryu" version = "1.0.20" @@ -6705,6 +7705,23 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "snow" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "850948bee068e713b8ab860fe1adc4d109676ab4c3b621fd8147f06b261f2f85" +dependencies = [ + "aes-gcm", + "blake2", + "chacha20poly1305", + "curve25519-dalek", + "rand_core 0.6.4", + "ring 0.17.14", + "rustc_version 0.4.1", + "sha2", + "subtle", +] + [[package]] name = "socket2" version = "0.5.9" @@ -6715,6 +7732,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" @@ -6845,7 +7868,7 @@ dependencies = [ "lazy_static", "md-5", "rand 0.8.5", - "ring", + "ring 0.17.14", "subtle", "thiserror 1.0.69", "tokio", @@ -7010,7 +8033,7 @@ dependencies = [ "cfg-if", "fastrand 2.3.0", "once_cell", - "rustix", + "rustix 0.38.44", "windows-sys 0.59.0", ] @@ -7565,6 +8588,24 @@ dependencies = [ "subtle", ] +[[package]] +name = "unsigned-varint" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" + +[[package]] +name = "unsigned-varint" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "untrusted" version = "0.9.0" @@ -7784,6 +8825,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + [[package]] name = "vte" version = "0.14.1" @@ -8005,7 +9052,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix", + "rustix 0.38.44", ] [[package]] @@ -8670,6 +9717,18 @@ dependencies = [ "tap", ] +[[package]] +name = "x25519-dalek" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7e468321c81fb07fa7f4c636c3972b9100f0346e5b6a9f2bd0603a52f7ed277" +dependencies = [ + "curve25519-dalek", + "rand_core 0.6.4", + "serde", + "zeroize", +] + [[package]] name = "x509-parser" version = "0.16.0" @@ -8702,6 +9761,37 @@ dependencies = [ "xml-rs", ] +[[package]] +name = "yamux" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed0164ae619f2dc144909a9f082187ebb5893693d8c0196e8085283ccd4b776" +dependencies = [ + "futures", + "log", + "nohash-hasher", + "parking_lot 0.12.3", + "pin-project", + "rand 0.8.5", + "static_assertions", +] + +[[package]] +name = "yamux" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3da1acad1c2dc53f0dde419115a38bd8221d8c3e47ae9aeceaf453266d29307e" +dependencies = [ + "futures", + "log", + "nohash-hasher", + "parking_lot 0.12.3", + "pin-project", + "rand 0.9.1", + "static_assertions", + "web-time", +] + [[package]] name = "yasna" version = "0.5.2" diff --git a/Cargo.toml b/Cargo.toml index 878eec1c..7bc5fc2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "crates/validator", "crates/shared", "crates/orchestrator", + "crates/p2p", "crates/dev-utils", ] resolver = "2" @@ -48,7 +49,7 @@ edition = "2021" match_same_arms = "warn" unused_async = "warn" uninlined_format_args = "warn" +manual_let_else = "warn" [workspace.lints.rust] unreachable_pub = "warn" -manual_let_else = "warn" \ No newline at end of file diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml new file mode 100644 index 00000000..2d5d94ff --- /dev/null +++ b/crates/p2p/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "p2p" +version.workspace = true +edition.workspace = true + +[dependencies] +libp2p = { version = "0.54", features = ["request-response", "identify", "ping", "mdns", "noise", "tcp", "autonat", "kad", "tokio", "cbor", "macros", "yamux"] } +void = "1.0" + +anyhow = {workspace = true} +serde = {workspace = true} +tokio = {workspace = true, features = ["sync"]} + +[lints] +workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs new file mode 100644 index 00000000..6b66394e --- /dev/null +++ b/crates/p2p/src/behaviour.rs @@ -0,0 +1,165 @@ +use anyhow::Context as _; +use anyhow::Result; +use libp2p::autonat; +use libp2p::connection_limits; +use libp2p::connection_limits::ConnectionLimits; +use libp2p::identify; +use libp2p::identity; +use libp2p::kad; +use libp2p::kad::store::MemoryStore; +use libp2p::mdns; +use libp2p::ping; +use libp2p::request_response; +use libp2p::swarm::NetworkBehaviour; +use std::time::Duration; + +use crate::message::IncomingMessage; +use crate::message::{Request, Response}; +use crate::Protocols; +use crate::PRIME_STREAM_PROTOCOL; + +#[derive(NetworkBehaviour)] +#[behaviour(to_swarm = "BehaviourEvent")] +pub(crate) struct Behaviour { + // connection gating + connection_limits: connection_limits::Behaviour, + + // discovery + mdns: mdns::tokio::Behaviour, + kademlia: kad::Behaviour, + + // protocols + identify: identify::Behaviour, + ping: ping::Behaviour, + request_response: request_response::cbor::Behaviour, + + // nat traversal + autonat: autonat::Behaviour, +} + +#[derive(Debug)] +pub(crate) enum BehaviourEvent { + Autonat(autonat::Event), + Identify(identify::Event), + Kademlia(kad::Event), + Mdns(mdns::Event), + Ping(ping::Event), + RequestResponse(request_response::Event), +} + +impl From for BehaviourEvent { + fn from(_: void::Void) -> Self { + unreachable!("void::Void cannot be converted to BehaviourEvent") + } +} + +impl From for BehaviourEvent { + fn from(event: autonat::Event) -> Self { + BehaviourEvent::Autonat(event) + } +} + +impl From for BehaviourEvent { + fn from(event: kad::Event) -> Self { + BehaviourEvent::Kademlia(event) + } +} + +impl From for BehaviourEvent { + fn from(event: libp2p::mdns::Event) -> Self { + BehaviourEvent::Mdns(event) + } +} + +impl From for BehaviourEvent { + fn from(event: ping::Event) -> Self { + BehaviourEvent::Ping(event) + } +} + +impl From for BehaviourEvent { + fn from(event: identify::Event) -> Self { + BehaviourEvent::Identify(event) + } +} + +impl From> for BehaviourEvent { + fn from(event: request_response::Event) -> Self { + BehaviourEvent::RequestResponse(event) + } +} + +impl Behaviour { + pub(crate) fn new( + keypair: &identity::Keypair, + protocols: Protocols, + agent_version: String, + ) -> Result { + let peer_id = keypair.public().to_peer_id(); + + let protocols = protocols.into_iter().map(|protocol| { + ( + protocol.as_stream_protocol(), + request_response::ProtocolSupport::Full, // TODO: configure inbound/outbound based on node role and protocol + ) + }); + + let autonat = autonat::Behaviour::new(peer_id, autonat::Config::default()); + let connection_limits = connection_limits::Behaviour::new( + ConnectionLimits::default().with_max_established(Some(100)), + ); + + let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) + .context("failed to create mDNS behaviour")?; + let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); + + let identify = identify::Behaviour::new( + identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) + .with_agent_version(agent_version), + ); + let ping = ping::Behaviour::new(ping::Config::new().with_interval(Duration::from_secs(10))); + + Ok(Self { + autonat, + connection_limits, + kademlia, + mdns, + identify, + ping, + request_response: request_response::cbor::Behaviour::new( + protocols, + request_response::Config::default(), + ), + }) + } + + pub(crate) fn request_response( + &mut self, + ) -> &mut request_response::cbor::Behaviour { + &mut self.request_response + } +} + +impl BehaviourEvent { + pub(crate) async fn handle(self, message_tx: tokio::sync::mpsc::Sender) { + match self { + BehaviourEvent::Autonat(_event) => {} + BehaviourEvent::Identify(_event) => {} + BehaviourEvent::Kademlia(_event) => { // TODO: potentially on outbound queries + } + BehaviourEvent::Mdns(_event) => {} + BehaviourEvent::Ping(_event) => {} + BehaviourEvent::RequestResponse(event) => match event { + request_response::Event::Message { peer, message } => { + let _ = message_tx + .send(IncomingMessage { + peer: peer.clone(), + message, + }) + .await; + } + _ => {} + }, + } + } +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs new file mode 100644 index 00000000..adde16af --- /dev/null +++ b/crates/p2p/src/lib.rs @@ -0,0 +1,299 @@ +use anyhow::Context; +use anyhow::Result; +use libp2p::futures::stream::FuturesUnordered; +use libp2p::multiaddr::Protocol; +use libp2p::noise; +use libp2p::swarm::SwarmEvent; +use libp2p::tcp; +use libp2p::yamux; +use libp2p::Multiaddr; +use libp2p::Swarm; +use libp2p::SwarmBuilder; +use libp2p::{identity, PeerId, Transport}; +use std::time::Duration; + +mod behaviour; +mod message; +mod protocol; + +use behaviour::Behaviour; +use message::{IncomingMessage, OutgoingMessage, OutgoingMessageInner}; +use protocol::Protocols; + +pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = + libp2p::StreamProtocol::new("/prime/1.0.0"); +// TODO: force this to be passed by the user +pub const DEFAULT_AGENT_VERSION: &str = "prime-node/0.1.0"; + +pub struct Node { + peer_id: PeerId, + listen_addrs: Vec, + swarm: Swarm, + bootnodes: Vec, + + // channel for sending incoming messages to the consumer of this library + incoming_message_tx: tokio::sync::mpsc::Sender, + + // channel for receiving outgoing messages from the consumer of this library + outgoing_message_rx: tokio::sync::mpsc::Receiver, +} + +impl Node { + pub fn peer_id(&self) -> PeerId { + self.peer_id + } + + pub fn listen_addrs(&self) -> &[libp2p::Multiaddr] { + &self.listen_addrs + } + + /// Returns the multiaddresses that this node is listening on, with the peer ID included. + pub fn multiaddrs(&self) -> Vec { + self.listen_addrs + .iter() + .map(|addr| addr.clone().with(Protocol::P2p(self.peer_id))) + .collect() + } + + pub async fn run(self) -> Result<()> { + use libp2p::futures::StreamExt as _; + + let Node { + peer_id: _, + listen_addrs, + mut swarm, + bootnodes, + incoming_message_tx, + mut outgoing_message_rx, + } = self; + + for addr in listen_addrs { + swarm + .listen_on(addr) + .context("swarm failed to listen on multiaddr")?; + } + + let futures = FuturesUnordered::new(); + for bootnode in bootnodes { + futures.push(swarm.dial(bootnode)) + } + let results: Vec<_> = futures.into_iter().collect(); + for result in results { + match result { + Ok(_) => {} + Err(_e) => { + // TODO: log this error + } + } + } + + loop { + tokio::select! { + Some(message) = outgoing_message_rx.recv() => { + match message.message { + OutgoingMessageInner::Request(request) => { + swarm.behaviour_mut().request_response().send_request(&message.peer, request); + } + OutgoingMessageInner::Response((channel, response)) => { + if let Err(_e) = swarm.behaviour_mut().request_response().send_response(channel, response) { + // log error + } + } + } + } + event = swarm.select_next_some() => { + match event { + SwarmEvent::NewListenAddr { + listener_id: _, + address: _, + } => {} + SwarmEvent::ExternalAddrConfirmed { address: _ } => {} + SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, + _ => continue, + } + }, + } + } + } +} + +pub struct NodeBuilder { + port: Option, + listen_addrs: Vec, + keypair: Option, + agent_version: Option, + protocols: Protocols, + bootnodes: Vec, +} + +impl NodeBuilder { + pub fn new() -> Self { + Self { + port: None, + listen_addrs: Vec::new(), + keypair: None, + agent_version: None, + protocols: Protocols::new(), + bootnodes: Vec::new(), + } + } + + pub fn with_port(mut self, port: u16) -> Self { + self.port = Some(port); + self + } + + pub fn with_listen_addr(mut self, addr: libp2p::Multiaddr) -> Self { + self.listen_addrs.push(addr); + self + } + + pub fn with_keypair(mut self, keypair: identity::Keypair) -> Self { + self.keypair = Some(keypair); + self + } + + pub fn with_agent_version(mut self, agent_version: String) -> Self { + self.agent_version = Some(agent_version); + self + } + + pub fn with_validator_authentication(mut self) -> Self { + self.protocols = self.protocols.with_validator_authentication(); + self + } + + pub fn with_hardware_challenge(mut self) -> Self { + self.protocols = self.protocols.with_hardware_challenge(); + self + } + + pub fn with_invite(mut self) -> Self { + self.protocols = self.protocols.with_invite(); + self + } + + pub fn with_get_task_logs(mut self) -> Self { + self.protocols = self.protocols.with_get_task_logs(); + self + } + + pub fn with_restart(mut self) -> Self { + self.protocols = self.protocols.with_restart(); + self + } + + pub fn with_bootnode(mut self, bootnode: Multiaddr) -> Self { + self.bootnodes.push(bootnode); + self + } + + pub fn with_bootnodes(mut self, bootnodes: I) -> Self + where + I: IntoIterator, + T: Into, + { + for bootnode in bootnodes { + self.bootnodes.push(bootnode.into()); + } + self + } + + pub fn try_build( + self, + ) -> Result<( + Node, + tokio::sync::mpsc::Receiver, + tokio::sync::mpsc::Sender, + )> { + let Self { + port, + mut listen_addrs, + keypair, + agent_version, + protocols, + bootnodes, + } = self; + + let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); + let peer_id = keypair.public().to_peer_id(); + + let transport = create_transport(&keypair)?; + let behaviour = Behaviour::new( + &keypair, + protocols, + agent_version.unwrap_or(DEFAULT_AGENT_VERSION.to_string()), + ) + .context("failed to create behaviour")?; + + let swarm = SwarmBuilder::with_existing_identity(keypair) + .with_tokio() + .with_other_transport(|_| transport)? + .with_behaviour(|_| behaviour)? + .build(); + + if listen_addrs.is_empty() { + let port = port.unwrap_or(0); + let listen_addr = format!("/ip4/0.0.0.0/tcp/{port}") + .parse() + .expect("can parse valid multiaddr"); + listen_addrs.push(listen_addr); + } + + let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); + let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); + + Ok(( + Node { + peer_id, + swarm, + listen_addrs, + bootnodes, + incoming_message_tx, + outgoing_message_rx, + }, + incoming_message_rx, + outgoing_message_tx, + )) + } +} + +fn create_transport( + keypair: &identity::Keypair, +) -> Result> { + let transport = tcp::tokio::Transport::new(tcp::Config::default()) + .upgrade(libp2p::core::upgrade::Version::V1) + .authenticate(noise::Config::new(keypair)?) + .multiplex(yamux::Config::default()) + .timeout(Duration::from_secs(20)) + .boxed(); + + Ok(transport) +} + +#[cfg(test)] +mod test { + use super::NodeBuilder; + use crate::message; + + #[tokio::test] + async fn two_nodes_can_connect() -> anyhow::Result<()> { + let node1 = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); + let (node1, mut incoming_message_rx1, outgoing_message_tx1) = node1; + + let node2 = NodeBuilder::new() + .with_get_task_logs() + .with_bootnodes(node1.multiaddrs()) + .try_build() + .unwrap(); + let (node2, mut incoming_message_rx2, outgoing_message_tx2) = node2; + + // Start both nodes in separate tasks + tokio::spawn(async move { node1.run().await }); + tokio::spawn(async move { node2.run().await }); + + let request = message::Request::GetTaskLogs; + + Ok(()) + } +} diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs new file mode 100644 index 00000000..99b740db --- /dev/null +++ b/crates/p2p/src/message.rs @@ -0,0 +1,122 @@ +use libp2p::PeerId; +use serde::{Deserialize, Serialize}; +use std::time::SystemTime; + +#[derive(Debug)] +pub struct IncomingMessage { + pub peer: PeerId, + pub message: libp2p::request_response::Message, +} + +#[derive(Debug)] +pub struct OutgoingMessage { + pub peer: PeerId, + pub message: OutgoingMessageInner, +} + +#[derive(Debug)] +pub enum OutgoingMessageInner { + Request(Request), + Response( + ( + libp2p::request_response::ResponseChannel, + Response, + ), + ), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Request { + ValidatorAuthentication(ValidatorAuthenticationRequest), + HardwareChallenge(HardwareChallengeRequest), + Invite(InviteRequest), + GetTaskLogs, + Restart, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Response { + ValidatorAuthentication(ValidatorAuthenticationResponse), + HardwareChallenge(HardwareChallengeResponse), + Invite(InviteResponse), + GetTaskLogs(GetTaskLogsResponse), + Restart(RestartResponse), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidatorAuthenticationRequest { + Initiation(ValidationAuthenticationInitiationRequest), + Solution(ValidationAuthenticationSolutionRequest), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidatorAuthenticationResponse { + Initiation(ValidationAuthenticationInitiationResponse), + Solution(ValidationAuthenticationSolutionResponse), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationAuthenticationInitiationRequest { + pub message: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationAuthenticationInitiationResponse { + pub signed_message: String, + pub message: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationAuthenticationSolutionRequest { + pub signed_message: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidationAuthenticationSolutionResponse { + Granted, + Rejected, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HardwareChallengeRequest { + pub challenge: String, // TODO + pub timestamp: SystemTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HardwareChallengeResponse { + pub response: String, // TODO + pub timestamp: SystemTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum InviteRequestUrl { + MasterUrl(String), + MasterIpPort(String, u16), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InviteRequest { + pub invite: String, + pub pool_id: u32, + pub url: InviteRequestUrl, + pub timestamp: u64, + pub expiration: [u8; 32], + pub nonce: [u8; 32], +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum InviteResponse { + Ok, + Error(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetTaskLogsResponse { + pub logs: Result, String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RestartResponse { + pub result: Result<(), String>, +} diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs new file mode 100644 index 00000000..5186ac44 --- /dev/null +++ b/crates/p2p/src/protocol.rs @@ -0,0 +1,73 @@ +use libp2p::StreamProtocol; +use std::{collections::HashSet, hash::Hash}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum Protocol { + // validator -> worker + ValidatorAuthentication, + // validator -> worker + HardwareChallenge, + // orchestrator -> worker + Invite, + // any -> worker + GetTaskLogs, + // any -> worker + Restart, +} + +impl Protocol { + pub(crate) fn as_stream_protocol(&self) -> StreamProtocol { + match self { + Protocol::ValidatorAuthentication => { + StreamProtocol::new("/prime/validator_authentication/1.0.0") + } + Protocol::HardwareChallenge => StreamProtocol::new("/prime/hardware_challenge/1.0.0"), + Protocol::Invite => StreamProtocol::new("/prime/invite/1.0.0"), + Protocol::GetTaskLogs => StreamProtocol::new("/prime/get_task_logs/1.0.0"), + Protocol::Restart => StreamProtocol::new("/prime/restart/1.0.0"), + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Protocols(HashSet); + +impl Protocols { + pub(crate) fn new() -> Self { + Self(HashSet::new()) + } + + pub(crate) fn with_validator_authentication(mut self) -> Self { + self.0.insert(Protocol::ValidatorAuthentication); + self + } + + pub(crate) fn with_hardware_challenge(mut self) -> Self { + self.0.insert(Protocol::HardwareChallenge); + self + } + + pub(crate) fn with_invite(mut self) -> Self { + self.0.insert(Protocol::Invite); + self + } + + pub(crate) fn with_get_task_logs(mut self) -> Self { + self.0.insert(Protocol::GetTaskLogs); + self + } + + pub(crate) fn with_restart(mut self) -> Self { + self.0.insert(Protocol::Restart); + self + } +} + +impl IntoIterator for Protocols { + type Item = Protocol; + type IntoIter = std::collections::hash_set::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} From f5365d9762b69e103e55f60b7f304ef52418c1d3 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 17:49:08 -0400 Subject: [PATCH 02/40] request-response protocol working --- crates/p2p/src/behaviour.rs | 23 +++++++- crates/p2p/src/lib.rs | 102 +++++++++++++++++++++++++++++------- crates/p2p/src/message.rs | 25 ++++++--- 3 files changed, 121 insertions(+), 29 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 6b66394e..cd6606bb 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -151,6 +151,7 @@ impl BehaviourEvent { BehaviourEvent::Ping(_event) => {} BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { + println!("received message from peer {peer:?}: {message:?}"); let _ = message_tx .send(IncomingMessage { peer: peer.clone(), @@ -158,7 +159,27 @@ impl BehaviourEvent { }) .await; } - _ => {} + request_response::Event::ResponseSent { peer, request_id } => { + println!("response sent to peer {peer:?} for request ID {request_id:?}"); + } + request_response::Event::InboundFailure { + peer, + request_id, + error, + } => { + println!( + "inbound failure from peer {peer:?} for request ID {request_id:?}: {error}" + ); + } + request_response::Event::OutboundFailure { + peer, + request_id, + error, + } => { + println!( + "outbound failure to peer {peer:?} for request ID {request_id:?}: {error}" + ); + } }, } } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index adde16af..777e8689 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,7 +1,6 @@ use anyhow::Context; use anyhow::Result; use libp2p::futures::stream::FuturesUnordered; -use libp2p::multiaddr::Protocol; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -17,7 +16,7 @@ mod message; mod protocol; use behaviour::Behaviour; -use message::{IncomingMessage, OutgoingMessage, OutgoingMessageInner}; +use message::{IncomingMessage, OutgoingMessage}; use protocol::Protocols; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = @@ -51,7 +50,11 @@ impl Node { pub fn multiaddrs(&self) -> Vec { self.listen_addrs .iter() - .map(|addr| addr.clone().with(Protocol::P2p(self.peer_id))) + .map(|addr| { + addr.clone() + .with_p2p(self.peer_id) + .expect("can add peer ID to multiaddr") + }) .collect() } @@ -81,8 +84,9 @@ impl Node { for result in results { match result { Ok(_) => {} - Err(_e) => { + Err(e) => { // TODO: log this error + println!("failed to dial bootnode: {e:?}"); } } } @@ -90,13 +94,15 @@ impl Node { loop { tokio::select! { Some(message) = outgoing_message_rx.recv() => { - match message.message { - OutgoingMessageInner::Request(request) => { - swarm.behaviour_mut().request_response().send_request(&message.peer, request); + match message { + OutgoingMessage::Request((peer, request)) => { + swarm.behaviour_mut().request_response().send_request(&peer, request); } - OutgoingMessageInner::Response((channel, response)) => { - if let Err(_e) = swarm.behaviour_mut().request_response().send_response(channel, response) { + OutgoingMessage::Response((channel, response)) => { + println!("sending response on channel"); + if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { // log error + println!("failed to send response: {e:?}"); } } } @@ -105,9 +111,22 @@ impl Node { match event { SwarmEvent::NewListenAddr { listener_id: _, - address: _, - } => {} - SwarmEvent::ExternalAddrConfirmed { address: _ } => {} + address, + } => { + println!("new listen address: {address}"); + } + SwarmEvent::ExternalAddrConfirmed { address } => { + println!("external address confirmed: {address}"); + } + SwarmEvent::ConnectionClosed { + peer_id, + cause, + endpoint: _, + connection_id: _, + num_established: _, + } => { + println!("connection closed with peer {peer_id}: {cause:?}"); + } SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, _ => continue, } @@ -230,6 +249,9 @@ impl NodeBuilder { .with_tokio() .with_other_transport(|_| transport)? .with_behaviour(|_| behaviour)? + .with_swarm_config(|cfg| { + cfg.with_idle_connection_timeout(Duration::from_secs(u64::MAX)) // don't disconnect from idle peers + }) .build(); if listen_addrs.is_empty() { @@ -277,23 +299,63 @@ mod test { use crate::message; #[tokio::test] - async fn two_nodes_can_connect() -> anyhow::Result<()> { - let node1 = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = node1; + async fn two_nodes_can_connect_and_do_request_response() { + let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + NodeBuilder::new().with_get_task_logs().try_build().unwrap(); + let node1_peer_id = node1.peer_id(); - let node2 = NodeBuilder::new() + let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() .unwrap(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = node2; + let node2_peer_id = node2.peer_id(); - // Start both nodes in separate tasks tokio::spawn(async move { node1.run().await }); tokio::spawn(async move { node2.run().await }); - let request = message::Request::GetTaskLogs; + // TODO: implement a way to get peer count + tokio::time::sleep(std::time::Duration::from_secs(2)).await; - Ok(()) + // send request from node1->node2 + let request = message::Request::GetTaskLogs; + outgoing_message_tx1 + .send(request.into_outgoing_message(node2_peer_id)) + .await + .unwrap(); + let message = incoming_message_rx2.recv().await.unwrap(); + assert_eq!(message.peer, node1_peer_id); + let libp2p::request_response::Message::Request { + request_id: _, + request: message::Request::GetTaskLogs, + channel, + } = message.message + else { + panic!("expected a GetTaskLogs request message"); + }; + + println!("received request from node1"); + + // send response from node2->node1 + let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse { + logs: Ok(vec!["log1".to_string(), "log2".to_string()]), + }); + outgoing_message_tx2 + .send(response.into_outgoing_message(channel)) + .await + .unwrap(); + let message = incoming_message_rx1.recv().await.unwrap(); + assert_eq!(message.peer, node2_peer_id); + let libp2p::request_response::Message::Response { + request_id: _, + response: message::Response::GetTaskLogs(response), + } = message.message + else { + panic!("expected a GetTaskLogs response message"); + }; + assert_eq!( + response.logs, + Ok(vec!["log1".to_string(), "log2".to_string()]) + ); } } diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 99b740db..97c07dff 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -9,14 +9,8 @@ pub struct IncomingMessage { } #[derive(Debug)] -pub struct OutgoingMessage { - pub peer: PeerId, - pub message: OutgoingMessageInner, -} - -#[derive(Debug)] -pub enum OutgoingMessageInner { - Request(Request), +pub enum OutgoingMessage { + Request((PeerId, Request)), Response( ( libp2p::request_response::ResponseChannel, @@ -34,6 +28,12 @@ pub enum Request { Restart, } +impl Request { + pub fn into_outgoing_message(self, peer: PeerId) -> OutgoingMessage { + OutgoingMessage::Request((peer, Request::from(self))) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Response { ValidatorAuthentication(ValidatorAuthenticationResponse), @@ -43,6 +43,15 @@ pub enum Response { Restart(RestartResponse), } +impl Response { + pub fn into_outgoing_message( + self, + channel: libp2p::request_response::ResponseChannel, + ) -> OutgoingMessage { + OutgoingMessage::Response((channel, Response::from(self))) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationRequest { Initiation(ValidationAuthenticationInitiationRequest), From 565ed95e32bd9e66208823ae629497be13063bb8 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 18:02:46 -0400 Subject: [PATCH 03/40] clippy --- crates/p2p/src/behaviour.rs | 8 ++------ crates/p2p/src/lib.rs | 6 ++++++ crates/p2p/src/message.rs | 20 ++++++++++---------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index cd6606bb..54f264dd 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -152,12 +152,8 @@ impl BehaviourEvent { BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { println!("received message from peer {peer:?}: {message:?}"); - let _ = message_tx - .send(IncomingMessage { - peer: peer.clone(), - message, - }) - .await; + // if this errors, user dropped their incoming message channel + let _ = message_tx.send(IncomingMessage { peer, message }).await; } request_response::Event::ResponseSent { peer, request_id } => { println!("response sent to peer {peer:?} for request ID {request_id:?}"); diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 777e8689..9f07d8d0 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -145,6 +145,12 @@ pub struct NodeBuilder { bootnodes: Vec, } +impl Default for NodeBuilder { + fn default() -> Self { + Self::new() + } +} + impl NodeBuilder { pub fn new() -> Self { Self { diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 97c07dff..54d757c1 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -30,7 +30,7 @@ pub enum Request { impl Request { pub fn into_outgoing_message(self, peer: PeerId) -> OutgoingMessage { - OutgoingMessage::Request((peer, Request::from(self))) + OutgoingMessage::Request((peer, self)) } } @@ -48,40 +48,40 @@ impl Response { self, channel: libp2p::request_response::ResponseChannel, ) -> OutgoingMessage { - OutgoingMessage::Response((channel, Response::from(self))) + OutgoingMessage::Response((channel, self)) } } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationRequest { - Initiation(ValidationAuthenticationInitiationRequest), - Solution(ValidationAuthenticationSolutionRequest), + Initiation(ValidatorAuthenticationInitiationRequest), + Solution(ValidatorAuthenticationSolutionRequest), } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationResponse { - Initiation(ValidationAuthenticationInitiationResponse), - Solution(ValidationAuthenticationSolutionResponse), + Initiation(ValidatorAuthenticationInitiationResponse), + Solution(ValidatorAuthenticationSolutionResponse), } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidationAuthenticationInitiationRequest { +pub struct ValidatorAuthenticationInitiationRequest { pub message: String, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidationAuthenticationInitiationResponse { +pub struct ValidatorAuthenticationInitiationResponse { pub signed_message: String, pub message: String, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidationAuthenticationSolutionRequest { +pub struct ValidatorAuthenticationSolutionRequest { pub signed_message: String, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidationAuthenticationSolutionResponse { +pub enum ValidatorAuthenticationSolutionResponse { Granted, Rejected, } From a5321211a3919879e4467f07a306a4ad661060c9 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 18:05:19 -0400 Subject: [PATCH 04/40] clippy --- crates/p2p/src/behaviour.rs | 1 + crates/p2p/src/message.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 54f264dd..e2737d57 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -37,6 +37,7 @@ pub(crate) struct Behaviour { autonat: autonat::Behaviour, } +#[allow(clippy::large_enum_variant)] #[derive(Debug)] pub(crate) enum BehaviourEvent { Autonat(autonat::Event), diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 54d757c1..9013a8ca 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -8,6 +8,7 @@ pub struct IncomingMessage { pub message: libp2p::request_response::Message, } +#[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum OutgoingMessage { Request((PeerId, Request)), From a548ce43802f19e12652158b40bee90abea135aa Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 16:28:19 -0400 Subject: [PATCH 05/40] begin implementation of libp2p node in worker; working on msg handling --- Cargo.lock | 2 + Cargo.toml | 2 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/lib.rs | 26 +++- crates/p2p/src/message.rs | 16 +- crates/worker/Cargo.toml | 2 + crates/worker/src/p2p/mod.rs | 274 +++++++++++++++++++++++++++++++++++ 7 files changed, 319 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a64a46e4..89b858dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6265,6 +6265,7 @@ dependencies = [ "libp2p", "serde", "tokio", + "tokio-util", "void", ] @@ -10344,6 +10345,7 @@ dependencies = [ "log", "nalgebra", "nvml-wrapper", + "p2p", "rand 0.8.5", "rand 0.9.1", "rand_core 0.6.4", diff --git a/Cargo.toml b/Cargo.toml index 4279f156..d4ca7ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +p2p = { path = "crates/p2p" } + actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 2d5d94ff..46f9833a 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -10,6 +10,7 @@ void = "1.0" anyhow = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} +tokio-util = { workspace = true, features = ["rt"] } [lints] workspace = true diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 9f07d8d0..c02f5f09 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -8,7 +8,7 @@ use libp2p::yamux; use libp2p::Multiaddr; use libp2p::Swarm; use libp2p::SwarmBuilder; -use libp2p::{identity, PeerId, Transport}; +use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; @@ -16,9 +16,13 @@ mod message; mod protocol; use behaviour::Behaviour; -use message::{IncomingMessage, OutgoingMessage}; use protocol::Protocols; +pub use message::*; +pub type Libp2pIncomingMessage = libp2p::request_response::Message; +pub type ResponseChannel = libp2p::request_response::ResponseChannel; +pub type PeerId = libp2p::PeerId; + pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); // TODO: force this to be passed by the user @@ -29,6 +33,7 @@ pub struct Node { listen_addrs: Vec, swarm: Swarm, bootnodes: Vec, + cancellation_token: tokio_util::sync::CancellationToken, // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -66,6 +71,7 @@ impl Node { listen_addrs, mut swarm, bootnodes, + cancellation_token, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -93,6 +99,10 @@ impl Node { loop { tokio::select! { + _ = cancellation_token.cancelled() => { + println!("cancellation token triggered, shutting down node"); + break Ok(()); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -143,6 +153,7 @@ pub struct NodeBuilder { agent_version: Option, protocols: Protocols, bootnodes: Vec, + cancellation_token: Option, } impl Default for NodeBuilder { @@ -160,6 +171,7 @@ impl NodeBuilder { agent_version: None, protocols: Protocols::new(), bootnodes: Vec::new(), + cancellation_token: None, } } @@ -224,6 +236,14 @@ impl NodeBuilder { self } + pub fn with_cancellation_token( + mut self, + cancellation_token: tokio_util::sync::CancellationToken, + ) -> Self { + self.cancellation_token = Some(cancellation_token); + self + } + pub fn try_build( self, ) -> Result<( @@ -238,6 +258,7 @@ impl NodeBuilder { agent_version, protocols, bootnodes, + cancellation_token, } = self; let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); @@ -279,6 +300,7 @@ impl NodeBuilder { bootnodes, incoming_message_tx, outgoing_message_rx, + cancellation_token: cancellation_token.unwrap_or_default(), }, incoming_message_rx, outgoing_message_tx, diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 9013a8ca..62b01501 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -72,13 +72,19 @@ pub struct ValidatorAuthenticationInitiationRequest { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationResponse { - pub signed_message: String, + pub signature: String, pub message: String, } +impl From for Response { + fn from(response: ValidatorAuthenticationInitiationResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Initiation(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationSolutionRequest { - pub signed_message: String, + pub signature: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -87,6 +93,12 @@ pub enum ValidatorAuthenticationSolutionResponse { Rejected, } +impl From for Response { + fn from(response: ValidatorAuthenticationSolutionResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Solution(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeRequest { pub challenge: String, // TODO diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 18596ba5..3ee03e12 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -8,6 +8,8 @@ workspace = true [dependencies] shared = { workspace = true } +p2p = { workspace = true } + actix-web = { workspace = true } bollard = "0.18.1" clap = { workspace = true } diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 9393f985..3cc35009 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -2,3 +2,277 @@ pub(crate) mod service; pub(crate) use service::P2PContext; pub(crate) use service::P2PService; + +use anyhow::Context as _; +use anyhow::Result; +use p2p::Node; +use p2p::NodeBuilder; +use p2p::PeerId; +use p2p::Response; +use p2p::{IncomingMessage, Libp2pIncomingMessage, OutgoingMessage}; +use shared::web3::wallet::Wallet; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; + +fn build_p2p_node( + port: u16, + cancellation_token: CancellationToken, +) -> Result<(Node, Receiver, Sender)> { + NodeBuilder::new() + .with_port(port) + .with_validator_authentication() + .with_hardware_challenge() + .with_invite() + .with_get_task_logs() + .with_restart() + .with_cancellation_token(cancellation_token) + .try_build() +} + +pub(crate) struct Service { + node: Node, + incoming_messages: Receiver, + cancellation_token: CancellationToken, + context: Context, +} + +impl Service { + pub(crate) fn new( + port: u16, + wallet: Wallet, + validator_addresses: HashSet, + cancellation_token: CancellationToken, + ) -> Result { + let (node, incoming_messages, outgoing_messages) = + build_p2p_node(port, cancellation_token.clone()).context("failed to build p2p node")?; + Ok(Self { + node, + incoming_messages, + cancellation_token, + context: Context::new(wallet, outgoing_messages, validator_addresses), + }) + } + + pub(crate) async fn run(self) { + let Self { + node, + mut incoming_messages, + cancellation_token, + context, + } = self; + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + Some(message) = (&mut incoming_messages).recv() => { + // TODO: spawn and store handles + if let Err(e) = handle_incoming_message(message, context.clone()) + .await { + tracing::error!("failed to handle incoming message: {e}"); + } + } + } + } + } +} + +#[derive(Clone)] +struct Context { + authorized_peers: Arc>>, + ongoing_auth_challenges: Arc>>, // use request_id? + nonce_cache: Arc>>, + wallet: Wallet, + outgoing_messages: Sender, + validator_addresses: Arc>>, +} + +impl Context { + fn new( + wallet: Wallet, + outgoing_messages: Sender, + validator_addresses: HashSet, + ) -> Self { + Self { + authorized_peers: Arc::new(RwLock::new(HashSet::new())), + ongoing_auth_challenges: Arc::new(RwLock::new(HashMap::new())), + nonce_cache: Arc::new(RwLock::new(HashMap::new())), + wallet, + outgoing_messages, + validator_addresses: Arc::new(RwLock::new(validator_addresses)), + } + } +} + +async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { + match message.message { + Libp2pIncomingMessage::Request { + request_id: _, + request, + channel, + } => { + tracing::debug!("received incoming request {request:?}"); + handle_incoming_request(message.peer, request, channel, context).await?; + } + Libp2pIncomingMessage::Response { + request_id: _, + response, + } => { + tracing::debug!("received incoming response {response:?}"); + handle_incoming_response(response).await?; + } + } + Ok(()) +} + +async fn handle_incoming_request( + from: PeerId, + request: p2p::Request, + channel: p2p::ResponseChannel, + context: Context, +) -> Result<()> { + match request { + p2p::Request::ValidatorAuthentication(req) => { + tracing::debug!("handling ValidatorAuthentication request"); + match req { + p2p::ValidatorAuthenticationRequest::Initiation(req) => { + let resp = + handle_validator_authentication_initiation_request(from, req, &context) + .await + .context("failed to handle ValidatorAuthenticationInitiationRequest")?; + let outgoing_message = resp.into_outgoing_message(channel); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send ValidatorAuthentication response")?; + } + p2p::ValidatorAuthenticationRequest::Solution(req) => { + let resp = match handle_validator_authentication_initiation_solution( + from, req, &context, + ) + .await + { + Ok(resp) => resp, + Err(e) => { + tracing::error!( + "failed to handle ValidatorAuthenticationSolutionRequest: {e}" + ); + p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() + } + }; + let outgoing_message = resp.into_outgoing_message(channel); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send ValidatorAuthenticationSolution response")?; + } + } + } + p2p::Request::HardwareChallenge(req) => { + tracing::debug!("handling HardwareChallenge request"); + } + p2p::Request::Invite(_) => { + tracing::debug!("handling Invite request"); + } + p2p::Request::GetTaskLogs => { + tracing::debug!("handling GetTaskLogs request"); + } + p2p::Request::Restart => { + tracing::debug!("handling Restart request"); + } + } + Ok(()) +} + +async fn handle_validator_authentication_initiation_request( + from: PeerId, + req: p2p::ValidatorAuthenticationInitiationRequest, + context: &Context, +) -> Result { + use rand_v8::Rng as _; + use shared::security::request_signer::sign_message; + + // generate a fresh cryptographically secure challenge message for this auth attempt + let challenge_bytes: [u8; 32] = rand_v8::rngs::OsRng.gen(); + let challenge_message = hex::encode(challenge_bytes); + let signature = sign_message(&req.message, &context.wallet) + .await + .map_err(|e| anyhow::anyhow!("failed to sign message: {e:?}"))?; + + // store the challenge message in nonce cache to prevent replay + let mut nonce_cache = context.nonce_cache.write().await; + nonce_cache.insert(challenge_message.clone(), SystemTime::now()); + + // store the current challenge for this peer + let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; + ongoing_auth_challenges.insert(from, challenge_message.clone()); + + Ok(p2p::ValidatorAuthenticationInitiationResponse { + message: challenge_message, + signature, + } + .into()) +} + +async fn handle_validator_authentication_initiation_solution( + from: PeerId, + req: p2p::ValidatorAuthenticationSolutionRequest, + context: &Context, +) -> Result { + use std::str::FromStr as _; + + let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; + let challenge_message = ongoing_auth_challenges + .remove(&from) + .ok_or_else(|| anyhow::anyhow!("no ongoing authentication challenge for peer {from}"))?; + + let mut nonce_cache = context.nonce_cache.write().await; + if nonce_cache.remove(&challenge_message).is_none() { + anyhow::bail!("challenge message {challenge_message} not found in nonce cache"); + } + + let Ok(signature) = alloy::primitives::Signature::from_str(&req.signature) else { + anyhow::bail!("failed to parse signature from message"); + }; + + let Ok(recovered_address) = signature.recover_address_from_msg(challenge_message) else { + anyhow::bail!("failed to recover address from signature and message"); + }; + + let validator_addresses = context.validator_addresses.read().await; + if !validator_addresses.contains(&recovered_address) { + anyhow::bail!("recovered address {recovered_address} is not in the list of authorized validator addresses"); + } + + Ok(p2p::ValidatorAuthenticationSolutionResponse::Granted.into()) +} + +async fn handle_incoming_response(response: p2p::Response) -> Result<()> { + match response { + p2p::Response::ValidatorAuthentication(_) => { + // critical developer error, could panic here + tracing::error!("worker should never receive ValidatorAuthentication responses"); + } + p2p::Response::HardwareChallenge(_) => { + tracing::debug!("handling HardwareChallenge response"); + } + p2p::Response::Invite(_) => { + tracing::debug!("handling Invite response"); + } + p2p::Response::GetTaskLogs(_) => { + tracing::debug!("handling GetTaskLogs response"); + } + p2p::Response::Restart(_) => { + tracing::debug!("handling Restart response"); + } + } + Ok(()) +} From d780aae61896777f21cc94cd6c4e1cfde5f040e1 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 17:10:43 -0400 Subject: [PATCH 06/40] implement more request handlers --- crates/p2p/src/message.rs | 34 +++++++- crates/worker/src/p2p/mod.rs | 149 +++++++++++++++++++++++------------ 2 files changed, 128 insertions(+), 55 deletions(-) diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 62b01501..5c9b1e70 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -111,6 +111,12 @@ pub struct HardwareChallengeResponse { pub timestamp: SystemTime, } +impl From for Response { + fn from(response: HardwareChallengeResponse) -> Self { + Response::HardwareChallenge(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum InviteRequestUrl { MasterUrl(String), @@ -133,12 +139,32 @@ pub enum InviteResponse { Error(String), } +impl From for Response { + fn from(response: InviteResponse) -> Self { + Response::Invite(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GetTaskLogsResponse { - pub logs: Result, String>, +pub enum GetTaskLogsResponse { + Ok(String), + Error(String), +} + +impl From for Response { + fn from(response: GetTaskLogsResponse) -> Self { + Response::GetTaskLogs(response) + } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RestartResponse { - pub result: Result<(), String>, +pub enum RestartResponse { + Ok, + Error(String), +} + +impl From for Response { + fn from(response: RestartResponse) -> Self { + Response::Restart(response) + } } diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 3cc35009..ef9978b3 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -19,20 +19,7 @@ use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; -fn build_p2p_node( - port: u16, - cancellation_token: CancellationToken, -) -> Result<(Node, Receiver, Sender)> { - NodeBuilder::new() - .with_port(port) - .with_validator_authentication() - .with_hardware_challenge() - .with_invite() - .with_get_task_logs() - .with_restart() - .with_cancellation_token(cancellation_token) - .try_build() -} +use crate::docker::DockerService; pub(crate) struct Service { node: Node, @@ -46,6 +33,7 @@ impl Service { port: u16, wallet: Wallet, validator_addresses: HashSet, + docker_service: Arc, cancellation_token: CancellationToken, ) -> Result { let (node, incoming_messages, outgoing_messages) = @@ -54,13 +42,18 @@ impl Service { node, incoming_messages, cancellation_token, - context: Context::new(wallet, outgoing_messages, validator_addresses), + context: Context::new( + wallet, + outgoing_messages, + validator_addresses, + docker_service, + ), }) } pub(crate) async fn run(self) { let Self { - node, + node: _, mut incoming_messages, cancellation_token, context, @@ -83,6 +76,21 @@ impl Service { } } +fn build_p2p_node( + port: u16, + cancellation_token: CancellationToken, +) -> Result<(Node, Receiver, Sender)> { + NodeBuilder::new() + .with_port(port) + .with_validator_authentication() + .with_hardware_challenge() + .with_invite() + .with_get_task_logs() + .with_restart() + .with_cancellation_token(cancellation_token) + .try_build() +} + #[derive(Clone)] struct Context { authorized_peers: Arc>>, @@ -90,7 +98,8 @@ struct Context { nonce_cache: Arc>>, wallet: Wallet, outgoing_messages: Sender, - validator_addresses: Arc>>, + validator_addresses: Arc>, + docker_service: Arc, } impl Context { @@ -98,6 +107,7 @@ impl Context { wallet: Wallet, outgoing_messages: Sender, validator_addresses: HashSet, + docker_service: Arc, ) -> Self { Self { authorized_peers: Arc::new(RwLock::new(HashSet::new())), @@ -105,7 +115,8 @@ impl Context { nonce_cache: Arc::new(RwLock::new(HashMap::new())), wallet, outgoing_messages, - validator_addresses: Arc::new(RwLock::new(validator_addresses)), + validator_addresses: Arc::new(validator_addresses), + docker_service, } } } @@ -137,58 +148,55 @@ async fn handle_incoming_request( channel: p2p::ResponseChannel, context: Context, ) -> Result<()> { - match request { + let resp = match request { p2p::Request::ValidatorAuthentication(req) => { tracing::debug!("handling ValidatorAuthentication request"); match req { p2p::ValidatorAuthenticationRequest::Initiation(req) => { - let resp = - handle_validator_authentication_initiation_request(from, req, &context) - .await - .context("failed to handle ValidatorAuthenticationInitiationRequest")?; - let outgoing_message = resp.into_outgoing_message(channel); - context - .outgoing_messages - .send(outgoing_message) + handle_validator_authentication_initiation_request(from, req, &context) .await - .context("failed to send ValidatorAuthentication response")?; + .context("failed to handle ValidatorAuthenticationInitiationRequest")? } p2p::ValidatorAuthenticationRequest::Solution(req) => { - let resp = match handle_validator_authentication_initiation_solution( - from, req, &context, - ) - .await + match handle_validator_authentication_initiation_solution(from, req, &context) + .await { - Ok(resp) => resp, + Ok(resp) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), Err(e) => { tracing::error!( "failed to handle ValidatorAuthenticationSolutionRequest: {e}" ); p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() } - }; - let outgoing_message = resp.into_outgoing_message(channel); - context - .outgoing_messages - .send(outgoing_message) - .await - .context("failed to send ValidatorAuthenticationSolution response")?; + } } } } - p2p::Request::HardwareChallenge(req) => { + p2p::Request::HardwareChallenge(_) => { tracing::debug!("handling HardwareChallenge request"); + todo!() } p2p::Request::Invite(_) => { tracing::debug!("handling Invite request"); + handle_invite_request(from, request, &context).await } p2p::Request::GetTaskLogs => { tracing::debug!("handling GetTaskLogs request"); + handle_get_task_logs_request(from, &context).await } p2p::Request::Restart => { tracing::debug!("handling Restart request"); + handle_restart_request(from, &context).await } - } + }; + + let outgoing_message = resp.into_outgoing_message(channel); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send ValidatorAuthentication response")?; + Ok(()) } @@ -226,7 +234,7 @@ async fn handle_validator_authentication_initiation_solution( from: PeerId, req: p2p::ValidatorAuthenticationSolutionRequest, context: &Context, -) -> Result { +) -> Result<()> { use std::str::FromStr as _; let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; @@ -247,31 +255,70 @@ async fn handle_validator_authentication_initiation_solution( anyhow::bail!("failed to recover address from signature and message"); }; - let validator_addresses = context.validator_addresses.read().await; - if !validator_addresses.contains(&recovered_address) { + if !context.validator_addresses.contains(&recovered_address) { anyhow::bail!("recovered address {recovered_address} is not in the list of authorized validator addresses"); } - Ok(p2p::ValidatorAuthenticationSolutionResponse::Granted.into()) + Ok(()) +} + +async fn handle_invite_request( + from: PeerId, + _request: p2p::Request, + context: &Context, +) -> Response { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return p2p::InviteResponse::Error("unauthorized".to_string()).into(); + } + + p2p::InviteResponse::Ok.into() +} + +async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Response { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return p2p::GetTaskLogsResponse::Error("unauthorized".to_string()).into(); + } + + match context.docker_service.get_logs().await { + Ok(logs) => p2p::GetTaskLogsResponse::Ok(logs).into(), + Err(e) => { + return p2p::GetTaskLogsResponse::Error(format!("failed to get task logs: {e:?}")) + .into(); + } + } +} + +async fn handle_restart_request(from: PeerId, context: &Context) -> Response { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return p2p::RestartResponse::Error("unauthorized".to_string()).into(); + } + + match context.docker_service.restart_task().await { + Ok(()) => p2p::RestartResponse::Ok.into(), + Err(e) => p2p::RestartResponse::Error(format!("failed to restart task: {e:?}")).into(), + } } async fn handle_incoming_response(response: p2p::Response) -> Result<()> { + // critical developer error if any of these happen, could panic here match response { p2p::Response::ValidatorAuthentication(_) => { - // critical developer error, could panic here tracing::error!("worker should never receive ValidatorAuthentication responses"); } p2p::Response::HardwareChallenge(_) => { - tracing::debug!("handling HardwareChallenge response"); + tracing::error!("worker should never receive HardwareChallenge responses"); } p2p::Response::Invite(_) => { - tracing::debug!("handling Invite response"); + tracing::error!("worker should never receive Invite responses"); } p2p::Response::GetTaskLogs(_) => { - tracing::debug!("handling GetTaskLogs response"); + tracing::error!("worker should never receive GetTaskLogs responses"); } p2p::Response::Restart(_) => { - tracing::debug!("handling Restart response"); + tracing::error!("worker should never receive Restart responses"); } } Ok(()) From bcaa44402fa86210b135fb2dee171facd9126dc7 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 17:29:47 -0400 Subject: [PATCH 07/40] impl hardware challenge, add new p2p to worker cli --- Cargo.lock | 1 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/challenge_message.rs | 89 +++++++++++++++++++++++++++++ crates/p2p/src/lib.rs | 19 +++--- crates/p2p/src/message.rs | 7 ++- crates/worker/src/cli/command.rs | 67 ++++++++++++---------- crates/worker/src/p2p/mod.rs | 54 +++++++++++------ 7 files changed, 182 insertions(+), 56 deletions(-) create mode 100644 crates/p2p/src/challenge_message.rs diff --git a/Cargo.lock b/Cargo.lock index 89b858dd..200a21e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6263,6 +6263,7 @@ version = "0.3.11" dependencies = [ "anyhow", "libp2p", + "nalgebra", "serde", "tokio", "tokio-util", diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 46f9833a..ba52d570 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -8,6 +8,7 @@ libp2p = { version = "0.54", features = ["request-response", "identify", "ping", void = "1.0" anyhow = {workspace = true} +nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } diff --git a/crates/p2p/src/challenge_message.rs b/crates/p2p/src/challenge_message.rs new file mode 100644 index 00000000..639cc602 --- /dev/null +++ b/crates/p2p/src/challenge_message.rs @@ -0,0 +1,89 @@ +use nalgebra::DMatrix; +use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, Serialize, Serializer, +}; +use std::fmt; + +#[derive(Debug, Clone)] +pub struct FixedF64(pub f64); + +impl Serialize for FixedF64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // adjust precision as needed + serializer.serialize_str(&format!("{:.12}", self.0)) + } +} + +impl<'de> Deserialize<'de> for FixedF64 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FixedF64Visitor; + + impl Visitor<'_> for FixedF64Visitor { + type Value = FixedF64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string representing a fixed precision float") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + value + .parse::() + .map(FixedF64) + .map_err(|_| E::custom(format!("invalid f64: {value}"))) + } + } + + deserializer.deserialize_str(FixedF64Visitor) + } +} + +impl PartialEq for FixedF64 { + fn eq(&self, other: &Self) -> bool { + format!("{:.10}", self.0) == format!("{:.10}", other.0) + } +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeRequest { + pub rows_a: usize, + pub cols_a: usize, + pub data_a: Vec, + pub rows_b: usize, + pub cols_b: usize, + pub data_b: Vec, + pub timestamp: Option, +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeResponse { + pub result: Vec, + pub rows: usize, + pub cols: usize, +} + +pub fn calc_matrix(req: &ChallengeRequest) -> ChallengeResponse { + // convert FixedF64 to f64 + let data_a: Vec = req.data_a.iter().map(|x| x.0).collect(); + let data_b: Vec = req.data_b.iter().map(|x| x.0).collect(); + let a = DMatrix::from_vec(req.rows_a, req.cols_a, data_a); + let b = DMatrix::from_vec(req.rows_b, req.cols_b, data_b); + let c = a * b; + + let data_c: Vec = c.iter().map(|x| FixedF64(*x)).collect(); + + ChallengeResponse { + rows: c.nrows(), + cols: c.ncols(), + result: data_c, + } +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index c02f5f09..46105a36 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -5,23 +5,27 @@ use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; use libp2p::yamux; -use libp2p::Multiaddr; use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; +mod challenge_message; mod message; mod protocol; use behaviour::Behaviour; use protocol::Protocols; +// TODO: put these in a mod +pub use challenge_message::*; pub use message::*; + pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type ResponseChannel = libp2p::request_response::ResponseChannel; pub type PeerId = libp2p::PeerId; +pub type Multiaddr = libp2p::Multiaddr; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -365,9 +369,8 @@ mod test { println!("received request from node1"); // send response from node2->node1 - let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse { - logs: Ok(vec!["log1".to_string(), "log2".to_string()]), - }); + let response = + message::Response::GetTaskLogs(message::GetTaskLogsResponse::Ok("logs".to_string())); outgoing_message_tx2 .send(response.into_outgoing_message(channel)) .await @@ -381,9 +384,9 @@ mod test { else { panic!("expected a GetTaskLogs response message"); }; - assert_eq!( - response.logs, - Ok(vec!["log1".to_string(), "log2".to_string()]) - ); + let message::GetTaskLogsResponse::Ok(logs) = response else { + panic!("expected a successful GetTaskLogs response"); + }; + assert_eq!(logs, "logs"); } } diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 5c9b1e70..c0fd2d66 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -2,6 +2,9 @@ use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; +use crate::ChallengeRequest; +use crate::ChallengeResponse; + #[derive(Debug)] pub struct IncomingMessage { pub peer: PeerId, @@ -101,13 +104,13 @@ impl From for Response { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeRequest { - pub challenge: String, // TODO + pub challenge: ChallengeRequest, pub timestamp: SystemTime, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeResponse { - pub response: String, // TODO + pub response: ChallengeResponse, pub timestamp: SystemTime, } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 92de379e..5698568e 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -9,8 +9,6 @@ use crate::metrics::store::MetricsStore; use crate::operations::compute_node::ComputeNodeOperations; use crate::operations::heartbeat::service::HeartbeatService; use crate::operations::provider::ProviderOperations; -use crate::p2p::P2PContext; -use crate::p2p::P2PService; use crate::services::discovery::DiscoveryService; use crate::services::discovery_updater::DiscoveryUpdater; use crate::state::system_state::SystemState; @@ -701,14 +699,14 @@ pub async fn execute_command( } }; - let p2p_context = P2PContext { - docker_service: docker_service.clone(), - heartbeat_service: heartbeat.clone(), - system_state: state.clone(), - contracts: contracts.clone(), - node_wallet: node_wallet_instance.clone(), - provider_wallet: provider_wallet_instance.clone(), - }; + // let p2p_context = P2PContext { + // docker_service: docker_service.clone(), + // heartbeat_service: heartbeat.clone(), + // system_state: state.clone(), + // contracts: contracts.clone(), + // node_wallet: node_wallet_instance.clone(), + // provider_wallet: provider_wallet_instance.clone(), + // }; let validators = match contracts.prime_network.get_validator_role().await { Ok(validators) => validators, @@ -728,15 +726,31 @@ pub async fn execute_command( let mut allowed_addresses = vec![pool_info.creator, pool_info.compute_manager_key]; allowed_addresses.extend(validators); - let p2p_service = match P2PService::new( - state.worker_p2p_seed, - cancellation_token.clone(), - Some(p2p_context), + // let p2p_service = match P2PService::new( + // state.worker_p2p_seed, + // cancellation_token.clone(), + // Some(p2p_context), + // node_wallet_instance.clone(), + // allowed_addresses, + // ) + // .await + // { + // Ok(service) => service, + // Err(e) => { + // error!("❌ Failed to start P2P service: {e}"); + // std::process::exit(1); + // } + // }; + + let port = 0; // TODO: cli option + let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); + let p2p_service = match crate::p2p::Service::new( + port, node_wallet_instance.clone(), - allowed_addresses, - ) - .await - { + validator_addresses, + docker_service.clone(), + cancellation_token.clone(), + ) { Ok(service) => service, Err(e) => { error!("❌ Failed to start P2P service: {e}"); @@ -744,23 +758,18 @@ pub async fn execute_command( } }; - if let Err(e) = p2p_service.start() { - error!("❌ Failed to start P2P listener: {e}"); - std::process::exit(1); - } - - node_config.worker_p2p_id = Some(p2p_service.node_id().to_string()); + let peer_id = p2p_service.peer_id(); + node_config.worker_p2p_id = Some(peer_id.to_string()); node_config.worker_p2p_addresses = Some( p2p_service - .listening_addresses() + .listen_addrs() .iter() .map(|addr| addr.to_string()) .collect(), ); - Console::success(&format!( - "P2P service started with ID: {}", - p2p_service.node_id() - )); + tokio::task::spawn(p2p_service.run()); + + Console::success(&format!("P2P service started with ID: {peer_id}",)); let mut attempts = 0; let max_attempts = 100; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index ef9978b3..78b8927c 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -1,8 +1,3 @@ -pub(crate) mod service; - -pub(crate) use service::P2PContext; -pub(crate) use service::P2PService; - use anyhow::Context as _; use anyhow::Result; use p2p::Node; @@ -51,6 +46,14 @@ impl Service { }) } + pub(crate) fn peer_id(&self) -> PeerId { + self.node.peer_id() + } + + pub(crate) fn listen_addrs(&self) -> &[p2p::Multiaddr] { + self.node.listen_addrs() + } + pub(crate) async fn run(self) { let Self { node: _, @@ -64,7 +67,7 @@ impl Service { _ = cancellation_token.cancelled() => { break; } - Some(message) = (&mut incoming_messages).recv() => { + Some(message) = incoming_messages.recv() => { // TODO: spawn and store handles if let Err(e) = handle_incoming_message(message, context.clone()) .await { @@ -136,7 +139,7 @@ async fn handle_incoming_message(message: IncomingMessage, context: Context) -> response, } => { tracing::debug!("received incoming response {response:?}"); - handle_incoming_response(response).await?; + handle_incoming_response(response); } } Ok(()) @@ -161,10 +164,10 @@ async fn handle_incoming_request( match handle_validator_authentication_initiation_solution(from, req, &context) .await { - Ok(resp) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), + Ok(()) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), Err(e) => { tracing::error!( - "failed to handle ValidatorAuthenticationSolutionRequest: {e}" + "failed to handle ValidatorAuthenticationSolutionRequest: {e:?}" ); p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() } @@ -172,9 +175,11 @@ async fn handle_incoming_request( } } } - p2p::Request::HardwareChallenge(_) => { + p2p::Request::HardwareChallenge(req) => { tracing::debug!("handling HardwareChallenge request"); - todo!() + handle_hardware_challenge_request(from, req, &context) + .await + .context("failed to handle HardwareChallenge request")? } p2p::Request::Invite(_) => { tracing::debug!("handling Invite request"); @@ -262,6 +267,25 @@ async fn handle_validator_authentication_initiation_solution( Ok(()) } +async fn handle_hardware_challenge_request( + from: PeerId, + request: p2p::HardwareChallengeRequest, + context: &Context, +) -> Result { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + // TODO: error response variant? + anyhow::bail!("unauthorized peer {from} attempted to access HardwareChallenge request"); + } + + let challenge_response = p2p::calc_matrix(&request.challenge); + let response = p2p::HardwareChallengeResponse { + response: challenge_response, + timestamp: SystemTime::now(), + }; + Ok(response.into()) +} + async fn handle_invite_request( from: PeerId, _request: p2p::Request, @@ -283,10 +307,7 @@ async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Respon match context.docker_service.get_logs().await { Ok(logs) => p2p::GetTaskLogsResponse::Ok(logs).into(), - Err(e) => { - return p2p::GetTaskLogsResponse::Error(format!("failed to get task logs: {e:?}")) - .into(); - } + Err(e) => p2p::GetTaskLogsResponse::Error(format!("failed to get task logs: {e:?}")).into(), } } @@ -302,7 +323,7 @@ async fn handle_restart_request(from: PeerId, context: &Context) -> Response { } } -async fn handle_incoming_response(response: p2p::Response) -> Result<()> { +fn handle_incoming_response(response: p2p::Response) { // critical developer error if any of these happen, could panic here match response { p2p::Response::ValidatorAuthentication(_) => { @@ -321,5 +342,4 @@ async fn handle_incoming_response(response: p2p::Response) -> Result<()> { tracing::error!("worker should never receive Restart responses"); } } - Ok(()) } From 0f386af0694279df6f9cd36920518aab6c6e40f1 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 17:51:51 -0400 Subject: [PATCH 08/40] implement invite request handling, finish cli changes --- crates/worker/src/cli/command.rs | 43 ++--- crates/worker/src/operations/compute_node.rs | 9 +- crates/worker/src/p2p/mod.rs | 162 +++++++++++++++++-- crates/worker/src/state/system_state.rs | 4 +- 4 files changed, 162 insertions(+), 56 deletions(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 5698568e..db28deb2 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -60,7 +60,7 @@ pub enum Commands { /// Compute pool ID #[arg(long)] - compute_pool_id: u64, + compute_pool_id: u32, /// Dry run the command without starting the worker #[arg(long, default_value = "false")] @@ -174,7 +174,7 @@ pub enum Commands { /// Compute pool ID #[arg(long)] - compute_pool_id: u64, + compute_pool_id: u32, }, } @@ -215,7 +215,7 @@ pub async fn execute_command( let state = Arc::new(SystemState::new( state_dir_overwrite.clone(), *disable_state_storing, - Some(compute_pool_id.to_string()), + Some(*compute_pool_id), )); let private_key_provider = if let Some(key) = private_key_provider { @@ -294,7 +294,7 @@ pub async fn execute_command( let discovery_state = state.clone(); let discovery_updater = DiscoveryUpdater::new(discovery_service.clone(), discovery_state.clone()); - let pool_id = U256::from(*compute_pool_id as u32); + let pool_id = U256::from(*compute_pool_id); let pool_info = loop { match contracts.compute_pool.get_pool_info(pool_id).await { @@ -336,7 +336,7 @@ pub async fn execute_command( .address() .to_string(), compute_specs: None, - compute_pool_id: *compute_pool_id as u32, + compute_pool_id: *compute_pool_id, worker_p2p_id: None, worker_p2p_addresses: None, }; @@ -699,15 +699,6 @@ pub async fn execute_command( } }; - // let p2p_context = P2PContext { - // docker_service: docker_service.clone(), - // heartbeat_service: heartbeat.clone(), - // system_state: state.clone(), - // contracts: contracts.clone(), - // node_wallet: node_wallet_instance.clone(), - // provider_wallet: provider_wallet_instance.clone(), - // }; - let validators = match contracts.prime_network.get_validator_role().await { Ok(validators) => validators, Err(e) => { @@ -726,22 +717,6 @@ pub async fn execute_command( let mut allowed_addresses = vec![pool_info.creator, pool_info.compute_manager_key]; allowed_addresses.extend(validators); - // let p2p_service = match P2PService::new( - // state.worker_p2p_seed, - // cancellation_token.clone(), - // Some(p2p_context), - // node_wallet_instance.clone(), - // allowed_addresses, - // ) - // .await - // { - // Ok(service) => service, - // Err(e) => { - // error!("❌ Failed to start P2P service: {e}"); - // std::process::exit(1); - // } - // }; - let port = 0; // TODO: cli option let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); let p2p_service = match crate::p2p::Service::new( @@ -749,6 +724,10 @@ pub async fn execute_command( node_wallet_instance.clone(), validator_addresses, docker_service.clone(), + heartbeat.clone(), + state.clone(), + contracts.clone(), + provider_wallet_instance.clone(), cancellation_token.clone(), ) { Ok(service) => service, @@ -823,7 +802,7 @@ pub async fn execute_command( // Start monitoring compute node status on chain provider_ops.start_monitoring(provider_ops_cancellation); - let pool_id = state.compute_pool_id.clone().unwrap_or("0".to_string()); + let pool_id = state.compute_pool_id.unwrap_or(0); if let Err(err) = compute_node_ops.start_monitoring(cancellation_token.clone(), pool_id) { error!("❌ Failed to start node monitoring: {err}"); @@ -1062,7 +1041,7 @@ pub async fn execute_command( } }; - let pool_id = U256::from(*compute_pool_id as u32); + let pool_id = U256::from(*compute_pool_id); if compute_node_exists { match contracts diff --git a/crates/worker/src/operations/compute_node.rs b/crates/worker/src/operations/compute_node.rs index 39b18c29..7cbdbda2 100644 --- a/crates/worker/src/operations/compute_node.rs +++ b/crates/worker/src/operations/compute_node.rs @@ -32,7 +32,7 @@ impl<'c> ComputeNodeOperations<'c> { pub(crate) fn start_monitoring( &self, cancellation_token: CancellationToken, - pool_id: String, + pool_id: u32, ) -> Result<()> { let provider_address = self.provider_wallet.wallet.default_signer().address(); let node_address = self.node_wallet.wallet.default_signer().address(); @@ -81,9 +81,8 @@ impl<'c> ComputeNodeOperations<'c> { } // Check rewards for the current compute pool - if let Ok(pool_id_u32) = pool_id.parse::() { match contracts.compute_pool.calculate_node_rewards( - U256::from(pool_id_u32), + U256::from(pool_id), node_address, ).await { Ok((claimable, locked)) => { @@ -96,9 +95,9 @@ impl<'c> ComputeNodeOperations<'c> { } } Err(e) => { - log::debug!("Failed to check rewards for pool {pool_id_u32}: {e}"); + log::debug!("Failed to check rewards for pool {pool_id}: {e}"); } - } + } first_check = false; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 78b8927c..3c79b1b6 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -1,10 +1,12 @@ use anyhow::Context as _; use anyhow::Result; +use p2p::InviteRequestUrl; use p2p::Node; use p2p::NodeBuilder; use p2p::PeerId; use p2p::Response; use p2p::{IncomingMessage, Libp2pIncomingMessage, OutgoingMessage}; +use shared::web3::contracts::core::builder::Contracts; use shared::web3::wallet::Wallet; use std::collections::HashMap; use std::collections::HashSet; @@ -15,6 +17,9 @@ use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; use crate::docker::DockerService; +use crate::operations::heartbeat::service::HeartbeatService; +use crate::state::system_state::SystemState; +use shared::web3::wallet::WalletProvider; pub(crate) struct Service { node: Node, @@ -24,11 +29,16 @@ pub(crate) struct Service { } impl Service { + #[allow(clippy::too_many_arguments)] pub(crate) fn new( port: u16, wallet: Wallet, validator_addresses: HashSet, docker_service: Arc, + heartbeat_service: Arc, + system_state: Arc, + contracts: Contracts, + provider_wallet: Wallet, cancellation_token: CancellationToken, ) -> Result { let (node, incoming_messages, outgoing_messages) = @@ -42,6 +52,10 @@ impl Service { outgoing_messages, validator_addresses, docker_service, + heartbeat_service, + system_state, + contracts, + provider_wallet, ), }) } @@ -97,20 +111,35 @@ fn build_p2p_node( #[derive(Clone)] struct Context { authorized_peers: Arc>>, + wallet: Wallet, + validator_addresses: Arc>, + + // for validator authentication requests ongoing_auth_challenges: Arc>>, // use request_id? nonce_cache: Arc>>, - wallet: Wallet, outgoing_messages: Sender, - validator_addresses: Arc>, + + // for get_task_logs and restart requests docker_service: Arc, + + // for invite requests + heartbeat_service: Arc, + system_state: Arc, + contracts: Contracts, + provider_wallet: Wallet, } impl Context { + #[allow(clippy::too_many_arguments)] fn new( wallet: Wallet, outgoing_messages: Sender, validator_addresses: HashSet, docker_service: Arc, + heartbeat_service: Arc, + system_state: Arc, + contracts: Contracts, + provider_wallet: Wallet, ) -> Self { Self { authorized_peers: Arc::new(RwLock::new(HashSet::new())), @@ -120,6 +149,10 @@ impl Context { outgoing_messages, validator_addresses: Arc::new(validator_addresses), docker_service, + heartbeat_service, + system_state, + contracts, + provider_wallet, } } } @@ -181,9 +214,12 @@ async fn handle_incoming_request( .await .context("failed to handle HardwareChallenge request")? } - p2p::Request::Invite(_) => { + p2p::Request::Invite(req) => { tracing::debug!("handling Invite request"); - handle_invite_request(from, request, &context).await + match handle_invite_request(from, req, &context).await { + Ok(()) => p2p::InviteResponse::Ok.into(), + Err(e) => p2p::InviteResponse::Error(e.to_string()).into(), + } } p2p::Request::GetTaskLogs => { tracing::debug!("handling GetTaskLogs request"); @@ -286,19 +322,6 @@ async fn handle_hardware_challenge_request( Ok(response.into()) } -async fn handle_invite_request( - from: PeerId, - _request: p2p::Request, - context: &Context, -) -> Response { - let authorized_peers = context.authorized_peers.read().await; - if !authorized_peers.contains(&from) { - return p2p::InviteResponse::Error("unauthorized".to_string()).into(); - } - - p2p::InviteResponse::Ok.into() -} - async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Response { let authorized_peers = context.authorized_peers.read().await; if !authorized_peers.contains(&from) { @@ -343,3 +366,108 @@ fn handle_incoming_response(response: p2p::Response) { } } } + +async fn handle_invite_request( + from: PeerId, + req: p2p::InviteRequest, + context: &Context, +) -> Result<()> { + use crate::console::Console; + use shared::web3::contracts::helpers::utils::retry_call; + use shared::web3::contracts::structs::compute_pool::PoolStatus; + + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return Err(anyhow::anyhow!( + "unauthorized peer {from} attempted to send invite" + )); + } + + if context.system_state.is_running().await { + anyhow::bail!("heartbeat is currently running and in a compute pool"); + } + + if let Some(pool_id) = context.system_state.compute_pool_id { + if req.pool_id != pool_id { + anyhow::bail!( + "pool ID mismatch: expected {}, got {}", + pool_id, + req.pool_id + ); + } + } + + let invite_bytes = hex::decode(&req.invite).context("failed to decode invite hex")?; + + if invite_bytes.len() < 65 { + anyhow::bail!("invite data is too short, expected at least 65 bytes"); + } + + let contracts = &context.contracts; + let pool_id = alloy::primitives::U256::from(req.pool_id); + + let bytes_array: [u8; 65] = match invite_bytes[..65].try_into() { + Ok(array) => array, + Err(_) => { + anyhow::bail!("failed to convert invite bytes to 65 byte array"); + } + }; + + let provider_address = context.provider_wallet.wallet.default_signer().address(); + + let pool_info = match contracts.compute_pool.get_pool_info(pool_id).await { + Ok(info) => info, + Err(err) => { + anyhow::bail!("failed to get pool info: {err:?}"); + } + }; + + if let PoolStatus::PENDING = pool_info.status { + anyhow::bail!("invalid invite; pool is pending"); + } + + let node_address = vec![context.wallet.wallet.default_signer().address()]; + let signatures = vec![alloy::primitives::FixedBytes::from(&bytes_array)]; + let call = contracts + .compute_pool + .build_join_compute_pool_call( + pool_id, + provider_address, + node_address, + vec![req.nonce], + vec![req.expiration], + signatures, + ) + .map_err(|e| anyhow::anyhow!("failed to build join compute pool call: {e:?}"))?; + + let provider = &context.provider_wallet.provider; + match retry_call(call, 3, provider.clone(), None).await { + Ok(result) => { + Console::section("WORKER JOINED COMPUTE POOL"); + Console::success(&format!( + "Successfully registered on chain with tx: {result}" + )); + Console::info( + "Status", + "Worker is now part of the compute pool and ready to receive tasks", + ); + } + Err(err) => { + anyhow::bail!("failed to join compute pool: {err:?}"); + } + } + + let heartbeat_endpoint = match req.url { + InviteRequestUrl::MasterIpPort(ip, port) => { + format!("http://{ip}:{port}/heartbeat") + } + InviteRequestUrl::MasterUrl(url) => format!("{url}/heartbeat"), + }; + + context + .heartbeat_service + .start(heartbeat_endpoint) + .await + .context("failed to start heartbeat service")?; + Ok(()) +} diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index fd8f0a3a..e419c870 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -34,7 +34,7 @@ pub(crate) struct SystemState { endpoint: Arc>>, state_dir_overwrite: Option, disable_state_storing: bool, - pub compute_pool_id: Option, + pub compute_pool_id: Option, pub worker_p2p_seed: Option, pub p2p_id: Option, @@ -45,7 +45,7 @@ impl SystemState { pub(crate) fn new( state_dir: Option, disable_state_storing: bool, - compute_pool_id: Option, + compute_pool_id: Option, ) -> Self { let default_state_dir = get_default_state_dir(); debug!("Default state dir: {default_state_dir:?}"); From 7bd100916bf65ca251cea390b38e954d67966de0 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 12:05:29 -0400 Subject: [PATCH 09/40] add full hardware challenge message --- Cargo.lock | 2 + crates/p2p/Cargo.toml | 2 + crates/p2p/src/challenge_message.rs | 89 +++++++++++++++++++++++++++++ crates/p2p/src/lib.rs | 45 +++++++++++---- crates/p2p/src/message.rs | 57 +++++++++++++++--- 5 files changed, 177 insertions(+), 18 deletions(-) create mode 100644 crates/p2p/src/challenge_message.rs diff --git a/Cargo.lock b/Cargo.lock index a64a46e4..ae652ad4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6263,8 +6263,10 @@ version = "0.3.11" dependencies = [ "anyhow", "libp2p", + "nalgebra", "serde", "tokio", + "tokio-util", "void", ] diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 2d5d94ff..ba52d570 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -8,8 +8,10 @@ libp2p = { version = "0.54", features = ["request-response", "identify", "ping", void = "1.0" anyhow = {workspace = true} +nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} +tokio-util = { workspace = true, features = ["rt"] } [lints] workspace = true diff --git a/crates/p2p/src/challenge_message.rs b/crates/p2p/src/challenge_message.rs new file mode 100644 index 00000000..639cc602 --- /dev/null +++ b/crates/p2p/src/challenge_message.rs @@ -0,0 +1,89 @@ +use nalgebra::DMatrix; +use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, Serialize, Serializer, +}; +use std::fmt; + +#[derive(Debug, Clone)] +pub struct FixedF64(pub f64); + +impl Serialize for FixedF64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // adjust precision as needed + serializer.serialize_str(&format!("{:.12}", self.0)) + } +} + +impl<'de> Deserialize<'de> for FixedF64 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FixedF64Visitor; + + impl Visitor<'_> for FixedF64Visitor { + type Value = FixedF64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string representing a fixed precision float") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + value + .parse::() + .map(FixedF64) + .map_err(|_| E::custom(format!("invalid f64: {value}"))) + } + } + + deserializer.deserialize_str(FixedF64Visitor) + } +} + +impl PartialEq for FixedF64 { + fn eq(&self, other: &Self) -> bool { + format!("{:.10}", self.0) == format!("{:.10}", other.0) + } +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeRequest { + pub rows_a: usize, + pub cols_a: usize, + pub data_a: Vec, + pub rows_b: usize, + pub cols_b: usize, + pub data_b: Vec, + pub timestamp: Option, +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeResponse { + pub result: Vec, + pub rows: usize, + pub cols: usize, +} + +pub fn calc_matrix(req: &ChallengeRequest) -> ChallengeResponse { + // convert FixedF64 to f64 + let data_a: Vec = req.data_a.iter().map(|x| x.0).collect(); + let data_b: Vec = req.data_b.iter().map(|x| x.0).collect(); + let a = DMatrix::from_vec(req.rows_a, req.cols_a, data_a); + let b = DMatrix::from_vec(req.rows_b, req.cols_b, data_b); + let c = a * b; + + let data_c: Vec = c.iter().map(|x| FixedF64(*x)).collect(); + + ChallengeResponse { + rows: c.nrows(), + cols: c.ncols(), + result: data_c, + } +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 9f07d8d0..46105a36 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -5,20 +5,28 @@ use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; use libp2p::yamux; -use libp2p::Multiaddr; use libp2p::Swarm; use libp2p::SwarmBuilder; -use libp2p::{identity, PeerId, Transport}; +use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; +mod challenge_message; mod message; mod protocol; use behaviour::Behaviour; -use message::{IncomingMessage, OutgoingMessage}; use protocol::Protocols; +// TODO: put these in a mod +pub use challenge_message::*; +pub use message::*; + +pub type Libp2pIncomingMessage = libp2p::request_response::Message; +pub type ResponseChannel = libp2p::request_response::ResponseChannel; +pub type PeerId = libp2p::PeerId; +pub type Multiaddr = libp2p::Multiaddr; + pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); // TODO: force this to be passed by the user @@ -29,6 +37,7 @@ pub struct Node { listen_addrs: Vec, swarm: Swarm, bootnodes: Vec, + cancellation_token: tokio_util::sync::CancellationToken, // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -66,6 +75,7 @@ impl Node { listen_addrs, mut swarm, bootnodes, + cancellation_token, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -93,6 +103,10 @@ impl Node { loop { tokio::select! { + _ = cancellation_token.cancelled() => { + println!("cancellation token triggered, shutting down node"); + break Ok(()); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -143,6 +157,7 @@ pub struct NodeBuilder { agent_version: Option, protocols: Protocols, bootnodes: Vec, + cancellation_token: Option, } impl Default for NodeBuilder { @@ -160,6 +175,7 @@ impl NodeBuilder { agent_version: None, protocols: Protocols::new(), bootnodes: Vec::new(), + cancellation_token: None, } } @@ -224,6 +240,14 @@ impl NodeBuilder { self } + pub fn with_cancellation_token( + mut self, + cancellation_token: tokio_util::sync::CancellationToken, + ) -> Self { + self.cancellation_token = Some(cancellation_token); + self + } + pub fn try_build( self, ) -> Result<( @@ -238,6 +262,7 @@ impl NodeBuilder { agent_version, protocols, bootnodes, + cancellation_token, } = self; let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); @@ -279,6 +304,7 @@ impl NodeBuilder { bootnodes, incoming_message_tx, outgoing_message_rx, + cancellation_token: cancellation_token.unwrap_or_default(), }, incoming_message_rx, outgoing_message_tx, @@ -343,9 +369,8 @@ mod test { println!("received request from node1"); // send response from node2->node1 - let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse { - logs: Ok(vec!["log1".to_string(), "log2".to_string()]), - }); + let response = + message::Response::GetTaskLogs(message::GetTaskLogsResponse::Ok("logs".to_string())); outgoing_message_tx2 .send(response.into_outgoing_message(channel)) .await @@ -359,9 +384,9 @@ mod test { else { panic!("expected a GetTaskLogs response message"); }; - assert_eq!( - response.logs, - Ok(vec!["log1".to_string(), "log2".to_string()]) - ); + let message::GetTaskLogsResponse::Ok(logs) = response else { + panic!("expected a successful GetTaskLogs response"); + }; + assert_eq!(logs, "logs"); } } diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 9013a8ca..c0fd2d66 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -2,6 +2,9 @@ use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; +use crate::ChallengeRequest; +use crate::ChallengeResponse; + #[derive(Debug)] pub struct IncomingMessage { pub peer: PeerId, @@ -72,13 +75,19 @@ pub struct ValidatorAuthenticationInitiationRequest { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationResponse { - pub signed_message: String, + pub signature: String, pub message: String, } +impl From for Response { + fn from(response: ValidatorAuthenticationInitiationResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Initiation(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationSolutionRequest { - pub signed_message: String, + pub signature: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -87,18 +96,30 @@ pub enum ValidatorAuthenticationSolutionResponse { Rejected, } +impl From for Response { + fn from(response: ValidatorAuthenticationSolutionResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Solution(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeRequest { - pub challenge: String, // TODO + pub challenge: ChallengeRequest, pub timestamp: SystemTime, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeResponse { - pub response: String, // TODO + pub response: ChallengeResponse, pub timestamp: SystemTime, } +impl From for Response { + fn from(response: HardwareChallengeResponse) -> Self { + Response::HardwareChallenge(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum InviteRequestUrl { MasterUrl(String), @@ -121,12 +142,32 @@ pub enum InviteResponse { Error(String), } +impl From for Response { + fn from(response: InviteResponse) -> Self { + Response::Invite(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GetTaskLogsResponse { - pub logs: Result, String>, +pub enum GetTaskLogsResponse { + Ok(String), + Error(String), +} + +impl From for Response { + fn from(response: GetTaskLogsResponse) -> Self { + Response::GetTaskLogs(response) + } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RestartResponse { - pub result: Result<(), String>, +pub enum RestartResponse { + Ok, + Error(String), +} + +impl From for Response { + fn from(response: RestartResponse) -> Self { + Response::Restart(response) + } } From d6c1a4af1d0dd7a3d236ab23d032f78ec336ef09 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 12:07:13 -0400 Subject: [PATCH 10/40] move messages to their own dir --- crates/p2p/src/lib.rs | 3 --- .../{challenge_message.rs => message/hardware_challenge.rs} | 0 crates/p2p/src/{message.rs => message/mod.rs} | 5 +++-- 3 files changed, 3 insertions(+), 5 deletions(-) rename crates/p2p/src/{challenge_message.rs => message/hardware_challenge.rs} (100%) rename crates/p2p/src/{message.rs => message/mod.rs} (98%) diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 46105a36..208fb597 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -11,15 +11,12 @@ use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; -mod challenge_message; mod message; mod protocol; use behaviour::Behaviour; use protocol::Protocols; -// TODO: put these in a mod -pub use challenge_message::*; pub use message::*; pub type Libp2pIncomingMessage = libp2p::request_response::Message; diff --git a/crates/p2p/src/challenge_message.rs b/crates/p2p/src/message/hardware_challenge.rs similarity index 100% rename from crates/p2p/src/challenge_message.rs rename to crates/p2p/src/message/hardware_challenge.rs diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message/mod.rs similarity index 98% rename from crates/p2p/src/message.rs rename to crates/p2p/src/message/mod.rs index c0fd2d66..64486533 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message/mod.rs @@ -2,8 +2,9 @@ use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; -use crate::ChallengeRequest; -use crate::ChallengeResponse; +mod hardware_challenge; + +pub use hardware_challenge::*; #[derive(Debug)] pub struct IncomingMessage { From ea46820b8a5dae8878e32726e80a65b8fee66911 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 12:16:19 -0400 Subject: [PATCH 11/40] add general request-response protocol --- crates/p2p/src/lib.rs | 5 +++ crates/p2p/src/message/mod.rs | 62 ++++++++++++++++++++++++++++++++++- crates/p2p/src/protocol.rs | 8 +++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 208fb597..6e2efca3 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -221,6 +221,11 @@ impl NodeBuilder { self } + pub fn with_general(mut self) -> Self { + self.protocols = self.protocols.with_general(); + self + } + pub fn with_bootnode(mut self, bootnode: Multiaddr) -> Self { self.bootnodes.push(bootnode); self diff --git a/crates/p2p/src/message/mod.rs b/crates/p2p/src/message/mod.rs index 64486533..adff99ac 100644 --- a/crates/p2p/src/message/mod.rs +++ b/crates/p2p/src/message/mod.rs @@ -4,7 +4,7 @@ use std::time::SystemTime; mod hardware_challenge; -pub use hardware_challenge::*; +pub use hardware_challenge::*; #[derive(Debug)] pub struct IncomingMessage { @@ -31,6 +31,7 @@ pub enum Request { Invite(InviteRequest), GetTaskLogs, Restart, + General(GeneralRequest), } impl Request { @@ -46,6 +47,7 @@ pub enum Response { Invite(InviteResponse), GetTaskLogs(GetTaskLogsResponse), Restart(RestartResponse), + General(GeneralResponse), } impl Response { @@ -63,17 +65,35 @@ pub enum ValidatorAuthenticationRequest { Solution(ValidatorAuthenticationSolutionRequest), } +impl From for Request { + fn from(request: ValidatorAuthenticationRequest) -> Self { + Request::ValidatorAuthentication(request) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationResponse { Initiation(ValidatorAuthenticationInitiationResponse), Solution(ValidatorAuthenticationSolutionResponse), } +impl From for Response { + fn from(response: ValidatorAuthenticationResponse) -> Self { + Response::ValidatorAuthentication(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationRequest { pub message: String, } +impl From for Request { + fn from(request: ValidatorAuthenticationInitiationRequest) -> Self { + Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Initiation(request)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationResponse { pub signature: String, @@ -91,6 +111,12 @@ pub struct ValidatorAuthenticationSolutionRequest { pub signature: String, } +impl From for Request { + fn from(request: ValidatorAuthenticationSolutionRequest) -> Self { + Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Solution(request)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationSolutionResponse { Granted, @@ -109,6 +135,12 @@ pub struct HardwareChallengeRequest { pub timestamp: SystemTime, } +impl From for Request { + fn from(request: HardwareChallengeRequest) -> Self { + Request::HardwareChallenge(request) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeResponse { pub response: ChallengeResponse, @@ -137,6 +169,12 @@ pub struct InviteRequest { pub nonce: [u8; 32], } +impl From for Request { + fn from(request: InviteRequest) -> Self { + Request::Invite(request) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum InviteResponse { Ok, @@ -172,3 +210,25 @@ impl From for Response { Response::Restart(response) } } + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeneralRequest { + data: Vec, +} + +impl From for Request { + fn from(request: GeneralRequest) -> Self { + Request::General(request) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeneralResponse { + data: Vec, +} + +impl From for Response { + fn from(response: GeneralResponse) -> Self { + Response::General(response) + } +} diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index 5186ac44..df423ef8 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -13,6 +13,8 @@ pub(crate) enum Protocol { GetTaskLogs, // any -> worker Restart, + // any -> any + General, } impl Protocol { @@ -25,6 +27,7 @@ impl Protocol { Protocol::Invite => StreamProtocol::new("/prime/invite/1.0.0"), Protocol::GetTaskLogs => StreamProtocol::new("/prime/get_task_logs/1.0.0"), Protocol::Restart => StreamProtocol::new("/prime/restart/1.0.0"), + Protocol::General => StreamProtocol::new("/prime/general/1.0.0"), } } } @@ -61,6 +64,11 @@ impl Protocols { self.0.insert(Protocol::Restart); self } + + pub(crate) fn with_general(mut self) -> Self { + self.0.insert(Protocol::General); + self + } } impl IntoIterator for Protocols { From 7288261a1a55f65d6463d6b5441f0e8e398106fb Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 13:36:33 -0400 Subject: [PATCH 12/40] update SystemState to store libp2p keypair --- Cargo.lock | 1 - crates/p2p/src/lib.rs | 1 + crates/worker/Cargo.toml | 1 - crates/worker/src/cli/command.rs | 44 +- crates/worker/src/docker/service.rs | 17 +- crates/worker/src/docker/taskbridge/bridge.rs | 10 +- crates/worker/src/operations/compute_node.rs | 19 - .../src/operations/heartbeat/service.rs | 6 +- crates/worker/src/p2p/mod.rs | 20 +- crates/worker/src/p2p/service.rs | 736 ------------------ crates/worker/src/state/system_state.rs | 137 ++-- crates/worker/src/utils/mod.rs | 1 - crates/worker/src/utils/p2p.rs | 60 -- 13 files changed, 117 insertions(+), 936 deletions(-) delete mode 100644 crates/worker/src/p2p/service.rs delete mode 100644 crates/worker/src/utils/p2p.rs diff --git a/Cargo.lock b/Cargo.lock index 200a21e8..9964cd8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10340,7 +10340,6 @@ dependencies = [ "hex", "homedir", "indicatif", - "iroh", "lazy_static", "libc", "log", diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 6e2efca3..b9f1ac48 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -23,6 +23,7 @@ pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; +pub type Keypair = libp2p::identity::Keypair; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 3ee03e12..43fc4a53 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -56,7 +56,6 @@ tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } tracing-log = "0.2.0" time = "0.3.41" -iroh = { workspace = true } rand_v8 = { workspace = true } rand_core_v6 = { workspace = true } dashmap = "6.1.0" diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index db28deb2..b5a56bdd 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -215,7 +215,7 @@ pub async fn execute_command( let state = Arc::new(SystemState::new( state_dir_overwrite.clone(), *disable_state_storing, - Some(*compute_pool_id), + *compute_pool_id, )); let private_key_provider = if let Some(key) = private_key_provider { @@ -513,7 +513,6 @@ pub async fn execute_command( .default_signer() .address() .to_string(), - state.get_p2p_seed(), *disable_host_network_mode, )); @@ -720,6 +719,7 @@ pub async fn execute_command( let port = 0; // TODO: cli option let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); let p2p_service = match crate::p2p::Service::new( + state.get_p2p_keypair().clone(), port, node_wallet_instance.clone(), validator_addresses, @@ -802,7 +802,7 @@ pub async fn execute_command( // Start monitoring compute node status on chain provider_ops.start_monitoring(provider_ops_cancellation); - let pool_id = state.compute_pool_id.unwrap_or(0); + let pool_id = state.get_compute_pool_id(); if let Err(err) = compute_node_ops.start_monitoring(cancellation_token.clone(), pool_id) { error!("❌ Failed to start node monitoring: {err}"); @@ -1009,7 +1009,7 @@ pub async fn execute_command( std::process::exit(1); } }; - let state = Arc::new(SystemState::new(None, true, None)); + /* Initialize dependencies - services, contracts, operations */ @@ -1023,18 +1023,18 @@ pub async fn execute_command( .build() .unwrap(); - let compute_node_ops = ComputeNodeOperations::new( - &provider_wallet_instance, - &node_wallet_instance, - contracts.clone(), - state.clone(), - ); + let provider_address = provider_wallet_instance.wallet.default_signer().address(); + let node_address = node_wallet_instance.wallet.default_signer().address(); let provider_ops = ProviderOperations::new(provider_wallet_instance.clone(), contracts.clone(), false); - let compute_node_exists = match compute_node_ops.check_compute_node_exists().await { - Ok(exists) => exists, + let compute_node_exists = match contracts + .compute_registry + .get_node(provider_address, node_address) + .await + { + Ok(_) => true, Err(e) => { Console::user_error(&format!("❌ Failed to check if compute node exists: {e}")); std::process::exit(1); @@ -1061,7 +1061,7 @@ pub async fn execute_command( std::process::exit(1); } } - match compute_node_ops.remove_compute_node().await { + match remove_compute_node(contracts, provider_address, node_address).await { Ok(_removed_node) => { Console::success("Compute node removed"); match provider_ops.reclaim_stake(U256::from(0)).await { @@ -1087,3 +1087,21 @@ pub async fn execute_command( } } } + +use alloy::primitives::Address; +use shared::web3::contracts::core::builder::Contracts; +use shared::web3::wallet::WalletProvider; + +async fn remove_compute_node( + contracts: Contracts, + provider_address: Address, + node_address: Address, +) -> Result> { + Console::title("🔄 Removing compute node"); + let remove_node_tx = contracts + .prime_network + .remove_compute_node(provider_address, node_address) + .await?; + Console::success(&format!("Remove node tx: {remove_node_tx:?}")); + Ok(true) +} diff --git a/crates/worker/src/docker/service.rs b/crates/worker/src/docker/service.rs index 63425e2d..da15b88e 100644 --- a/crates/worker/src/docker/service.rs +++ b/crates/worker/src/docker/service.rs @@ -24,7 +24,6 @@ pub(crate) struct DockerService { system_memory_mb: Option, task_bridge_socket_path: String, node_address: String, - p2p_seed: Option, } const TASK_PREFIX: &str = "prime-task"; @@ -39,7 +38,6 @@ impl DockerService { task_bridge_socket_path: String, storage_path: String, node_address: String, - p2p_seed: Option, disable_host_network_mode: bool, ) -> Self { let docker_manager = @@ -52,7 +50,6 @@ impl DockerService { system_memory_mb, task_bridge_socket_path, node_address, - p2p_seed, } } @@ -177,7 +174,6 @@ impl DockerService { let system_memory_mb = self.system_memory_mb; let task_bridge_socket_path = self.task_bridge_socket_path.clone(); let node_address = self.node_address.clone(); - let p2p_seed = self.p2p_seed; let handle = tokio::spawn(async move { let Some(payload) = state_clone.get_current_task().await else { return; @@ -185,11 +181,7 @@ impl DockerService { let cmd = match payload.cmd { Some(cmd_vec) => { cmd_vec.into_iter().map(|arg| { - let mut processed_arg = arg.replace("${SOCKET_PATH}", &task_bridge_socket_path); - if let Some(seed) = p2p_seed { - processed_arg = processed_arg.replace("${WORKER_P2P_SEED}", &seed.to_string()); - } - processed_arg + arg.replace("${SOCKET_PATH}", &task_bridge_socket_path) }).collect() } None => vec!["sleep".to_string(), "infinity".to_string()], @@ -199,10 +191,7 @@ impl DockerService { if let Some(env) = &payload.env_vars { // Clone env vars and replace ${SOCKET_PATH} in values for (key, value) in env.iter() { - let mut processed_value = value.replace("${SOCKET_PATH}", &task_bridge_socket_path); - if let Some(seed) = p2p_seed { - processed_value = processed_value.replace("${WORKER_P2P_SEED}", &seed.to_string()); - } + let processed_value = value.replace("${SOCKET_PATH}", &task_bridge_socket_path); env_vars.insert(key.clone(), processed_value); } } @@ -432,7 +421,6 @@ mod tests { "/tmp/com.prime.miner/metrics.sock".to_string(), "/tmp/test-storage".to_string(), Address::ZERO.to_string(), - None, false, ); let task = Task { @@ -481,7 +469,6 @@ mod tests { test_socket_path.to_string(), "/tmp/test-storage".to_string(), Address::ZERO.to_string(), - Some(12345), // p2p_seed for testing false, ); diff --git a/crates/worker/src/docker/taskbridge/bridge.rs b/crates/worker/src/docker/taskbridge/bridge.rs index 65a28f76..80b8aee7 100644 --- a/crates/worker/src/docker/taskbridge/bridge.rs +++ b/crates/worker/src/docker/taskbridge/bridge.rs @@ -473,7 +473,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -506,7 +506,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -541,7 +541,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -590,7 +590,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -639,7 +639,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), diff --git a/crates/worker/src/operations/compute_node.rs b/crates/worker/src/operations/compute_node.rs index 7cbdbda2..00f147a7 100644 --- a/crates/worker/src/operations/compute_node.rs +++ b/crates/worker/src/operations/compute_node.rs @@ -164,23 +164,4 @@ impl<'c> ComputeNodeOperations<'c> { Console::success(&format!("Add node tx: {add_node_tx:?}")); Ok(true) } - - pub(crate) async fn remove_compute_node(&self) -> Result> { - Console::title("🔄 Removing compute node"); - - if !self.check_compute_node_exists().await? { - return Ok(false); - } - - Console::progress("Removing compute node"); - let provider_address = self.provider_wallet.wallet.default_signer().address(); - let node_address = self.node_wallet.wallet.default_signer().address(); - let remove_node_tx = self - .contracts - .prime_network - .remove_compute_node(provider_address, node_address) - .await?; - Console::success(&format!("Remove node tx: {remove_node_tx:?}")); - Ok(true) - } } diff --git a/crates/worker/src/operations/heartbeat/service.rs b/crates/worker/src/operations/heartbeat/service.rs index 0d77d783..1b002cae 100644 --- a/crates/worker/src/operations/heartbeat/service.rs +++ b/crates/worker/src/operations/heartbeat/service.rs @@ -143,7 +143,7 @@ async fn send_heartbeat( wallet: Wallet, docker_service: Arc, metrics_store: Arc, - p2p_id: Option, + p2p_id: p2p::PeerId, ) -> Result { if endpoint.is_none() { return Err(HeartbeatError::RequestFailed); @@ -176,7 +176,7 @@ async fn send_heartbeat( .to_string(), ), timestamp: Some(ts), - p2p_id, + p2p_id: Some(p2p_id.to_string()), // TODO: this should always be `Some` task_details, } } else { @@ -188,7 +188,7 @@ async fn send_heartbeat( .to_string(), ), timestamp: Some(ts), - p2p_id, + p2p_id: Some(p2p_id.to_string()), // TODO: this should always be `Some` ..Default::default() } }; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 32894683..6a851c01 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -31,6 +31,7 @@ pub(crate) struct Service { impl Service { #[allow(clippy::too_many_arguments)] pub(crate) fn new( + keypair: p2p::Keypair, port: u16, wallet: Wallet, validator_addresses: HashSet, @@ -42,7 +43,8 @@ impl Service { cancellation_token: CancellationToken, ) -> Result { let (node, incoming_messages, outgoing_messages) = - build_p2p_node(port, cancellation_token.clone()).context("failed to build p2p node")?; + build_p2p_node(keypair, port, cancellation_token.clone()) + .context("failed to build p2p node")?; Ok(Self { node, incoming_messages, @@ -94,10 +96,12 @@ impl Service { } fn build_p2p_node( + keypair: p2p::Keypair, port: u16, cancellation_token: CancellationToken, ) -> Result<(Node, Receiver, Sender)> { NodeBuilder::new() + .with_keypair(keypair) .with_port(port) .with_validator_authentication() .with_hardware_challenge() @@ -393,14 +397,12 @@ async fn handle_invite_request( anyhow::bail!("heartbeat is currently running and in a compute pool"); } - if let Some(pool_id) = context.system_state.compute_pool_id { - if req.pool_id != pool_id { - anyhow::bail!( - "pool ID mismatch: expected {}, got {}", - pool_id, - req.pool_id - ); - } + if req.pool_id != context.system_state.get_compute_pool_id() { + anyhow::bail!( + "pool ID mismatch: expected {}, got {}", + context.system_state.get_compute_pool_id(), + req.pool_id + ); } let invite_bytes = hex::decode(&req.invite).context("failed to decode invite hex")?; diff --git a/crates/worker/src/p2p/service.rs b/crates/worker/src/p2p/service.rs deleted file mode 100644 index 51a68405..00000000 --- a/crates/worker/src/p2p/service.rs +++ /dev/null @@ -1,736 +0,0 @@ -use crate::console::Console; -use crate::docker::DockerService; -use crate::operations::heartbeat::service::HeartbeatService; -use crate::state::system_state::SystemState; -use alloy::primitives::{Address, FixedBytes, U256}; -use anyhow::Result; -use dashmap::DashMap; -use iroh::endpoint::Incoming; -use iroh::{Endpoint, RelayMode, SecretKey}; -use lazy_static::lazy_static; -use log::{debug, error, info, warn}; -use rand_v8::Rng; -use shared::models::challenge::calc_matrix; -use shared::models::invite::InviteRequest; -use shared::p2p::messages::MAX_MESSAGE_SIZE; -use shared::p2p::messages::{P2PMessage, P2PRequest, P2PResponse}; -use shared::p2p::protocol::PRIME_P2P_PROTOCOL; -use shared::security::request_signer::sign_message; -use shared::web3::contracts::core::builder::Contracts; -use shared::web3::contracts::helpers::utils::retry_call; -use shared::web3::contracts::structs::compute_pool::PoolStatus; -use shared::web3::wallet::{Wallet, WalletProvider}; -use std::str::FromStr; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use tokio_util::sync::CancellationToken; - -lazy_static! { - static ref NONCE_CACHE: DashMap = DashMap::new(); -} - -#[derive(Clone)] -pub(crate) struct P2PContext { - pub docker_service: Arc, - pub heartbeat_service: Arc, - pub system_state: Arc, - pub contracts: Contracts, - pub node_wallet: Wallet, - pub provider_wallet: Wallet, -} - -#[derive(Clone)] -pub(crate) struct P2PService { - endpoint: Endpoint, - secret_key: SecretKey, - node_id: String, - listening_addrs: Vec, - cancellation_token: CancellationToken, - context: Option, - allowed_addresses: Vec
, - wallet: Wallet, -} - -enum EndpointLoopResult { - Shutdown, - EndpointClosed, -} - -impl P2PService { - /// Create a new P2P service with a unique worker identity - pub(crate) async fn new( - worker_p2p_seed: Option, - cancellation_token: CancellationToken, - context: Option, - wallet: Wallet, - allowed_addresses: Vec
, - ) -> Result { - // Generate or derive the secret key for this worker - let secret_key = if let Some(seed) = worker_p2p_seed { - // Derive from seed for deterministic identity - let mut seed_bytes = [0u8; 32]; - seed_bytes[..8].copy_from_slice(&seed.to_le_bytes()); - SecretKey::from_bytes(&seed_bytes) - } else { - let mut rng = rand_v8::thread_rng(); - SecretKey::generate(&mut rng) - }; - - let node_id = secret_key.public().to_string(); - info!("Starting P2P service with node ID: {node_id}"); - - // Create the endpoint - let endpoint = Endpoint::builder() - .secret_key(secret_key.clone()) - .alpns(vec![PRIME_P2P_PROTOCOL.to_vec()]) - .discovery_n0() - .relay_mode(RelayMode::Default) - .bind() - .await?; - - // Get listening addresses - let node_addr = endpoint.node_addr().await?; - let listening_addrs = node_addr - .direct_addresses - .iter() - .map(|addr| addr.to_string()) - .collect::>(); - - info!("P2P service listening on: {listening_addrs:?}"); - - Ok(Self { - endpoint, - secret_key, - node_id, - listening_addrs, - cancellation_token, - context, - allowed_addresses, - wallet, - }) - } - - /// Get the P2P node ID - pub(crate) fn node_id(&self) -> &str { - &self.node_id - } - - /// Get the listening addresses - pub(crate) fn listening_addresses(&self) -> &[String] { - &self.listening_addrs - } - - /// Recreate the endpoint with the same identity - async fn recreate_endpoint(&self) -> Result { - info!("Recreating P2P endpoint with node ID: {}", self.node_id); - - let endpoint = Endpoint::builder() - .secret_key(self.secret_key.clone()) - .alpns(vec![PRIME_P2P_PROTOCOL.to_vec()]) - .discovery_n0() - .relay_mode(RelayMode::Default) - .bind() - .await?; - - let node_addr = endpoint.node_addr().await?; - let listening_addrs = node_addr - .direct_addresses - .iter() - .map(|addr| addr.to_string()) - .collect::>(); - - info!("P2P endpoint recreated, listening on: {listening_addrs:?}"); - Ok(endpoint) - } - /// Start accepting incoming connections with automatic recovery - pub(crate) fn start(&self) -> Result<()> { - let service = Arc::new(self.clone()); - let cancellation_token = self.cancellation_token.clone(); - - tokio::spawn(async move { - service.run_with_recovery(cancellation_token).await; - }); - - Ok(()) - } - - /// Run the P2P service with automatic endpoint recovery - async fn run_with_recovery(&self, cancellation_token: CancellationToken) { - let mut endpoint = self.endpoint.clone(); - let mut retry_delay = Duration::from_secs(1); - const MAX_RETRY_DELAY: Duration = Duration::from_secs(60); - - loop { - tokio::select! { - _ = cancellation_token.cancelled() => { - info!("P2P service shutting down"); - break; - } - result = self.run_endpoint_loop(&endpoint, &cancellation_token) => { - match result { - EndpointLoopResult::Shutdown => break, - EndpointLoopResult::EndpointClosed => { - warn!("P2P endpoint closed, attempting recovery in {retry_delay:?}"); - - tokio::select! { - _ = cancellation_token.cancelled() => break, - _ = tokio::time::sleep(retry_delay) => {} - } - - match self.recreate_endpoint().await { - Ok(new_endpoint) => { - info!("P2P endpoint successfully recovered"); - endpoint = new_endpoint; - retry_delay = Duration::from_secs(1); - } - Err(e) => { - error!("Failed to recreate P2P endpoint: {e}"); - retry_delay = std::cmp::min(retry_delay * 2, MAX_RETRY_DELAY); - } - } - } - } - } - } - } - } - - /// Run the main endpoint acceptance loop - async fn run_endpoint_loop( - &self, - endpoint: &Endpoint, - cancellation_token: &CancellationToken, - ) -> EndpointLoopResult { - let context = self.context.clone(); - let allowed_addresses = self.allowed_addresses.clone(); - let wallet = self.wallet.clone(); - - loop { - tokio::select! { - _ = cancellation_token.cancelled() => { - return EndpointLoopResult::Shutdown; - } - incoming = endpoint.accept() => { - if let Some(incoming) = incoming { - tokio::spawn(Self::handle_connection(incoming, context.clone(), allowed_addresses.clone(), wallet.clone())); - } else { - return EndpointLoopResult::EndpointClosed; - } - } - } - } - } - - /// Handle an incoming connection - async fn handle_connection( - incoming: Incoming, - context: Option, - allowed_addresses: Vec
, - wallet: Wallet, - ) { - match incoming.await { - Ok(connection) => { - match connection.accept_bi().await { - Ok((send, recv)) => { - if let Err(e) = - Self::handle_stream(send, recv, context, allowed_addresses, wallet) - .await - { - error!("Error handling stream: {e}"); - } - // Wait a bit before closing to ensure client has processed response - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - } - Err(e) => { - error!("Failed to accept bi-stream: {e}"); - connection.close(1u32.into(), b"stream error"); - } - } - } - Err(e) => { - // Only log as debug for protocol mismatches, which are expected - if e.to_string() - .contains("peer doesn't support any known protocol") - { - debug!("Connection attempt with unsupported protocol: {e}"); - } else { - error!("Failed to accept connection: {e}"); - } - } - } - } - - /// Read a message from the stream - async fn read_message(recv: &mut iroh::endpoint::RecvStream) -> Result { - // Read message length - let mut msg_len_bytes = [0u8; 4]; - match recv.read_exact(&mut msg_len_bytes).await { - Ok(_) => {} - Err(e) => { - debug!("Stream read ended: {e}"); - return Err(anyhow::anyhow!("Stream closed")); - } - } - let msg_len = u32::from_be_bytes(msg_len_bytes) as usize; - - // Enforce maximum message size - if msg_len > MAX_MESSAGE_SIZE { - error!("Message size {msg_len} exceeds maximum allowed size {MAX_MESSAGE_SIZE}"); - return Err(anyhow::anyhow!("Message too large")); - } - - let mut msg_bytes = vec![0u8; msg_len]; - recv.read_exact(&mut msg_bytes).await?; - - let request: P2PRequest = serde_json::from_slice(&msg_bytes) - .map_err(|e| anyhow::anyhow!("Failed to deserialize P2P request: {}", e))?; - - debug!("Received P2P request: {request:?}"); - Ok(request) - } - - async fn write_response( - send: &mut iroh::endpoint::SendStream, - response: P2PResponse, - ) -> Result<()> { - let response_bytes = serde_json::to_vec(&response)?; - - // Check response size before sending - if response_bytes.len() > MAX_MESSAGE_SIZE { - error!( - "Response size {} exceeds maximum allowed size {}", - response_bytes.len(), - MAX_MESSAGE_SIZE - ); - return Err(anyhow::anyhow!("Response too large")); - } - - send.write_all(&(response_bytes.len() as u32).to_be_bytes()) - .await?; - send.write_all(&response_bytes).await?; - Ok(()) - } - - /// Handle a bidirectional stream - async fn handle_stream( - mut send: iroh::endpoint::SendStream, - mut recv: iroh::endpoint::RecvStream, - context: Option, - allowed_addresses: Vec
, - wallet: Wallet, - ) -> Result<()> { - // Handle multiple messages in sequence - let mut is_authorized = false; - let mut current_challenge: Option = None; - - loop { - let Ok(request) = Self::read_message(&mut recv).await else { - break; - }; - - // Handle the request - let response = match request.message { - P2PMessage::Ping { nonce, .. } => { - info!("Received ping with nonce: {nonce}"); - P2PResponse::new( - request.id, - P2PMessage::Pong { - timestamp: SystemTime::now(), - nonce, - }, - ) - } - P2PMessage::RequestAuthChallenge { message } => { - // Generate a fresh cryptographically secure challenge message for this auth attempt - let challenge_bytes: [u8; 32] = rand_v8::rngs::OsRng.gen(); - let challenge_message = hex::encode(challenge_bytes); - - debug!("Received request auth challenge"); - let signature = match sign_message(&message, &wallet).await { - Ok(signature) => signature, - Err(e) => { - error!("Failed to sign message: {e}"); - return Err(anyhow::anyhow!("Failed to sign message: {}", e)); - } - }; - - // Store the challenge message in nonce cache to prevent replay - NONCE_CACHE.insert(challenge_message.clone(), SystemTime::now()); - - // Store the current challenge for this connection - current_challenge = Some(challenge_message.clone()); - - P2PResponse::new( - request.id, - P2PMessage::AuthChallenge { - message: challenge_message, - signed_message: signature, - }, - ) - } - P2PMessage::AuthSolution { signed_message } => { - // Get the challenge message for this connection - debug!("Received auth solution"); - let Some(challenge_message) = ¤t_challenge else { - warn!("No active challenge for auth solution"); - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - }; - - // Check if challenge message has been used before (replay attack prevention) - if !NONCE_CACHE.contains_key(challenge_message) { - warn!("Challenge message not found or expired: {challenge_message}"); - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - } - - // Clean up old nonces (older than 5 minutes) - let cutoff_time = SystemTime::now() - Duration::from_secs(300); - NONCE_CACHE.retain(|_, &mut timestamp| timestamp > cutoff_time); - - // Parse the signature - let Ok(parsed_signature) = - alloy::primitives::Signature::from_str(&signed_message) - else { - // Handle signature parsing error - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - }; - - // Recover address from the challenge message that the client signed - let Ok(recovered_address) = - parsed_signature.recover_address_from_msg(challenge_message) - else { - // Handle address recovery error - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - }; - - // Check if the recovered address is in allowed addresses - NONCE_CACHE.remove(challenge_message); - current_challenge = None; - if allowed_addresses.contains(&recovered_address) { - is_authorized = true; - P2PResponse::new(request.id, P2PMessage::AuthGranted {}) - } else { - P2PResponse::new(request.id, P2PMessage::AuthRejected {}) - } - } - P2PMessage::HardwareChallenge { challenge, .. } if is_authorized => { - info!("Received hardware challenge"); - let challenge_response = calc_matrix(&challenge); - P2PResponse::new( - request.id, - P2PMessage::HardwareChallengeResponse { - response: challenge_response, - timestamp: SystemTime::now(), - }, - ) - } - P2PMessage::Invite(invite) if is_authorized => { - if let Some(context) = &context { - let (status, error) = Self::handle_invite(invite, context).await; - P2PResponse::new(request.id, P2PMessage::InviteResponse { status, error }) - } else { - P2PResponse::new( - request.id, - P2PMessage::InviteResponse { - status: "error".to_string(), - error: Some("No context".to_string()), - }, - ) - } - } - P2PMessage::GetTaskLogs if is_authorized => { - if let Some(context) = &context { - let logs = context.docker_service.get_logs().await; - let response_logs = logs - .map(|log_string| vec![log_string]) - .map_err(|e| e.to_string()); - P2PResponse::new( - request.id, - P2PMessage::GetTaskLogsResponse { - logs: response_logs, - }, - ) - } else { - P2PResponse::new( - request.id, - P2PMessage::GetTaskLogsResponse { logs: Ok(vec![]) }, - ) - } - } - P2PMessage::RestartTask if is_authorized => { - if let Some(context) = &context { - let result = context.docker_service.restart_task().await; - let response_result = result.map_err(|e| e.to_string()); - P2PResponse::new( - request.id, - P2PMessage::RestartTaskResponse { - result: response_result, - }, - ) - } else { - P2PResponse::new( - request.id, - P2PMessage::RestartTaskResponse { result: Ok(()) }, - ) - } - } - _ => { - warn!("Unexpected message type"); - continue; - } - }; - - // Send response - Self::write_response(&mut send, response).await?; - } - - Ok(()) - } - - async fn handle_invite( - invite: InviteRequest, - context: &P2PContext, - ) -> (String, Option) { - if context.system_state.is_running().await { - return ( - "error".to_string(), - Some("Heartbeat is currently running and in a compute pool".to_string()), - ); - } - if let Some(pool_id) = context.system_state.compute_pool_id.clone() { - if invite.pool_id.to_string() != pool_id { - return ("error".to_string(), Some("Invalid pool ID".to_string())); - } - } - - let invite_bytes = match hex::decode(&invite.invite) { - Ok(bytes) => bytes, - Err(err) => { - error!("Failed to decode invite hex string: {err:?}"); - return ( - "error".to_string(), - Some("Invalid invite format".to_string()), - ); - } - }; - - if invite_bytes.len() < 65 { - return ( - "error".to_string(), - Some("Invite data is too short".to_string()), - ); - } - - let contracts = &context.contracts; - let wallet = &context.node_wallet; - let pool_id = U256::from(invite.pool_id); - - let bytes_array: [u8; 65] = match invite_bytes[..65].try_into() { - Ok(array) => array, - Err(_) => { - error!("Failed to convert invite bytes to fixed-size array"); - return ( - "error".to_string(), - Some("Invalid invite signature format".to_string()), - ); - } - }; - - let provider_address = context.provider_wallet.wallet.default_signer().address(); - - let pool_info = match contracts.compute_pool.get_pool_info(pool_id).await { - Ok(info) => info, - Err(err) => { - error!("Failed to get pool info: {err:?}"); - return ( - "error".to_string(), - Some("Failed to get pool information".to_string()), - ); - } - }; - - if let PoolStatus::PENDING = pool_info.status { - Console::user_error("Pool is pending - Invite is invalid"); - return ( - "error".to_string(), - Some("Pool is pending - Invite is invalid".to_string()), - ); - } - - let node_address = vec![wallet.wallet.default_signer().address()]; - let signatures = vec![FixedBytes::from(&bytes_array)]; - let nonces = vec![invite.nonce]; - let expirations = vec![invite.expiration]; - let call = match contracts.compute_pool.build_join_compute_pool_call( - pool_id, - provider_address, - node_address, - nonces, - expirations, - signatures, - ) { - Ok(call) => call, - Err(err) => { - error!("Failed to build join compute pool call: {err:?}"); - return ( - "error".to_string(), - Some("Failed to build join compute pool call".to_string()), - ); - } - }; - let provider = &context.provider_wallet.provider; - match retry_call(call, 3, provider.clone(), None).await { - Ok(result) => { - Console::section("WORKER JOINED COMPUTE POOL"); - Console::success(&format!( - "Successfully registered on chain with tx: {result}" - )); - Console::info( - "Status", - "Worker is now part of the compute pool and ready to receive tasks", - ); - } - Err(err) => { - error!("Failed to join compute pool: {err:?}"); - return ( - "error".to_string(), - Some(format!("Failed to join compute pool: {err}")), - ); - } - } - let endpoint = if let Some(url) = &invite.master_url { - format!("{url}/heartbeat") - } else { - match (&invite.master_ip, &invite.master_port) { - (Some(ip), Some(port)) => format!("http://{ip}:{port}/heartbeat"), - _ => { - error!("Missing master IP or port in invite request"); - return ( - "error".to_string(), - Some("Missing master IP or port".to_string()), - ); - } - } - }; - - if let Err(err) = context.heartbeat_service.start(endpoint).await { - error!("Failed to start heartbeat service: {err:?}"); - return ( - "error".to_string(), - Some("Failed to start heartbeat service".to_string()), - ); - } - - ("ok".to_string(), None) - } -} - -#[cfg(test)] -mod tests { - use rand_v8::Rng; - use serial_test::serial; - use shared::p2p::P2PClient; - use url::Url; - - use super::*; - - async fn setup_test_service( - include_addresses: bool, - ) -> (P2PService, P2PClient, Address, Address) { - let validator_wallet = shared::web3::wallet::Wallet::new( - "0000000000000000000000000000000000000000000000000000000000000001", - Url::parse("https://mainnet.infura.io/v3/9aa3d95b3bc440fa88ea12eaa4456161").unwrap(), - ) - .unwrap(); - let worker_wallet = shared::web3::wallet::Wallet::new( - "0000000000000000000000000000000000000000000000000000000000000002", - Url::parse("https://mainnet.infura.io/v3/9aa3d95b3bc440fa88ea12eaa4456161").unwrap(), - ) - .unwrap(); - let validator_wallet_address = validator_wallet.wallet.default_signer().address(); - let worker_wallet_address = worker_wallet.wallet.default_signer().address(); - let service = P2PService::new( - None, - CancellationToken::new(), - None, - worker_wallet, - if include_addresses { - vec![validator_wallet_address] - } else { - vec![] - }, - ) - .await - .unwrap(); - let client = P2PClient::new(validator_wallet.clone()).await.unwrap(); - ( - service, - client, - validator_wallet_address, - worker_wallet_address, - ) - } - - #[tokio::test] - #[serial] - async fn test_ping() { - let (service, client, _, worker_wallet_address) = setup_test_service(true).await; - let node_id = service.node_id().to_string(); - let addresses = service.listening_addresses().to_vec(); - let random_nonce = rand_v8::thread_rng().gen::(); - - tokio::spawn(async move { - service.start().unwrap(); - }); - - let ping = P2PMessage::Ping { - nonce: random_nonce, - timestamp: SystemTime::now(), - }; - - let response = client - .send_request(&node_id, &addresses, worker_wallet_address, ping, 20) - .await - .unwrap(); - - let response_nonce = match response { - P2PMessage::Pong { nonce, .. } => nonce, - _ => panic!("Expected Pong message"), - }; - assert_eq!(response_nonce, random_nonce); - } - #[tokio::test] - #[serial] - async fn test_auth_error() { - let (service, client, _, worker_wallet_address) = setup_test_service(false).await; - let node_id = service.node_id().to_string(); - let addresses = service.listening_addresses().to_vec(); - - tokio::spawn(async move { - service.start().unwrap(); - }); - - let ping = P2PMessage::Ping { - nonce: rand_v8::thread_rng().gen::(), - timestamp: SystemTime::now(), - }; - - // Since we set include_addresses to false, the client's wallet address - // is not in the allowed_addresses list, so we expect auth to be rejected - let result = client - .send_request(&node_id, &addresses, worker_wallet_address, ping, 20) - .await; - - assert!( - result.is_err(), - "Expected auth to be rejected but request succeeded" - ); - } -} diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index e419c870..e6ab26da 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -2,7 +2,6 @@ use anyhow::Result; use directories::ProjectDirs; use log::debug; use log::error; -use log::warn; use serde::{Deserialize, Serialize}; use std::fs; use std::path::Path; @@ -10,9 +9,6 @@ use std::path::PathBuf; use std::sync::Arc; use tokio::sync::RwLock; -use crate::utils::p2p::generate_iroh_node_id_from_seed; -use crate::utils::p2p::generate_random_seed; - const STATE_FILENAME: &str = "heartbeat_state.toml"; fn get_default_state_dir() -> Option { @@ -20,11 +16,31 @@ fn get_default_state_dir() -> Option { .map(|proj_dirs| proj_dirs.data_local_dir().to_string_lossy().into_owned()) } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] struct PersistedSystemState { endpoint: Option, - p2p_seed: Option, - worker_p2p_seed: Option, + p2p_keypair: p2p::Keypair, +} + +impl Serialize for PersistedSystemState { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serde_json::to_string(self) + .map_err(serde::ser::Error::custom) + .and_then(|s| serializer.serialize_str(&s)) + } +} + +impl<'de> Deserialize<'de> for PersistedSystemState { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s: String = Deserialize::deserialize(deserializer)?; + serde_json::from_str(&s).map_err(serde::de::Error::custom) + } } #[derive(Debug, Clone)] @@ -34,18 +50,15 @@ pub(crate) struct SystemState { endpoint: Arc>>, state_dir_overwrite: Option, disable_state_storing: bool, - pub compute_pool_id: Option, - - pub worker_p2p_seed: Option, - pub p2p_id: Option, - pub p2p_seed: Option, + compute_pool_id: u32, + p2p_keypair: p2p::Keypair, } impl SystemState { pub(crate) fn new( state_dir: Option, disable_state_storing: bool, - compute_pool_id: Option, + compute_pool_id: u32, ) -> Self { let default_state_dir = get_default_state_dir(); debug!("Default state dir: {default_state_dir:?}"); @@ -53,9 +66,10 @@ impl SystemState { .map(PathBuf::from) .or_else(|| default_state_dir.map(PathBuf::from)); debug!("State path: {state_path:?}"); + let mut endpoint = None; - let mut p2p_seed: Option = None; - let mut worker_p2p_seed: Option = None; + let mut p2p_keypair = None; + // Try to load state, log info if creating new file if !disable_state_storing { if let Some(path) = &state_path { @@ -67,31 +81,15 @@ impl SystemState { } else if let Ok(Some(loaded_state)) = SystemState::load_state(path) { debug!("Loaded previous state from {state_file:?}"); endpoint = loaded_state.endpoint; - p2p_seed = loaded_state.p2p_seed; - worker_p2p_seed = loaded_state.worker_p2p_seed; + p2p_keypair = Some(loaded_state.p2p_keypair); } else { debug!("Failed to load state from {state_file:?}"); } } } - if p2p_seed.is_none() { - let seed = generate_random_seed(); - p2p_seed = Some(seed); - } - // Generate p2p_id from seed if available - - let p2p_id: Option = - p2p_seed.and_then(|seed| match generate_iroh_node_id_from_seed(seed) { - Ok(id) => Some(id), - Err(_) => { - warn!("Failed to generate p2p_id from seed"); - None - } - }); - if worker_p2p_seed.is_none() { - let seed = generate_random_seed(); - worker_p2p_seed = Some(seed); + if p2p_keypair.is_none() { + p2p_keypair = Some(p2p::Keypair::generate_ed25519()); } Self { @@ -101,44 +99,34 @@ impl SystemState { state_dir_overwrite: state_path.clone(), disable_state_storing, compute_pool_id, - p2p_seed, - p2p_id, - worker_p2p_seed, + p2p_keypair: p2p_keypair.expect("p2p keypair must be Some at this point"), } } + fn save_state(&self, heartbeat_endpoint: Option) -> Result<()> { if !self.disable_state_storing { debug!("Saving state"); if let Some(state_dir) = &self.state_dir_overwrite { - // Get values without block_on - debug!("Saving p2p_seed: {:?}", self.p2p_seed); - - // Ensure p2p_seed is valid before creating state - if let Some(seed) = self.p2p_seed { - let state = PersistedSystemState { - endpoint: heartbeat_endpoint, - p2p_seed: Some(seed), - worker_p2p_seed: self.worker_p2p_seed, - }; - - debug!("state: {state:?}"); - - fs::create_dir_all(state_dir)?; - let state_path = state_dir.join(STATE_FILENAME); - - // Use JSON serialization instead of TOML - match serde_json::to_string_pretty(&state) { - Ok(json_string) => { - fs::write(&state_path, json_string)?; - debug!("Saved state to {state_path:?}"); - } - Err(e) => { - error!("Failed to serialize state: {e}"); - return Err(anyhow::anyhow!("Failed to serialize state: {}", e)); - } + let state = PersistedSystemState { + endpoint: heartbeat_endpoint, + p2p_keypair: self.p2p_keypair.clone(), + }; + + debug!("state: {state:?}"); + + fs::create_dir_all(state_dir)?; + let state_path = state_dir.join(STATE_FILENAME); + + // Use JSON serialization instead of TOML + match serde_json::to_string_pretty(&state) { + Ok(json_string) => { + fs::write(&state_path, json_string)?; + debug!("Saved state to {state_path:?}"); + } + Err(e) => { + error!("Failed to serialize state: {e}"); + return Err(anyhow::anyhow!("Failed to serialize state: {}", e)); } - } else { - warn!("Cannot save state: p2p_seed is None"); } } } @@ -160,12 +148,16 @@ impl SystemState { Ok(None) } - pub(crate) fn get_p2p_seed(&self) -> Option { - self.p2p_seed + pub(crate) fn get_compute_pool_id(&self) -> u32 { + self.compute_pool_id + } + + pub(crate) fn get_p2p_keypair(&self) -> &p2p::Keypair { + &self.p2p_keypair } - pub(crate) fn get_p2p_id(&self) -> Option { - self.p2p_id.clone() + pub(crate) fn get_p2p_id(&self) -> p2p::PeerId { + self.p2p_keypair.public().to_peer_id() } pub(crate) async fn update_last_heartbeat(&self) { @@ -238,9 +230,8 @@ mod tests { let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, - None, + 0, ); - assert!(state.p2p_id.is_some()); let _ = state .set_running(true, Some("http://localhost:8080/heartbeat".to_string())) .await; @@ -266,7 +257,7 @@ mod tests { let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, - None, + 0, ); assert!(!(state.is_running().await)); assert_eq!(state.get_heartbeat_endpoint().await, None); @@ -285,7 +276,7 @@ mod tests { let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, - None, + 0, ); assert_eq!( state.get_heartbeat_endpoint().await, diff --git a/crates/worker/src/utils/mod.rs b/crates/worker/src/utils/mod.rs index 210f1e35..6a79dd07 100644 --- a/crates/worker/src/utils/mod.rs +++ b/crates/worker/src/utils/mod.rs @@ -1,2 +1 @@ pub(crate) mod logging; -pub(crate) mod p2p; diff --git a/crates/worker/src/utils/p2p.rs b/crates/worker/src/utils/p2p.rs deleted file mode 100644 index ef07b28c..00000000 --- a/crates/worker/src/utils/p2p.rs +++ /dev/null @@ -1,60 +0,0 @@ -use iroh::SecretKey; -use rand_v8::Rng; -use rand_v8::{rngs::StdRng, SeedableRng}; -use std::error::Error; - -/// Generate a random seed -pub(crate) fn generate_random_seed() -> u64 { - rand_v8::thread_rng().gen() -} - -// Generate an Iroh node ID from a seed -pub(crate) fn generate_iroh_node_id_from_seed(seed: u64) -> Result> { - // Create a deterministic RNG from the seed - let mut rng = StdRng::seed_from_u64(seed); - - // Generate the secret key using Iroh's method - // This matches exactly how it's done in your Node implementation - let secret_key = SecretKey::generate(&mut rng); - - // Get the node ID (public key) as a string - let node_id = secret_key.public().to_string(); - - Ok(node_id) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_generate_random_seed() { - let seed1 = generate_random_seed(); - let seed2 = generate_random_seed(); - - assert_ne!(seed1, seed2); - } - - #[test] - fn test_known_generation() { - let seed: u32 = 848364385; - let result = generate_iroh_node_id_from_seed(seed as u64).unwrap(); - assert_eq!( - result, - "6ba970180efbd83909282ac741085431f54aa516e1783852978bd529a400d0e9" - ); - assert_eq!(result.len(), 64); - } - - #[test] - fn test_deterministic_generation() { - // Same seed should generate same node_id - let seed = generate_random_seed(); - println!("seed: {}", seed); - let result1 = generate_iroh_node_id_from_seed(seed).unwrap(); - let result2 = generate_iroh_node_id_from_seed(seed).unwrap(); - println!("result1: {}", result1); - - assert_eq!(result1, result2); - } -} From 304f8a849477e47f6f8be35e40529c6fdbf30080 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 13:45:04 -0400 Subject: [PATCH 13/40] organize and remove unused deps --- Cargo.lock | 134 +-------------------------------------- crates/worker/Cargo.toml | 52 ++++++--------- 2 files changed, 24 insertions(+), 162 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9964cd8d..807c07a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2292,16 +2292,6 @@ dependencies = [ "cipher", ] -[[package]] -name = "ctrlc" -version = "3.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c" -dependencies = [ - "nix 0.29.0", - "windows-sys 0.59.0", -] - [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -3578,7 +3568,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "idna 1.0.3", + "idna", "ipnet", "once_cell", "rand 0.8.5", @@ -3603,7 +3593,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "idna 1.0.3", + "idna", "ipnet", "once_cell", "rand 0.9.1", @@ -4077,16 +4067,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "idna" version = "1.0.3" @@ -4141,12 +4121,6 @@ dependencies = [ "windows 0.52.0", ] -[[package]] -name = "if_chain" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" - [[package]] name = "igd-next" version = "0.14.3" @@ -4241,19 +4215,6 @@ dependencies = [ "serde", ] -[[package]] -name = "indicatif" -version = "0.17.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" -dependencies = [ - "console", - "number_prefix", - "portable-atomic", - "unicode-width", - "web-time", -] - [[package]] name = "inout" version = "0.1.4" @@ -6077,12 +6038,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - [[package]] name = "nvml-wrapper" version = "0.10.0" @@ -6755,30 +6710,6 @@ dependencies = [ "toml_edit", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -9239,12 +9170,6 @@ version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - [[package]] name = "unicode-ident" version = "1.0.18" @@ -9338,7 +9263,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna 1.0.3", + "idna", "percent-encoding", "serde", ] @@ -9469,48 +9394,6 @@ dependencies = [ "url", ] -[[package]] -name = "validator" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b92f40481c04ff1f4f61f304d61793c7b56ff76ac1469f1beb199b1445b253bd" -dependencies = [ - "idna 0.4.0", - "lazy_static", - "regex", - "serde", - "serde_derive", - "serde_json", - "url", - "validator_derive", -] - -[[package]] -name = "validator_derive" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc44ca3088bb3ba384d9aecf40c6a23a676ce23e09bdaca2073d99c207f864af" -dependencies = [ - "if_chain", - "lazy_static", - "proc-macro-error", - "proc-macro2", - "quote", - "regex", - "syn 1.0.109", - "validator_types", -] - -[[package]] -name = "validator_types" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "111abfe30072511849c5910134e8baf8dc05de4c0e5903d681cbd5c9c4d611e3" -dependencies = [ - "proc-macro2", - "syn 1.0.109", -] - [[package]] name = "valuable" version = "0.1.1" @@ -10324,32 +10207,24 @@ dependencies = [ "alloy", "anyhow", "bollard", - "bytes", "chrono", "cid", "clap", "colored", "console", - "ctrlc", - "dashmap", "directories", "env_logger", "futures", - "futures-core", "futures-util", "hex", "homedir", - "indicatif", "lazy_static", "libc", "log", - "nalgebra", "nvml-wrapper", "p2p", "rand 0.8.5", "rand 0.9.1", - "rand_core 0.6.4", - "regex", "reqwest", "rust-ipfs", "serde", @@ -10366,15 +10241,12 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml", "tracing", - "tracing-log", "tracing-loki", "tracing-subscriber", "unicode-width", "url", "uuid", - "validator 0.16.1", ] [[package]] diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 43fc4a53..f3f01b04 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -11,42 +11,38 @@ shared = { workspace = true } p2p = { workspace = true } actix-web = { workspace = true } -bollard = "0.18.1" +alloy = { workspace = true } +anyhow = { workspace = true } +cid = { workspace = true } clap = { workspace = true } -colored = "2.0" -lazy_static = "1.4" -regex = "1.10" +chrono = { workspace = true } +directories = { workspace = true } +env_logger = { workspace = true } +futures = { workspace = true } +futures-util = { workspace = true } +hex = { workspace = true } +log = { workspace = true } +rand_v8 = { workspace = true } +reqwest = { workspace = true, features = ["blocking"] } +rust-ipfs = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } +stun = { workspace = true } tokio = { workspace = true, features = ["full", "macros"] } +tokio-util = { workspace = true, features = ["rt"] } +url = { workspace = true } uuid = { workspace = true } -validator = { version = "0.16", features = ["derive"] } + +bollard = "0.18.1" +colored = "2.0" +lazy_static = "1.4" sysinfo = "0.30" libc = "0.2" nvml-wrapper = "0.10.0" -log = { workspace = true } -env_logger = { workspace = true } -futures-core = "0.3" -futures-util = { workspace = true } -alloy = { workspace = true } -url = { workspace = true } -serde_json = { workspace = true } -reqwest = { workspace = true, features = ["blocking"] } -hex = { workspace = true } console = "0.15.10" -indicatif = "0.17.9" -bytes = "1.9.0" -anyhow = { workspace = true } thiserror = "2.0.11" -toml = { workspace = true } -ctrlc = "3.4.5" -tokio-util = { workspace = true, features = ["rt"] } -futures = { workspace = true } -chrono = { workspace = true } serial_test = "0.5.1" -directories = { workspace = true } strip-ansi-escapes = "0.2.1" -nalgebra = { workspace = true } -stun = { workspace = true } sha2 = "0.10.8" unicode-width = "0.2.0" rand = "0.9.0" @@ -54,12 +50,6 @@ tempfile = "3.14.0" tracing-loki = "0.2.6" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -tracing-log = "0.2.0" time = "0.3.41" -rand_v8 = { workspace = true } -rand_core_v6 = { workspace = true } -dashmap = "6.1.0" tokio-stream = { version = "0.1.17", features = ["net"] } -rust-ipfs = { workspace = true } -cid = { workspace = true } homedir = "0.3" From 46ecca716e97374f12abbda15e9fbf77fc49ae5a Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 13:48:42 -0400 Subject: [PATCH 14/40] add libp2p_port to cli --- crates/worker/src/cli/command.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index b5a56bdd..8f358252 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -14,6 +14,7 @@ use crate::services::discovery_updater::DiscoveryUpdater; use crate::state::system_state::SystemState; use crate::TaskHandles; use alloy::primitives::utils::format_ether; +use alloy::primitives::Address; use alloy::primitives::U256; use alloy::signers::local::PrivateKeySigner; use alloy::signers::Signer; @@ -22,8 +23,10 @@ use log::{error, info}; use shared::models::node::ComputeRequirements; use shared::models::node::Node; use shared::web3::contracts::core::builder::ContractBuilder; +use shared::web3::contracts::core::builder::Contracts; use shared::web3::contracts::structs::compute_pool::PoolStatus; use shared::web3::wallet::Wallet; +use shared::web3::wallet::WalletProvider; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; @@ -54,6 +57,10 @@ pub enum Commands { #[arg(long, default_value = "8080")] port: u16, + /// Port for libp2p service + #[arg(long, default_value = "4002")] + libp2p_port: u16, + /// External IP address for the worker to advertise #[arg(long)] external_ip: Option, @@ -186,6 +193,7 @@ pub async fn execute_command( match command { Commands::Run { port: _, + libp2p_port, external_ip, compute_pool_id, dry_run: _, @@ -716,11 +724,10 @@ pub async fn execute_command( let mut allowed_addresses = vec![pool_info.creator, pool_info.compute_manager_key]; allowed_addresses.extend(validators); - let port = 0; // TODO: cli option let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); let p2p_service = match crate::p2p::Service::new( state.get_p2p_keypair().clone(), - port, + *libp2p_port, node_wallet_instance.clone(), validator_addresses, docker_service.clone(), @@ -1088,10 +1095,6 @@ pub async fn execute_command( } } -use alloy::primitives::Address; -use shared::web3::contracts::core::builder::Contracts; -use shared::web3::wallet::WalletProvider; - async fn remove_compute_node( contracts: Contracts, provider_address: Address, From 4358e3201b83a77a1f78e3eb60d516a60f7d678d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 15:19:49 -0400 Subject: [PATCH 15/40] serde for PersistedSystemState --- crates/worker/src/state/system_state.rs | 37 +++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index e6ab26da..bed32693 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -16,31 +16,32 @@ fn get_default_state_dir() -> Option { .map(|proj_dirs| proj_dirs.data_local_dir().to_string_lossy().into_owned()) } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] struct PersistedSystemState { endpoint: Option, + #[serde( + serialize_with = "serialize_keypair", + deserialize_with = "deserialize_keypair" + )] p2p_keypair: p2p::Keypair, } -impl Serialize for PersistedSystemState { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serde_json::to_string(self) - .map_err(serde::ser::Error::custom) - .and_then(|s| serializer.serialize_str(&s)) - } +fn serialize_keypair(keypair: &p2p::Keypair, serializer: S) -> Result +where + S: serde::Serializer, +{ + let serialized = keypair + .to_protobuf_encoding() + .map_err(serde::ser::Error::custom)?; + serializer.serialize_bytes(&serialized) } -impl<'de> Deserialize<'de> for PersistedSystemState { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let s: String = Deserialize::deserialize(deserializer)?; - serde_json::from_str(&s).map_err(serde::de::Error::custom) - } +fn deserialize_keypair<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let serialized: Vec = Deserialize::deserialize(deserializer)?; + p2p::Keypair::from_protobuf_encoding(&serialized).map_err(serde::de::Error::custom) } #[derive(Debug, Clone)] From 577d843e117b88bc157e6304e7ce14c1ea0cd1c6 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 15:22:35 -0400 Subject: [PATCH 16/40] spawn message handler --- crates/worker/src/p2p/mod.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 6a851c01..c2af2bbd 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -1,5 +1,6 @@ use anyhow::Context as _; use anyhow::Result; +use futures::stream::FuturesUnordered; use p2p::InviteRequestUrl; use p2p::Node; use p2p::NodeBuilder; @@ -71,6 +72,8 @@ impl Service { } pub(crate) async fn run(self) { + use futures::StreamExt as _; + let Self { node: _, mut incoming_messages, @@ -78,17 +81,24 @@ impl Service { context, } = self; + let mut message_handlers = FuturesUnordered::new(); + loop { tokio::select! { _ = cancellation_token.cancelled() => { break; } Some(message) = incoming_messages.recv() => { - // TODO: spawn and store handles - if let Err(e) = handle_incoming_message(message, context.clone()) - .await { + let context = context.clone(); + let handle = tokio::task::spawn( + handle_incoming_message(message, context) + ); + message_handlers.push(handle); + } + Some(res) = message_handlers.next() => { + if let Err(e) = res { tracing::error!("failed to handle incoming message: {e}"); - } + } } } } From 4285eaaf7d208a10321fb2a86ba1d875dbd952b5 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 16:40:55 -0400 Subject: [PATCH 17/40] add dial channel to p2p node; impl validator libp2p node --- Cargo.lock | 1 + Cargo.toml | 2 + crates/p2p/src/lib.rs | 40 +++- crates/validator/Cargo.toml | 2 + crates/validator/src/p2p/mod.rs | 380 ++++++++++++++++++++++++++++++++ 5 files changed, 414 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae652ad4..23c65154 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9452,6 +9452,7 @@ dependencies = [ "log", "mockito", "nalgebra", + "p2p", "prometheus 0.14.0", "rand 0.8.5", "rand 0.9.1", diff --git a/Cargo.toml b/Cargo.toml index 4279f156..d4ca7ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +p2p = { path = "crates/p2p" } + actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 6e2efca3..4a2b176e 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,6 +1,5 @@ use anyhow::Context; use anyhow::Result; -use libp2p::futures::stream::FuturesUnordered; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -23,6 +22,9 @@ pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; +pub type Keypair = libp2p::identity::Keypair; +pub type DialSender = + tokio::sync::mpsc::Sender<(Vec, tokio::sync::oneshot::Sender>)>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -36,6 +38,9 @@ pub struct Node { bootnodes: Vec, cancellation_token: tokio_util::sync::CancellationToken, + dial_rx: + tokio::sync::mpsc::Receiver<(Vec, tokio::sync::oneshot::Sender>)>, + // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -73,6 +78,7 @@ impl Node { mut swarm, bootnodes, cancellation_token, + mut dial_rx, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -83,17 +89,12 @@ impl Node { .context("swarm failed to listen on multiaddr")?; } - let futures = FuturesUnordered::new(); for bootnode in bootnodes { - futures.push(swarm.dial(bootnode)) - } - let results: Vec<_> = futures.into_iter().collect(); - for result in results { - match result { + match swarm.dial(bootnode.clone()) { Ok(_) => {} Err(e) => { - // TODO: log this error - println!("failed to dial bootnode: {e:?}"); + // log error + println!("failed to dial bootnode {bootnode}: {e:?}"); } } } @@ -104,6 +105,19 @@ impl Node { println!("cancellation token triggered, shutting down node"); break Ok(()); } + Some((addrs, res_tx)) = dial_rx.recv() => { + let mut res = Ok(()); + for addr in addrs { + match swarm.dial(addr.clone()) { + Ok(_) => {} + Err(e) => { + res = Err(anyhow::anyhow!("failed to dial {addr}: {e:?}")); + break; + } + } + } + let _ = res_tx.send(res); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -254,6 +268,7 @@ impl NodeBuilder { self, ) -> Result<( Node, + DialSender, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, )> { @@ -295,6 +310,7 @@ impl NodeBuilder { listen_addrs.push(listen_addr); } + let (dial_tx, dial_rx) = tokio::sync::mpsc::channel(100); let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); @@ -304,10 +320,12 @@ impl NodeBuilder { swarm, listen_addrs, bootnodes, + dial_rx, incoming_message_tx, outgoing_message_rx, cancellation_token: cancellation_token.unwrap_or_default(), }, + dial_tx, incoming_message_rx, outgoing_message_tx, )) @@ -334,11 +352,11 @@ mod test { #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + let (node2, _, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index db3694ca..4143903d 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -7,6 +7,8 @@ edition.workspace = true workspace = true [dependencies] +p2p = { workspace = true} + actix-web = { workspace = true } alloy = { workspace = true } anyhow = { workspace = true } diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 33dad50c..378b1080 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,3 +1,383 @@ pub(crate) mod client; pub use client::P2PClient; + +use anyhow::{bail, Context as _, Result}; +use futures::stream::FuturesUnordered; +use p2p::{ + IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, + ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, + ValidatorAuthenticationSolutionRequest, +}; +use shared::web3::wallet::Wallet; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; + +pub(crate) struct Service { + node: Node, + dial_tx: p2p::DialSender, + incoming_messages: Receiver, + hardware_challenge_rx: Receiver, + cancellation_token: CancellationToken, + context: Context, +} + +pub(crate) struct HardwareChallengeRequest { + worker_wallet_address: alloy::primitives::Address, + worker_p2p_id: String, + worker_addresses: Vec, + challenge: p2p::ChallengeRequest, +} + +impl Service { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, + wallet: Wallet, + ) -> Result<(Self, Sender)> { + let (node, dial_tx, incoming_messages, outgoing_messages) = + build_p2p_node(keypair, port, cancellation_token.clone()) + .context("failed to build p2p node")?; + let (hardware_challenge_tx, hardware_challenge_rx) = tokio::sync::mpsc::channel(100); + + Ok(( + Self { + node, + dial_tx, + incoming_messages, + hardware_challenge_rx, + cancellation_token, + context: Context::new(outgoing_messages, wallet), + }, + hardware_challenge_tx, + )) + } + + pub(crate) fn peer_id(&self) -> PeerId { + self.node.peer_id() + } + + pub(crate) fn listen_addrs(&self) -> &[p2p::Multiaddr] { + self.node.listen_addrs() + } + + pub(crate) async fn run(self) { + use futures::StreamExt as _; + + let Self { + node: _, + dial_tx, + mut incoming_messages, + mut hardware_challenge_rx, + cancellation_token, + context, + } = self; + + let mut message_handlers = FuturesUnordered::new(); + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + Some(message) = hardware_challenge_rx.recv() => { + if let Err(e) = handle_outgoing_hardware_challenge(message, dial_tx.clone(), context.clone()) + .await { + log::error!("failed to handle outgoing hardware challenge: {e}"); + } + } + Some(message) = incoming_messages.recv() => { + let context = context.clone(); + let handle = tokio::task::spawn( + handle_incoming_message(message, context) + ); + message_handlers.push(handle); + } + Some(res) = message_handlers.next() => { + if let Err(e) = res { + log::error!("failed to handle incoming message: {e}"); + } + } + } + } + } +} + +fn build_p2p_node( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, +) -> Result<( + Node, + p2p::DialSender, + Receiver, + Sender, +)> { + NodeBuilder::new() + .with_keypair(keypair) + .with_port(port) + .with_validator_authentication() + .with_hardware_challenge() + .with_cancellation_token(cancellation_token) + .try_build() +} + +#[derive(Clone)] +struct Context { + outgoing_messages: Sender, + ongoing_auth_requests: Arc>>, + wallet: Wallet, +} + +#[derive(Debug, Clone)] +struct OngoingHardwareChallenge { + worker_wallet_address: alloy::primitives::Address, + auth_challenge_request_message: String, + hardware_challenge: p2p::ChallengeRequest, +} + +impl Context { + fn new(outgoing_messages: Sender, wallet: Wallet) -> Self { + Self { + outgoing_messages, + ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), + wallet, + } + } +} + +async fn handle_outgoing_hardware_challenge( + request: HardwareChallengeRequest, + dial_tx: p2p::DialSender, + context: Context, +) -> Result<()> { + use rand_v8::rngs::OsRng; + use rand_v8::Rng as _; + use std::str::FromStr as _; + + let HardwareChallengeRequest { + worker_wallet_address, + worker_p2p_id, + worker_addresses, + challenge, + } = request; + + log::debug!( + "sending hardware challenge to {} with addresses {:?}", + worker_p2p_id, + worker_addresses + ); + + // first, dial the worker + let worker_p2p_id = + PeerId::from_str(&worker_p2p_id).context("failed to parse worker p2p id")?; + + // ensure there's no ongoing challenge + // use write-lock to make this atomic until we finish sending the auth request and writing to the map + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + if ongoing_auth_requests.contains_key(&worker_p2p_id) { + bail!( + "ongoing hardware challenge for {} already exists", + worker_p2p_id + ); + } + + let multiaddrs = worker_addresses + .iter() + .filter_map(|addr| { + Some( + p2p::Multiaddr::from_str(addr) + .ok()? + .with_p2p(worker_p2p_id.clone()) + .ok()?, + ) + }) + .collect::>(); + if multiaddrs.is_empty() { + bail!("no valid multiaddrs for worker p2p id {worker_p2p_id}"); + } + + // TODO: we can improve this by checking if we're already connected to the peer before dialing + let (res_tx, res_rx) = tokio::sync::oneshot::channel(); + dial_tx + .send((multiaddrs, res_tx)) + .await + .context("failed to send dial request")?; + res_rx + .await + .context("failed to receive dial response")? + .context("failed to dial worker")?; + + // create the authentication challenge request message + let challenge_bytes: [u8; 32] = OsRng.gen(); + let auth_challenge_message: String = hex::encode(challenge_bytes); + + let req: p2p::Request = ValidatorAuthenticationInitiationRequest { + message: auth_challenge_message.clone(), + } + .into(); + let outgoing_message = req.into_outgoing_message(worker_p2p_id.clone()); + log::debug!( + "sending ValidatorAuthenticationInitiationRequest to {}", + worker_p2p_id + ); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send outgoing message")?; + + // store the ongoing hardware challenge + let ongoing_challenge = OngoingHardwareChallenge { + worker_wallet_address, + auth_challenge_request_message: auth_challenge_message.clone(), + hardware_challenge: challenge, + }; + + ongoing_auth_requests.insert(worker_p2p_id.clone(), ongoing_challenge); + Ok(()) +} + +async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { + match message.message { + Libp2pIncomingMessage::Request { + request_id: _, + request, + channel: _, + } => { + log::error!( + "validator should not receive incoming requests: {request:?} from {}", + message.peer + ); + } + Libp2pIncomingMessage::Response { + request_id: _, + response, + } => { + log::debug!("received incoming response {response:?}"); + handle_incoming_response(message.peer, response, context) + .await + .context("failed to handle incoming response")?; + } + } + Ok(()) +} + +async fn handle_incoming_response( + from: PeerId, + response: p2p::Response, + context: Context, +) -> Result<()> { + match response { + p2p::Response::ValidatorAuthentication(resp) => { + log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); + handle_validation_authentication_response(from, resp, context) + .await + .context("failed to handle validator authentication response")?; + } + p2p::Response::HardwareChallenge(resp) => { + log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); + // TODO + } + p2p::Response::Invite(_) => { + log::error!("validator should not receive `Invite` responses: from {from}"); + } + p2p::Response::GetTaskLogs(_) => { + log::error!("validator should not receive `GetTaskLogs` responses: from {from}"); + } + p2p::Response::Restart(_) => { + log::error!("validator should not receive `Restart` responses: from {from}"); + } + p2p::Response::General(_) => { + todo!() + } + } + + Ok(()) +} + +async fn handle_validation_authentication_response( + from: PeerId, + response: p2p::ValidatorAuthenticationResponse, + context: Context, +) -> Result<()> { + use shared::security::request_signer::sign_message; + use std::str::FromStr as _; + + match response { + ValidatorAuthenticationResponse::Initiation(req) => { + let ongoing_auth_requests = context.ongoing_auth_requests.read().await; + let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationInitiationResponse" + ); + }; + + let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&req.signature) + else { + bail!("Failed to parse signature from server"); + }; + + // recover address from the challenge message that the server signed + let Ok(recovered_address) = parsed_signature + .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) + else { + bail!("Failed to recover address from server signature") + }; + + // verify the recovered address matches the expected worker wallet address + if recovered_address != ongoing_challenge.worker_wallet_address { + bail!( + "Server address verification failed: expected {}, got {recovered_address}", + ongoing_challenge.worker_wallet_address, + ) + } + + log::debug!("auth challenge initiation response received from node: {from}"); + let signature = sign_message(&req.message, &context.wallet).await.unwrap(); + + let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); + let req = req.into_outgoing_message(from); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + } + ValidatorAuthenticationResponse::Solution(req) => { + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationSolutionResponse" + ); + }; + + match req { + p2p::ValidatorAuthenticationSolutionResponse::Granted => {} + p2p::ValidatorAuthenticationSolutionResponse::Rejected => { + log::debug!("auth challenge rejected by node: {from}"); + return Ok(()); + } + } + + // auth was granted, finally send the hardware challenge + let req: p2p::Request = p2p::HardwareChallengeRequest { + challenge: ongoing_challenge.hardware_challenge, + timestamp: std::time::SystemTime::now(), + } + .into(); + let req = req.into_outgoing_message(from); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + } + } + Ok(()) +} From 75038851adb9bd5839ec852c395b9278ade4e94d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 16:54:12 -0400 Subject: [PATCH 18/40] fully implement hardware challenge flow --- crates/validator/src/p2p/mod.rs | 41 +++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 378b1080..49cb5b60 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -25,13 +25,6 @@ pub(crate) struct Service { context: Context, } -pub(crate) struct HardwareChallengeRequest { - worker_wallet_address: alloy::primitives::Address, - worker_p2p_id: String, - worker_addresses: Vec, - challenge: p2p::ChallengeRequest, -} - impl Service { #[allow(clippy::too_many_arguments)] pub(crate) fn new( @@ -127,18 +120,29 @@ fn build_p2p_node( .try_build() } +pub(crate) struct HardwareChallengeRequest { + worker_wallet_address: alloy::primitives::Address, + worker_p2p_id: String, + worker_addresses: Vec, + challenge: p2p::ChallengeRequest, + response_tx: tokio::sync::oneshot::Sender, +} + #[derive(Clone)] struct Context { outgoing_messages: Sender, - ongoing_auth_requests: Arc>>, + ongoing_auth_requests: Arc>>, + ongoing_hardware_challenges: + Arc>>>, wallet: Wallet, } -#[derive(Debug, Clone)] -struct OngoingHardwareChallenge { +#[derive(Debug)] +struct OngoingAuthChallenge { worker_wallet_address: alloy::primitives::Address, auth_challenge_request_message: String, hardware_challenge: p2p::ChallengeRequest, + hardware_challenge_response_tx: tokio::sync::oneshot::Sender, } impl Context { @@ -146,6 +150,7 @@ impl Context { Self { outgoing_messages, ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), + ongoing_hardware_challenges: Arc::new(RwLock::new(HashMap::new())), wallet, } } @@ -165,6 +170,7 @@ async fn handle_outgoing_hardware_challenge( worker_p2p_id, worker_addresses, challenge, + response_tx, } = request; log::debug!( @@ -233,10 +239,11 @@ async fn handle_outgoing_hardware_challenge( .context("failed to send outgoing message")?; // store the ongoing hardware challenge - let ongoing_challenge = OngoingHardwareChallenge { + let ongoing_challenge = OngoingAuthChallenge { worker_wallet_address, auth_challenge_request_message: auth_challenge_message.clone(), hardware_challenge: challenge, + hardware_challenge_response_tx: response_tx, }; ongoing_auth_requests.insert(worker_p2p_id.clone(), ongoing_challenge); @@ -282,7 +289,13 @@ async fn handle_incoming_response( } p2p::Response::HardwareChallenge(resp) => { log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); - // TODO + let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; + let Some(response_tx) = ongoing_hardware_challenges.remove(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle HardwareChallengeResponse" + ); + }; + let _ = response_tx.send(resp.response); // timestamp is silently dropped, is it actually used anywhere? } p2p::Response::Invite(_) => { log::error!("validator should not receive `Invite` responses: from {from}"); @@ -377,6 +390,10 @@ async fn handle_validation_authentication_response( .send(req) .await .context("failed to send outgoing message")?; + + let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; + ongoing_hardware_challenges + .insert(from, ongoing_challenge.hardware_challenge_response_tx); } } Ok(()) From d32f540e745810a43c38116b1d3bc19278529f56 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 17:15:28 -0400 Subject: [PATCH 19/40] upddate validator main to use libp2p node --- Cargo.lock | 1 - crates/validator/Cargo.toml | 1 - crates/validator/src/lib.rs | 2 +- crates/validator/src/main.rs | 31 ++++--- crates/validator/src/p2p/client.rs | 89 ------------------- crates/validator/src/p2p/mod.rs | 55 ++++-------- crates/validator/src/validators/hardware.rs | 66 +++++--------- .../src/validators/hardware_challenge.rs | 85 +++++++++--------- 8 files changed, 105 insertions(+), 225 deletions(-) delete mode 100644 crates/validator/src/p2p/client.rs diff --git a/Cargo.lock b/Cargo.lock index 23c65154..a4a2af90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9447,7 +9447,6 @@ dependencies = [ "env_logger", "futures", "hex", - "iroh", "lazy_static", "log", "mockito", diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 4143903d..043c1558 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -18,7 +18,6 @@ directories = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } hex = { workspace = true } -iroh = { workspace = true } rand_v8 = { workspace = true } lazy_static = "1.5.0" log = { workspace = true } diff --git a/crates/validator/src/lib.rs b/crates/validator/src/lib.rs index 760af2d1..9fac5ce8 100644 --- a/crates/validator/src/lib.rs +++ b/crates/validator/src/lib.rs @@ -5,7 +5,7 @@ mod validators; pub use metrics::export_metrics; pub use metrics::MetricsContext; -pub use p2p::P2PClient; +pub use p2p::Service as P2PService; pub use store::redis::RedisStore; pub use validators::hardware::HardwareValidator; pub use validators::synthetic_data::types::InvalidationType; diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index 55b3900d..d17f5004 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -23,7 +23,7 @@ use tokio_util::sync::CancellationToken; use url::Url; use validator::{ - export_metrics, HardwareValidator, InvalidationType, MetricsContext, P2PClient, RedisStore, + export_metrics, HardwareValidator, InvalidationType, MetricsContext, P2PService, RedisStore, SyntheticDataValidator, }; @@ -196,6 +196,10 @@ struct Args { /// Redis URL #[arg(long, default_value = "redis://localhost:6380")] redis_url: String, + + /// Libp2p port + #[arg(long, default_value = "4003")] + libp2p_port: u16, } #[tokio::main] @@ -269,19 +273,27 @@ async fn main() -> anyhow::Result<()> { MetricsContext::new(validator_wallet.address().to_string(), args.pool_id.clone()); // Initialize P2P client if enabled - let p2p_client = { - match P2PClient::new(validator_wallet.clone()).await { - Ok(client) => { - info!("P2P client initialized for testing"); - Some(client) + let keypair = p2p::Keypair::generate_ed25519(); + let (p2p_service, hardware_challenge_tx) = { + match P2PService::new( + keypair, + args.libp2p_port, + cancellation_token.clone(), + validator_wallet.clone(), + ) { + Ok(res) => { + info!("p2p service initialized successfully"); + res } Err(e) => { - error!("Failed to initialize P2P client: {e}"); - None + error!("failed to initialize p2p service: {e}"); + std::process::exit(1); } } }; + tokio::task::spawn(p2p_service.run()); + if let Some(pool_id) = args.pool_id.clone() { let pool = match contracts .compute_pool @@ -308,8 +320,7 @@ async fn main() -> anyhow::Result<()> { let contracts = contract_builder.build().unwrap(); - let hardware_validator = - HardwareValidator::new(&validator_wallet, contracts.clone(), p2p_client.as_ref()); + let hardware_validator = HardwareValidator::new(contracts.clone(), hardware_challenge_tx); let synthetic_validator = if let Some(pool_id) = args.pool_id.clone() { let penalty = U256::from(args.validator_penalty) * Unit::ETHER.wei(); diff --git a/crates/validator/src/p2p/client.rs b/crates/validator/src/p2p/client.rs deleted file mode 100644 index a0b21db1..00000000 --- a/crates/validator/src/p2p/client.rs +++ /dev/null @@ -1,89 +0,0 @@ -use alloy::primitives::Address; -use anyhow::Result; -use log::info; -use rand_v8::Rng; -use shared::models::challenge::{ChallengeRequest, ChallengeResponse}; -use shared::p2p::{client::P2PClient as SharedP2PClient, messages::P2PMessage}; -use shared::web3::wallet::Wallet; -use std::time::SystemTime; - -pub struct P2PClient { - shared_client: SharedP2PClient, -} - -impl P2PClient { - pub async fn new(wallet: Wallet) -> Result { - let shared_client = SharedP2PClient::new(wallet).await?; - Ok(Self { shared_client }) - } - - pub async fn ping_worker( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - ) -> Result { - let nonce = rand_v8::thread_rng().gen::(); - - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::Ping { - timestamp: SystemTime::now(), - nonce, - }, - 10, - ) - .await?; - - match response { - P2PMessage::Pong { - nonce: returned_nonce, - .. - } => { - if returned_nonce == nonce { - info!("Received valid pong from worker {worker_p2p_id} with nonce: {nonce}"); - Ok(nonce) - } else { - Err(anyhow::anyhow!("Invalid nonce in pong response")) - } - } - _ => Err(anyhow::anyhow!("Unexpected response type for ping")), - } - } - - pub async fn send_hardware_challenge( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - challenge: ChallengeRequest, - ) -> Result { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::HardwareChallenge { - challenge, - timestamp: SystemTime::now(), - }, - 30, - ) - .await?; - - match response { - P2PMessage::HardwareChallengeResponse { response, .. } => { - info!("Received hardware challenge response from worker {worker_p2p_id}"); - Ok(response) - } - _ => Err(anyhow::anyhow!( - "Unexpected response type for hardware challenge" - )), - } - } -} diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 49cb5b60..e756d33f 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,7 +1,3 @@ -pub(crate) mod client; - -pub use client::P2PClient; - use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use p2p::{ @@ -16,8 +12,8 @@ use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; -pub(crate) struct Service { - node: Node, +pub struct Service { + _node: Node, dial_tx: p2p::DialSender, incoming_messages: Receiver, hardware_challenge_rx: Receiver, @@ -27,7 +23,7 @@ pub(crate) struct Service { impl Service { #[allow(clippy::too_many_arguments)] - pub(crate) fn new( + pub fn new( keypair: p2p::Keypair, port: u16, cancellation_token: CancellationToken, @@ -40,7 +36,7 @@ impl Service { Ok(( Self { - node, + _node: node, dial_tx, incoming_messages, hardware_challenge_rx, @@ -51,19 +47,11 @@ impl Service { )) } - pub(crate) fn peer_id(&self) -> PeerId { - self.node.peer_id() - } - - pub(crate) fn listen_addrs(&self) -> &[p2p::Multiaddr] { - self.node.listen_addrs() - } - - pub(crate) async fn run(self) { + pub async fn run(self) { use futures::StreamExt as _; let Self { - node: _, + _node, dial_tx, mut incoming_messages, mut hardware_challenge_rx, @@ -120,12 +108,12 @@ fn build_p2p_node( .try_build() } -pub(crate) struct HardwareChallengeRequest { - worker_wallet_address: alloy::primitives::Address, - worker_p2p_id: String, - worker_addresses: Vec, - challenge: p2p::ChallengeRequest, - response_tx: tokio::sync::oneshot::Sender, +pub struct HardwareChallengeRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) challenge: p2p::ChallengeRequest, + pub(crate) response_tx: tokio::sync::oneshot::Sender, } #[derive(Clone)] @@ -174,9 +162,7 @@ async fn handle_outgoing_hardware_challenge( } = request; log::debug!( - "sending hardware challenge to {} with addresses {:?}", - worker_p2p_id, - worker_addresses + "sending hardware challenge to {worker_p2p_id} with addresses {worker_addresses:?}" ); // first, dial the worker @@ -196,12 +182,10 @@ async fn handle_outgoing_hardware_challenge( let multiaddrs = worker_addresses .iter() .filter_map(|addr| { - Some( - p2p::Multiaddr::from_str(addr) + p2p::Multiaddr::from_str(addr) .ok()? - .with_p2p(worker_p2p_id.clone()) - .ok()?, - ) + .with_p2p(worker_p2p_id) + .ok() }) .collect::>(); if multiaddrs.is_empty() { @@ -227,10 +211,9 @@ async fn handle_outgoing_hardware_challenge( message: auth_challenge_message.clone(), } .into(); - let outgoing_message = req.into_outgoing_message(worker_p2p_id.clone()); + let outgoing_message = req.into_outgoing_message(worker_p2p_id); log::debug!( - "sending ValidatorAuthenticationInitiationRequest to {}", - worker_p2p_id + "sending ValidatorAuthenticationInitiationRequest to {worker_p2p_id}" ); context .outgoing_messages @@ -246,7 +229,7 @@ async fn handle_outgoing_hardware_challenge( hardware_challenge_response_tx: response_tx, }; - ongoing_auth_requests.insert(worker_p2p_id.clone(), ongoing_challenge); + ongoing_auth_requests.insert(worker_p2p_id, ongoing_challenge); Ok(()) } diff --git a/crates/validator/src/validators/hardware.rs b/crates/validator/src/validators/hardware.rs index 00736d34..877861da 100644 --- a/crates/validator/src/validators/hardware.rs +++ b/crates/validator/src/validators/hardware.rs @@ -1,15 +1,13 @@ use alloy::primitives::Address; +use anyhow::bail; use anyhow::Result; use log::{debug, error, info}; use shared::{ models::node::DiscoveryNode, - web3::{ - contracts::core::builder::Contracts, - wallet::{Wallet, WalletProvider}, - }, + web3::{contracts::core::builder::Contracts, wallet::WalletProvider}, }; -use crate::p2p::client::P2PClient; +use crate::p2p::HardwareChallengeRequest; use crate::validators::hardware_challenge::HardwareChallenge; /// Hardware validator implementation @@ -17,35 +15,27 @@ use crate::validators::hardware_challenge::HardwareChallenge; /// NOTE: This is a temporary implementation that will be replaced with a proper /// hardware validator in the near future. The current implementation only performs /// basic matrix multiplication challenges and does not verify actual hardware specs. -pub struct HardwareValidator<'a> { - wallet: &'a Wallet, +pub struct HardwareValidator { contracts: Contracts, - p2p_client: Option<&'a P2PClient>, + challenge_tx: tokio::sync::mpsc::Sender, } -impl<'a> HardwareValidator<'a> { +impl HardwareValidator { pub fn new( - wallet: &'a Wallet, contracts: Contracts, - p2p_client: Option<&'a P2PClient>, + challenge_tx: tokio::sync::mpsc::Sender, ) -> Self { Self { - wallet, contracts, - p2p_client, + challenge_tx, } } - async fn validate_node( - _wallet: &'a Wallet, - contracts: Contracts, - p2p_client: Option<&'a P2PClient>, - node: DiscoveryNode, - ) -> Result<()> { + async fn validate_node(&self, node: DiscoveryNode) -> Result<()> { let node_address = match node.id.trim_start_matches("0x").parse::
() { Ok(addr) => addr, Err(e) => { - return Err(anyhow::anyhow!("Failed to parse node address: {}", e)); + bail!("failed to parse node address: {e:?}"); } }; @@ -56,30 +46,22 @@ impl<'a> HardwareValidator<'a> { { Ok(addr) => addr, Err(e) => { - return Err(anyhow::anyhow!("Failed to parse provider address: {}", e)); + bail!("failed to parse provider address: {e:?}"); } }; // Perform hardware challenge - if let Some(p2p_client) = p2p_client { - let hardware_challenge = HardwareChallenge::new(p2p_client); - let challenge_result = hardware_challenge.challenge_node(&node).await; - - if let Err(e) = challenge_result { - println!("Challenge failed for node: {}, error: {}", node.id, e); - error!("Challenge failed for node: {}, error: {}", node.id, e); - return Err(anyhow::anyhow!("Failed to challenge node: {}", e)); - } - } else { - debug!( - "P2P client not available, skipping hardware challenge for node {}", - node.id - ); + let hardware_challenge = HardwareChallenge::new(self.challenge_tx.clone()); + let challenge_result = hardware_challenge.challenge_node(&node).await; + + if let Err(e) = challenge_result { + bail!("failed to challenge node: {e:?}"); } debug!("Sending validation transaction for node {}", node.id); - if let Err(e) = contracts + if let Err(e) = self + .contracts .prime_network .validate_node(provider_address, node_address) .await @@ -100,17 +82,11 @@ impl<'a> HardwareValidator<'a> { debug!("Non validated nodes: {non_validated:?}"); info!("Starting validation for {} nodes", non_validated.len()); - let contracts = self.contracts.clone(); - let wallet = self.wallet; - let p2p_client = self.p2p_client; - // Process non validated nodes sequentially as simple fix // to avoid nonce conflicts for now. Will sophisticate this in the future for node in non_validated { let node_id = node.id.clone(); - match HardwareValidator::validate_node(wallet, contracts.clone(), p2p_client, node) - .await - { + match self.validate_node(node).await { Ok(_) => (), Err(e) => { error!("Failed to validate node {node_id}: {e}"); @@ -134,7 +110,6 @@ mod tests { async fn test_challenge_node() { let coordinator_key = "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"; let rpc_url: Url = Url::parse("http://localhost:8545").unwrap(); - let coordinator_wallet = Arc::new(Wallet::new(coordinator_key, rpc_url).unwrap()); let contracts = ContractBuilder::new(coordinator_wallet.provider()) @@ -145,7 +120,8 @@ mod tests { .build() .unwrap(); - let validator = HardwareValidator::new(&coordinator_wallet, contracts, None); + let (tx, _rx) = tokio::sync::mpsc::channel(100); + let validator = HardwareValidator::new(contracts, tx); let fake_discovery_node1 = DiscoveryNode { is_validated: false, diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index c881c542..de04a96a 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -1,40 +1,38 @@ -use crate::p2p::client::P2PClient; use alloy::primitives::Address; -use anyhow::{Error, Result}; +use anyhow::{bail, Context as _, Result}; use log::{error, info}; use rand::{rng, Rng}; -use shared::models::{ - challenge::{calc_matrix, ChallengeRequest, FixedF64}, - node::DiscoveryNode, -}; +use shared::models::node::DiscoveryNode; use std::str::FromStr; -pub(crate) struct HardwareChallenge<'a> { - p2p_client: &'a P2PClient, +use crate::p2p::HardwareChallengeRequest; + +pub(crate) struct HardwareChallenge { + challenge_tx: tokio::sync::mpsc::Sender, } -impl<'a> HardwareChallenge<'a> { - pub(crate) fn new(p2p_client: &'a P2PClient) -> Self { - Self { p2p_client } +impl HardwareChallenge { + pub(crate) fn new(challenge_tx: tokio::sync::mpsc::Sender) -> Self { + Self { challenge_tx } } - pub(crate) async fn challenge_node(&self, node: &DiscoveryNode) -> Result { + pub(crate) async fn challenge_node(&self, node: &DiscoveryNode) -> Result<()> { // Check if node has P2P ID and addresses let p2p_id = node .node .worker_p2p_id - .as_ref() + .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P ID", node.id))?; let p2p_addresses = node .node .worker_p2p_addresses - .as_ref() + .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; // create random challenge matrix let challenge_matrix = self.random_challenge(3, 3, 3, 3); - let challenge_expected = calc_matrix(&challenge_matrix); + let challenge_expected = p2p::calc_matrix(&challenge_matrix); // Add timestamp to the challenge let current_time = std::time::SystemTime::now() @@ -47,34 +45,35 @@ impl<'a> HardwareChallenge<'a> { let node_address = Address::from_str(&node.node.id) .map_err(|e| anyhow::anyhow!("Failed to parse node address {}: {}", node.node.id, e))?; + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let hardware_challenge = HardwareChallengeRequest { + worker_wallet_address: node_address, + worker_p2p_id: p2p_id, + worker_addresses: p2p_addresses, + challenge: challenge_with_timestamp, + response_tx, + }; // Send challenge via P2P - match self - .p2p_client - .send_hardware_challenge( - node_address, - p2p_id, - p2p_addresses, - challenge_with_timestamp, - ) + self.challenge_tx + .send(hardware_challenge) + .await + .context("failed to send hardware challenge request to p2p service")?; + + let resp = response_rx .await - { - Ok(response) => { - if challenge_expected.result == response.result { - info!("Challenge for node {} successful", node.id); - Ok(0) - } else { - error!( - "Challenge failed for node {}: expected {:?}, got {:?}", - node.id, challenge_expected.result, response.result - ); - Err(anyhow::anyhow!("Node failed challenge")) - } - } - Err(e) => { - error!("Failed to send challenge to node {}: {}", node.id, e); - Err(anyhow::anyhow!("Failed to send challenge: {}", e)) - } + .context("failed to receive response from node")?; + + if challenge_expected.result == resp.result { + info!("Challenge for node {} successful", node.id); + } else { + error!( + "Challenge failed for node {}: expected {:?}, got {:?}", + node.id, challenge_expected.result, resp.result + ); + bail!("Node failed challenge"); } + + Ok(()) } fn random_challenge( @@ -83,7 +82,9 @@ impl<'a> HardwareChallenge<'a> { cols_a: usize, rows_b: usize, cols_b: usize, - ) -> ChallengeRequest { + ) -> p2p::ChallengeRequest { + use p2p::FixedF64; + let mut rng = rng(); let data_a_vec: Vec = (0..(rows_a * cols_a)) @@ -98,7 +99,7 @@ impl<'a> HardwareChallenge<'a> { let data_a: Vec = data_a_vec.iter().map(|x| FixedF64(*x)).collect(); let data_b: Vec = data_b_vec.iter().map(|x| FixedF64(*x)).collect(); - ChallengeRequest { + p2p::ChallengeRequest { rows_a, cols_a, data_a, From 56d6b1dab255712b95256879ff389b04eeca2a84 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 17:17:43 -0400 Subject: [PATCH 20/40] clean up deps --- Cargo.lock | 1 - crates/validator/Cargo.toml | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4a2af90..a93d4a0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9465,7 +9465,6 @@ dependencies = [ "tempfile", "tokio", "tokio-util", - "toml", "url", ] diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 043c1558..76969bb0 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true workspace = true [dependencies] +shared = { workspace = true } p2p = { workspace = true} actix-web = { workspace = true } @@ -19,23 +20,22 @@ env_logger = { workspace = true } futures = { workspace = true } hex = { workspace = true } rand_v8 = { workspace = true } -lazy_static = "1.5.0" log = { workspace = true } nalgebra = { workspace = true } -prometheus = "0.14.0" -rand = "0.9.0" redis = { workspace = true, features = ["tokio-comp"] } -redis-test = { workspace = true } -regex = "1.11.1" reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -shared = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } -toml = { workspace = true } url = { workspace = true } +lazy_static = "1.5.0" +prometheus = "0.14.0" +rand = "0.9.0" +regex = "1.11.1" + [dev-dependencies] mockito = { workspace = true } +redis-test = { workspace = true } tempfile = "=3.14.0" From c6183d66264ce05676537901d8fdaaf78b5a6323 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 17:23:14 -0400 Subject: [PATCH 21/40] add authorized peer to map --- crates/worker/src/p2p/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index c2af2bbd..cc905d83 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -208,7 +208,7 @@ async fn handle_incoming_request( .context("failed to handle ValidatorAuthenticationInitiationRequest")? } p2p::ValidatorAuthenticationRequest::Solution(req) => { - match handle_validator_authentication_initiation_solution(from, req, &context) + match handle_validator_authentication_solution_request(from, req, &context) .await { Ok(()) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), @@ -288,7 +288,7 @@ async fn handle_validator_authentication_initiation_request( .into()) } -async fn handle_validator_authentication_initiation_solution( +async fn handle_validator_authentication_solution_request( from: PeerId, req: p2p::ValidatorAuthenticationSolutionRequest, context: &Context, @@ -317,6 +317,8 @@ async fn handle_validator_authentication_initiation_solution( anyhow::bail!("recovered address {recovered_address} is not in the list of authorized validator addresses"); } + let mut authorized_peers = context.authorized_peers.write().await; + authorized_peers.insert(from); Ok(()) } From 94e9e4d462aa021f29a498b6b0c88e927857cab3 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 12:08:18 -0400 Subject: [PATCH 22/40] implement dialing peers --- Cargo.toml | 2 ++ crates/p2p/src/lib.rs | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4279f156..d4ca7ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +p2p = { path = "crates/p2p" } + actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 6e2efca3..4a2b176e 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,6 +1,5 @@ use anyhow::Context; use anyhow::Result; -use libp2p::futures::stream::FuturesUnordered; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -23,6 +22,9 @@ pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; +pub type Keypair = libp2p::identity::Keypair; +pub type DialSender = + tokio::sync::mpsc::Sender<(Vec, tokio::sync::oneshot::Sender>)>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -36,6 +38,9 @@ pub struct Node { bootnodes: Vec, cancellation_token: tokio_util::sync::CancellationToken, + dial_rx: + tokio::sync::mpsc::Receiver<(Vec, tokio::sync::oneshot::Sender>)>, + // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -73,6 +78,7 @@ impl Node { mut swarm, bootnodes, cancellation_token, + mut dial_rx, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -83,17 +89,12 @@ impl Node { .context("swarm failed to listen on multiaddr")?; } - let futures = FuturesUnordered::new(); for bootnode in bootnodes { - futures.push(swarm.dial(bootnode)) - } - let results: Vec<_> = futures.into_iter().collect(); - for result in results { - match result { + match swarm.dial(bootnode.clone()) { Ok(_) => {} Err(e) => { - // TODO: log this error - println!("failed to dial bootnode: {e:?}"); + // log error + println!("failed to dial bootnode {bootnode}: {e:?}"); } } } @@ -104,6 +105,19 @@ impl Node { println!("cancellation token triggered, shutting down node"); break Ok(()); } + Some((addrs, res_tx)) = dial_rx.recv() => { + let mut res = Ok(()); + for addr in addrs { + match swarm.dial(addr.clone()) { + Ok(_) => {} + Err(e) => { + res = Err(anyhow::anyhow!("failed to dial {addr}: {e:?}")); + break; + } + } + } + let _ = res_tx.send(res); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -254,6 +268,7 @@ impl NodeBuilder { self, ) -> Result<( Node, + DialSender, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, )> { @@ -295,6 +310,7 @@ impl NodeBuilder { listen_addrs.push(listen_addr); } + let (dial_tx, dial_rx) = tokio::sync::mpsc::channel(100); let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); @@ -304,10 +320,12 @@ impl NodeBuilder { swarm, listen_addrs, bootnodes, + dial_rx, incoming_message_tx, outgoing_message_rx, cancellation_token: cancellation_token.unwrap_or_default(), }, + dial_tx, incoming_message_rx, outgoing_message_tx, )) @@ -334,11 +352,11 @@ mod test { #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + let (node2, _, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() From a8af70634b017787edbefc5d0577a93dcfd0d539 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 16:50:45 -0400 Subject: [PATCH 23/40] use tracing --- Cargo.lock | 1 + Cargo.toml | 1 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/behaviour.rs | 9 +++++---- crates/p2p/src/lib.rs | 18 +++++++----------- crates/worker/Cargo.toml | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae652ad4..c16f0570 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6267,6 +6267,7 @@ dependencies = [ "serde", "tokio", "tokio-util", + "tracing", "void", ] diff --git a/Cargo.toml b/Cargo.toml index d4ca7ab8..1bc9e2ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ rand_core_v6 = { package = "rand_core", version = "0.6.4", features = ["std"] } ipld-core = "0.4" rust-ipfs = "0.14" cid = "0.11" +tracing = "0.1.41" [workspace.package] version = "0.3.11" diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index ba52d570..bb670107 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -12,6 +12,7 @@ nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } +tracing = { workspace = true } [lints] workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index e2737d57..b114b61e 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -12,6 +12,7 @@ use libp2p::ping; use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; use std::time::Duration; +use tracing::debug; use crate::message::IncomingMessage; use crate::message::{Request, Response}; @@ -152,19 +153,19 @@ impl BehaviourEvent { BehaviourEvent::Ping(_event) => {} BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { - println!("received message from peer {peer:?}: {message:?}"); + debug!("received message from peer {peer:?}: {message:?}"); // if this errors, user dropped their incoming message channel let _ = message_tx.send(IncomingMessage { peer, message }).await; } request_response::Event::ResponseSent { peer, request_id } => { - println!("response sent to peer {peer:?} for request ID {request_id:?}"); + debug!("response sent to peer {peer:?} for request ID {request_id:?}"); } request_response::Event::InboundFailure { peer, request_id, error, } => { - println!( + debug!( "inbound failure from peer {peer:?} for request ID {request_id:?}: {error}" ); } @@ -173,7 +174,7 @@ impl BehaviourEvent { request_id, error, } => { - println!( + debug!( "outbound failure to peer {peer:?} for request ID {request_id:?}: {error}" ); } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 4a2b176e..0a5637a9 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -8,6 +8,7 @@ use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use std::time::Duration; +use tracing::debug; mod behaviour; mod message; @@ -93,8 +94,7 @@ impl Node { match swarm.dial(bootnode.clone()) { Ok(_) => {} Err(e) => { - // log error - println!("failed to dial bootnode {bootnode}: {e:?}"); + debug!("failed to dial bootnode {bootnode}: {e:?}"); } } } @@ -102,7 +102,7 @@ impl Node { loop { tokio::select! { _ = cancellation_token.cancelled() => { - println!("cancellation token triggered, shutting down node"); + debug!("cancellation token triggered, shutting down node"); break Ok(()); } Some((addrs, res_tx)) = dial_rx.recv() => { @@ -124,10 +124,8 @@ impl Node { swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { - println!("sending response on channel"); if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { - // log error - println!("failed to send response: {e:?}"); + debug!("failed to send response: {e:?}"); } } } @@ -138,10 +136,10 @@ impl Node { listener_id: _, address, } => { - println!("new listen address: {address}"); + debug!("new listen address: {address}"); } SwarmEvent::ExternalAddrConfirmed { address } => { - println!("external address confirmed: {address}"); + debug!("external address confirmed: {address}"); } SwarmEvent::ConnectionClosed { peer_id, @@ -150,7 +148,7 @@ impl Node { connection_id: _, num_established: _, } => { - println!("connection closed with peer {peer_id}: {cause:?}"); + debug!("connection closed with peer {peer_id}: {cause:?}"); } SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, _ => continue, @@ -386,8 +384,6 @@ mod test { panic!("expected a GetTaskLogs request message"); }; - println!("received request from node1"); - // send response from node2->node1 let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse::Ok("logs".to_string())); diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 18596ba5..0f08e404 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -50,7 +50,7 @@ unicode-width = "0.2.0" rand = "0.9.0" tempfile = "3.14.0" tracing-loki = "0.2.6" -tracing = "0.1.41" +tracing = { workspace = true } tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } tracing-log = "0.2.0" time = "0.3.41" From 15dc2c4a7d5ff5463257f303f1e75e6ba57b5561 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 18:33:27 -0400 Subject: [PATCH 24/40] move shared authentication service to shared --- Cargo.lock | 4 + crates/p2p/src/lib.rs | 12 +- crates/p2p/src/message/mod.rs | 22 +- crates/p2p/src/protocol.rs | 46 +- crates/shared/Cargo.toml | 4 + crates/shared/src/p2p/mod.rs | 3 + crates/shared/src/p2p/service.rs | 453 ++++++++++++++++++ crates/validator/src/p2p/mod.rs | 385 ++------------- .../src/validators/hardware_challenge.rs | 1 + 9 files changed, 582 insertions(+), 348 deletions(-) create mode 100644 crates/shared/src/p2p/service.rs diff --git a/Cargo.lock b/Cargo.lock index a93d4a0e..c54a0ad1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6223,6 +6223,7 @@ dependencies = [ "iroh", "log", "mockito", + "p2p", "prometheus 0.14.0", "rand 0.8.5", "rand 0.9.1", @@ -8232,12 +8233,14 @@ dependencies = [ "base64 0.22.1", "chrono", "dashmap", + "futures", "futures-util", "google-cloud-storage", "hex", "iroh", "log", "nalgebra", + "p2p", "rand 0.8.5", "rand 0.9.1", "redis", @@ -8246,6 +8249,7 @@ dependencies = [ "serde_json", "subtle", "tokio", + "tokio-util", "url", "utoipa", "uuid", diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 4a2b176e..4f032bbf 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -14,9 +14,9 @@ mod message; mod protocol; use behaviour::Behaviour; -use protocol::Protocols; pub use message::*; +pub use protocol::*; pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type ResponseChannel = libp2p::request_response::ResponseChannel; @@ -120,7 +120,8 @@ impl Node { } Some(message) = outgoing_message_rx.recv() => { match message { - OutgoingMessage::Request((peer, request)) => { + OutgoingMessage::Request((peer, _addrs, request)) => { + // TODO: if we're not connected to the peer, we should dial it swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { @@ -240,6 +241,11 @@ impl NodeBuilder { self } + pub fn with_protocols(mut self, protocols: Protocols) -> Self { + self.protocols.join(protocols); + self + } + pub fn with_bootnode(mut self, bootnode: Multiaddr) -> Self { self.bootnodes.push(bootnode); self @@ -372,7 +378,7 @@ mod test { // send request from node1->node2 let request = message::Request::GetTaskLogs; outgoing_message_tx1 - .send(request.into_outgoing_message(node2_peer_id)) + .send(request.into_outgoing_message(node2_peer_id, vec![])) .await .unwrap(); let message = incoming_message_rx2.recv().await.unwrap(); diff --git a/crates/p2p/src/message/mod.rs b/crates/p2p/src/message/mod.rs index adff99ac..dc2403e3 100644 --- a/crates/p2p/src/message/mod.rs +++ b/crates/p2p/src/message/mod.rs @@ -1,3 +1,4 @@ +use crate::Protocol; use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; @@ -15,7 +16,7 @@ pub struct IncomingMessage { #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum OutgoingMessage { - Request((PeerId, Request)), + Request((PeerId, Vec, Request)), Response( ( libp2p::request_response::ResponseChannel, @@ -35,8 +36,23 @@ pub enum Request { } impl Request { - pub fn into_outgoing_message(self, peer: PeerId) -> OutgoingMessage { - OutgoingMessage::Request((peer, self)) + pub fn into_outgoing_message( + self, + peer: PeerId, + multiaddrs: Vec, + ) -> OutgoingMessage { + OutgoingMessage::Request((peer, multiaddrs, self)) + } + + pub fn protocol(&self) -> Protocol { + match self { + Request::ValidatorAuthentication(_) => Protocol::ValidatorAuthentication, + Request::HardwareChallenge(_) => Protocol::HardwareChallenge, + Request::Invite(_) => Protocol::Invite, + Request::GetTaskLogs => Protocol::GetTaskLogs, + Request::Restart => Protocol::Restart, + Request::General(_) => Protocol::General, + } } } diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index df423ef8..ae839cec 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -2,7 +2,7 @@ use libp2p::StreamProtocol; use std::{collections::HashSet, hash::Hash}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) enum Protocol { +pub enum Protocol { // validator -> worker ValidatorAuthentication, // validator -> worker @@ -33,42 +33,70 @@ impl Protocol { } #[derive(Debug, Clone)] -pub(crate) struct Protocols(HashSet); +pub struct Protocols(HashSet); impl Protocols { - pub(crate) fn new() -> Self { + pub fn new() -> Self { Self(HashSet::new()) } - pub(crate) fn with_validator_authentication(mut self) -> Self { + pub fn has_validator_authentication(&self) -> bool { + self.0.contains(&Protocol::ValidatorAuthentication) + } + + pub fn has_hardware_challenge(&self) -> bool { + self.0.contains(&Protocol::HardwareChallenge) + } + + pub fn has_invite(&self) -> bool { + self.0.contains(&Protocol::Invite) + } + + pub fn has_get_task_logs(&self) -> bool { + self.0.contains(&Protocol::GetTaskLogs) + } + + pub fn has_restart(&self) -> bool { + self.0.contains(&Protocol::Restart) + } + + pub fn has_general(&self) -> bool { + self.0.contains(&Protocol::General) + } + + pub fn with_validator_authentication(mut self) -> Self { self.0.insert(Protocol::ValidatorAuthentication); self } - pub(crate) fn with_hardware_challenge(mut self) -> Self { + pub fn with_hardware_challenge(mut self) -> Self { self.0.insert(Protocol::HardwareChallenge); self } - pub(crate) fn with_invite(mut self) -> Self { + pub fn with_invite(mut self) -> Self { self.0.insert(Protocol::Invite); self } - pub(crate) fn with_get_task_logs(mut self) -> Self { + pub fn with_get_task_logs(mut self) -> Self { self.0.insert(Protocol::GetTaskLogs); self } - pub(crate) fn with_restart(mut self) -> Self { + pub fn with_restart(mut self) -> Self { self.0.insert(Protocol::Restart); self } - pub(crate) fn with_general(mut self) -> Self { + pub fn with_general(mut self) -> Self { self.0.insert(Protocol::General); self } + + pub(crate) fn join(&mut self, other: Protocols) { + self.0.extend(other.0); + } } impl IntoIterator for Protocols { diff --git a/crates/shared/Cargo.toml b/crates/shared/Cargo.toml index 9afdafff..4d3a8760 100644 --- a/crates/shared/Cargo.toml +++ b/crates/shared/Cargo.toml @@ -15,6 +15,8 @@ default = [] testnet = [] [dependencies] +p2p = { workspace = true} + tokio = { workspace = true } alloy = { workspace = true } alloy-provider = { workspace = true } @@ -40,3 +42,5 @@ iroh = { workspace = true } rand_v8 = { workspace = true } subtle = "2.6.1" utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } +futures = { workspace = true } +tokio-util = { workspace = true } diff --git a/crates/shared/src/p2p/mod.rs b/crates/shared/src/p2p/mod.rs index f505f3b1..cac69a8a 100644 --- a/crates/shared/src/p2p/mod.rs +++ b/crates/shared/src/p2p/mod.rs @@ -1,6 +1,9 @@ pub mod client; pub mod messages; pub mod protocol; +mod service; pub use client::P2PClient; pub use protocol::*; + +pub use service::*; diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs new file mode 100644 index 00000000..f5a7bbe3 --- /dev/null +++ b/crates/shared/src/p2p/service.rs @@ -0,0 +1,453 @@ +use crate::web3::wallet::Wallet; +use anyhow::{bail, Context as _, Result}; +use futures::stream::FuturesUnordered; +use p2p::{ + IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, Protocol, + Protocols, Response, ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, + ValidatorAuthenticationSolutionRequest, +}; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::Arc; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; + +pub struct OutgoingRequest { + pub peer_wallet_address: alloy::primitives::Address, + pub request: p2p::Request, + pub peer_id: String, + pub multiaddrs: Vec, + pub response_tx: tokio::sync::oneshot::Sender, +} + +/// A p2p service implementation that is used by the validator and the orchestrator. +/// It handles the authentication protocol used before sending +/// requests to the worker. +pub struct Service { + _node: Node, + dial_tx: p2p::DialSender, + incoming_messages_rx: Receiver, + outgoing_messages_rx: Receiver, + cancellation_token: CancellationToken, + context: Context, +} + +impl Service { + pub fn new( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, + wallet: Wallet, + protocols: Protocols, + ) -> Result<(Self, Sender)> { + let (node, dial_tx, incoming_messages_rx, outgoing_messages) = + build_p2p_node(keypair, port, cancellation_token.clone(), protocols.clone()) + .context("failed to build p2p node")?; + let (outgoing_messages_tx, outgoing_messages_rx) = tokio::sync::mpsc::channel(100); + + Ok(( + Self { + _node: node, + dial_tx, + incoming_messages_rx, + outgoing_messages_rx, + cancellation_token, + context: Context::new(outgoing_messages, wallet, protocols), + }, + outgoing_messages_tx, + )) + } + + pub async fn run(self) { + use futures::StreamExt as _; + + let Self { + _node, + dial_tx, + mut incoming_messages_rx, + mut outgoing_messages_rx, + cancellation_token, + context, + } = self; + + let mut message_handlers = FuturesUnordered::new(); + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + Some(message) = outgoing_messages_rx.recv() => { + if let Err(e) = handle_outgoing_message(message, dial_tx.clone(), context.clone()) + .await { + log::error!("failed to handle outgoing message: {e}"); + } + } + Some(message) = incoming_messages_rx.recv() => { + let context = context.clone(); + let handle = tokio::task::spawn( + handle_incoming_message(message, context) + ); + message_handlers.push(handle); + } + Some(res) = message_handlers.next() => { + if let Err(e) = res { + log::error!("failed to handle incoming message: {e}"); + } + } + } + } + } +} + +fn build_p2p_node( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, + protocols: Protocols, +) -> Result<( + Node, + p2p::DialSender, + Receiver, + Sender, +)> { + NodeBuilder::new() + .with_keypair(keypair) + .with_port(port) + .with_validator_authentication() + .with_protocols(protocols) + .with_cancellation_token(cancellation_token) + .try_build() +} + +#[derive(Clone)] +struct Context { + // outbound message channel; receiver is held by libp2p node + outgoing_messages: Sender, + + // ongoing authentication requests + ongoing_auth_requests: Arc>>, + is_authenticated_with_peer: Arc>>, + + // this assumes that there is only one outbound request per protocol per peer at a time, + // is this a correct assumption? + // response channel is for sending the response back to the caller who initiated the request + ongoing_outbound_requests: + Arc>>>, + + wallet: Wallet, + protocols: Protocols, +} + +#[derive(Debug)] +struct OngoingAuthChallenge { + peer_wallet_address: alloy::primitives::Address, + auth_challenge_request_message: String, + outgoing_message: p2p::Request, + response_tx: tokio::sync::oneshot::Sender, +} + +impl Context { + fn new( + outgoing_messages: Sender, + wallet: Wallet, + protocols: Protocols, + ) -> Self { + Self { + outgoing_messages, + ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), + is_authenticated_with_peer: Arc::new(RwLock::new(HashSet::new())), + ongoing_outbound_requests: Arc::new(RwLock::new(HashMap::new())), + wallet, + protocols, + } + } +} + +async fn handle_outgoing_message( + message: OutgoingRequest, + dial_tx: p2p::DialSender, + context: Context, +) -> Result<()> { + use rand_v8::rngs::OsRng; + use rand_v8::Rng as _; + use std::str::FromStr as _; + + let OutgoingRequest { + peer_wallet_address, + request, + peer_id, + multiaddrs, + response_tx, + } = message; + + let peer_id = PeerId::from_str(&peer_id).context("failed to parse peer id")?; + + // check if we're authenticated already + let is_authenticated_with_peer = context.is_authenticated_with_peer.read().await; + if is_authenticated_with_peer.contains(&peer_id) { + log::debug!( + "already authenticated with peer {peer_id}, skipping validation authentication" + ); + // multiaddresses are already known, as we've connected to them previously + context + .outgoing_messages + .send(request.into_outgoing_message(peer_id, vec![])) + .await + .context("failed to send outgoing message")?; + return Ok(()); + } + + log::debug!("sending validation authentication request to {peer_id}"); + + // first, dial the worker + // ensure there's no ongoing challenge + // use write-lock to make this atomic until we finish sending the auth request and writing to the map + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + if ongoing_auth_requests.contains_key(&peer_id) { + bail!("ongoing auth request for {} already exists", peer_id); + } + + let multiaddrs = multiaddrs + .iter() + .filter_map(|addr| p2p::Multiaddr::from_str(addr).ok()?.with_p2p(peer_id).ok()) + .collect::>(); + if multiaddrs.is_empty() { + bail!("no valid multiaddrs for peer id {peer_id}"); + } + + // TODO: we can improve this by checking if we're already connected to the peer before dialing + let (res_tx, res_rx) = tokio::sync::oneshot::channel(); + dial_tx + .send((multiaddrs.clone(), res_tx)) + .await + .context("failed to send dial request")?; + res_rx + .await + .context("failed to receive dial response")? + .context("failed to dial worker")?; + + // create the authentication challenge request message + let challenge_bytes: [u8; 32] = OsRng.gen(); + let auth_challenge_message: String = hex::encode(challenge_bytes); + + let req: p2p::Request = ValidatorAuthenticationInitiationRequest { + message: auth_challenge_message.clone(), + } + .into(); + let outgoing_message = req.into_outgoing_message(peer_id, multiaddrs); + log::debug!("sending ValidatorAuthenticationInitiationRequest to {peer_id}"); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send outgoing message")?; + + // store the ongoing auth challenge + let ongoing_challenge = OngoingAuthChallenge { + peer_wallet_address, + auth_challenge_request_message: auth_challenge_message.clone(), + outgoing_message: request, + response_tx, + }; + + ongoing_auth_requests.insert(peer_id, ongoing_challenge); + Ok(()) +} + +async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { + match message.message { + Libp2pIncomingMessage::Request { + request_id: _, + request, + channel: _, + } => { + log::error!( + "node should not receive incoming requests: {request:?} from {}", + message.peer + ); + } + Libp2pIncomingMessage::Response { + request_id: _, + response, + } => { + log::debug!("received incoming response {response:?}"); + handle_incoming_response(message.peer, response, context) + .await + .context("failed to handle incoming response")?; + } + } + Ok(()) +} + +async fn handle_incoming_response( + from: PeerId, + response: p2p::Response, + context: Context, +) -> Result<()> { + match response { + p2p::Response::ValidatorAuthentication(resp) => { + log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); + handle_validation_authentication_response(from, resp, context) + .await + .context("failed to handle validator authentication response")?; + } + p2p::Response::HardwareChallenge(ref resp) => { + if !context.protocols.has_hardware_challenge() { + bail!("received HardwareChallengeResponse from {from}, but hardware challenge protocol is not enabled"); + } + + log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = + ongoing_outbound_requests.remove(&(from, Protocol::HardwareChallenge)) + else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle HardwareChallengeResponse" + ); + }; + let _ = response_tx.send(response); + } + p2p::Response::Invite(ref resp) => { + if !context.protocols.has_invite() { + bail!("received InviteResponse from {from}, but invite protocol is not enabled"); + } + + log::debug!("received InviteResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::Invite)) + else { + bail!("no ongoing invite for peer {from}, cannot handle InviteResponse"); + }; + let _ = response_tx.send(response); + } + p2p::Response::GetTaskLogs(ref resp) => { + if !context.protocols.has_get_task_logs() { + bail!("received GetTaskLogsResponse from {from}, but get task logs protocol is not enabled"); + } + + log::debug!("received GetTaskLogsResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = + ongoing_outbound_requests.remove(&(from, Protocol::GetTaskLogs)) + else { + bail!("no ongoing GetTaskLogs for peer {from}, cannot handle GetTaskLogsResponse"); + }; + let _ = response_tx.send(response); + } + p2p::Response::Restart(ref resp) => { + if !context.protocols.has_restart() { + bail!("received RestartResponse from {from}, but restart protocol is not enabled"); + } + + log::debug!("received RestartResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::Restart)) + else { + bail!("no ongoing Restart for peer {from}, cannot handle RestartResponse"); + }; + let _ = response_tx.send(response); + } + p2p::Response::General(ref resp) => { + if !context.protocols.has_general() { + bail!("received GeneralResponse from {from}, but general protocol is not enabled"); + } + + log::debug!("received GeneralResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::General)) + else { + bail!("no ongoing General for peer {from}, cannot handle GeneralResponse"); + }; + let _ = response_tx.send(response); + } + } + + Ok(()) +} + +async fn handle_validation_authentication_response( + from: PeerId, + response: p2p::ValidatorAuthenticationResponse, + context: Context, +) -> Result<()> { + use crate::security::request_signer::sign_message; + use std::str::FromStr as _; + + match response { + ValidatorAuthenticationResponse::Initiation(req) => { + let ongoing_auth_requests = context.ongoing_auth_requests.read().await; + let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationInitiationResponse" + ); + }; + + let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&req.signature) + else { + bail!("failed to parse signature from response"); + }; + + // recover address from the challenge message that the peer signed + let Ok(recovered_address) = parsed_signature + .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) + else { + bail!("Failed to recover address from response signature") + }; + + // verify the recovered address matches the expected worker wallet address + if recovered_address != ongoing_challenge.peer_wallet_address { + bail!( + "peer address verification failed: expected {}, got {recovered_address}", + ongoing_challenge.peer_wallet_address, + ) + } + + log::debug!("auth challenge initiation response received from node: {from}"); + let signature = sign_message(&req.message, &context.wallet).await.unwrap(); + + let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); + let req = req.into_outgoing_message(from, vec![]); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + } + ValidatorAuthenticationResponse::Solution(req) => { + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationSolutionResponse" + ); + }; + + match req { + p2p::ValidatorAuthenticationSolutionResponse::Granted => {} + p2p::ValidatorAuthenticationSolutionResponse::Rejected => { + log::debug!("auth challenge rejected by node: {from}"); + return Ok(()); + } + } + + // auth was granted, finally send the hardware challenge + let mut is_authenticated_with_peer = context.is_authenticated_with_peer.write().await; + is_authenticated_with_peer.insert(from); + + let protocol = ongoing_challenge.outgoing_message.protocol(); + let req = ongoing_challenge + .outgoing_message + .into_outgoing_message(from, vec![]); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + ongoing_outbound_requests.insert((from, protocol), ongoing_challenge.response_tx); + } + } + Ok(()) +} diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index e756d33f..fcce43ec 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,87 +1,91 @@ use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; -use p2p::{ - IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, - ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, - ValidatorAuthenticationSolutionRequest, -}; +use p2p::{Keypair, Protocols}; +use shared::p2p::OutgoingRequest; +use shared::p2p::Service as P2PService; use shared::web3::wallet::Wallet; -use std::collections::HashMap; -use std::sync::Arc; use tokio::sync::mpsc::{Receiver, Sender}; -use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; pub struct Service { - _node: Node, - dial_tx: p2p::DialSender, - incoming_messages: Receiver, + inner: P2PService, + + // converts incoming hardware challenges to outgoing requests + outgoing_message_tx: Sender, hardware_challenge_rx: Receiver, - cancellation_token: CancellationToken, - context: Context, } impl Service { - #[allow(clippy::too_many_arguments)] pub fn new( - keypair: p2p::Keypair, + keypair: Keypair, port: u16, cancellation_token: CancellationToken, wallet: Wallet, ) -> Result<(Self, Sender)> { - let (node, dial_tx, incoming_messages, outgoing_messages) = - build_p2p_node(keypair, port, cancellation_token.clone()) - .context("failed to build p2p node")?; let (hardware_challenge_tx, hardware_challenge_rx) = tokio::sync::mpsc::channel(100); - + let (inner, outgoing_message_tx) = P2PService::new( + keypair, + port, + cancellation_token.clone(), + wallet, + Protocols::new() + .with_hardware_challenge() + .with_validator_authentication(), + ) + .context("failed to create P2P service")?; Ok(( Self { - _node: node, - dial_tx, - incoming_messages, + inner, + outgoing_message_tx, hardware_challenge_rx, - cancellation_token, - context: Context::new(outgoing_messages, wallet), }, hardware_challenge_tx, )) } - pub async fn run(self) { + pub async fn run(self) -> Result<()> { use futures::StreamExt as _; let Self { - _node, - dial_tx, - mut incoming_messages, + inner, + outgoing_message_tx, mut hardware_challenge_rx, - cancellation_token, - context, } = self; - let mut message_handlers = FuturesUnordered::new(); + tokio::task::spawn(inner.run()); + + let mut futures = FuturesUnordered::new(); loop { tokio::select! { - _ = cancellation_token.cancelled() => { - break; - } - Some(message) = hardware_challenge_rx.recv() => { - if let Err(e) = handle_outgoing_hardware_challenge(message, dial_tx.clone(), context.clone()) - .await { - log::error!("failed to handle outgoing hardware challenge: {e}"); - } + Some(request) = hardware_challenge_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::HardwareChallenge(resp) => resp.response, + _ => bail!("unexpected response type for hardware challenge request"), + }; + let _ = request.response_tx.send(resp); + Ok(()) + }; + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: p2p::HardwareChallengeRequest { + challenge: request.challenge, + timestamp: std::time::SystemTime::now(), + }.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing hardware challenge request")?; } - Some(message) = incoming_messages.recv() => { - let context = context.clone(); - let handle = tokio::task::spawn( - handle_incoming_message(message, context) - ); - message_handlers.push(handle); - } - Some(res) = message_handlers.next() => { + Some(res) = futures.next() => { if let Err(e) = res { - log::error!("failed to handle incoming message: {e}"); + log::error!("failed to handle hardware challenge request: {e}"); } } } @@ -89,25 +93,6 @@ impl Service { } } -fn build_p2p_node( - keypair: p2p::Keypair, - port: u16, - cancellation_token: CancellationToken, -) -> Result<( - Node, - p2p::DialSender, - Receiver, - Sender, -)> { - NodeBuilder::new() - .with_keypair(keypair) - .with_port(port) - .with_validator_authentication() - .with_hardware_challenge() - .with_cancellation_token(cancellation_token) - .try_build() -} - pub struct HardwareChallengeRequest { pub(crate) worker_wallet_address: alloy::primitives::Address, pub(crate) worker_p2p_id: String, @@ -115,269 +100,3 @@ pub struct HardwareChallengeRequest { pub(crate) challenge: p2p::ChallengeRequest, pub(crate) response_tx: tokio::sync::oneshot::Sender, } - -#[derive(Clone)] -struct Context { - outgoing_messages: Sender, - ongoing_auth_requests: Arc>>, - ongoing_hardware_challenges: - Arc>>>, - wallet: Wallet, -} - -#[derive(Debug)] -struct OngoingAuthChallenge { - worker_wallet_address: alloy::primitives::Address, - auth_challenge_request_message: String, - hardware_challenge: p2p::ChallengeRequest, - hardware_challenge_response_tx: tokio::sync::oneshot::Sender, -} - -impl Context { - fn new(outgoing_messages: Sender, wallet: Wallet) -> Self { - Self { - outgoing_messages, - ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), - ongoing_hardware_challenges: Arc::new(RwLock::new(HashMap::new())), - wallet, - } - } -} - -async fn handle_outgoing_hardware_challenge( - request: HardwareChallengeRequest, - dial_tx: p2p::DialSender, - context: Context, -) -> Result<()> { - use rand_v8::rngs::OsRng; - use rand_v8::Rng as _; - use std::str::FromStr as _; - - let HardwareChallengeRequest { - worker_wallet_address, - worker_p2p_id, - worker_addresses, - challenge, - response_tx, - } = request; - - log::debug!( - "sending hardware challenge to {worker_p2p_id} with addresses {worker_addresses:?}" - ); - - // first, dial the worker - let worker_p2p_id = - PeerId::from_str(&worker_p2p_id).context("failed to parse worker p2p id")?; - - // ensure there's no ongoing challenge - // use write-lock to make this atomic until we finish sending the auth request and writing to the map - let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; - if ongoing_auth_requests.contains_key(&worker_p2p_id) { - bail!( - "ongoing hardware challenge for {} already exists", - worker_p2p_id - ); - } - - let multiaddrs = worker_addresses - .iter() - .filter_map(|addr| { - p2p::Multiaddr::from_str(addr) - .ok()? - .with_p2p(worker_p2p_id) - .ok() - }) - .collect::>(); - if multiaddrs.is_empty() { - bail!("no valid multiaddrs for worker p2p id {worker_p2p_id}"); - } - - // TODO: we can improve this by checking if we're already connected to the peer before dialing - let (res_tx, res_rx) = tokio::sync::oneshot::channel(); - dial_tx - .send((multiaddrs, res_tx)) - .await - .context("failed to send dial request")?; - res_rx - .await - .context("failed to receive dial response")? - .context("failed to dial worker")?; - - // create the authentication challenge request message - let challenge_bytes: [u8; 32] = OsRng.gen(); - let auth_challenge_message: String = hex::encode(challenge_bytes); - - let req: p2p::Request = ValidatorAuthenticationInitiationRequest { - message: auth_challenge_message.clone(), - } - .into(); - let outgoing_message = req.into_outgoing_message(worker_p2p_id); - log::debug!( - "sending ValidatorAuthenticationInitiationRequest to {worker_p2p_id}" - ); - context - .outgoing_messages - .send(outgoing_message) - .await - .context("failed to send outgoing message")?; - - // store the ongoing hardware challenge - let ongoing_challenge = OngoingAuthChallenge { - worker_wallet_address, - auth_challenge_request_message: auth_challenge_message.clone(), - hardware_challenge: challenge, - hardware_challenge_response_tx: response_tx, - }; - - ongoing_auth_requests.insert(worker_p2p_id, ongoing_challenge); - Ok(()) -} - -async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { - match message.message { - Libp2pIncomingMessage::Request { - request_id: _, - request, - channel: _, - } => { - log::error!( - "validator should not receive incoming requests: {request:?} from {}", - message.peer - ); - } - Libp2pIncomingMessage::Response { - request_id: _, - response, - } => { - log::debug!("received incoming response {response:?}"); - handle_incoming_response(message.peer, response, context) - .await - .context("failed to handle incoming response")?; - } - } - Ok(()) -} - -async fn handle_incoming_response( - from: PeerId, - response: p2p::Response, - context: Context, -) -> Result<()> { - match response { - p2p::Response::ValidatorAuthentication(resp) => { - log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); - handle_validation_authentication_response(from, resp, context) - .await - .context("failed to handle validator authentication response")?; - } - p2p::Response::HardwareChallenge(resp) => { - log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); - let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; - let Some(response_tx) = ongoing_hardware_challenges.remove(&from) else { - bail!( - "no ongoing hardware challenge for peer {from}, cannot handle HardwareChallengeResponse" - ); - }; - let _ = response_tx.send(resp.response); // timestamp is silently dropped, is it actually used anywhere? - } - p2p::Response::Invite(_) => { - log::error!("validator should not receive `Invite` responses: from {from}"); - } - p2p::Response::GetTaskLogs(_) => { - log::error!("validator should not receive `GetTaskLogs` responses: from {from}"); - } - p2p::Response::Restart(_) => { - log::error!("validator should not receive `Restart` responses: from {from}"); - } - p2p::Response::General(_) => { - todo!() - } - } - - Ok(()) -} - -async fn handle_validation_authentication_response( - from: PeerId, - response: p2p::ValidatorAuthenticationResponse, - context: Context, -) -> Result<()> { - use shared::security::request_signer::sign_message; - use std::str::FromStr as _; - - match response { - ValidatorAuthenticationResponse::Initiation(req) => { - let ongoing_auth_requests = context.ongoing_auth_requests.read().await; - let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { - bail!( - "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationInitiationResponse" - ); - }; - - let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&req.signature) - else { - bail!("Failed to parse signature from server"); - }; - - // recover address from the challenge message that the server signed - let Ok(recovered_address) = parsed_signature - .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) - else { - bail!("Failed to recover address from server signature") - }; - - // verify the recovered address matches the expected worker wallet address - if recovered_address != ongoing_challenge.worker_wallet_address { - bail!( - "Server address verification failed: expected {}, got {recovered_address}", - ongoing_challenge.worker_wallet_address, - ) - } - - log::debug!("auth challenge initiation response received from node: {from}"); - let signature = sign_message(&req.message, &context.wallet).await.unwrap(); - - let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); - let req = req.into_outgoing_message(from); - context - .outgoing_messages - .send(req) - .await - .context("failed to send outgoing message")?; - } - ValidatorAuthenticationResponse::Solution(req) => { - let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; - let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { - bail!( - "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationSolutionResponse" - ); - }; - - match req { - p2p::ValidatorAuthenticationSolutionResponse::Granted => {} - p2p::ValidatorAuthenticationSolutionResponse::Rejected => { - log::debug!("auth challenge rejected by node: {from}"); - return Ok(()); - } - } - - // auth was granted, finally send the hardware challenge - let req: p2p::Request = p2p::HardwareChallengeRequest { - challenge: ongoing_challenge.hardware_challenge, - timestamp: std::time::SystemTime::now(), - } - .into(); - let req = req.into_outgoing_message(from); - context - .outgoing_messages - .send(req) - .await - .context("failed to send outgoing message")?; - - let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; - ongoing_hardware_challenges - .insert(from, ongoing_challenge.hardware_challenge_response_tx); - } - } - Ok(()) -} diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index de04a96a..f84e1dea 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -53,6 +53,7 @@ impl HardwareChallenge { challenge: challenge_with_timestamp, response_tx, }; + // Send challenge via P2P self.challenge_tx .send(hardware_challenge) From 0046fac172db48c3cbb843ade33385b70775579d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 18:45:26 -0400 Subject: [PATCH 25/40] implement orchestrator p2p service --- Cargo.lock | 1 + crates/orchestrator/Cargo.toml | 5 +- crates/orchestrator/src/api/tests/helper.rs | 8 +- crates/orchestrator/src/lib.rs | 1 + crates/orchestrator/src/p2p/mod.rs | 174 ++++++++++++++++++ crates/p2p/Cargo.toml | 2 +- crates/validator/src/p2p/mod.rs | 6 +- .../src/validators/hardware_challenge.rs | 2 +- 8 files changed, 189 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c54a0ad1..1bcee1d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6234,6 +6234,7 @@ dependencies = [ "serde_json", "shared", "tokio", + "tokio-util", "url", "utoipa", "utoipa-swagger-ui", diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index 6ac53140..cf31fdf5 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -7,6 +7,9 @@ edition.workspace = true workspace = true [dependencies] +p2p = { workspace = true} +shared = { workspace = true } + actix-web = { workspace = true } actix-web-prometheus = "0.1.2" alloy = { workspace = true } @@ -28,8 +31,8 @@ redis-test = { workspace = true } reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -shared = { workspace = true } tokio = { workspace = true } +tokio-util = { workspace = true } url = { workspace = true } utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web", "debug-embed", "reqwest", "vendored"] } diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index ca2e65c1..a5282b3a 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use url::Url; #[cfg(test)] -pub async fn create_test_app_state() -> Data { +pub(crate) async fn create_test_app_state() -> Data { use shared::utils::MockStorageProvider; use crate::{ @@ -69,7 +69,7 @@ pub async fn create_test_app_state() -> Data { } #[cfg(test)] -pub async fn create_test_app_state_with_nodegroups() -> Data { +pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { use shared::utils::MockStorageProvider; use crate::{ @@ -139,7 +139,7 @@ pub async fn create_test_app_state_with_nodegroups() -> Data { } #[cfg(test)] -pub fn setup_contract() -> Contracts { +pub(crate) fn setup_contract() -> Contracts { let coordinator_key = "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"; let rpc_url: Url = Url::parse("http://localhost:8545").unwrap(); let wallet = Wallet::new(coordinator_key, rpc_url).unwrap(); @@ -154,7 +154,7 @@ pub fn setup_contract() -> Contracts { } #[cfg(test)] -pub async fn create_test_app_state_with_metrics() -> Data { +pub(crate) async fn create_test_app_state_with_metrics() -> Data { use shared::utils::MockStorageProvider; use crate::{ diff --git a/crates/orchestrator/src/lib.rs b/crates/orchestrator/src/lib.rs index 5f82d58d..a1d41f39 100644 --- a/crates/orchestrator/src/lib.rs +++ b/crates/orchestrator/src/lib.rs @@ -17,6 +17,7 @@ pub use metrics::webhook_sender::MetricsWebhookSender; pub use metrics::MetricsContext; pub use node::invite::NodeInviter; pub use p2p::client::P2PClient; +pub use p2p::Service as P2PService; pub use plugins::node_groups::NodeGroupConfiguration; pub use plugins::node_groups::NodeGroupsPlugin; pub use plugins::webhook::WebhookConfig; diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index 1d331315..63aa2192 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -1 +1,175 @@ pub(crate) mod client; + +use anyhow::{bail, Context as _, Result}; +use futures::stream::FuturesUnordered; +use futures::FutureExt; +use p2p::{Keypair, Protocols}; +use shared::p2p::OutgoingRequest; +use shared::p2p::Service as P2PService; +use shared::web3::wallet::Wallet; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio_util::sync::CancellationToken; + +pub struct Service { + inner: P2PService, + outgoing_message_tx: Sender, + invite_rx: Receiver, + get_task_logs_rx: Receiver, + restart_task_rx: Receiver, +} + +impl Service { + pub fn new( + keypair: Keypair, + port: u16, + cancellation_token: CancellationToken, + wallet: Wallet, + ) -> Result<( + Self, + Sender, + Sender, + Sender, + )> { + let (invite_tx, invite_rx) = tokio::sync::mpsc::channel(100); + let (get_task_logs_tx, get_task_logs_rx) = tokio::sync::mpsc::channel(100); + let (restart_task_tx, restart_task_rx) = tokio::sync::mpsc::channel(100); + let (inner, outgoing_message_tx) = P2PService::new( + keypair, + port, + cancellation_token.clone(), + wallet, + Protocols::new() + .with_invite() + .with_get_task_logs() + .with_restart() + .with_validator_authentication(), + ) + .context("failed to create p2p service")?; + Ok(( + Self { + inner, + outgoing_message_tx, + invite_rx, + get_task_logs_rx, + restart_task_rx, + }, + invite_tx, + get_task_logs_tx, + restart_task_tx, + )) + } + + pub async fn run(self) -> Result<()> { + use futures::StreamExt as _; + + let Self { + inner, + outgoing_message_tx, + mut invite_rx, + mut get_task_logs_rx, + mut restart_task_rx, + } = self; + + tokio::task::spawn(inner.run()); + + let mut futures = FuturesUnordered::new(); + + loop { + tokio::select! { + Some(request) = invite_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::Invite(resp) => resp, + _ => bail!("unexpected response type for invite request"), + }; + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; + Ok(()) + }.boxed(); + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: request.invite.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing invite request")?; + } + Some(request) = get_task_logs_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::GetTaskLogs(resp) => resp, + _ => bail!("unexpected response type for get task logs request"), + }; + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; + Ok(()) + }.boxed(); + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: p2p::Request::GetTaskLogs.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing get task logs request")?; + } + Some(request) = restart_task_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::Restart(resp) => resp, + _ => bail!("unexpected response type for restart task request"), + }; + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; + Ok(()) + }.boxed(); + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: p2p::Request::Restart.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing restart task request")?; + } + Some(res) = futures.next() => { + if let Err(e) = res { + log::error!("failed to handle response conversion: {e}"); + } + } + } + } + } +} + +pub struct InviteRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) invite: p2p::InviteRequest, + pub(crate) response_tx: tokio::sync::oneshot::Sender, +} + +pub struct GetTaskLogsRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) response_tx: tokio::sync::oneshot::Sender, +} + +pub struct RestartTaskRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) response_tx: tokio::sync::oneshot::Sender, +} diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index ba52d570..dc5efc40 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -11,7 +11,7 @@ anyhow = {workspace = true} nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} -tokio-util = { workspace = true, features = ["rt"] } +tokio-util = { workspace = true } [lints] workspace = true diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index fcce43ec..dc6b23e6 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -32,7 +32,7 @@ impl Service { .with_hardware_challenge() .with_validator_authentication(), ) - .context("failed to create P2P service")?; + .context("failed to create p2p service")?; Ok(( Self { inner, @@ -65,7 +65,7 @@ impl Service { p2p::Response::HardwareChallenge(resp) => resp.response, _ => bail!("unexpected response type for hardware challenge request"), }; - let _ = request.response_tx.send(resp); + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) }; futures.push(fut); @@ -85,7 +85,7 @@ impl Service { } Some(res) = futures.next() => { if let Err(e) = res { - log::error!("failed to handle hardware challenge request: {e}"); + log::error!("failed to handle response conversion: {e}"); } } } diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index f84e1dea..6970355d 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -53,7 +53,7 @@ impl HardwareChallenge { challenge: challenge_with_timestamp, response_tx, }; - + // Send challenge via P2P self.challenge_tx .send(hardware_challenge) From 08a10ec0377c15204b69dac94ea09ff5baed3bfb Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:16:18 -0400 Subject: [PATCH 26/40] update orchestrator to use libp2p node --- crates/orchestrator/src/api/routes/groups.rs | 24 +++-- crates/orchestrator/src/api/routes/nodes.rs | 42 ++++++-- crates/orchestrator/src/api/server.rs | 32 +++--- crates/orchestrator/src/api/tests/helper.rs | 42 +++----- crates/orchestrator/src/discovery/monitor.rs | 7 +- crates/orchestrator/src/lib.rs | 1 - crates/orchestrator/src/main.rs | 73 +++++++++---- crates/orchestrator/src/node/invite.rs | 82 ++++++++------- crates/orchestrator/src/p2p/client.rs | 102 ------------------- crates/orchestrator/src/p2p/mod.rs | 22 ++-- 10 files changed, 192 insertions(+), 235 deletions(-) delete mode 100644 crates/orchestrator/src/p2p/client.rs diff --git a/crates/orchestrator/src/api/routes/groups.rs b/crates/orchestrator/src/api/routes/groups.rs index 44b22cd9..414f524a 100644 --- a/crates/orchestrator/src/api/routes/groups.rs +++ b/crates/orchestrator/src/api/routes/groups.rs @@ -236,9 +236,6 @@ async fn fetch_node_logs_p2p( match node { Some(node) => { - // Check if P2P client is available - let p2p_client = app_state.p2p_client.clone(); - // Check if node has P2P information let (worker_p2p_id, worker_p2p_addresses) = match (&node.worker_p2p_id, &node.worker_p2p_addresses) { @@ -254,11 +251,22 @@ async fn fetch_node_logs_p2p( }; // Send P2P request for task logs - match tokio::time::timeout( - Duration::from_secs(NODE_REQUEST_TIMEOUT), - p2p_client.get_task_logs(node_address, worker_p2p_id, worker_p2p_addresses), - ) - .await + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let get_task_logs_request = crate::p2p::GetTaskLogsRequest { + worker_wallet_address: node_address, + worker_p2p_id: worker_p2p_id.clone(), + worker_addresses: worker_p2p_addresses.clone(), + response_tx, + }; + if let Err(e) = app_state.get_task_logs_tx.send(get_task_logs_request).await { + error!("Failed to send GetTaskLogsRequest for node {node_address}: {e}"); + return json!({ + "success": false, + "error": format!("Failed to send request: {}", e), + "status": node.status.to_string() + }); + }; + match tokio::time::timeout(Duration::from_secs(NODE_REQUEST_TIMEOUT), response_rx).await { Ok(Ok(log_lines)) => { json!({ diff --git a/crates/orchestrator/src/api/routes/nodes.rs b/crates/orchestrator/src/api/routes/nodes.rs index a260706a..9debddde 100644 --- a/crates/orchestrator/src/api/routes/nodes.rs +++ b/crates/orchestrator/src/api/routes/nodes.rs @@ -164,11 +164,22 @@ async fn restart_node_task(node_id: web::Path, app_state: Data .as_ref() .expect("worker_p2p_addresses should be present"); - match app_state - .p2p_client - .restart_task(node_address, p2p_id, p2p_addresses) - .await - { + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let restart_task_request = crate::p2p::RestartTaskRequest { + worker_wallet_address: node.address, + worker_p2p_id: p2p_id.clone(), + worker_addresses: p2p_addresses.clone(), + response_tx, + }; + if let Err(e) = app_state.restart_task_tx.send(restart_task_request).await { + error!("Failed to send restart task request: {e}"); + return HttpResponse::InternalServerError().json(json!({ + "success": false, + "error": "Failed to send restart task request" + })); + } + + match response_rx.await { Ok(_) => HttpResponse::Ok().json(json!({ "success": true, "message": "Task restarted successfully" @@ -240,11 +251,22 @@ async fn get_node_logs(node_id: web::Path, app_state: Data) -> })); }; - match app_state - .p2p_client - .get_task_logs(node_address, p2p_id, p2p_addresses) - .await - { + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let get_task_logs_request = crate::p2p::GetTaskLogsRequest { + worker_wallet_address: node.address, + worker_p2p_id: p2p_id.clone(), + worker_addresses: p2p_addresses.clone(), + response_tx, + }; + if let Err(e) = app_state.get_task_logs_tx.send(get_task_logs_request).await { + error!("Failed to send get task logs request: {e}"); + return HttpResponse::InternalServerError().json(json!({ + "success": false, + "error": "Failed to send get task logs request" + })); + } + + match response_rx.await { Ok(logs) => HttpResponse::Ok().json(json!({ "success": true, "logs": logs diff --git a/crates/orchestrator/src/api/server.rs b/crates/orchestrator/src/api/server.rs index 095bcb6c..fc5943c9 100644 --- a/crates/orchestrator/src/api/server.rs +++ b/crates/orchestrator/src/api/server.rs @@ -5,7 +5,7 @@ use crate::api::routes::task::tasks_routes; use crate::api::routes::{heartbeat::heartbeat_routes, metrics::metrics_routes}; use crate::metrics::MetricsContext; use crate::models::node::NodeStatus; -use crate::p2p::client::P2PClient; +use crate::p2p::{GetTaskLogsRequest, RestartTaskRequest}; use crate::plugins::node_groups::NodeGroupsPlugin; use crate::scheduler::Scheduler; use crate::store::core::{RedisStore, StoreContext}; @@ -23,6 +23,7 @@ use shared::utils::StorageProvider; use shared::web3::contracts::core::builder::Contracts; use shared::web3::wallet::WalletProvider; use std::sync::Arc; +use tokio::sync::mpsc::Sender; use utoipa::{ openapi::security::{ApiKey, ApiKeyValue, SecurityScheme}, Modify, OpenApi, @@ -116,17 +117,18 @@ async fn health_check(data: web::Data) -> HttpResponse { } pub(crate) struct AppState { - pub store_context: Arc, - pub storage_provider: Option>, - pub heartbeats: Arc, - pub redis_store: Arc, - pub hourly_upload_limit: i64, - pub contracts: Option>, - pub pool_id: u32, - pub scheduler: Scheduler, - pub node_groups_plugin: Option>, - pub metrics: Arc, - pub p2p_client: Arc, + pub(crate) store_context: Arc, + pub(crate) storage_provider: Option>, + pub(crate) heartbeats: Arc, + pub(crate) redis_store: Arc, + pub(crate) hourly_upload_limit: i64, + pub(crate) contracts: Option>, + pub(crate) pool_id: u32, + pub(crate) scheduler: Scheduler, + pub(crate) node_groups_plugin: Option>, + pub(crate) metrics: Arc, + pub(crate) get_task_logs_tx: Sender, + pub(crate) restart_task_tx: Sender, } #[allow(clippy::too_many_arguments)] @@ -145,7 +147,8 @@ pub async fn start_server( scheduler: Scheduler, node_groups_plugin: Option>, metrics: Arc, - p2p_client: Arc, + get_task_logs_tx: Sender, + restart_task_tx: Sender, ) -> Result<(), Error> { info!("Starting server at http://{host}:{port}"); let app_state = Data::new(AppState { @@ -159,7 +162,8 @@ pub async fn start_server( scheduler, node_groups_plugin, metrics, - p2p_client, + get_task_logs_tx, + restart_task_tx, }); let node_store = app_state.store_context.node_store.clone(); let node_store_clone = node_store.clone(); diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index a5282b3a..92b26cce 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -22,8 +22,8 @@ pub(crate) async fn create_test_app_state() -> Data { use shared::utils::MockStorageProvider; use crate::{ - metrics::MetricsContext, p2p::client::P2PClient, scheduler::Scheduler, - utils::loop_heartbeats::LoopHeartbeats, ServerMode, + metrics::MetricsContext, scheduler::Scheduler, utils::loop_heartbeats::LoopHeartbeats, + ServerMode, }; let store = Arc::new(RedisStore::new_test()); @@ -46,12 +46,8 @@ pub(crate) async fn create_test_app_state() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); Data::new(AppState { store_context: store_context.clone(), @@ -64,7 +60,8 @@ pub(crate) async fn create_test_app_state() -> Data { scheduler, node_groups_plugin: None, metrics, - p2p_client: p2p_client.clone(), + get_task_logs_tx, + restart_task_tx, }) } @@ -74,7 +71,6 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { use crate::{ metrics::MetricsContext, - p2p::client::P2PClient, plugins::node_groups::{NodeGroupConfiguration, NodeGroupsPlugin}, scheduler::Scheduler, utils::loop_heartbeats::LoopHeartbeats, @@ -116,12 +112,8 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); Data::new(AppState { store_context: store_context.clone(), @@ -134,7 +126,8 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { scheduler, node_groups_plugin, metrics, - p2p_client: p2p_client.clone(), + get_task_logs_tx, + restart_task_tx, }) } @@ -158,8 +151,8 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { use shared::utils::MockStorageProvider; use crate::{ - metrics::MetricsContext, p2p::client::P2PClient, scheduler::Scheduler, - utils::loop_heartbeats::LoopHeartbeats, ServerMode, + metrics::MetricsContext, scheduler::Scheduler, utils::loop_heartbeats::LoopHeartbeats, + ServerMode, }; let store = Arc::new(RedisStore::new_test()); @@ -182,12 +175,8 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new("0".to_string())); - let wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); Data::new(AppState { store_context: store_context.clone(), @@ -200,6 +189,7 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { scheduler, node_groups_plugin: None, metrics, - p2p_client: p2p_client.clone(), + get_task_logs_tx, + restart_task_tx, }) } diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 56fed833..d1ea3133 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -384,15 +384,12 @@ impl DiscoveryMonitor { if let Some(balance) = discovery_node.latest_balance { if balance == U256::ZERO { - info!( - "Node {} has zero balance, marking as low balance", - node_address - ); + info!("Node {node_address} has zero balance, marking as low balance"); if let Err(e) = self .update_node_status(&node_address, NodeStatus::LowBalance) .await { - error!("Error updating node status: {}", e); + error!("Error updating node status: {e}"); } } } diff --git a/crates/orchestrator/src/lib.rs b/crates/orchestrator/src/lib.rs index a1d41f39..19d13eba 100644 --- a/crates/orchestrator/src/lib.rs +++ b/crates/orchestrator/src/lib.rs @@ -16,7 +16,6 @@ pub use metrics::sync_service::MetricsSyncService; pub use metrics::webhook_sender::MetricsWebhookSender; pub use metrics::MetricsContext; pub use node::invite::NodeInviter; -pub use p2p::client::P2PClient; pub use p2p::Service as P2PService; pub use plugins::node_groups::NodeGroupConfiguration; pub use plugins::node_groups::NodeGroupsPlugin; diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index f9beaccb..5f8e2af2 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -9,12 +9,13 @@ use shared::web3::contracts::core::builder::ContractBuilder; use shared::web3::wallet::Wallet; use std::sync::Arc; use tokio::task::JoinSet; +use tokio_util::sync::CancellationToken; use url::Url; use orchestrator::{ start_server, DiscoveryMonitor, LoopHeartbeats, MetricsContext, MetricsSyncService, MetricsWebhookSender, NodeGroupConfiguration, NodeGroupsPlugin, NodeInviter, NodeStatusUpdater, - P2PClient, RedisStore, Scheduler, SchedulerPlugin, ServerMode, StatusUpdatePlugin, + P2PService, RedisStore, Scheduler, SchedulerPlugin, ServerMode, StatusUpdatePlugin, StoreContext, WebhookConfig, WebhookPlugin, }; @@ -91,6 +92,10 @@ struct Args { /// Max healthy nodes with same endpoint #[arg(long, default_value = "1")] max_healthy_nodes_with_same_endpoint: u32, + + /// Libp2p port + #[arg(long, default_value = "4004")] + libp2p_port: u16, } #[tokio::main] @@ -143,7 +148,27 @@ async fn main() -> Result<()> { let store = Arc::new(RedisStore::new(&args.redis_store_url)); let store_context = Arc::new(StoreContext::new(store.clone())); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let keypair = p2p::Keypair::generate_ed25519(); + let cancellation_token = CancellationToken::new(); + let (p2p_service, invite_tx, get_task_logs_tx, restart_task_tx) = { + match P2PService::new( + keypair, + args.libp2p_port, + cancellation_token.clone(), + wallet.clone(), + ) { + Ok(res) => { + info!("p2p service initialized successfully"); + res + } + Err(e) => { + error!("failed to initialize p2p service: {e}"); + std::process::exit(1); + } + } + }; + + tokio::task::spawn(p2p_service.run()); let contracts = ContractBuilder::new(wallet.provider()) .with_compute_registry() @@ -297,24 +322,29 @@ async fn main() -> Result<()> { let inviter_store_context = store_context.clone(); let inviter_heartbeats = heartbeats.clone(); - tasks.spawn({ - let wallet = wallet.clone(); - let p2p_client = p2p_client.clone(); - async move { - let inviter = NodeInviter::new( - wallet, - compute_pool_id, - domain_id, - args.host.as_deref(), - Some(&args.port), - args.url.as_deref(), - inviter_store_context.clone(), - inviter_heartbeats.clone(), - p2p_client, - ); - inviter.run().await + let wallet = wallet.clone(); + let inviter = match NodeInviter::new( + wallet, + compute_pool_id, + domain_id, + args.host.as_deref(), + Some(&args.port), + args.url.as_deref(), + inviter_store_context.clone(), + inviter_heartbeats.clone(), + invite_tx, + ) { + Ok(inviter) => { + info!("Node inviter initialized successfully"); + inviter } - }); + Err(e) => { + error!("Failed to initialize node inviter: {e}"); + std::process::exit(1); + } + }; + + tasks.spawn(async move { inviter.run().await }); // Create status_update_plugins for status updater let mut status_updater_plugins: Vec = vec![]; @@ -387,7 +417,8 @@ async fn main() -> Result<()> { scheduler, node_groups_plugin, metrics_context, - p2p_client, + get_task_logs_tx, + restart_task_tx, ) => { if let Err(e) = res { error!("Server error: {e}"); @@ -403,6 +434,8 @@ async fn main() -> Result<()> { } } + // TODO: use cancellation token to gracefully shutdown tasks + cancellation_token.cancel(); tasks.shutdown().await; Ok(()) } diff --git a/crates/orchestrator/src/node/invite.rs b/crates/orchestrator/src/node/invite.rs index 17ae4207..8391d047 100644 --- a/crates/orchestrator/src/node/invite.rs +++ b/crates/orchestrator/src/node/invite.rs @@ -1,40 +1,40 @@ use crate::models::node::NodeStatus; use crate::models::node::OrchestratorNode; -use crate::p2p::client::P2PClient; +use crate::p2p::InviteRequest as InviteRequestWithMetadata; use crate::store::core::StoreContext; use crate::utils::loop_heartbeats::LoopHeartbeats; use alloy::primitives::utils::keccak256 as keccak; use alloy::primitives::U256; use alloy::signers::Signer; -use anyhow::Result; +use anyhow::{bail, Result}; use futures::stream; use futures::StreamExt; use log::{debug, error, info, warn}; -use shared::models::invite::InviteRequest; +use p2p::InviteRequest; +use p2p::InviteRequestUrl; use shared::web3::wallet::Wallet; use std::sync::Arc; use std::time::SystemTime; use std::time::UNIX_EPOCH; +use tokio::sync::mpsc::Sender; use tokio::time::{interval, Duration}; // Timeout constants const DEFAULT_INVITE_CONCURRENT_COUNT: usize = 32; // Max concurrent count of nodes being invited -pub struct NodeInviter<'a> { +pub struct NodeInviter { wallet: Wallet, pool_id: u32, domain_id: u32, - host: Option<&'a str>, - port: Option<&'a u16>, - url: Option<&'a str>, + url: InviteRequestUrl, store_context: Arc, heartbeats: Arc, - p2p_client: Arc, + invite_tx: Sender, } -impl<'a> NodeInviter<'a> { +impl NodeInviter { #[allow(clippy::too_many_arguments)] - pub fn new( + pub fn new<'a>( wallet: Wallet, pool_id: u32, domain_id: u32, @@ -43,19 +43,31 @@ impl<'a> NodeInviter<'a> { url: Option<&'a str>, store_context: Arc, heartbeats: Arc, - p2p_client: Arc, - ) -> Self { - Self { + invite_tx: Sender, + ) -> Result { + let url = if let Some(url) = url { + InviteRequestUrl::MasterUrl(url.to_string()) + } else { + let Some(host) = host else { + bail!("either host or url must be provided"); + }; + + let Some(port) = port else { + bail!("either port or url must be provided"); + }; + + InviteRequestUrl::MasterIpPort(host.to_string(), *port) + }; + + Ok(Self { wallet, pool_id, domain_id, - host, - port, url, store_context, heartbeats, - p2p_client, - } + invite_tx, + }) } pub async fn run(&self) -> Result<()> { @@ -71,7 +83,7 @@ impl<'a> NodeInviter<'a> { } } - async fn _generate_invite( + async fn generate_invite( &self, node: &OrchestratorNode, nonce: [u8; 32], @@ -102,7 +114,7 @@ impl<'a> NodeInviter<'a> { Ok(signature) } - async fn _send_invite(&self, node: &OrchestratorNode) -> Result<(), anyhow::Error> { + async fn send_invite(&self, node: &OrchestratorNode) -> Result<(), anyhow::Error> { if node.worker_p2p_id.is_none() || node.worker_p2p_addresses.is_none() { return Err(anyhow::anyhow!("Node does not have p2p information")); } @@ -120,21 +132,11 @@ impl<'a> NodeInviter<'a> { ) .to_be_bytes(); - let invite_signature = self._generate_invite(node, nonce, expiration).await?; + let invite_signature = self.generate_invite(node, nonce, expiration).await?; let payload = InviteRequest { invite: hex::encode(invite_signature), pool_id: self.pool_id, - master_url: self.url.map(|u| u.to_string()), - master_ip: if self.url.is_none() { - self.host.map(|h| h.to_string()) - } else { - None - }, - master_port: if self.url.is_none() { - self.port.copied() - } else { - None - }, + url: self.url.clone(), timestamp: SystemTime::now() .duration_since(UNIX_EPOCH) .map_err(|e| anyhow::anyhow!("System time error: {}", e))? @@ -145,11 +147,19 @@ impl<'a> NodeInviter<'a> { info!("Sending invite to node: {p2p_id}"); - match self - .p2p_client - .invite_worker(node.address, p2p_id, p2p_addresses, payload) + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let invite = InviteRequestWithMetadata { + worker_wallet_address: node.address, + worker_p2p_id: p2p_id.clone(), + worker_addresses: p2p_addresses.clone(), + invite: payload, + response_tx, + }; + self.invite_tx + .send(invite) .await - { + .map_err(|_| anyhow::anyhow!("failed to send invite request"))?; + match response_rx.await { Ok(_) => { info!("Successfully invited node"); if let Err(e) = self @@ -182,7 +192,7 @@ impl<'a> NodeInviter<'a> { let invited_nodes = stream::iter(nodes.into_iter().map(|node| async move { info!("Processing node {:?}", node.address); - match self._send_invite(&node).await { + match self.send_invite(&node).await { Ok(_) => { info!("Successfully processed node {:?}", node.address); Ok(()) diff --git a/crates/orchestrator/src/p2p/client.rs b/crates/orchestrator/src/p2p/client.rs deleted file mode 100644 index 39810151..00000000 --- a/crates/orchestrator/src/p2p/client.rs +++ /dev/null @@ -1,102 +0,0 @@ -use alloy::primitives::Address; -use anyhow::Result; -use log::{info, warn}; -use shared::models::invite::InviteRequest; -use shared::p2p::{client::P2PClient as SharedP2PClient, messages::P2PMessage}; -use shared::web3::wallet::Wallet; - -pub struct P2PClient { - shared_client: SharedP2PClient, -} - -impl P2PClient { - pub async fn new(wallet: Wallet) -> Result { - let shared_client = SharedP2PClient::new(wallet).await?; - Ok(Self { shared_client }) - } - - pub async fn invite_worker( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - invite: InviteRequest, - ) -> Result<()> { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::Invite(invite), - 20, - ) - .await?; - - match response { - P2PMessage::InviteResponse { status, error } => { - if status == "ok" { - info!("Successfully invited worker {worker_p2p_id}"); - Ok(()) - } else { - let error_msg = error.unwrap_or_else(|| "Unknown error".to_string()); - warn!("Failed to invite worker {worker_p2p_id}: {error_msg}"); - Err(anyhow::anyhow!("Invite failed: {}", error_msg)) - } - } - _ => Err(anyhow::anyhow!("Unexpected response type for invite")), - } - } - - pub async fn get_task_logs( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - ) -> Result> { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::GetTaskLogs, - 20, - ) - .await?; - - match response { - P2PMessage::GetTaskLogsResponse { logs } => { - logs.map_err(|e| anyhow::anyhow!("Failed to get task logs: {}", e)) - } - _ => Err(anyhow::anyhow!( - "Unexpected response type for get_task_logs" - )), - } - } - - pub async fn restart_task( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - ) -> Result<()> { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::RestartTask, - 25, - ) - .await?; - - match response { - P2PMessage::RestartTaskResponse { result } => { - result.map_err(|e| anyhow::anyhow!("Failed to restart task: {}", e)) - } - _ => Err(anyhow::anyhow!("Unexpected response type for restart_task")), - } - } -} diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index 63aa2192..f3bf57cf 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -1,5 +1,3 @@ -pub(crate) mod client; - use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use futures::FutureExt; @@ -19,6 +17,7 @@ pub struct Service { } impl Service { + #[allow(clippy::type_complexity)] pub fn new( keypair: Keypair, port: u16, @@ -79,9 +78,8 @@ impl Service { Some(request) = invite_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { - p2p::Response::Invite(resp) => resp, - _ => bail!("unexpected response type for invite request"), + let p2p::Response::Invite(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + bail!("unexpected response type for invite request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) @@ -101,9 +99,8 @@ impl Service { Some(request) = get_task_logs_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { - p2p::Response::GetTaskLogs(resp) => resp, - _ => bail!("unexpected response type for get task logs request"), + let p2p::Response::GetTaskLogs(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + bail!("unexpected response type for get task logs request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) @@ -114,7 +111,7 @@ impl Service { peer_wallet_address: request.worker_wallet_address, peer_id: request.worker_p2p_id, multiaddrs: request.worker_addresses, - request: p2p::Request::GetTaskLogs.into(), + request: p2p::Request::GetTaskLogs, response_tx: incoming_resp_tx, }; outgoing_message_tx.send(outgoing_request).await @@ -123,9 +120,8 @@ impl Service { Some(request) = restart_task_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { - p2p::Response::Restart(resp) => resp, - _ => bail!("unexpected response type for restart task request"), + let p2p::Response::Restart(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + bail!("unexpected response type for restart task request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) @@ -136,7 +132,7 @@ impl Service { peer_wallet_address: request.worker_wallet_address, peer_id: request.worker_p2p_id, multiaddrs: request.worker_addresses, - request: p2p::Request::Restart.into(), + request: p2p::Request::Restart, response_tx: incoming_resp_tx, }; outgoing_message_tx.send(outgoing_request).await From ac923ca8a3739304b8cade5353befbf83cc98687 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:19:12 -0400 Subject: [PATCH 27/40] deps cleanup --- Cargo.lock | 6 ------ crates/orchestrator/Cargo.toml | 17 +++++++---------- crates/validator/Cargo.toml | 2 -- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1bcee1d6..7eac708f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6211,8 +6211,6 @@ dependencies = [ "actix-web-prometheus", "alloy", "anyhow", - "async-trait", - "base64 0.22.1", "chrono", "clap", "env_logger", @@ -6220,12 +6218,10 @@ dependencies = [ "google-cloud-auth 0.18.0", "google-cloud-storage", "hex", - "iroh", "log", "mockito", "p2p", "prometheus 0.14.0", - "rand 0.8.5", "rand 0.9.1", "redis", "redis-test", @@ -9455,10 +9451,8 @@ dependencies = [ "lazy_static", "log", "mockito", - "nalgebra", "p2p", "prometheus 0.14.0", - "rand 0.8.5", "rand 0.9.1", "redis", "redis-test", diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index cf31fdf5..ce733ee6 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -11,21 +11,14 @@ p2p = { workspace = true} shared = { workspace = true } actix-web = { workspace = true } -actix-web-prometheus = "0.1.2" alloy = { workspace = true } anyhow = { workspace = true } -async-trait = "0.1.88" -base64 = "0.22.1" chrono = { workspace = true, features = ["serde"] } clap = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } -google-cloud-auth = "0.18.0" -google-cloud-storage = "0.24.0" hex = { workspace = true } log = { workspace = true } -prometheus = "0.14.0" -rand = "0.9.0" redis = { workspace = true, features = ["tokio-comp"] } redis-test = { workspace = true } reqwest = { workspace = true } @@ -34,11 +27,15 @@ serde_json = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } url = { workspace = true } +uuid = { workspace = true } + +actix-web-prometheus = "0.1.2" +google-cloud-auth = "0.18.0" +google-cloud-storage = "0.24.0" +prometheus = "0.14.0" +rand = "0.9.0" utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web", "debug-embed", "reqwest", "vendored"] } -uuid = { workspace = true } -iroh = { workspace = true } -rand_v8 = { workspace = true } [dev-dependencies] mockito = { workspace = true } diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 76969bb0..4d329921 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -19,9 +19,7 @@ directories = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } hex = { workspace = true } -rand_v8 = { workspace = true } log = { workspace = true } -nalgebra = { workspace = true } redis = { workspace = true, features = ["tokio-comp"] } reqwest = { workspace = true } serde = { workspace = true } From f35b0012ef0404a69468bc10ce6b4cf1487481cc Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:26:43 -0400 Subject: [PATCH 28/40] delete unused code --- Cargo.lock | 33 ---- crates/discovery/src/chainsync/sync.rs | 10 +- crates/p2p/src/protocol.rs | 6 + crates/shared/src/models/challenge.rs | 89 ---------- crates/shared/src/models/invite.rs | 20 --- crates/shared/src/models/mod.rs | 2 - crates/shared/src/p2p/client.rs | 237 ------------------------- crates/shared/src/p2p/messages.rs | 101 ----------- crates/shared/src/p2p/mod.rs | 6 - crates/shared/src/p2p/protocol.rs | 5 - crates/shared/src/p2p/service.rs | 1 + crates/validator/src/main.rs | 2 +- 12 files changed, 11 insertions(+), 501 deletions(-) delete mode 100644 crates/shared/src/models/challenge.rs delete mode 100644 crates/shared/src/models/invite.rs delete mode 100644 crates/shared/src/p2p/client.rs delete mode 100644 crates/shared/src/p2p/messages.rs delete mode 100644 crates/shared/src/p2p/protocol.rs diff --git a/Cargo.lock b/Cargo.lock index 153fcfa7..aa99f00e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8010,15 +8010,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "serde_spanned" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" -dependencies = [ - "serde", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -8911,26 +8902,11 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.8.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", -] - [[package]] name = "toml_datetime" version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" -dependencies = [ - "serde", -] [[package]] name = "toml_edit" @@ -8939,19 +8915,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ "indexmap 2.9.0", - "serde", - "serde_spanned", "toml_datetime", - "toml_write", "winnow", ] -[[package]] -name = "toml_write" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" - [[package]] name = "tower" version = "0.5.2" diff --git a/crates/discovery/src/chainsync/sync.rs b/crates/discovery/src/chainsync/sync.rs index 6101c87a..1120d3cb 100644 --- a/crates/discovery/src/chainsync/sync.rs +++ b/crates/discovery/src/chainsync/sync.rs @@ -155,7 +155,7 @@ async fn sync_single_node( })?; let balance = provider.get_balance(node_address).await.map_err(|e| { - error!("Error retrieving balance for node {}: {}", node_address, e); + error!("Error retrieving balance for node {node_address}: {e}"); anyhow::anyhow!("Failed to retrieve node balance") })?; n.latest_balance = Some(balance); @@ -166,8 +166,7 @@ async fn sync_single_node( .await .map_err(|e| { error!( - "Error retrieving node info for provider {} and node {}: {}", - provider_address, node_address, e + "Error retrieving node info for provider {provider_address} and node {node_address}: {e}" ); anyhow::anyhow!("Failed to retrieve node info") })?; @@ -177,10 +176,7 @@ async fn sync_single_node( .get_provider(provider_address) .await .map_err(|e| { - error!( - "Error retrieving provider info for {}: {}", - provider_address, e - ); + error!("Error retrieving provider info for {provider_address}: {e}"); anyhow::anyhow!("Failed to retrieve provider info") })?; diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index ae839cec..0956ef0f 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -35,6 +35,12 @@ impl Protocol { #[derive(Debug, Clone)] pub struct Protocols(HashSet); +impl Default for Protocols { + fn default() -> Self { + Self::new() + } +} + impl Protocols { pub fn new() -> Self { Self(HashSet::new()) diff --git a/crates/shared/src/models/challenge.rs b/crates/shared/src/models/challenge.rs deleted file mode 100644 index 639cc602..00000000 --- a/crates/shared/src/models/challenge.rs +++ /dev/null @@ -1,89 +0,0 @@ -use nalgebra::DMatrix; -use serde::{ - de::{self, Visitor}, - Deserialize, Deserializer, Serialize, Serializer, -}; -use std::fmt; - -#[derive(Debug, Clone)] -pub struct FixedF64(pub f64); - -impl Serialize for FixedF64 { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - // adjust precision as needed - serializer.serialize_str(&format!("{:.12}", self.0)) - } -} - -impl<'de> Deserialize<'de> for FixedF64 { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct FixedF64Visitor; - - impl Visitor<'_> for FixedF64Visitor { - type Value = FixedF64; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string representing a fixed precision float") - } - - fn visit_str(self, value: &str) -> Result - where - E: de::Error, - { - value - .parse::() - .map(FixedF64) - .map_err(|_| E::custom(format!("invalid f64: {value}"))) - } - } - - deserializer.deserialize_str(FixedF64Visitor) - } -} - -impl PartialEq for FixedF64 { - fn eq(&self, other: &Self) -> bool { - format!("{:.10}", self.0) == format!("{:.10}", other.0) - } -} - -#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] -pub struct ChallengeRequest { - pub rows_a: usize, - pub cols_a: usize, - pub data_a: Vec, - pub rows_b: usize, - pub cols_b: usize, - pub data_b: Vec, - pub timestamp: Option, -} - -#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] -pub struct ChallengeResponse { - pub result: Vec, - pub rows: usize, - pub cols: usize, -} - -pub fn calc_matrix(req: &ChallengeRequest) -> ChallengeResponse { - // convert FixedF64 to f64 - let data_a: Vec = req.data_a.iter().map(|x| x.0).collect(); - let data_b: Vec = req.data_b.iter().map(|x| x.0).collect(); - let a = DMatrix::from_vec(req.rows_a, req.cols_a, data_a); - let b = DMatrix::from_vec(req.rows_b, req.cols_b, data_b); - let c = a * b; - - let data_c: Vec = c.iter().map(|x| FixedF64(*x)).collect(); - - ChallengeResponse { - rows: c.nrows(), - cols: c.ncols(), - result: data_c, - } -} diff --git a/crates/shared/src/models/invite.rs b/crates/shared/src/models/invite.rs deleted file mode 100644 index 08cf2a5e..00000000 --- a/crates/shared/src/models/invite.rs +++ /dev/null @@ -1,20 +0,0 @@ -use serde::Deserialize; -use serde::Serialize; - -#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] -pub struct InviteRequest { - pub invite: String, - pub pool_id: u32, - // Either master url or ip and port - pub master_url: Option, - pub master_ip: Option, - pub master_port: Option, - pub timestamp: u64, - pub expiration: [u8; 32], - pub nonce: [u8; 32], -} - -#[derive(Deserialize, Serialize)] -pub struct InviteResponse { - pub status: String, -} diff --git a/crates/shared/src/models/mod.rs b/crates/shared/src/models/mod.rs index 0bbe8968..dea669b3 100644 --- a/crates/shared/src/models/mod.rs +++ b/crates/shared/src/models/mod.rs @@ -1,7 +1,5 @@ pub mod api; -pub mod challenge; pub mod heartbeat; -pub mod invite; pub mod metric; pub mod node; pub mod storage; diff --git a/crates/shared/src/p2p/client.rs b/crates/shared/src/p2p/client.rs deleted file mode 100644 index 54e6de45..00000000 --- a/crates/shared/src/p2p/client.rs +++ /dev/null @@ -1,237 +0,0 @@ -use alloy::primitives::Address; -use anyhow::Result; -use iroh::endpoint::{RecvStream, SendStream}; -use iroh::{Endpoint, NodeAddr, NodeId, RelayMode, SecretKey}; -use log::{debug, info}; -use std::str::FromStr; -use std::time::Duration; - -use crate::p2p::messages::{P2PMessage, P2PRequest, P2PResponse}; -use crate::p2p::protocol::PRIME_P2P_PROTOCOL; -use crate::security::request_signer::sign_message; -use crate::web3::wallet::Wallet; -use rand_v8::rngs::OsRng; -use rand_v8::Rng; - -pub struct P2PClient { - endpoint: Endpoint, - node_id: NodeId, - wallet: Wallet, -} - -impl P2PClient { - pub async fn new(wallet: Wallet) -> Result { - let mut rng = rand_v8::thread_rng(); - let secret_key = SecretKey::generate(&mut rng); - let node_id = secret_key.public(); - - let endpoint = Endpoint::builder() - .secret_key(secret_key) - .alpns(vec![PRIME_P2P_PROTOCOL.to_vec()]) - .relay_mode(RelayMode::Default) - .discovery_n0() - .bind() - .await?; - - info!("P2P client initialized with node ID: {node_id}"); - - Ok(Self { - endpoint, - node_id, - wallet, - }) - } - - pub fn node_id(&self) -> NodeId { - self.node_id - } - - pub fn endpoint(&self) -> &Endpoint { - &self.endpoint - } - - /// Helper function to write a message with length prefix - async fn write_message(send: &mut SendStream, message: &T) -> Result<()> { - let message_bytes = serde_json::to_vec(message)?; - send.write_all(&(message_bytes.len() as u32).to_be_bytes()) - .await?; - send.write_all(&message_bytes).await?; - Ok(()) - } - - /// Helper function to read a message with length prefix - async fn read_message(recv: &mut RecvStream) -> Result { - let mut len_bytes = [0u8; 4]; - recv.read_exact(&mut len_bytes).await?; - let len = u32::from_be_bytes(len_bytes) as usize; - - let mut message_bytes = vec![0u8; len]; - recv.read_exact(&mut message_bytes).await?; - - let message: T = serde_json::from_slice(&message_bytes)?; - Ok(message) - } - - pub async fn send_request( - &self, - target_p2p_id: &str, - target_addresses: &[String], - target_wallet_address: Address, - message: P2PMessage, - timeout_secs: u64, - ) -> Result { - let timeout_duration = Duration::from_secs(timeout_secs); - - tokio::time::timeout(timeout_duration, async { - self.send_request_inner( - target_p2p_id, - target_addresses, - target_wallet_address, - message, - ) - .await - }) - .await - .map_err(|_| { - anyhow::anyhow!( - "P2P request to {} timed out after {}s", - target_p2p_id, - timeout_secs - ) - })? - } - - async fn send_request_inner( - &self, - target_p2p_id: &str, - target_addresses: &[String], - target_wallet_address: Address, - message: P2PMessage, - ) -> Result { - // Parse target node ID - let node_id = NodeId::from_str(target_p2p_id)?; - - let mut socket_addrs = Vec::new(); - for addr in target_addresses { - if let Ok(socket_addr) = addr.parse() { - socket_addrs.push(socket_addr); - } - } - - if socket_addrs.is_empty() { - return Err(anyhow::anyhow!( - "No valid addresses provided for target node" - )); - } - - // Create node address - let node_addr = NodeAddr::new(node_id).with_direct_addresses(socket_addrs); - - debug!("Connecting to P2P node: {target_p2p_id}"); - - // Connect to the target node - let connection = self.endpoint.connect(node_addr, PRIME_P2P_PROTOCOL).await?; - - let (mut send, mut recv) = connection.open_bi().await?; - - // First request an auth challenge - let challenge_bytes: [u8; 32] = OsRng.gen(); - let challenge_message: String = hex::encode(challenge_bytes); - - let request_auth_challenge = P2PRequest::new(P2PMessage::RequestAuthChallenge { - message: challenge_message.clone(), - }); - Self::write_message(&mut send, &request_auth_challenge).await?; - - // Response contains the auth challenge we have to solve (to show we are the right node) - let auth_challenge_response: P2PResponse = Self::read_message(&mut recv).await?; - let auth_challenge_solution: P2PRequest = match auth_challenge_response.message { - P2PMessage::AuthChallenge { - signed_message, - message, - } => { - // Parse the signature from the server - let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&signed_message) - else { - return Err(anyhow::anyhow!("Failed to parse signature from server")); - }; - - // Recover address from the challenge message that the server signed - let Ok(recovered_address) = - parsed_signature.recover_address_from_msg(&challenge_message) - else { - return Err(anyhow::anyhow!( - "Failed to recover address from server signature" - )); - }; - - // Verify the recovered address matches the expected target wallet address - if recovered_address != target_wallet_address { - return Err(anyhow::anyhow!( - "Server address verification failed: expected {}, got {}", - target_wallet_address, - recovered_address - )); - } - - debug!("Auth challenge received from node: {target_p2p_id}"); - let signature = sign_message(&message, &self.wallet).await.unwrap(); - P2PRequest::new(P2PMessage::AuthSolution { - signed_message: signature, - }) - } - _ => { - return Err(anyhow::anyhow!( - "Expected auth challenge, got different message type" - )); - } - }; - Self::write_message(&mut send, &auth_challenge_solution).await?; - - // Check if we are granted or rejected - let auth_response: P2PResponse = Self::read_message(&mut recv).await?; - match auth_response.message { - P2PMessage::AuthGranted { .. } => { - debug!("Auth granted with node: {target_p2p_id}"); - } - P2PMessage::AuthRejected { .. } => { - debug!("Auth rejected with node: {target_p2p_id}"); - return Err(anyhow::anyhow!( - "Auth rejected with node: {}", - target_p2p_id - )); - } - _ => { - return Err(anyhow::anyhow!( - "Expected auth response, got different message type" - )); - } - } - - // Now send the actual request - let request = P2PRequest::new(message); - Self::write_message(&mut send, &request).await?; - - // Read response - let response: P2PResponse = Self::read_message(&mut recv).await?; - - tokio::time::sleep(Duration::from_millis(50)).await; - - send.finish()?; - - Ok(response.message) - } - - /// Shutdown the P2P client gracefully - pub async fn shutdown(self) -> Result<()> { - info!("Shutting down P2P client with node ID: {}", self.node_id); - self.endpoint.close().await; - Ok(()) - } -} - -impl Drop for P2PClient { - fn drop(&mut self) { - debug!("P2P client dropped for node ID: {}", self.node_id); - } -} diff --git a/crates/shared/src/p2p/messages.rs b/crates/shared/src/p2p/messages.rs deleted file mode 100644 index 1624686a..00000000 --- a/crates/shared/src/p2p/messages.rs +++ /dev/null @@ -1,101 +0,0 @@ -use crate::models::challenge::{ChallengeRequest, ChallengeResponse}; -use crate::models::invite::InviteRequest; -use serde::{Deserialize, Serialize}; -use std::time::SystemTime; - -/// Maximum message size for P2P communication (1MB) -pub const MAX_MESSAGE_SIZE: usize = 1024 * 1024; - -/// P2P message types for validator-worker communication -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(tag = "type", content = "payload")] -pub enum P2PMessage { - /// Request auth challenge from worker to validator - RequestAuthChallenge { message: String }, - - /// Auth challenge from worker to validator - AuthChallenge { - signed_message: String, - message: String, - }, - - /// Auth solution from validator to worker - AuthSolution { signed_message: String }, - - /// Auth granted from worker to validator - AuthGranted {}, - - /// Auth rejected from validator to worker - AuthRejected {}, - - /// Simple ping message for connectivity testing - Ping { timestamp: SystemTime, nonce: u64 }, - - /// Response to ping - Pong { timestamp: SystemTime, nonce: u64 }, - - /// Hardware challenge from validator to worker - HardwareChallenge { - challenge: ChallengeRequest, - timestamp: SystemTime, - }, - - /// Hardware challenge response from worker to validator - HardwareChallengeResponse { - response: ChallengeResponse, - timestamp: SystemTime, - }, - - /// Invite request from orchestrator to worker - Invite(InviteRequest), - - /// Response to invite - InviteResponse { - status: String, - error: Option, - }, - - /// Get task logs from worker - GetTaskLogs, - - /// Response with task logs - GetTaskLogsResponse { logs: Result, String> }, - - /// Restart task on worker - RestartTask, - - /// Response to restart task - RestartTaskResponse { result: Result<(), String> }, -} - -/// P2P request wrapper with ID for tracking -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct P2PRequest { - pub id: String, - pub message: P2PMessage, -} - -/// P2P response wrapper with request ID -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct P2PResponse { - pub request_id: String, - pub message: P2PMessage, -} - -impl P2PRequest { - pub fn new(message: P2PMessage) -> Self { - Self { - id: uuid::Uuid::new_v4().to_string(), - message, - } - } -} - -impl P2PResponse { - pub fn new(request_id: String, message: P2PMessage) -> Self { - Self { - request_id, - message, - } - } -} diff --git a/crates/shared/src/p2p/mod.rs b/crates/shared/src/p2p/mod.rs index cac69a8a..9d0e4016 100644 --- a/crates/shared/src/p2p/mod.rs +++ b/crates/shared/src/p2p/mod.rs @@ -1,9 +1,3 @@ -pub mod client; -pub mod messages; -pub mod protocol; mod service; -pub use client::P2PClient; -pub use protocol::*; - pub use service::*; diff --git a/crates/shared/src/p2p/protocol.rs b/crates/shared/src/p2p/protocol.rs deleted file mode 100644 index 2aab189d..00000000 --- a/crates/shared/src/p2p/protocol.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Protocol ID for Prime P2P communication -pub const PRIME_P2P_PROTOCOL: &[u8] = b"prime-p2p-v1"; - -/// Timeout for P2P requests in seconds -pub const P2P_REQUEST_TIMEOUT: u64 = 30; diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index f5a7bbe3..064dca63 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -133,6 +133,7 @@ struct Context { // this assumes that there is only one outbound request per protocol per peer at a time, // is this a correct assumption? // response channel is for sending the response back to the caller who initiated the request + #[allow(clippy::type_complexity)] ongoing_outbound_requests: Arc>>>, diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index d17f5004..f3b80d4b 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -639,7 +639,7 @@ mod tests { web::{self, post}, HttpResponse, Scope, }; - use shared::models::challenge::{calc_matrix, ChallengeRequest, ChallengeResponse, FixedF64}; + use p2p::{calc_matrix, ChallengeRequest, ChallengeResponse, FixedF64}; async fn handle_challenge(challenge: web::Json) -> HttpResponse { let result = calc_matrix(&challenge); From 2475059bb13d2b91730cbdcdcaf7a4d003a8ad21 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:27:37 -0400 Subject: [PATCH 29/40] no port conflict --- crates/worker/src/cli/command.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 8f358252..2fb8a739 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -128,7 +128,7 @@ pub enum Commands { #[arg(long, default_value = "false")] with_ipfs_upload: bool, - #[arg(long, default_value = "4002")] + #[arg(long, default_value = "5001")] ipfs_port: u16, }, Check {}, From 73300bea87429b21d1a0e2f3844c6df42b93d205 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 20:59:22 -0400 Subject: [PATCH 30/40] rename messages to be more correct --- crates/orchestrator/src/p2p/mod.rs | 8 ++-- crates/p2p/src/lib.rs | 17 ++++--- crates/p2p/src/message/mod.rs | 76 +++++++++++++++--------------- crates/p2p/src/protocol.rs | 16 +++---- crates/shared/src/p2p/service.rs | 24 +++++----- crates/validator/src/p2p/mod.rs | 2 +- crates/worker/src/cli/command.rs | 3 ++ crates/worker/src/p2p/mod.rs | 30 ++++++------ 8 files changed, 91 insertions(+), 85 deletions(-) diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index f3bf57cf..c11ca2bf 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -41,7 +41,7 @@ impl Service { .with_invite() .with_get_task_logs() .with_restart() - .with_validator_authentication(), + .with_authentication(), ) .context("failed to create p2p service")?; Ok(( @@ -120,7 +120,7 @@ impl Service { Some(request) = restart_task_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let p2p::Response::Restart(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + let p2p::Response::RestartTask(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { bail!("unexpected response type for restart task request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; @@ -132,7 +132,7 @@ impl Service { peer_wallet_address: request.worker_wallet_address, peer_id: request.worker_p2p_id, multiaddrs: request.worker_addresses, - request: p2p::Request::Restart, + request: p2p::Request::RestartTask, response_tx: incoming_resp_tx, }; outgoing_message_tx.send(outgoing_request).await @@ -167,5 +167,5 @@ pub struct RestartTaskRequest { pub(crate) worker_wallet_address: alloy::primitives::Address, pub(crate) worker_p2p_id: String, pub(crate) worker_addresses: Vec, - pub(crate) response_tx: tokio::sync::oneshot::Sender, + pub(crate) response_tx: tokio::sync::oneshot::Sender, } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 0ad1e4a5..f5fedad3 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -9,6 +9,7 @@ use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use std::time::Duration; use tracing::debug; +use tracing::info; mod behaviour; mod message; @@ -134,20 +135,24 @@ impl Node { event = swarm.select_next_some() => { match event { SwarmEvent::NewListenAddr { - listener_id: _, address, + .. } => { debug!("new listen address: {address}"); } SwarmEvent::ExternalAddrConfirmed { address } => { debug!("external address confirmed: {address}"); } + SwarmEvent::ConnectionEstablished { + peer_id, + .. + } => { + info!("connection established with peer {peer_id}"); + } SwarmEvent::ConnectionClosed { peer_id, cause, - endpoint: _, - connection_id: _, - num_established: _, + .. } => { debug!("connection closed with peer {peer_id}: {cause:?}"); } @@ -209,8 +214,8 @@ impl NodeBuilder { self } - pub fn with_validator_authentication(mut self) -> Self { - self.protocols = self.protocols.with_validator_authentication(); + pub fn with_authentication(mut self) -> Self { + self.protocols = self.protocols.with_authentication(); self } diff --git a/crates/p2p/src/message/mod.rs b/crates/p2p/src/message/mod.rs index dc2403e3..74b09c5a 100644 --- a/crates/p2p/src/message/mod.rs +++ b/crates/p2p/src/message/mod.rs @@ -27,11 +27,11 @@ pub enum OutgoingMessage { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Request { - ValidatorAuthentication(ValidatorAuthenticationRequest), + Authentication(AuthenticationRequest), HardwareChallenge(HardwareChallengeRequest), Invite(InviteRequest), GetTaskLogs, - Restart, + RestartTask, General(GeneralRequest), } @@ -46,11 +46,11 @@ impl Request { pub fn protocol(&self) -> Protocol { match self { - Request::ValidatorAuthentication(_) => Protocol::ValidatorAuthentication, + Request::Authentication(_) => Protocol::Authentication, Request::HardwareChallenge(_) => Protocol::HardwareChallenge, Request::Invite(_) => Protocol::Invite, Request::GetTaskLogs => Protocol::GetTaskLogs, - Request::Restart => Protocol::Restart, + Request::RestartTask => Protocol::Restart, Request::General(_) => Protocol::General, } } @@ -58,11 +58,11 @@ impl Request { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Response { - ValidatorAuthentication(ValidatorAuthenticationResponse), + Authentication(AuthenticationResponse), HardwareChallenge(HardwareChallengeResponse), Invite(InviteResponse), GetTaskLogs(GetTaskLogsResponse), - Restart(RestartResponse), + RestartTask(RestartTaskResponse), General(GeneralResponse), } @@ -76,72 +76,72 @@ impl Response { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidatorAuthenticationRequest { - Initiation(ValidatorAuthenticationInitiationRequest), - Solution(ValidatorAuthenticationSolutionRequest), +pub enum AuthenticationRequest { + Initiation(AuthenticationInitiationRequest), + Solution(AuthenticationSolutionRequest), } -impl From for Request { - fn from(request: ValidatorAuthenticationRequest) -> Self { - Request::ValidatorAuthentication(request) +impl From for Request { + fn from(request: AuthenticationRequest) -> Self { + Request::Authentication(request) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidatorAuthenticationResponse { - Initiation(ValidatorAuthenticationInitiationResponse), - Solution(ValidatorAuthenticationSolutionResponse), +pub enum AuthenticationResponse { + Initiation(AuthenticationInitiationResponse), + Solution(AuthenticationSolutionResponse), } -impl From for Response { - fn from(response: ValidatorAuthenticationResponse) -> Self { - Response::ValidatorAuthentication(response) +impl From for Response { + fn from(response: AuthenticationResponse) -> Self { + Response::Authentication(response) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidatorAuthenticationInitiationRequest { +pub struct AuthenticationInitiationRequest { pub message: String, } -impl From for Request { - fn from(request: ValidatorAuthenticationInitiationRequest) -> Self { - Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Initiation(request)) +impl From for Request { + fn from(request: AuthenticationInitiationRequest) -> Self { + Request::Authentication(AuthenticationRequest::Initiation(request)) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidatorAuthenticationInitiationResponse { +pub struct AuthenticationInitiationResponse { pub signature: String, pub message: String, } -impl From for Response { - fn from(response: ValidatorAuthenticationInitiationResponse) -> Self { - Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Initiation(response)) +impl From for Response { + fn from(response: AuthenticationInitiationResponse) -> Self { + Response::Authentication(AuthenticationResponse::Initiation(response)) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidatorAuthenticationSolutionRequest { +pub struct AuthenticationSolutionRequest { pub signature: String, } -impl From for Request { - fn from(request: ValidatorAuthenticationSolutionRequest) -> Self { - Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Solution(request)) +impl From for Request { + fn from(request: AuthenticationSolutionRequest) -> Self { + Request::Authentication(AuthenticationRequest::Solution(request)) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidatorAuthenticationSolutionResponse { +pub enum AuthenticationSolutionResponse { Granted, Rejected, } -impl From for Response { - fn from(response: ValidatorAuthenticationSolutionResponse) -> Self { - Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Solution(response)) +impl From for Response { + fn from(response: AuthenticationSolutionResponse) -> Self { + Response::Authentication(AuthenticationResponse::Solution(response)) } } @@ -216,14 +216,14 @@ impl From for Response { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum RestartResponse { +pub enum RestartTaskResponse { Ok, Error(String), } -impl From for Response { - fn from(response: RestartResponse) -> Self { - Response::Restart(response) +impl From for Response { + fn from(response: RestartTaskResponse) -> Self { + Response::RestartTask(response) } } diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index 0956ef0f..f721bea6 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -3,8 +3,8 @@ use std::{collections::HashSet, hash::Hash}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Protocol { - // validator -> worker - ValidatorAuthentication, + // validator or orchestrator -> worker + Authentication, // validator -> worker HardwareChallenge, // orchestrator -> worker @@ -20,9 +20,7 @@ pub enum Protocol { impl Protocol { pub(crate) fn as_stream_protocol(&self) -> StreamProtocol { match self { - Protocol::ValidatorAuthentication => { - StreamProtocol::new("/prime/validator_authentication/1.0.0") - } + Protocol::Authentication => StreamProtocol::new("/prime/authentication/1.0.0"), Protocol::HardwareChallenge => StreamProtocol::new("/prime/hardware_challenge/1.0.0"), Protocol::Invite => StreamProtocol::new("/prime/invite/1.0.0"), Protocol::GetTaskLogs => StreamProtocol::new("/prime/get_task_logs/1.0.0"), @@ -46,8 +44,8 @@ impl Protocols { Self(HashSet::new()) } - pub fn has_validator_authentication(&self) -> bool { - self.0.contains(&Protocol::ValidatorAuthentication) + pub fn has_authentication(&self) -> bool { + self.0.contains(&Protocol::Authentication) } pub fn has_hardware_challenge(&self) -> bool { @@ -70,8 +68,8 @@ impl Protocols { self.0.contains(&Protocol::General) } - pub fn with_validator_authentication(mut self) -> Self { - self.0.insert(Protocol::ValidatorAuthentication); + pub fn with_authentication(mut self) -> Self { + self.0.insert(Protocol::Authentication); self } diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index 064dca63..bd817ea1 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -2,9 +2,9 @@ use crate::web3::wallet::Wallet; use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use p2p::{ + AuthenticationInitiationRequest, AuthenticationResponse, AuthenticationSolutionRequest, IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, Protocol, - Protocols, Response, ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, - ValidatorAuthenticationSolutionRequest, + Protocols, Response, }; use std::collections::HashMap; use std::collections::HashSet; @@ -115,7 +115,7 @@ fn build_p2p_node( NodeBuilder::new() .with_keypair(keypair) .with_port(port) - .with_validator_authentication() + .with_authentication() .with_protocols(protocols) .with_cancellation_token(cancellation_token) .try_build() @@ -233,7 +233,7 @@ async fn handle_outgoing_message( let challenge_bytes: [u8; 32] = OsRng.gen(); let auth_challenge_message: String = hex::encode(challenge_bytes); - let req: p2p::Request = ValidatorAuthenticationInitiationRequest { + let req: p2p::Request = AuthenticationInitiationRequest { message: auth_challenge_message.clone(), } .into(); @@ -288,7 +288,7 @@ async fn handle_incoming_response( context: Context, ) -> Result<()> { match response { - p2p::Response::ValidatorAuthentication(resp) => { + p2p::Response::Authentication(resp) => { log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); handle_validation_authentication_response(from, resp, context) .await @@ -337,7 +337,7 @@ async fn handle_incoming_response( }; let _ = response_tx.send(response); } - p2p::Response::Restart(ref resp) => { + p2p::Response::RestartTask(ref resp) => { if !context.protocols.has_restart() { bail!("received RestartResponse from {from}, but restart protocol is not enabled"); } @@ -370,14 +370,14 @@ async fn handle_incoming_response( async fn handle_validation_authentication_response( from: PeerId, - response: p2p::ValidatorAuthenticationResponse, + response: p2p::AuthenticationResponse, context: Context, ) -> Result<()> { use crate::security::request_signer::sign_message; use std::str::FromStr as _; match response { - ValidatorAuthenticationResponse::Initiation(req) => { + AuthenticationResponse::Initiation(req) => { let ongoing_auth_requests = context.ongoing_auth_requests.read().await; let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { bail!( @@ -408,7 +408,7 @@ async fn handle_validation_authentication_response( log::debug!("auth challenge initiation response received from node: {from}"); let signature = sign_message(&req.message, &context.wallet).await.unwrap(); - let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); + let req: p2p::Request = AuthenticationSolutionRequest { signature }.into(); let req = req.into_outgoing_message(from, vec![]); context .outgoing_messages @@ -416,7 +416,7 @@ async fn handle_validation_authentication_response( .await .context("failed to send outgoing message")?; } - ValidatorAuthenticationResponse::Solution(req) => { + AuthenticationResponse::Solution(req) => { let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { bail!( @@ -425,8 +425,8 @@ async fn handle_validation_authentication_response( }; match req { - p2p::ValidatorAuthenticationSolutionResponse::Granted => {} - p2p::ValidatorAuthenticationSolutionResponse::Rejected => { + p2p::AuthenticationSolutionResponse::Granted => {} + p2p::AuthenticationSolutionResponse::Rejected => { log::debug!("auth challenge rejected by node: {from}"); return Ok(()); } diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index dc6b23e6..24811586 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -30,7 +30,7 @@ impl Service { wallet, Protocols::new() .with_hardware_challenge() - .with_validator_authentication(), + .with_authentication(), ) .context("failed to create p2p service")?; Ok(( diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 2fb8a739..2ff06e8d 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -746,11 +746,14 @@ pub async fn execute_command( let peer_id = p2p_service.peer_id(); node_config.worker_p2p_id = Some(peer_id.to_string()); + let external_p2p_address = + format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port,); node_config.worker_p2p_addresses = Some( p2p_service .listen_addrs() .iter() .map(|addr| addr.to_string()) + .chain(std::iter::once(external_p2p_address)) .collect(), ); tokio::task::spawn(p2p_service.run()); diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 748d1d54..61b682c5 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -113,7 +113,7 @@ fn build_p2p_node( let (node, _, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() .with_keypair(keypair) .with_port(port) - .with_validator_authentication() + .with_authentication() .with_hardware_challenge() .with_invite() .with_get_task_logs() @@ -201,24 +201,24 @@ async fn handle_incoming_request( context: Context, ) -> Result<()> { let resp = match request { - p2p::Request::ValidatorAuthentication(req) => { + p2p::Request::Authentication(req) => { tracing::debug!("handling ValidatorAuthentication request"); match req { - p2p::ValidatorAuthenticationRequest::Initiation(req) => { + p2p::AuthenticationRequest::Initiation(req) => { handle_validator_authentication_initiation_request(from, req, &context) .await .context("failed to handle ValidatorAuthenticationInitiationRequest")? } - p2p::ValidatorAuthenticationRequest::Solution(req) => { + p2p::AuthenticationRequest::Solution(req) => { match handle_validator_authentication_solution_request(from, req, &context) .await { - Ok(()) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), + Ok(()) => p2p::AuthenticationSolutionResponse::Granted.into(), Err(e) => { tracing::error!( "failed to handle ValidatorAuthenticationSolutionRequest: {e:?}" ); - p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() + p2p::AuthenticationSolutionResponse::Rejected.into() } } } @@ -241,7 +241,7 @@ async fn handle_incoming_request( tracing::debug!("handling GetTaskLogs request"); handle_get_task_logs_request(from, &context).await } - p2p::Request::Restart => { + p2p::Request::RestartTask => { tracing::debug!("handling Restart request"); handle_restart_request(from, &context).await } @@ -262,7 +262,7 @@ async fn handle_incoming_request( async fn handle_validator_authentication_initiation_request( from: PeerId, - req: p2p::ValidatorAuthenticationInitiationRequest, + req: p2p::AuthenticationInitiationRequest, context: &Context, ) -> Result { use rand_v8::Rng as _; @@ -283,7 +283,7 @@ async fn handle_validator_authentication_initiation_request( let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; ongoing_auth_challenges.insert(from, challenge_message.clone()); - Ok(p2p::ValidatorAuthenticationInitiationResponse { + Ok(p2p::AuthenticationInitiationResponse { message: challenge_message, signature, } @@ -292,7 +292,7 @@ async fn handle_validator_authentication_initiation_request( async fn handle_validator_authentication_solution_request( from: PeerId, - req: p2p::ValidatorAuthenticationSolutionRequest, + req: p2p::AuthenticationSolutionRequest, context: &Context, ) -> Result<()> { use std::str::FromStr as _; @@ -358,19 +358,19 @@ async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Respon async fn handle_restart_request(from: PeerId, context: &Context) -> Response { let authorized_peers = context.authorized_peers.read().await; if !authorized_peers.contains(&from) { - return p2p::RestartResponse::Error("unauthorized".to_string()).into(); + return p2p::RestartTaskResponse::Error("unauthorized".to_string()).into(); } match context.docker_service.restart_task().await { - Ok(()) => p2p::RestartResponse::Ok.into(), - Err(e) => p2p::RestartResponse::Error(format!("failed to restart task: {e:?}")).into(), + Ok(()) => p2p::RestartTaskResponse::Ok.into(), + Err(e) => p2p::RestartTaskResponse::Error(format!("failed to restart task: {e:?}")).into(), } } fn handle_incoming_response(response: p2p::Response) { // critical developer error if any of these happen, could panic here match response { - p2p::Response::ValidatorAuthentication(_) => { + p2p::Response::Authentication(_) => { tracing::error!("worker should never receive ValidatorAuthentication responses"); } p2p::Response::HardwareChallenge(_) => { @@ -382,7 +382,7 @@ fn handle_incoming_response(response: p2p::Response) { p2p::Response::GetTaskLogs(_) => { tracing::error!("worker should never receive GetTaskLogs responses"); } - p2p::Response::Restart(_) => { + p2p::Response::RestartTask(_) => { tracing::error!("worker should never receive Restart responses"); } p2p::Response::General(_) => { From e135ad463f7eed53146f4293d5a18d3f08dea096 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 23:18:34 -0400 Subject: [PATCH 31/40] add logging --- Cargo.lock | 1 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/behaviour.rs | 9 +++--- crates/p2p/src/lib.rs | 28 +++++++++++++++++- crates/shared/src/p2p/service.rs | 29 ++++++++++++------- crates/validator/src/p2p/mod.rs | 1 + .../src/validators/hardware_challenge.rs | 9 ++++++ crates/worker/src/cli/command.rs | 4 +-- crates/worker/src/p2p/mod.rs | 4 ++- crates/worker/src/utils/logging.rs | 4 --- 10 files changed, 67 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa99f00e..67fc79bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6216,6 +6216,7 @@ version = "0.3.11" dependencies = [ "anyhow", "libp2p", + "log", "nalgebra", "serde", "tokio", diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index bb670107..498fbd29 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -13,6 +13,7 @@ serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } tracing = { workspace = true } +log = { workspace = true } [lints] workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index b114b61e..9d92be15 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -6,7 +6,7 @@ use libp2p::connection_limits::ConnectionLimits; use libp2p::identify; use libp2p::identity; use libp2p::kad; -use libp2p::kad::store::MemoryStore; +// use libp2p::kad::store::MemoryStore; use libp2p::mdns; use libp2p::ping; use libp2p::request_response; @@ -27,7 +27,8 @@ pub(crate) struct Behaviour { // discovery mdns: mdns::tokio::Behaviour, - kademlia: kad::Behaviour, + // comment out kademlia for now as it requires bootnodes to be provided + // kademlia: kad::Behaviour, // protocols identify: identify::Behaviour, @@ -113,7 +114,7 @@ impl Behaviour { let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) .context("failed to create mDNS behaviour")?; - let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); + // let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); let identify = identify::Behaviour::new( identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) @@ -124,7 +125,7 @@ impl Behaviour { Ok(Self { autonat, connection_limits, - kademlia, + // kademlia, mdns, identify, ping, diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index f5fedad3..896698c8 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -102,13 +102,15 @@ impl Node { loop { tokio::select! { + biased; _ = cancellation_token.cancelled() => { debug!("cancellation token triggered, shutting down node"); break Ok(()); } Some((addrs, res_tx)) = dial_rx.recv() => { + log::info!("dialing addresses: {addrs:?}"); let mut res = Ok(()); - for addr in addrs { + for addr in &addrs { match swarm.dial(addr.clone()) { Ok(_) => {} Err(e) => { @@ -117,15 +119,18 @@ impl Node { } } } + log::info!("finished dialing addresses: {addrs:?}"); let _ = res_tx.send(res); } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, _addrs, request)) => { // TODO: if we're not connected to the peer, we should dial it + log::info!("sending request to peer {peer}: {request:?}"); swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { + log::info!("sending response: {response:?}"); if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { debug!("failed to send response: {e:?}"); } @@ -359,6 +364,27 @@ mod test { use super::NodeBuilder; use crate::message; + #[tokio::test] + async fn can_dial() { + let (node1, _, _, _) = NodeBuilder::new().with_port(4002).try_build().unwrap(); + let node1_peer_id = node1.peer_id(); + let local_p2p_address: crate::Multiaddr = + format!("/ip4/127.0.0.1/tcp/4002/p2p/{}", node1_peer_id) + .parse() + .expect("can parse valid multiaddr"); + let (node2, dial_tx2, _, _) = NodeBuilder::new().try_build().unwrap(); + tokio::spawn(async move { node1.run().await }); + tokio::spawn(async move { node2.run().await }); + + let (res_tx, res_rx) = tokio::sync::oneshot::channel(); + dial_tx2 + .send((vec![local_p2p_address], res_tx)) + .await + .expect("can send dial request"); + let res = res_rx.await.expect("can receive dial response"); + assert!(res.is_ok(), "dialing node1 should succeed: {res:?}"); + } + #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index bd817ea1..f0d504ca 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -25,7 +25,7 @@ pub struct OutgoingRequest { /// It handles the authentication protocol used before sending /// requests to the worker. pub struct Service { - _node: Node, + node: Node, dial_tx: p2p::DialSender, incoming_messages_rx: Receiver, outgoing_messages_rx: Receiver, @@ -48,7 +48,7 @@ impl Service { Ok(( Self { - _node: node, + node, dial_tx, incoming_messages_rx, outgoing_messages_rx, @@ -63,15 +63,17 @@ impl Service { use futures::StreamExt as _; let Self { - _node, + node, dial_tx, mut incoming_messages_rx, mut outgoing_messages_rx, cancellation_token, context, } = self; + tokio::task::spawn(node.run()); - let mut message_handlers = FuturesUnordered::new(); + let mut incoming_message_handlers = FuturesUnordered::new(); + let mut outgoing_message_handlers = FuturesUnordered::new(); loop { tokio::select! { @@ -79,23 +81,26 @@ impl Service { break; } Some(message) = outgoing_messages_rx.recv() => { - if let Err(e) = handle_outgoing_message(message, dial_tx.clone(), context.clone()) - .await { - log::error!("failed to handle outgoing message: {e}"); - } + let handle = tokio::task::spawn(handle_outgoing_message(message, dial_tx.clone(), context.clone())); + outgoing_message_handlers.push(handle); } Some(message) = incoming_messages_rx.recv() => { let context = context.clone(); let handle = tokio::task::spawn( handle_incoming_message(message, context) ); - message_handlers.push(handle); + incoming_message_handlers.push(handle); } - Some(res) = message_handlers.next() => { + Some(res) = incoming_message_handlers.next() => { if let Err(e) = res { log::error!("failed to handle incoming message: {e}"); } } + Some(res) = outgoing_message_handlers.next() => { + if let Err(e) = res { + log::error!("failed to handle outgoing message: {e}"); + } + } } } } @@ -200,7 +205,7 @@ async fn handle_outgoing_message( return Ok(()); } - log::debug!("sending validation authentication request to {peer_id}"); + log::info!("sending validation authentication request to {peer_id}"); // first, dial the worker // ensure there's no ongoing challenge @@ -224,10 +229,12 @@ async fn handle_outgoing_message( .send((multiaddrs.clone(), res_tx)) .await .context("failed to send dial request")?; + log::info!("dialing worker {peer_id} with multiaddrs: {multiaddrs:?}"); res_rx .await .context("failed to receive dial response")? .context("failed to dial worker")?; + log::info!("dialed worker {peer_id} with multiaddrs: {multiaddrs:?}"); // create the authentication challenge request message let challenge_bytes: [u8; 32] = OsRng.gen(); diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 24811586..6fa8fac7 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -59,6 +59,7 @@ impl Service { loop { tokio::select! { Some(request) = hardware_challenge_rx.recv() => { + println!("p2p: got hardware challenge"); let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 6970355d..5580096e 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -30,6 +30,11 @@ impl HardwareChallenge { .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; + println!( + "Challenging node {} with P2P ID: {} and addresses: {:?}", + node.id, p2p_id, p2p_addresses + ); + // create random challenge matrix let challenge_matrix = self.random_challenge(3, 3, 3, 3); let challenge_expected = p2p::calc_matrix(&challenge_matrix); @@ -60,10 +65,14 @@ impl HardwareChallenge { .await .context("failed to send hardware challenge request to p2p service")?; + println!("hardware challenge sent to node {}", node.id); + let resp = response_rx .await .context("failed to receive response from node")?; + println!("response received from node {}: {:?}", node.id, resp); + if challenge_expected.result == resp.result { info!("Challenge for node {} successful", node.id); } else { diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 2ff06e8d..539de1ae 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -747,13 +747,13 @@ pub async fn execute_command( let peer_id = p2p_service.peer_id(); node_config.worker_p2p_id = Some(peer_id.to_string()); let external_p2p_address = - format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port,); + format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port); node_config.worker_p2p_addresses = Some( p2p_service .listen_addrs() .iter() .map(|addr| addr.to_string()) - .chain(std::iter::once(external_p2p_address)) + .chain(vec![external_p2p_address]) .collect(), ); tokio::task::spawn(p2p_service.run()); diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 61b682c5..c8464a7a 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -75,12 +75,14 @@ impl Service { use futures::StreamExt as _; let Self { - node: _, + node, mut incoming_messages, cancellation_token, context, } = self; + tokio::task::spawn(node.run()); + let mut message_handlers = FuturesUnordered::new(); loop { diff --git a/crates/worker/src/utils/logging.rs b/crates/worker/src/utils/logging.rs index 18c8de4b..312d565c 100644 --- a/crates/worker/src/utils/logging.rs +++ b/crates/worker/src/utils/logging.rs @@ -75,10 +75,6 @@ pub fn setup_logging(cli: Option<&Cli>) -> Result<(), Box Date: Fri, 11 Jul 2025 13:50:53 +0200 Subject: [PATCH 32/40] basic version of python sdk with provider and node registration --- Cargo.lock | 157 +++++++++- Cargo.toml | 11 +- Makefile | 9 + crates/dev-utils/examples/compute_pool.rs | 9 +- crates/dev-utils/examples/create_domain.rs | 2 +- crates/dev-utils/examples/eject_node.rs | 6 +- crates/dev-utils/examples/get_node_info.rs | 5 +- crates/dev-utils/examples/invalidate_work.rs | 2 +- crates/dev-utils/examples/mint_ai_token.rs | 4 +- .../examples/set_min_stake_amount.rs | 4 +- .../dev-utils/examples/start_compute_pool.rs | 2 +- crates/dev-utils/examples/submit_work.rs | 2 +- .../examples/test_concurrent_calls.rs | 14 +- crates/discovery/src/api/routes/node.rs | 16 +- crates/discovery/src/chainsync/sync.rs | 10 +- crates/discovery/src/store/redis.rs | 4 +- .../orchestrator/src/api/routes/heartbeat.rs | 2 +- crates/orchestrator/src/api/routes/task.rs | 4 +- crates/orchestrator/src/api/tests/helper.rs | 8 +- crates/orchestrator/src/discovery/monitor.rs | 7 +- .../src/plugins/node_groups/tests.rs | 24 +- crates/orchestrator/src/scheduler/mod.rs | 4 +- crates/orchestrator/src/status_update/mod.rs | 35 ++- crates/orchestrator/src/store/core/redis.rs | 4 +- .../src/store/domains/heartbeat_store.rs | 2 +- .../src/store/domains/metrics_store.rs | 2 +- crates/prime-core/Cargo.toml | 31 ++ crates/prime-core/src/lib.rs | 1 + .../prime-core/src/operations/compute_node.rs | 92 ++++++ crates/prime-core/src/operations/mod.rs | 2 + .../src/operations/provider.rs | 158 +++++----- crates/prime-protocol-py/.gitignore | 24 ++ crates/prime-protocol-py/.python-version | 1 + crates/prime-protocol-py/Cargo.toml | 32 ++ crates/prime-protocol-py/Makefile | 46 +++ crates/prime-protocol-py/README.md | 46 +++ .../prime-protocol-py/examples/basic_usage.py | 24 ++ crates/prime-protocol-py/pyproject.toml | 37 +++ crates/prime-protocol-py/requirements-dev.txt | 3 + crates/prime-protocol-py/setup.sh | 16 + crates/prime-protocol-py/src/client.rs | 294 ++++++++++++++++++ crates/prime-protocol-py/src/error.rs | 21 ++ crates/prime-protocol-py/src/lib.rs | 62 ++++ crates/prime-protocol-py/tests/test_client.py | 29 ++ crates/prime-protocol-py/uv.lock | 7 + crates/shared/src/models/metric.rs | 2 +- .../src/security/auth_signature_middleware.rs | 9 +- crates/shared/src/security/request_signer.rs | 2 +- crates/shared/src/utils/google_cloud.rs | 24 +- crates/shared/src/utils/mod.rs | 2 +- .../implementations/compute_pool_contract.rs | 4 + crates/validator/src/store/redis.rs | 4 +- crates/validator/src/validators/hardware.rs | 2 +- .../synthetic_data/chain_operations.rs | 23 +- .../src/validators/synthetic_data/mod.rs | 17 +- .../validators/synthetic_data/tests/mod.rs | 62 ++-- .../src/validators/synthetic_data/toploc.rs | 5 +- crates/worker/Cargo.toml | 1 + .../src/checks/hardware/interconnect.rs | 2 +- crates/worker/src/checks/hardware/storage.rs | 4 +- crates/worker/src/checks/stun.rs | 2 +- crates/worker/src/cli/command.rs | 24 +- crates/worker/src/docker/taskbridge/bridge.rs | 7 +- .../src/operations/heartbeat/service.rs | 2 +- crates/worker/src/operations/mod.rs | 3 +- .../{compute_node.rs => node_monitor.rs} | 103 +----- crates/worker/src/p2p/service.rs | 6 +- crates/worker/src/utils/p2p.rs | 4 +- 68 files changed, 1213 insertions(+), 376 deletions(-) create mode 100644 crates/prime-core/Cargo.toml create mode 100644 crates/prime-core/src/lib.rs create mode 100644 crates/prime-core/src/operations/compute_node.rs create mode 100644 crates/prime-core/src/operations/mod.rs rename crates/{worker => prime-core}/src/operations/provider.rs (67%) create mode 100644 crates/prime-protocol-py/.gitignore create mode 100644 crates/prime-protocol-py/.python-version create mode 100644 crates/prime-protocol-py/Cargo.toml create mode 100644 crates/prime-protocol-py/Makefile create mode 100644 crates/prime-protocol-py/README.md create mode 100644 crates/prime-protocol-py/examples/basic_usage.py create mode 100644 crates/prime-protocol-py/pyproject.toml create mode 100644 crates/prime-protocol-py/requirements-dev.txt create mode 100755 crates/prime-protocol-py/setup.sh create mode 100644 crates/prime-protocol-py/src/client.rs create mode 100644 crates/prime-protocol-py/src/error.rs create mode 100644 crates/prime-protocol-py/src/lib.rs create mode 100644 crates/prime-protocol-py/tests/test_client.py create mode 100644 crates/prime-protocol-py/uv.lock rename crates/worker/src/operations/{compute_node.rs => node_monitor.rs} (54%) diff --git a/Cargo.lock b/Cargo.lock index 84c4ad45..cfd97e47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4254,6 +4254,12 @@ dependencies = [ "web-time", ] +[[package]] +name = "indoc" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" + [[package]] name = "inout" version = "0.1.4" @@ -5528,6 +5534,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -5923,7 +5938,7 @@ dependencies = [ "bitflags 1.3.2", "cfg-if", "libc", - "memoffset", + "memoffset 0.7.1", "pin-utils", ] @@ -6713,6 +6728,47 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "prime-core" +version = "0.1.0" +dependencies = [ + "actix-web", + "alloy", + "alloy-provider", + "anyhow", + "env_logger", + "futures-util", + "hex", + "log", + "rand 0.8.5", + "redis", + "serde", + "serde_json", + "shared", + "subtle", + "tokio", + "tokio-util", + "url", + "uuid", +] + +[[package]] +name = "prime-protocol-py" +version = "0.1.0" +dependencies = [ + "alloy", + "alloy-provider", + "log", + "prime-core", + "pyo3", + "pyo3-log", + "shared", + "thiserror 1.0.69", + "tokio", + "tokio-test", + "url", +] + [[package]] name = "primeorder" version = "0.13.6" @@ -6921,6 +6977,79 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "pyo3" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +dependencies = [ + "indoc", + "libc", + "memoffset 0.9.1", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-log" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45192e5e4a4d2505587e27806c7b710c231c40c56f3bfc19535d0bb25df52264" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + +[[package]] +name = "pyo3-macros" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.101", +] + [[package]] name = "quanta" version = "0.10.1" @@ -8703,6 +8832,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "target-lexicon" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" + [[package]] name = "tempfile" version = "3.14.0" @@ -8902,6 +9037,19 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "tokio-test" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" +dependencies = [ + "async-stream", + "bytes", + "futures-core", + "tokio", + "tokio-stream", +] + [[package]] name = "tokio-tungstenite" version = "0.24.0" @@ -9265,6 +9413,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "universal-hash" version = "0.5.1" @@ -10333,6 +10487,7 @@ dependencies = [ "log", "nalgebra", "nvml-wrapper", + "prime-core", "rand 0.8.5", "rand 0.9.1", "rand_core 0.6.4", diff --git a/Cargo.toml b/Cargo.toml index 00702d19..4b12cea6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,14 @@ members = [ "crates/shared", "crates/orchestrator", "crates/dev-utils", + "crates/prime-protocol-py", + "crates/prime-core", ] resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +prime-core = { path = "crates/prime-core" } actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } @@ -39,7 +42,6 @@ mockito = "1.7.0" iroh = "0.34.1" rand_v8 = { package = "rand", version = "0.8.5", features = ["std"] } rand_core_v6 = { package = "rand_core", version = "0.6.4", features = ["std"] } -ipld-core = "0.4" rust-ipfs = "0.14" cid = "0.11" @@ -55,3 +57,10 @@ manual_let_else = "warn" [workspace.lints.rust] unreachable_pub = "warn" + +[workspace.metadata.rust-analyzer] +# Help rust-analyzer with proc-macros +procMacro.enable = true +procMacro.attributes.enable = true +# Use a separate target directory for rust-analyzer +targetDir = true diff --git a/Makefile b/Makefile index decd07f6..dfc0d0af 100644 --- a/Makefile +++ b/Makefile @@ -268,3 +268,12 @@ deregister-worker: set -a; source ${ENV_FILE}; set +a; \ cargo run --bin worker -- deregister --compute-pool-id $${WORKER_COMPUTE_POOL_ID} --private-key-provider $${PRIVATE_KEY_PROVIDER} --private-key-node $${PRIVATE_KEY_NODE} --rpc-url $${RPC_URL} +# Python Package +.PHONY: python-install +python-install: + @cd crates/prime-protocol-py && make install + +.PHONY: python-test +python-test: + @cd crates/prime-protocol-py && make test + diff --git a/crates/dev-utils/examples/compute_pool.rs b/crates/dev-utils/examples/compute_pool.rs index 2569980c..51658d59 100644 --- a/crates/dev-utils/examples/compute_pool.rs +++ b/crates/dev-utils/examples/compute_pool.rs @@ -68,17 +68,14 @@ async fn main() -> Result<()> { compute_limit, ) .await; - println!("Transaction: {:?}", tx); + println!("Transaction: {tx:?}"); let rewards_distributor_address = contracts .compute_pool .get_reward_distributor_address(U256::from(0)) .await .unwrap(); - println!( - "Rewards distributor address: {:?}", - rewards_distributor_address - ); + println!("Rewards distributor address: {rewards_distributor_address:?}"); let rewards_distributor = RewardsDistributor::new( rewards_distributor_address, wallet.provider(), @@ -86,7 +83,7 @@ async fn main() -> Result<()> { ); let rate = U256::from(10000000000000000u64); let tx = rewards_distributor.set_reward_rate(rate).await; - println!("Setting reward rate: {:?}", tx); + println!("Setting reward rate: {tx:?}"); let reward_rate = rewards_distributor.get_reward_rate().await.unwrap(); println!( diff --git a/crates/dev-utils/examples/create_domain.rs b/crates/dev-utils/examples/create_domain.rs index 4365c764..d1da5ea2 100644 --- a/crates/dev-utils/examples/create_domain.rs +++ b/crates/dev-utils/examples/create_domain.rs @@ -59,6 +59,6 @@ async fn main() -> Result<()> { .await; println!("Creating domain: {}", args.domain_name); println!("Validation logic: {}", args.validation_logic); - println!("Transaction: {:?}", tx); + println!("Transaction: {tx:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/eject_node.rs b/crates/dev-utils/examples/eject_node.rs index e2ed03a3..142aa1cd 100644 --- a/crates/dev-utils/examples/eject_node.rs +++ b/crates/dev-utils/examples/eject_node.rs @@ -52,20 +52,20 @@ async fn main() -> Result<()> { .compute_registry .get_node(provider_address, node_address) .await; - println!("Node info: {:?}", node_info); + println!("Node info: {node_info:?}"); let tx = contracts .compute_pool .eject_node(args.pool_id, node_address) .await; println!("Ejected node {} from pool {}", args.node, args.pool_id); - println!("Transaction: {:?}", tx); + println!("Transaction: {tx:?}"); let node_info = contracts .compute_registry .get_node(provider_address, node_address) .await; - println!("Post ejection node info: {:?}", node_info); + println!("Post ejection node info: {node_info:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/get_node_info.rs b/crates/dev-utils/examples/get_node_info.rs index fec5f526..79c7c120 100644 --- a/crates/dev-utils/examples/get_node_info.rs +++ b/crates/dev-utils/examples/get_node_info.rs @@ -55,9 +55,6 @@ async fn main() -> Result<()> { .await .unwrap(); - println!( - "Node Active: {}, Validated: {}, In Pool: {}", - active, validated, is_node_in_pool - ); + println!("Node Active: {active}, Validated: {validated}, In Pool: {is_node_in_pool}"); Ok(()) } diff --git a/crates/dev-utils/examples/invalidate_work.rs b/crates/dev-utils/examples/invalidate_work.rs index 78154b07..c93c8cee 100644 --- a/crates/dev-utils/examples/invalidate_work.rs +++ b/crates/dev-utils/examples/invalidate_work.rs @@ -65,7 +65,7 @@ async fn main() -> Result<()> { "Invalidated work in pool {} with penalty {}", args.pool_id, args.penalty ); - println!("Transaction hash: {:?}", tx); + println!("Transaction hash: {tx:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/mint_ai_token.rs b/crates/dev-utils/examples/mint_ai_token.rs index 5e572b40..bc43b78d 100644 --- a/crates/dev-utils/examples/mint_ai_token.rs +++ b/crates/dev-utils/examples/mint_ai_token.rs @@ -45,9 +45,9 @@ async fn main() -> Result<()> { let amount = U256::from(args.amount) * Unit::ETHER.wei(); let tx = contracts.ai_token.mint(address, amount).await; println!("Minting to address: {}", args.address); - println!("Transaction: {:?}", tx); + println!("Transaction: {tx:?}"); let balance = contracts.ai_token.balance_of(address).await; - println!("Balance: {:?}", balance); + println!("Balance: {balance:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/set_min_stake_amount.rs b/crates/dev-utils/examples/set_min_stake_amount.rs index 82644e61..2858f5c7 100644 --- a/crates/dev-utils/examples/set_min_stake_amount.rs +++ b/crates/dev-utils/examples/set_min_stake_amount.rs @@ -36,13 +36,13 @@ async fn main() -> Result<()> { .unwrap(); let min_stake_amount = U256::from(args.min_stake_amount) * Unit::ETHER.wei(); - println!("Min stake amount: {}", min_stake_amount); + println!("Min stake amount: {min_stake_amount}"); let tx = contracts .prime_network .set_stake_minimum(min_stake_amount) .await; - println!("Transaction: {:?}", tx); + println!("Transaction: {tx:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/start_compute_pool.rs b/crates/dev-utils/examples/start_compute_pool.rs index b11e2b2c..a94c0b6f 100644 --- a/crates/dev-utils/examples/start_compute_pool.rs +++ b/crates/dev-utils/examples/start_compute_pool.rs @@ -41,6 +41,6 @@ async fn main() -> Result<()> { .start_compute_pool(U256::from(args.pool_id)) .await; println!("Started compute pool with id: {}", args.pool_id); - println!("Transaction: {:?}", tx); + println!("Transaction: {tx:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/submit_work.rs b/crates/dev-utils/examples/submit_work.rs index aa3b489c..0fcf20d0 100644 --- a/crates/dev-utils/examples/submit_work.rs +++ b/crates/dev-utils/examples/submit_work.rs @@ -64,7 +64,7 @@ async fn main() -> Result<()> { "Submitted work for node {} in pool {}", args.node, args.pool_id ); - println!("Transaction hash: {:?}", tx); + println!("Transaction hash: {tx:?}"); Ok(()) } diff --git a/crates/dev-utils/examples/test_concurrent_calls.rs b/crates/dev-utils/examples/test_concurrent_calls.rs index 47f7bbea..1bef230a 100644 --- a/crates/dev-utils/examples/test_concurrent_calls.rs +++ b/crates/dev-utils/examples/test_concurrent_calls.rs @@ -38,7 +38,7 @@ async fn main() -> Result<()> { let wallet = Arc::new(Wallet::new(&args.key, Url::parse(&args.rpc_url)?).unwrap()); let price = wallet.provider.get_gas_price().await?; - println!("Gas price: {:?}", price); + println!("Gas price: {price:?}"); let current_nonce = wallet .provider @@ -50,8 +50,8 @@ async fn main() -> Result<()> { .block_id(BlockId::Number(BlockNumberOrTag::Pending)) .await?; - println!("Pending nonce: {:?}", pending_nonce); - println!("Current nonce: {:?}", current_nonce); + println!("Pending nonce: {pending_nonce:?}"); + println!("Current nonce: {current_nonce:?}"); // Unfortunately have to build all contracts atm let contracts = Arc::new( @@ -67,7 +67,7 @@ async fn main() -> Result<()> { let address = Address::from_str(&args.address).unwrap(); let amount = U256::from(args.amount) * Unit::ETHER.wei(); let random = (rand::random::() % 10) + 1; - println!("Random: {:?}", random); + println!("Random: {random:?}"); let contracts_one = contracts.clone(); let wallet_one = wallet.clone(); @@ -80,7 +80,7 @@ async fn main() -> Result<()> { let tx = retry_call(mint_call, 5, wallet_one.provider(), None) .await .unwrap(); - println!("Transaction hash I: {:?}", tx); + println!("Transaction hash I: {tx:?}"); }); let contracts_two = contracts.clone(); @@ -93,11 +93,11 @@ async fn main() -> Result<()> { let tx = retry_call(mint_call_two, 5, wallet_two.provider(), None) .await .unwrap(); - println!("Transaction hash II: {:?}", tx); + println!("Transaction hash II: {tx:?}"); }); let balance = contracts.ai_token.balance_of(address).await.unwrap(); - println!("Balance: {:?}", balance); + println!("Balance: {balance:?}"); tokio::time::sleep(tokio::time::Duration::from_secs(40)).await; Ok(()) } diff --git a/crates/discovery/src/api/routes/node.rs b/crates/discovery/src/api/routes/node.rs index b2cf780f..aa6ca45a 100644 --- a/crates/discovery/src/api/routes/node.rs +++ b/crates/discovery/src/api/routes/node.rs @@ -465,12 +465,10 @@ mod tests { assert_eq!(body.data, "Node registered successfully"); let nodes = app_state.node_store.get_nodes().await; - let nodes = match nodes { - Ok(nodes) => nodes, - Err(_) => { - panic!("Error getting nodes"); - } + let Ok(nodes) = nodes else { + panic!("Error getting nodes"); }; + assert_eq!(nodes.len(), 1); assert_eq!(nodes[0].id, node.id); assert_eq!(nodes[0].last_updated, None); @@ -611,12 +609,10 @@ mod tests { assert_eq!(body.data, "Node registered successfully"); let nodes = app_state.node_store.get_nodes().await; - let nodes = match nodes { - Ok(nodes) => nodes, - Err(_) => { - panic!("Error getting nodes"); - } + let Ok(nodes) = nodes else { + panic!("Error getting nodes"); }; + assert_eq!(nodes.len(), 1); assert_eq!(nodes[0].id, node.id); } diff --git a/crates/discovery/src/chainsync/sync.rs b/crates/discovery/src/chainsync/sync.rs index 6101c87a..1120d3cb 100644 --- a/crates/discovery/src/chainsync/sync.rs +++ b/crates/discovery/src/chainsync/sync.rs @@ -155,7 +155,7 @@ async fn sync_single_node( })?; let balance = provider.get_balance(node_address).await.map_err(|e| { - error!("Error retrieving balance for node {}: {}", node_address, e); + error!("Error retrieving balance for node {node_address}: {e}"); anyhow::anyhow!("Failed to retrieve node balance") })?; n.latest_balance = Some(balance); @@ -166,8 +166,7 @@ async fn sync_single_node( .await .map_err(|e| { error!( - "Error retrieving node info for provider {} and node {}: {}", - provider_address, node_address, e + "Error retrieving node info for provider {provider_address} and node {node_address}: {e}" ); anyhow::anyhow!("Failed to retrieve node info") })?; @@ -177,10 +176,7 @@ async fn sync_single_node( .get_provider(provider_address) .await .map_err(|e| { - error!( - "Error retrieving provider info for {}: {}", - provider_address, e - ); + error!("Error retrieving provider info for {provider_address}: {e}"); anyhow::anyhow!("Failed to retrieve provider info") })?; diff --git a/crates/discovery/src/store/redis.rs b/crates/discovery/src/store/redis.rs index 508815c2..c0a0c36b 100644 --- a/crates/discovery/src/store/redis.rs +++ b/crates/discovery/src/store/redis.rs @@ -45,8 +45,8 @@ impl RedisStore { _ => panic!("Expected TCP connection"), }; - let redis_url = format!("redis://{}:{}", host, port); - debug!("Starting test Redis server at {}", redis_url); + let redis_url = format!("redis://{host}:{port}"); + debug!("Starting test Redis server at {redis_url}"); // Add a small delay to ensure server is ready thread::sleep(Duration::from_millis(100)); diff --git a/crates/orchestrator/src/api/routes/heartbeat.rs b/crates/orchestrator/src/api/routes/heartbeat.rs index a8110e61..4d6261f9 100644 --- a/crates/orchestrator/src/api/routes/heartbeat.rs +++ b/crates/orchestrator/src/api/routes/heartbeat.rs @@ -404,7 +404,7 @@ mod tests { let task = match task.try_into() { Ok(task) => task, - Err(e) => panic!("Failed to convert TaskRequest to Task: {}", e), + Err(e) => panic!("Failed to convert TaskRequest to Task: {e}"), }; let _ = app_state.store_context.task_store.add_task(task).await; diff --git a/crates/orchestrator/src/api/routes/task.rs b/crates/orchestrator/src/api/routes/task.rs index 7cff4b6d..fa167dc7 100644 --- a/crates/orchestrator/src/api/routes/task.rs +++ b/crates/orchestrator/src/api/routes/task.rs @@ -315,8 +315,8 @@ mod tests { // Add tasks in sequence with delays for i in 1..=3 { let task: Task = TaskRequest { - image: format!("test{}", i), - name: format!("test{}", i), + image: format!("test{i}"), + name: format!("test{i}"), ..Default::default() } .try_into() diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index ca2e65c1..a5282b3a 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use url::Url; #[cfg(test)] -pub async fn create_test_app_state() -> Data { +pub(crate) async fn create_test_app_state() -> Data { use shared::utils::MockStorageProvider; use crate::{ @@ -69,7 +69,7 @@ pub async fn create_test_app_state() -> Data { } #[cfg(test)] -pub async fn create_test_app_state_with_nodegroups() -> Data { +pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { use shared::utils::MockStorageProvider; use crate::{ @@ -139,7 +139,7 @@ pub async fn create_test_app_state_with_nodegroups() -> Data { } #[cfg(test)] -pub fn setup_contract() -> Contracts { +pub(crate) fn setup_contract() -> Contracts { let coordinator_key = "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"; let rpc_url: Url = Url::parse("http://localhost:8545").unwrap(); let wallet = Wallet::new(coordinator_key, rpc_url).unwrap(); @@ -154,7 +154,7 @@ pub fn setup_contract() -> Contracts { } #[cfg(test)] -pub async fn create_test_app_state_with_metrics() -> Data { +pub(crate) async fn create_test_app_state_with_metrics() -> Data { use shared::utils::MockStorageProvider; use crate::{ diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 56fed833..d1ea3133 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -384,15 +384,12 @@ impl DiscoveryMonitor { if let Some(balance) = discovery_node.latest_balance { if balance == U256::ZERO { - info!( - "Node {} has zero balance, marking as low balance", - node_address - ); + info!("Node {node_address} has zero balance, marking as low balance"); if let Err(e) = self .update_node_status(&node_address, NodeStatus::LowBalance) .await { - error!("Error updating node status: {}", e); + error!("Error updating node status: {e}"); } } } diff --git a/crates/orchestrator/src/plugins/node_groups/tests.rs b/crates/orchestrator/src/plugins/node_groups/tests.rs index a7d73b36..5fc22430 100644 --- a/crates/orchestrator/src/plugins/node_groups/tests.rs +++ b/crates/orchestrator/src/plugins/node_groups/tests.rs @@ -276,9 +276,7 @@ async fn test_group_formation_with_multiple_configs() { let _ = plugin.try_form_new_groups().await; let mut conn = plugin.store.client.get_connection().unwrap(); - let groups: Vec = conn - .keys(format!("{}*", GROUP_KEY_PREFIX).as_str()) - .unwrap(); + let groups: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*").as_str()).unwrap(); assert_eq!(groups.len(), 2); // Verify group was created @@ -1102,7 +1100,7 @@ async fn test_node_cannot_be_in_multiple_groups() { ); // Get all group keys - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); let group_copy = group_keys.clone(); // There should be exactly one group @@ -1167,7 +1165,7 @@ async fn test_node_cannot_be_in_multiple_groups() { let _ = plugin.try_form_new_groups().await; // Get updated group keys - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); // There should now be exactly two groups assert_eq!( @@ -1544,7 +1542,7 @@ async fn test_task_observer() { let _ = store_context.task_store.add_task(task2.clone()).await; let _ = plugin.try_form_new_groups().await; let all_tasks = store_context.task_store.get_all_tasks().await.unwrap(); - println!("All tasks: {:?}", all_tasks); + println!("All tasks: {all_tasks:?}"); assert_eq!(all_tasks.len(), 2); assert!(all_tasks[0].id != all_tasks[1].id); let topologies = get_task_topologies(&task).unwrap(); @@ -1588,7 +1586,7 @@ async fn test_task_observer() { .unwrap(); assert!(group_3.is_some()); let all_tasks = store_context.task_store.get_all_tasks().await.unwrap(); - println!("All tasks: {:?}", all_tasks); + println!("All tasks: {all_tasks:?}"); assert_eq!(all_tasks.len(), 2); // Manually assign the first task to the group to test immediate dissolution let group_3_before = plugin @@ -1615,7 +1613,7 @@ async fn test_task_observer() { .get_node_group(&node_3.address.to_string()) .await .unwrap(); - println!("Group 3 after task deletion: {:?}", group_3); + println!("Group 3 after task deletion: {group_3:?}"); // With new behavior, group should be dissolved immediately when its assigned task is deleted assert!(group_3.is_none()); @@ -1833,7 +1831,7 @@ async fn test_group_formation_priority() { let nodes: Vec<_> = (1..=4) .map(|i| { create_test_node( - &format!("0x{}234567890123456789012345678901234567890", i), + &format!("0x{i}234567890123456789012345678901234567890"), NodeStatus::Healthy, None, ) @@ -1863,7 +1861,7 @@ async fn test_group_formation_priority() { // Verify: Should form one 3-node group + one 1-node group // NOT four 1-node groups let mut conn = plugin.store.client.get_connection().unwrap(); - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); assert_eq!(group_keys.len(), 2, "Should form exactly 2 groups"); // Check group compositions @@ -1944,7 +1942,7 @@ async fn test_multiple_groups_same_configuration() { let nodes: Vec<_> = (1..=6) .map(|i| { create_test_node( - &format!("0x{}234567890123456789012345678901234567890", i), + &format!("0x{i}234567890123456789012345678901234567890"), NodeStatus::Healthy, None, ) @@ -1958,7 +1956,7 @@ async fn test_multiple_groups_same_configuration() { // Verify: Should create 3 groups of 2 nodes each let mut conn = plugin.store.client.get_connection().unwrap(); - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); assert_eq!(group_keys.len(), 3, "Should form exactly 3 groups"); // Verify all groups have exactly 2 nodes and same configuration @@ -2663,7 +2661,7 @@ async fn test_no_merge_when_policy_disabled() { // Create 3 nodes let nodes: Vec<_> = (1..=3) - .map(|i| create_test_node(&format!("0x{:040x}", i), NodeStatus::Healthy, None)) + .map(|i| create_test_node(&format!("0x{i:040x}"), NodeStatus::Healthy, None)) .collect(); for node in &nodes { diff --git a/crates/orchestrator/src/scheduler/mod.rs b/crates/orchestrator/src/scheduler/mod.rs index 711f313f..d5ffa506 100644 --- a/crates/orchestrator/src/scheduler/mod.rs +++ b/crates/orchestrator/src/scheduler/mod.rs @@ -144,12 +144,12 @@ mod tests { ); assert_eq!( env_vars.get("NODE_VAR").unwrap(), - &format!("node-{}", node_address) + &format!("node-{node_address}") ); // Check cmd replacement let cmd = returned_task.cmd.unwrap(); assert_eq!(cmd[0], format!("--task={}", task.id)); - assert_eq!(cmd[1], format!("--node={}", node_address)); + assert_eq!(cmd[1], format!("--node={node_address}")); } } diff --git a/crates/orchestrator/src/status_update/mod.rs b/crates/orchestrator/src/status_update/mod.rs index b2738488..67140cbc 100644 --- a/crates/orchestrator/src/status_update/mod.rs +++ b/crates/orchestrator/src/status_update/mod.rs @@ -372,6 +372,7 @@ async fn process_node( } #[cfg(test)] +#[allow(clippy::unused_async)] async fn is_node_in_pool(_: Contracts, _: u32, _: &OrchestratorNode) -> bool { true } @@ -433,7 +434,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let heartbeat = HeartbeatRequest { address: node.address.to_string(), @@ -451,7 +452,7 @@ mod tests { .beat(&heartbeat) .await { - error!("Heartbeat Error: {}", e); + error!("Heartbeat Error: {e}"); } let _ = updater.process_nodes().await; @@ -510,7 +511,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( @@ -563,7 +564,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( @@ -623,7 +624,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } if let Err(e) = app_state .store_context @@ -631,7 +632,7 @@ mod tests { .set_unhealthy_counter(&node.address, 2) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); } let mode = ServerMode::Full; @@ -687,7 +688,7 @@ mod tests { .set_unhealthy_counter(&node.address, 2) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); }; let heartbeat = HeartbeatRequest { @@ -702,7 +703,7 @@ mod tests { .beat(&heartbeat) .await { - error!("Heartbeat Error: {}", e); + error!("Heartbeat Error: {e}"); } if let Err(e) = app_state .store_context @@ -710,7 +711,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; @@ -772,7 +773,7 @@ mod tests { .set_unhealthy_counter(&node1.address, 1) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); }; if let Err(e) = app_state .store_context @@ -780,7 +781,7 @@ mod tests { .add_node(node1.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let node2 = OrchestratorNode { @@ -797,7 +798,7 @@ mod tests { .add_node(node2.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; @@ -873,7 +874,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } if let Err(e) = app_state .store_context @@ -881,7 +882,7 @@ mod tests { .set_unhealthy_counter(&node.address, 2) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); } let mode = ServerMode::Full; @@ -926,7 +927,7 @@ mod tests { .beat(&heartbeat) .await { - error!("Heartbeat Error: {}", e); + error!("Heartbeat Error: {e}"); } sleep(Duration::from_secs(5)).await; @@ -960,7 +961,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( @@ -1029,7 +1030,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( diff --git a/crates/orchestrator/src/store/core/redis.rs b/crates/orchestrator/src/store/core/redis.rs index 79f57ce8..3b524b33 100644 --- a/crates/orchestrator/src/store/core/redis.rs +++ b/crates/orchestrator/src/store/core/redis.rs @@ -45,8 +45,8 @@ impl RedisStore { _ => panic!("Expected TCP connection"), }; - let redis_url = format!("redis://{}:{}", host, port); - debug!("Starting test Redis server at {}", redis_url); + let redis_url = format!("redis://{host}:{port}"); + debug!("Starting test Redis server at {redis_url}"); // Add a small delay to ensure server is ready thread::sleep(Duration::from_millis(100)); diff --git a/crates/orchestrator/src/store/domains/heartbeat_store.rs b/crates/orchestrator/src/store/domains/heartbeat_store.rs index b2f8138a..8bb43374 100644 --- a/crates/orchestrator/src/store/domains/heartbeat_store.rs +++ b/crates/orchestrator/src/store/domains/heartbeat_store.rs @@ -80,7 +80,7 @@ impl HeartbeatStore { .get_multiplexed_async_connection() .await .map_err(|_| anyhow!("Failed to get connection"))?; - let key = format!("{}:{}", ORCHESTRATOR_UNHEALTHY_COUNTER_KEY, address); + let key = format!("{ORCHESTRATOR_UNHEALTHY_COUNTER_KEY}:{address}"); con.set(key, counter.to_string()) .await .map_err(|_| anyhow!("Failed to set value")) diff --git a/crates/orchestrator/src/store/domains/metrics_store.rs b/crates/orchestrator/src/store/domains/metrics_store.rs index 1a0d79ac..5520860a 100644 --- a/crates/orchestrator/src/store/domains/metrics_store.rs +++ b/crates/orchestrator/src/store/domains/metrics_store.rs @@ -145,7 +145,7 @@ impl MetricsStore { task_id: &str, ) -> Result> { let mut con = self.redis.client.get_multiplexed_async_connection().await?; - let pattern = format!("{}:*", ORCHESTRATOR_NODE_METRICS_STORE); + let pattern = format!("{ORCHESTRATOR_NODE_METRICS_STORE}:*"); // Scan all node keys let mut iter: redis::AsyncIter = con.scan_match(&pattern).await?; diff --git a/crates/prime-core/Cargo.toml b/crates/prime-core/Cargo.toml new file mode 100644 index 00000000..bfcef45e --- /dev/null +++ b/crates/prime-core/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "prime-core" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[lib] +name = "prime_core" +path = "src/lib.rs" + +[dependencies] +shared = { workspace = true } +alloy = { workspace = true } +alloy-provider = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +url = { workspace = true } +actix-web = { workspace = true } +anyhow = { workspace = true } +futures-util = { workspace = true } +hex = { workspace = true } +uuid = { workspace = true } +log = { workspace = true } +tokio = { workspace = true } +tokio-util = { workspace = true } +redis = { workspace = true, features = ["aio", "tokio-comp"] } +rand_v8 = { workspace = true } +env_logger = { workspace = true } +subtle = "2.6.1" diff --git a/crates/prime-core/src/lib.rs b/crates/prime-core/src/lib.rs new file mode 100644 index 00000000..1bf04f8a --- /dev/null +++ b/crates/prime-core/src/lib.rs @@ -0,0 +1 @@ +pub mod operations; diff --git a/crates/prime-core/src/operations/compute_node.rs b/crates/prime-core/src/operations/compute_node.rs new file mode 100644 index 00000000..c294291a --- /dev/null +++ b/crates/prime-core/src/operations/compute_node.rs @@ -0,0 +1,92 @@ +use alloy::{primitives::utils::keccak256 as keccak, primitives::U256, signers::Signer}; +use anyhow::Result; +use shared::web3::wallet::Wallet; +use shared::web3::{contracts::core::builder::Contracts, wallet::WalletProvider}; + +pub struct ComputeNodeOperations<'c> { + provider_wallet: &'c Wallet, + node_wallet: &'c Wallet, + contracts: Contracts, +} + +impl<'c> ComputeNodeOperations<'c> { + pub fn new( + provider_wallet: &'c Wallet, + node_wallet: &'c Wallet, + contracts: Contracts, + ) -> Self { + Self { + provider_wallet, + node_wallet, + contracts, + } + } + + pub async fn check_compute_node_exists(&self) -> Result> { + let compute_node = self + .contracts + .compute_registry + .get_node( + self.provider_wallet.wallet.default_signer().address(), + self.node_wallet.wallet.default_signer().address(), + ) + .await; + + match compute_node { + Ok(_) => Ok(true), + Err(_) => Ok(false), + } + } + + // Returns true if the compute node was added, false if it already exists + pub async fn add_compute_node( + &self, + compute_units: U256, + ) -> Result> { + log::info!("🔄 Adding compute node"); + + if self.check_compute_node_exists().await? { + return Ok(false); + } + + log::info!("Adding compute node"); + let provider_address = self.provider_wallet.wallet.default_signer().address(); + let node_address = self.node_wallet.wallet.default_signer().address(); + let digest = keccak([provider_address.as_slice(), node_address.as_slice()].concat()); + + let signature = self + .node_wallet + .signer + .sign_message(digest.as_slice()) + .await? + .as_bytes(); + + // Create the signature bytes + let add_node_tx = self + .contracts + .prime_network + .add_compute_node(node_address, compute_units, signature.to_vec()) + .await?; + log::info!("Add node tx: {add_node_tx:?}"); + Ok(true) + } + + pub async fn remove_compute_node(&self) -> Result> { + log::info!("🔄 Removing compute node"); + + if !self.check_compute_node_exists().await? { + return Ok(false); + } + + log::info!("Removing compute node"); + let provider_address = self.provider_wallet.wallet.default_signer().address(); + let node_address = self.node_wallet.wallet.default_signer().address(); + let remove_node_tx = self + .contracts + .prime_network + .remove_compute_node(provider_address, node_address) + .await?; + log::info!("Remove node tx: {remove_node_tx:?}"); + Ok(true) + } +} diff --git a/crates/prime-core/src/operations/mod.rs b/crates/prime-core/src/operations/mod.rs new file mode 100644 index 00000000..089315f5 --- /dev/null +++ b/crates/prime-core/src/operations/mod.rs @@ -0,0 +1,2 @@ +pub mod compute_node; +pub mod provider; diff --git a/crates/worker/src/operations/provider.rs b/crates/prime-core/src/operations/provider.rs similarity index 67% rename from crates/worker/src/operations/provider.rs rename to crates/prime-core/src/operations/provider.rs index fb8aba5f..c07f6189 100644 --- a/crates/worker/src/operations/provider.rs +++ b/crates/prime-core/src/operations/provider.rs @@ -1,4 +1,3 @@ -use crate::console::Console; use alloy::primitives::utils::format_ether; use alloy::primitives::{Address, U256}; use log::error; @@ -9,18 +8,14 @@ use std::{fmt, io}; use tokio::time::{sleep, Duration}; use tokio_util::sync::CancellationToken; -pub(crate) struct ProviderOperations { +pub struct ProviderOperations { wallet: Wallet, contracts: Contracts, auto_accept: bool, } impl ProviderOperations { - pub(crate) fn new( - wallet: Wallet, - contracts: Contracts, - auto_accept: bool, - ) -> Self { + pub fn new(wallet: Wallet, contracts: Contracts, auto_accept: bool) -> Self { Self { wallet, contracts, @@ -44,7 +39,7 @@ impl ProviderOperations { } } - pub(crate) fn start_monitoring(&self, cancellation_token: CancellationToken) { + pub fn start_monitoring(&self, cancellation_token: CancellationToken) { let provider_address = self.wallet.wallet.default_signer().address(); let contracts = self.contracts.clone(); @@ -58,12 +53,12 @@ impl ProviderOperations { loop { tokio::select! { _ = cancellation_token.cancelled() => { - Console::info("Monitor", "Shutting down provider status monitor..."); + log::info!("Shutting down provider status monitor..."); break; } _ = async { let Some(stake_manager) = contracts.stake_manager.as_ref() else { - Console::user_error("Cannot start monitoring - stake manager not initialized"); + log::error!("Cannot start monitoring - stake manager not initialized"); return; }; @@ -71,21 +66,21 @@ impl ProviderOperations { match stake_manager.get_stake(provider_address).await { Ok(stake) => { if first_check || stake != last_stake { - Console::info("🔄 Chain Sync - Provider stake", &format_ether(stake)); + log::info!("🔄 Chain Sync - Provider stake: {}", format_ether(stake)); if !first_check { if stake < last_stake { - Console::warning(&format!("Stake decreased - possible slashing detected: From {} to {}", + log::warn!("Stake decreased - possible slashing detected: From {} to {}", format_ether(last_stake), format_ether(stake) - )); + ); if stake == U256::ZERO { - Console::warning("Stake is 0 - you might have to restart the node to increase your stake (if you still have balance left)"); + log::warn!("Stake is 0 - you might have to restart the node to increase your stake (if you still have balance left)"); } } else { - Console::info("🔄 Chain Sync - Stake changed", &format!("From {} to {}", + log::info!("🔄 Chain Sync - Stake increased: From {} to {}", format_ether(last_stake), format_ether(stake) - )); + ); } } last_stake = stake; @@ -102,13 +97,7 @@ impl ProviderOperations { match contracts.ai_token.balance_of(provider_address).await { Ok(balance) => { if first_check || balance != last_balance { - Console::info("🔄 Chain Sync - Balance", &format_ether(balance)); - if !first_check { - Console::info("🔄 Chain Sync - Balance changed", &format!("From {} to {}", - format_ether(last_balance), - format_ether(balance) - )); - } + log::info!("🔄 Chain Sync - Balance: {}", format_ether(balance)); last_balance = balance; } Some(balance) @@ -123,12 +112,12 @@ impl ProviderOperations { match contracts.compute_registry.get_provider(provider_address).await { Ok(provider) => { if first_check || provider.is_whitelisted != last_whitelist_status { - Console::info("🔄 Chain Sync - Whitelist status", &format!("{}", provider.is_whitelisted)); + log::info!("🔄 Chain Sync - Whitelist status: {}", provider.is_whitelisted); if !first_check { - Console::info("🔄 Chain Sync - Whitelist status changed", &format!("From {} to {}", + log::info!("🔄 Chain Sync - Whitelist status changed: {} -> {}", last_whitelist_status, provider.is_whitelisted - )); + ); } last_whitelist_status = provider.is_whitelisted; } @@ -146,7 +135,7 @@ impl ProviderOperations { }); } - pub(crate) async fn check_provider_exists(&self) -> Result { + pub async fn check_provider_exists(&self) -> Result { let address = self.wallet.wallet.default_signer().address(); let provider = self @@ -159,7 +148,7 @@ impl ProviderOperations { Ok(provider.provider_address != Address::default()) } - pub(crate) async fn check_provider_whitelisted(&self) -> Result { + pub async fn check_provider_whitelisted(&self) -> Result { let address = self.wallet.wallet.default_signer().address(); let provider = self @@ -171,29 +160,32 @@ impl ProviderOperations { Ok(provider.is_whitelisted) } - - pub(crate) async fn retry_register_provider( + pub async fn retry_register_provider( &self, stake: U256, max_attempts: u32, - cancellation_token: CancellationToken, + cancellation_token: Option, ) -> Result<(), ProviderError> { - Console::title("Registering Provider"); + log::info!("Registering Provider"); let mut attempts = 0; while attempts < max_attempts || max_attempts == 0 { - Console::progress("Registering provider..."); + log::info!("Registering provider..."); match self.register_provider(stake).await { Ok(_) => { return Ok(()); } Err(e) => match e { ProviderError::NotWhitelisted | ProviderError::InsufficientBalance => { - Console::info("Info", "Retrying in 10 seconds..."); - tokio::select! { - _ = tokio::time::sleep(tokio::time::Duration::from_secs(10)) => {} - _ = cancellation_token.cancelled() => { - return Err(e); + log::info!("Retrying in 10 seconds..."); + if let Some(ref token) = cancellation_token { + tokio::select! { + _ = tokio::time::sleep(tokio::time::Duration::from_secs(10)) => {} + _ = token.cancelled() => { + return Err(e); + } } + } else { + tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; } attempts += 1; continue; @@ -206,7 +198,7 @@ impl ProviderOperations { Err(ProviderError::Other) } - pub(crate) async fn register_provider(&self, stake: U256) -> Result<(), ProviderError> { + pub async fn register_provider(&self, stake: U256) -> Result<(), ProviderError> { let address = self.wallet.wallet.default_signer().address(); let balance: U256 = self .contracts @@ -224,42 +216,39 @@ impl ProviderOperations { let provider_exists = self.check_provider_exists().await?; if !provider_exists { - Console::info("Balance", &format_ether(balance)); - Console::info( - "ETH Balance", + log::info!("Balance: {}", &format_ether(balance)); + log::info!( + "ETH Balance: {}", &format!("{} ETH", format_ether(U256::from(eth_balance))), ); if balance < stake { - Console::user_error(&format!( - "Insufficient balance for stake: {}", - format_ether(stake) - )); + log::error!("Insufficient balance for stake: {}", format_ether(stake)); return Err(ProviderError::InsufficientBalance); } if !self.prompt_user_confirmation(&format!( "Do you want to approve staking {}?", format_ether(stake) )) { - Console::info("Operation cancelled by user", "Staking approval declined"); + log::info!("Operation cancelled by user: Staking approval declined"); return Err(ProviderError::UserCancelled); } - Console::progress("Approving for Stake transaction"); + log::info!("Approving for Stake transaction"); self.contracts .ai_token .approve(stake) .await .map_err(|_| ProviderError::Other)?; - Console::progress("Registering Provider"); + log::info!("Registering Provider"); let Ok(register_tx) = self.contracts.prime_network.register_provider(stake).await else { return Err(ProviderError::Other); }; - Console::info("Registration tx", &format!("{register_tx:?}")); + log::info!("Registration tx: {}", &format!("{register_tx:?}")); } // Get provider details again - cleanup later - Console::progress("Getting provider details"); + log::info!("Getting provider details"); let _ = self .contracts .compute_registry @@ -270,32 +259,29 @@ impl ProviderOperations { let provider_exists = self.check_provider_exists().await?; if !provider_exists { - Console::info("Balance", &format_ether(balance)); - Console::info( - "ETH Balance", + log::info!("Balance: {}", &format_ether(balance)); + log::info!( + "ETH Balance: {}", &format!("{} ETH", format_ether(U256::from(eth_balance))), ); if balance < stake { - Console::user_error(&format!( - "Insufficient balance for stake: {}", - format_ether(stake) - )); + log::error!("Insufficient balance for stake: {}", format_ether(stake)); return Err(ProviderError::InsufficientBalance); } if !self.prompt_user_confirmation(&format!( "Do you want to approve staking {}?", format_ether(stake) )) { - Console::info("Operation cancelled by user", "Staking approval declined"); + log::info!("Operation cancelled by user: Staking approval declined"); return Err(ProviderError::UserCancelled); } - Console::progress("Approving Stake transaction"); + log::info!("Approving Stake transaction"); self.contracts.ai_token.approve(stake).await.map_err(|e| { error!("Failed to approve stake: {e}"); ProviderError::Other })?; - Console::progress("Registering Provider"); + log::info!("Registering Provider"); let register_tx = match self.contracts.prime_network.register_provider(stake).await { Ok(tx) => tx, Err(e) => { @@ -303,7 +289,7 @@ impl ProviderOperations { return Err(ProviderError::Other); } }; - Console::info("Registration tx", &format!("{register_tx:?}")); + log::info!("Registration tx: {register_tx:?}"); } let provider = self @@ -315,23 +301,23 @@ impl ProviderOperations { let provider_exists = provider.provider_address != Address::default(); if !provider_exists { - Console::user_error( - "Provider could not be registered. Please ensure your balance is high enough.", + log::error!( + "Provider could not be registered. Please ensure your balance is high enough." ); return Err(ProviderError::Other); } - Console::success("Provider registered"); + log::info!("Provider registered"); if !provider.is_whitelisted { - Console::user_error("Provider is not whitelisted yet."); + log::error!("Provider is not whitelisted yet."); return Err(ProviderError::NotWhitelisted); } Ok(()) } - pub(crate) async fn increase_stake(&self, additional_stake: U256) -> Result<(), ProviderError> { - Console::title("💰 Increasing Provider Stake"); + pub async fn increase_stake(&self, additional_stake: U256) -> Result<(), ProviderError> { + log::info!("💰 Increasing Provider Stake"); let address = self.wallet.wallet.default_signer().address(); let balance: U256 = self @@ -341,11 +327,14 @@ impl ProviderOperations { .await .map_err(|_| ProviderError::Other)?; - Console::info("Current Balance", &format_ether(balance)); - Console::info("Additional stake amount", &format_ether(additional_stake)); + log::info!("Current Balance: {}", &format_ether(balance)); + log::info!( + "Additional stake amount: {}", + &format_ether(additional_stake) + ); if balance < additional_stake { - Console::user_error("Insufficient balance for stake increase"); + log::error!("Insufficient balance for stake increase"); return Err(ProviderError::Other); } @@ -353,20 +342,20 @@ impl ProviderOperations { "Do you want to approve staking {} additional funds?", format_ether(additional_stake) )) { - Console::info("Operation cancelled by user", "Staking approval declined"); + log::info!("Operation cancelled by user: Staking approval declined"); return Err(ProviderError::UserCancelled); } - Console::progress("Approving additional stake"); + log::info!("Approving additional stake"); let approve_tx = self .contracts .ai_token .approve(additional_stake) .await .map_err(|_| ProviderError::Other)?; - Console::info("Transaction approved", &format!("{approve_tx:?}")); + log::info!("Transaction approved: {}", &format!("{approve_tx:?}")); - Console::progress("Increasing stake"); + log::info!("Increasing stake"); let stake_tx = match self.contracts.prime_network.stake(additional_stake).await { Ok(tx) => tx, Err(e) => { @@ -374,17 +363,15 @@ impl ProviderOperations { return Err(ProviderError::Other); } }; - Console::info( - "Stake increase transaction completed: ", - &format!("{stake_tx:?}"), + log::info!( + "Stake increase transaction completed: {}", + &format!("{stake_tx:?}") ); - Console::success("Provider stake increased successfully"); Ok(()) } - pub(crate) async fn reclaim_stake(&self, amount: U256) -> Result<(), ProviderError> { - Console::progress("Reclaiming stake"); + pub async fn reclaim_stake(&self, amount: U256) -> Result<(), ProviderError> { let reclaim_tx = match self.contracts.prime_network.reclaim_stake(amount).await { Ok(tx) => tx, Err(e) => { @@ -392,17 +379,16 @@ impl ProviderOperations { return Err(ProviderError::Other); } }; - Console::info( - "Stake reclaim transaction completed: ", - &format!("{reclaim_tx:?}"), + log::info!( + "Stake reclaim transaction completed: {}", + &format!("{reclaim_tx:?}") ); - Console::success("Provider stake reclaimed successfully"); Ok(()) } } #[derive(Debug)] -pub(crate) enum ProviderError { +pub enum ProviderError { NotWhitelisted, UserCancelled, Other, diff --git a/crates/prime-protocol-py/.gitignore b/crates/prime-protocol-py/.gitignore new file mode 100644 index 00000000..454f9f33 --- /dev/null +++ b/crates/prime-protocol-py/.gitignore @@ -0,0 +1,24 @@ +# Python +__pycache__/ +*.py[cod] +*.so +*.pyd +*.egg-info/ +dist/ + +# Virtual environments +.venv/ + +# Testing +.pytest_cache/ + +# IDE +.vscode/ +.idea/ + +# Rust/Maturin +target/ +Cargo.lock + +# OS +.DS_Store \ No newline at end of file diff --git a/crates/prime-protocol-py/.python-version b/crates/prime-protocol-py/.python-version new file mode 100644 index 00000000..4b7e4839 --- /dev/null +++ b/crates/prime-protocol-py/.python-version @@ -0,0 +1 @@ +3.11 \ No newline at end of file diff --git a/crates/prime-protocol-py/Cargo.toml b/crates/prime-protocol-py/Cargo.toml new file mode 100644 index 00000000..9441afe1 --- /dev/null +++ b/crates/prime-protocol-py/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "prime-protocol-py" +version = "0.1.0" +authors = ["Prime Protocol"] +edition = "2021" +rust-version = "1.70" + +[lib] +name = "primeprotocol" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.25.1", features = ["extension-module"] } +thiserror = "1.0" +shared = { workspace = true } +prime-core = { workspace = true } +alloy = { workspace = true } +alloy-provider = { workspace = true } +tokio = { version = "1.35", features = ["rt"] } +url = "2.5" +log = { workspace = true } +pyo3-log = "0.12.4" + +[dev-dependencies] +tokio-test = "0.4" + +[profile.release] +opt-level = 3 +lto = true +codegen-units = 1 +strip = true + diff --git a/crates/prime-protocol-py/Makefile b/crates/prime-protocol-py/Makefile new file mode 100644 index 00000000..fe1858d0 --- /dev/null +++ b/crates/prime-protocol-py/Makefile @@ -0,0 +1,46 @@ +.PHONY: install +install: + @command -v uv > /dev/null || (echo "Please install uv first: curl -LsSf https://astral.sh/uv/install.sh | sh" && exit 1) + @./setup.sh # Uses uv for fast package management + +.PHONY: build +build: + @source .venv/bin/activate && maturin develop + @source .venv/bin/activate && uv pip install --force-reinstall -e . + +.PHONY: dev +dev: + @source .venv/bin/activate && maturin develop --watch + +.PHONY: build-release +build-release: + @source .venv/bin/activate && maturin build --release --strip + +.PHONY: test +test: + @source .venv/bin/activate && pytest tests/ -v + +.PHONY: example +example: + @source .venv/bin/activate && python examples/basic_usage.py + +.PHONY: clean +clean: + @rm -rf target/ dist/ *.egg-info .pytest_cache __pycache__ .venv/ + +.PHONY: clear-cache +clear-cache: + @uv cache clean + @echo "uv cache cleared" + +.PHONY: help +help: + @echo "Available commands:" + @echo " make install - Setup environment and install dependencies" + @echo " make build - Build development version" + @echo " make dev - Build with hot reload (watches for changes)" + @echo " make build-release - Build release wheel" + @echo " make test - Run tests" + @echo " make example - Run example script" + @echo " make clean - Clean build artifacts" + @echo " make clear-cache - Clear uv cache" \ No newline at end of file diff --git a/crates/prime-protocol-py/README.md b/crates/prime-protocol-py/README.md new file mode 100644 index 00000000..439218c3 --- /dev/null +++ b/crates/prime-protocol-py/README.md @@ -0,0 +1,46 @@ +# Prime Protocol Python Client + +Python bindings for checking if compute pools exist. + +## Build + +```bash +# Install uv (one-time) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Setup and build +cd crates/prime-protocol-py +make install +``` + +## Usage + +```python +from primeprotocol import PrimeProtocolClient + +client = PrimeProtocolClient("http://localhost:8545") +exists = client.compute_pool_exists(0) +``` + +## Development + +```bash +make build # Build development version +make test # Run tests +make example # Run example +make clean # Clean artifacts +make help # Show all commands +``` + +## Installing in other projects + +```bash +# Build the wheel +make build-release + +# Install with uv (recommended) +uv pip install target/wheels/primeprotocol-*.whl + +# Or install directly from source +uv pip install /path/to/prime-protocol-py/ +``` \ No newline at end of file diff --git a/crates/prime-protocol-py/examples/basic_usage.py b/crates/prime-protocol-py/examples/basic_usage.py new file mode 100644 index 00000000..639eccf7 --- /dev/null +++ b/crates/prime-protocol-py/examples/basic_usage.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Example usage of the Prime Protocol Python client.""" + +import logging +import os +from primeprotocol import PrimeProtocolClient + +FORMAT = '%(levelname)s %(name)s %(asctime)-15s %(filename)s:%(lineno)d %(message)s' +logging.basicConfig(format=FORMAT) +logging.getLogger().setLevel(logging.INFO) + + +def main(): + rpc_url = os.getenv("RPC_URL", "http://localhost:8545") + pool_id = os.getenv("POOL_ID", 0) + private_key_provider = os.getenv("PRIVATE_KEY_PROVIDER", None) + private_key_node = os.getenv("PRIVATE_KEY_NODE", None) + + logging.info(f"Connecting to: {rpc_url}") + client = PrimeProtocolClient(pool_id, rpc_url, private_key_provider, private_key_node) + client.start() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/crates/prime-protocol-py/pyproject.toml b/crates/prime-protocol-py/pyproject.toml new file mode 100644 index 00000000..9834d8b4 --- /dev/null +++ b/crates/prime-protocol-py/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "primeprotocol" +description = "Simple Python bindings for Prime Protocol client" +readme = "README.md" +requires-python = ">=3.8" +license = {text = "MIT"} +keywords = ["prime", "protocol"] +authors = [ + {name = "Prime Protocol", email = "jannik@primeintellect.ai"} +] +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dynamic = ["version"] + +[project.urls] +"Homepage" = "https://github.com/primeprotocol/protocol" +"Bug Tracker" = "https://github.com/primeprotocol/protocol/issues" + +[tool.maturin] +features = ["pyo3/extension-module"] +module-name = "primeprotocol" \ No newline at end of file diff --git a/crates/prime-protocol-py/requirements-dev.txt b/crates/prime-protocol-py/requirements-dev.txt new file mode 100644 index 00000000..f2af3c5d --- /dev/null +++ b/crates/prime-protocol-py/requirements-dev.txt @@ -0,0 +1,3 @@ +# Development dependencies +maturin>=1.0,<2.0 +pytest>=7.0 \ No newline at end of file diff --git a/crates/prime-protocol-py/setup.sh b/crates/prime-protocol-py/setup.sh new file mode 100755 index 00000000..7609b236 --- /dev/null +++ b/crates/prime-protocol-py/setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +# Check if uv is installed +if ! command -v uv &> /dev/null; then + echo "Please install uv first: curl -LsSf https://astral.sh/uv/install.sh | sh" + exit 1 +fi + +# Setup environment +uv venv +source .venv/bin/activate +uv pip install -r requirements-dev.txt +maturin develop + +echo "Setup complete." \ No newline at end of file diff --git a/crates/prime-protocol-py/src/client.rs b/crates/prime-protocol-py/src/client.rs new file mode 100644 index 00000000..b4139b7b --- /dev/null +++ b/crates/prime-protocol-py/src/client.rs @@ -0,0 +1,294 @@ +use crate::error::{PrimeProtocolError, Result}; +use alloy::primitives::utils::format_ether; +use alloy::primitives::U256; +use prime_core::operations::compute_node::ComputeNodeOperations; +use prime_core::operations::provider::ProviderOperations; +use shared::web3::contracts::core::builder::{ContractBuilder, Contracts}; +use shared::web3::contracts::structs::compute_pool::PoolStatus; +use shared::web3::wallet::{Wallet, WalletProvider}; +use url::Url; + +pub struct PrimeProtocolClientCore { + rpc_url: String, + compute_pool_id: u64, + private_key_provider: Option, + private_key_node: Option, + auto_accept_transactions: bool, + funding_retry_count: u32, +} + +impl PrimeProtocolClientCore { + pub fn new( + compute_pool_id: u64, + rpc_url: String, + private_key_provider: Option, + private_key_node: Option, + auto_accept_transactions: Option, + funding_retry_count: Option, + ) -> Result { + if rpc_url.is_empty() { + return Err(PrimeProtocolError::InvalidConfig( + "RPC URL cannot be empty".to_string(), + )); + } + + Url::parse(&rpc_url) + .map_err(|_| PrimeProtocolError::InvalidConfig("Invalid RPC URL format".to_string()))?; + + Ok(Self { + rpc_url, + compute_pool_id, + private_key_provider, + private_key_node, + auto_accept_transactions: auto_accept_transactions.unwrap_or(true), + funding_retry_count: funding_retry_count.unwrap_or(10), + }) + } + + pub async fn start_async(&self) -> Result<()> { + let (provider_wallet, node_wallet, contracts) = + self.initialize_blockchain_components().await?; + let pool_info = self.wait_for_active_pool(&contracts).await?; + + log::info!("Pool info: {:?}", pool_info); + + self.ensure_provider_registered(&provider_wallet, &contracts) + .await?; + self.ensure_compute_node_registered(&provider_wallet, &node_wallet, &contracts) + .await?; + + // TODO: Optional - run hardware check? + // TODO: p2p reachable? + + Ok(()) + } + + async fn initialize_blockchain_components( + &self, + ) -> Result<(Wallet, Wallet, Contracts)> { + let private_key_provider = self.get_private_key_provider()?; + let private_key_node = self.get_private_key_node()?; + let rpc_url = Url::parse(&self.rpc_url).unwrap(); + + let provider_wallet = Wallet::new(&private_key_provider, rpc_url.clone()).map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to create provider wallet: {}", e)) + })?; + + let node_wallet = Wallet::new(&private_key_node, rpc_url.clone()).map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to create node wallet: {}", e)) + })?; + + let contracts = ContractBuilder::new(provider_wallet.provider()) + .with_compute_pool() + .with_compute_registry() + .with_ai_token() + .with_prime_network() + .with_stake_manager() + .build() + .map_err(|e| PrimeProtocolError::BlockchainError(e.to_string()))?; + + Ok((provider_wallet, node_wallet, contracts)) + } + + async fn wait_for_active_pool( + &self, + contracts: &Contracts, + ) -> Result { + loop { + match contracts + .compute_pool + .get_pool_info(U256::from(self.compute_pool_id)) + .await + { + Ok(pool) if pool.status == PoolStatus::ACTIVE => return Ok(pool), + Ok(_) => { + log::info!("Pool not active yet, waiting..."); + tokio::time::sleep(tokio::time::Duration::from_secs(15)).await; + } + Err(e) => { + return Err(PrimeProtocolError::BlockchainError(format!( + "Failed to get pool info: {}", + e + ))); + } + } + } + } + async fn ensure_provider_registered( + &self, + provider_wallet: &Wallet, + contracts: &Contracts, + ) -> Result<()> { + let provider_ops = ProviderOperations::new( + provider_wallet.clone(), + contracts.clone(), + self.auto_accept_transactions, + ); + + // Check if provider exists + let provider_exists = provider_ops.check_provider_exists().await.map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to check if provider exists: {}", + e + )) + })?; + + let Some(stake_manager) = contracts.stake_manager.as_ref() else { + return Err(PrimeProtocolError::BlockchainError( + "Stake manager not initialized".to_string(), + )); + }; + + // Check if provider is whitelisted + let is_whitelisted = provider_ops + .check_provider_whitelisted() + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to check provider whitelist status: {}", + e + )) + })?; + + // todo: revisit this + if provider_exists && is_whitelisted { + log::info!("Provider is registered and whitelisted"); + } else { + // For now, we'll use a default compute_units value - this should be configurable + let compute_units = U256::from(1); + + let required_stake = stake_manager + .calculate_stake(compute_units, U256::from(0)) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to calculate required stake: {}", + e + )) + })?; + + log::info!("Required stake: {}", format_ether(required_stake)); + + provider_ops + .retry_register_provider(required_stake, self.funding_retry_count, None) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to register provider: {}", + e + )) + })?; + + log::info!("Provider registered successfully"); + } + + // Get provider's current total compute and stake + let provider_total_compute = contracts + .compute_registry + .get_provider_total_compute(provider_wallet.wallet.default_signer().address()) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to get provider total compute: {}", + e + )) + })?; + + let provider_stake = stake_manager + .get_stake(provider_wallet.wallet.default_signer().address()) + .await + .unwrap_or_default(); + + // For now, we'll use a default compute_units value - this should be configurable + let compute_units = U256::from(1); + + let required_stake = stake_manager + .calculate_stake(compute_units, provider_total_compute) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to calculate required stake: {}", + e + )) + })?; + + if required_stake > provider_stake { + log::info!( + "Provider stake is less than required stake. Required: {} tokens, Current: {} tokens", + format_ether(required_stake), + format_ether(provider_stake) + ); + + provider_ops + .increase_stake(required_stake - provider_stake) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to increase stake: {}", e)) + })?; + + log::info!("Successfully increased stake"); + } + + Ok(()) + } + + async fn ensure_compute_node_registered( + &self, + provider_wallet: &Wallet, + node_wallet: &Wallet, + contracts: &Contracts, + ) -> Result<()> { + let compute_node_ops = + ComputeNodeOperations::new(provider_wallet, node_wallet, contracts.clone()); + + // Check if compute node exists + let compute_node_exists = + compute_node_ops + .check_compute_node_exists() + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to check if compute node exists: {}", + e + )) + })?; + + if compute_node_exists { + log::info!("Compute node is already registered"); + return Ok(()); + } + + // If compute node doesn't exist, register it + // For now, we'll use default compute specs - this should be configurable + compute_node_ops + .add_compute_node(U256::from(1)) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to register compute node: {}", + e + )) + })?; + + log::info!("Compute node registered successfully"); + Ok(()) + } + + fn get_private_key_provider(&self) -> Result { + match &self.private_key_provider { + Some(key) => Ok(key.clone()), + None => std::env::var("PRIVATE_KEY_PROVIDER").map_err(|_| { + PrimeProtocolError::InvalidConfig("PRIVATE_KEY_PROVIDER must be set".to_string()) + }), + } + } + + fn get_private_key_node(&self) -> Result { + match &self.private_key_node { + Some(key) => Ok(key.clone()), + None => std::env::var("PRIVATE_KEY_NODE").map_err(|_| { + PrimeProtocolError::InvalidConfig("PRIVATE_KEY_NODE must be set".to_string()) + }), + } + } +} diff --git a/crates/prime-protocol-py/src/error.rs b/crates/prime-protocol-py/src/error.rs new file mode 100644 index 00000000..cf561595 --- /dev/null +++ b/crates/prime-protocol-py/src/error.rs @@ -0,0 +1,21 @@ +use thiserror::Error; + +/// Result type alias for Prime Protocol operations +pub type Result = std::result::Result; + +/// Errors that can occur in the Prime Protocol client +#[derive(Debug, Error)] +pub enum PrimeProtocolError { + /// Invalid configuration provided + #[error("Invalid configuration: {0}")] + InvalidConfig(String), + + /// Blockchain interaction error + #[error("Blockchain error: {0}")] + BlockchainError(String), + + /// General runtime error + #[error("Runtime error: {0}")] + #[allow(dead_code)] + RuntimeError(String), +} diff --git a/crates/prime-protocol-py/src/lib.rs b/crates/prime-protocol-py/src/lib.rs new file mode 100644 index 00000000..faa72b0c --- /dev/null +++ b/crates/prime-protocol-py/src/lib.rs @@ -0,0 +1,62 @@ +use pyo3::prelude::*; + +mod client; +mod error; + +use client::PrimeProtocolClientCore; + +// todo: We need a manager + validator side to send messages + +/// Prime Protocol Python client +#[pyclass] +pub struct PrimeProtocolClient { + inner: PrimeProtocolClientCore, +} + +#[pymethods] +impl PrimeProtocolClient { + #[new] + #[pyo3(signature = (compute_pool_id, rpc_url, private_key_provider=None, private_key_node=None))] + pub fn new( + compute_pool_id: u64, + rpc_url: String, + private_key_provider: Option, + private_key_node: Option, + ) -> PyResult { + // todo: revisit default arguments here that are currently none + let inner = PrimeProtocolClientCore::new( + compute_pool_id, + rpc_url, + private_key_provider, + private_key_node, + None, + None, + ) + .map_err(|e| PyErr::new::(e.to_string()))?; + + Ok(Self { inner }) + } + + pub fn start(&self) -> PyResult<()> { + // Create a new runtime for this call + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| PyErr::new::(e.to_string()))?; + + // Run the async function + let result = rt.block_on(self.inner.start_async()); + + // Clean shutdown + rt.shutdown_background(); + + result.map_err(|e| PyErr::new::(e.to_string())) + } +} + +#[pymodule] +fn primeprotocol(m: &Bound<'_, PyModule>) -> PyResult<()> { + pyo3_log::init(); + m.add_class::()?; + Ok(()) +} diff --git a/crates/prime-protocol-py/tests/test_client.py b/crates/prime-protocol-py/tests/test_client.py new file mode 100644 index 00000000..57b02400 --- /dev/null +++ b/crates/prime-protocol-py/tests/test_client.py @@ -0,0 +1,29 @@ +"""Basic tests for the Prime Protocol Python client.""" + +import pytest +from primeprotocol import PrimeProtocolClient + + +def test_client_creation(): + """Test that client can be created with valid RPC URL.""" + client = PrimeProtocolClient("http://localhost:8545") + assert client is not None + + +def test_client_creation_with_empty_url(): + """Test that client creation fails with empty RPC URL.""" + with pytest.raises(ValueError): + PrimeProtocolClient("") + + +def test_client_creation_with_invalid_url(): + """Test that client creation fails with invalid RPC URL.""" + with pytest.raises(ValueError): + PrimeProtocolClient("not-a-valid-url") + + +def test_has_compute_pool_exists_method(): + """Test that the client has the compute_pool_exists method.""" + client = PrimeProtocolClient("http://example.com:8545") + assert hasattr(client, 'compute_pool_exists') + assert callable(getattr(client, 'compute_pool_exists')) \ No newline at end of file diff --git a/crates/prime-protocol-py/uv.lock b/crates/prime-protocol-py/uv.lock new file mode 100644 index 00000000..639a70ba --- /dev/null +++ b/crates/prime-protocol-py/uv.lock @@ -0,0 +1,7 @@ +version = 1 +requires-python = ">=3.8" + +[[package]] +name = "primeprotocol" +version = "0.1.0" +source = { editable = "." } diff --git a/crates/shared/src/models/metric.rs b/crates/shared/src/models/metric.rs index 47b27f24..b85c4926 100644 --- a/crates/shared/src/models/metric.rs +++ b/crates/shared/src/models/metric.rs @@ -58,7 +58,7 @@ mod tests { let invalid_values = vec![(f64::INFINITY, "infinite value"), (f64::NAN, "NaN value")]; for (value, case) in invalid_values { let entry = MetricEntry::new(key.clone(), value); - assert!(entry.is_err(), "Should fail for {}", case); + assert!(entry.is_err(), "Should fail for {case}"); } } diff --git a/crates/shared/src/security/auth_signature_middleware.rs b/crates/shared/src/security/auth_signature_middleware.rs index 1c4c1e10..8ba7767e 100644 --- a/crates/shared/src/security/auth_signature_middleware.rs +++ b/crates/shared/src/security/auth_signature_middleware.rs @@ -634,10 +634,10 @@ mod tests { .await; log::info!("Address: {}", wallet.wallet.default_signer().address()); - log::info!("Signature: {}", signature); - log::info!("Nonce: {}", nonce); + log::info!("Signature: {signature}"); + log::info!("Nonce: {nonce}"); let req = test::TestRequest::get() - .uri(&format!("/test?nonce={}", nonce)) + .uri(&format!("/test?nonce={nonce}")) .insert_header(( "x-address", wallet.wallet.default_signer().address().to_string(), @@ -801,8 +801,7 @@ mod tests { // Create multiple addresses let addresses: Vec
= (0..5) .map(|i| { - Address::from_str(&format!("0x{}000000000000000000000000000000000000000", i)) - .unwrap() + Address::from_str(&format!("0x{i}000000000000000000000000000000000000000")).unwrap() }) .collect(); diff --git a/crates/shared/src/security/request_signer.rs b/crates/shared/src/security/request_signer.rs index ff3e9964..c5ea3605 100644 --- a/crates/shared/src/security/request_signer.rs +++ b/crates/shared/src/security/request_signer.rs @@ -143,7 +143,7 @@ mod tests { let signature = sign_request(endpoint, &wallet, Some(&empty_data)) .await .unwrap(); - println!("Signature: {}", signature); + println!("Signature: {signature}"); assert!(signature.starts_with("0x")); assert_eq!(signature.len(), 132); } diff --git a/crates/shared/src/utils/google_cloud.rs b/crates/shared/src/utils/google_cloud.rs index 128259eb..72fae856 100644 --- a/crates/shared/src/utils/google_cloud.rs +++ b/crates/shared/src/utils/google_cloud.rs @@ -194,20 +194,14 @@ mod tests { #[tokio::test] async fn test_generate_mapping_file() { // Check if required environment variables are set - let bucket_name = match std::env::var("S3_BUCKET_NAME") { - Ok(name) => name, - Err(_) => { - println!("Skipping test: BUCKET_NAME not set"); - return; - } + let Ok(bucket_name) = std::env::var("S3_BUCKET_NAME") else { + println!("Skipping test: BUCKET_NAME not set"); + return; }; - let credentials_base64 = match std::env::var("S3_CREDENTIALS") { - Ok(credentials) => credentials, - Err(_) => { - println!("Skipping test: S3_CREDENTIALS not set"); - return; - } + let Ok(credentials_base64) = std::env::var("S3_CREDENTIALS") else { + println!("Skipping test: S3_CREDENTIALS not set"); + return; }; let storage = GcsStorageProvider::new(&bucket_name, &credentials_base64) @@ -219,15 +213,15 @@ mod tests { .generate_mapping_file(&random_sha256, "run_1/file.parquet") .await .unwrap(); - println!("mapping_content: {}", mapping_content); - println!("bucket_name: {}", bucket_name); + println!("mapping_content: {mapping_content}"); + println!("bucket_name: {bucket_name}"); let original_file_name = storage .resolve_mapping_for_sha(&random_sha256) .await .unwrap(); - println!("original_file_name: {}", original_file_name); + println!("original_file_name: {original_file_name}"); assert_eq!(original_file_name, "run_1/file.parquet"); } } diff --git a/crates/shared/src/utils/mod.rs b/crates/shared/src/utils/mod.rs index d4e3f1c9..290f1ae5 100644 --- a/crates/shared/src/utils/mod.rs +++ b/crates/shared/src/utils/mod.rs @@ -119,7 +119,7 @@ mod tests { provider.add_mapping_file("sha256", "file.txt").await; provider.add_file("file.txt", "content").await; let map_file_link = provider.resolve_mapping_for_sha("sha256").await.unwrap(); - println!("map_file_link: {}", map_file_link); + println!("map_file_link: {map_file_link}"); assert_eq!(map_file_link, "file.txt"); assert_eq!( diff --git a/crates/shared/src/web3/contracts/implementations/compute_pool_contract.rs b/crates/shared/src/web3/contracts/implementations/compute_pool_contract.rs index ff0a20ce..b52f96e2 100644 --- a/crates/shared/src/web3/contracts/implementations/compute_pool_contract.rs +++ b/crates/shared/src/web3/contracts/implementations/compute_pool_contract.rs @@ -29,6 +29,7 @@ impl ComputePool

{ .function("getComputePool", &[pool_id.into()])? .call() .await?; + let pool_info_tuple: &[DynSolValue] = pool_info_response.first().unwrap().as_tuple().unwrap(); @@ -60,6 +61,9 @@ impl ComputePool

{ _ => panic!("Unknown status value: {status}"), }; + println!("Mapped status: {mapped_status:?}"); + println!("Returning pool info"); + let pool_info = PoolInfo { pool_id, domain_id, diff --git a/crates/validator/src/store/redis.rs b/crates/validator/src/store/redis.rs index 508815c2..c0a0c36b 100644 --- a/crates/validator/src/store/redis.rs +++ b/crates/validator/src/store/redis.rs @@ -45,8 +45,8 @@ impl RedisStore { _ => panic!("Expected TCP connection"), }; - let redis_url = format!("redis://{}:{}", host, port); - debug!("Starting test Redis server at {}", redis_url); + let redis_url = format!("redis://{host}:{port}"); + debug!("Starting test Redis server at {redis_url}"); // Add a small delay to ensure server is ready thread::sleep(Duration::from_millis(100)); diff --git a/crates/validator/src/validators/hardware.rs b/crates/validator/src/validators/hardware.rs index 00736d34..5a29df32 100644 --- a/crates/validator/src/validators/hardware.rs +++ b/crates/validator/src/validators/hardware.rs @@ -185,7 +185,7 @@ mod tests { let result = validator.validate_nodes(nodes).await; let elapsed = start_time.elapsed(); assert!(elapsed < std::time::Duration::from_secs(11)); - println!("Validation took: {:?}", elapsed); + println!("Validation took: {elapsed:?}"); assert!(result.is_ok()); } diff --git a/crates/validator/src/validators/synthetic_data/chain_operations.rs b/crates/validator/src/validators/synthetic_data/chain_operations.rs index 004c7e45..a0687d18 100644 --- a/crates/validator/src/validators/synthetic_data/chain_operations.rs +++ b/crates/validator/src/validators/synthetic_data/chain_operations.rs @@ -3,7 +3,7 @@ use super::*; impl SyntheticDataValidator { #[cfg(test)] pub fn soft_invalidate_work(&self, work_key: &str) -> Result<(), Error> { - info!("Soft invalidating work: {}", work_key); + info!("Soft invalidating work: {work_key}"); if self.disable_chain_invalidation { info!("Chain invalidation is disabled, skipping work soft invalidation"); @@ -54,7 +54,7 @@ impl SyntheticDataValidator { #[cfg(test)] pub fn invalidate_work(&self, work_key: &str) -> Result<(), Error> { - info!("Invalidating work: {}", work_key); + info!("Invalidating work: {work_key}"); if let Some(metrics) = &self.metrics { metrics.record_work_key_invalidation(); @@ -98,20 +98,27 @@ impl SyntheticDataValidator { } } } - + #[cfg(test)] + #[allow(clippy::unused_async)] pub async fn invalidate_according_to_invalidation_type( &self, work_key: &str, invalidation_type: InvalidationType, ) -> Result<(), Error> { match invalidation_type { - #[cfg(test)] InvalidationType::Soft => self.soft_invalidate_work(work_key), - #[cfg(not(test))] - InvalidationType::Soft => self.soft_invalidate_work(work_key).await, - #[cfg(test)] InvalidationType::Hard => self.invalidate_work(work_key), - #[cfg(not(test))] + } + } + + #[cfg(not(test))] + pub async fn invalidate_according_to_invalidation_type( + &self, + work_key: &str, + invalidation_type: InvalidationType, + ) -> Result<(), Error> { + match invalidation_type { + InvalidationType::Soft => self.soft_invalidate_work(work_key).await, InvalidationType::Hard => self.invalidate_work(work_key).await, } } diff --git a/crates/validator/src/validators/synthetic_data/mod.rs b/crates/validator/src/validators/synthetic_data/mod.rs index ce472c8b..bf8ce6e2 100644 --- a/crates/validator/src/validators/synthetic_data/mod.rs +++ b/crates/validator/src/validators/synthetic_data/mod.rs @@ -237,7 +237,7 @@ impl SyntheticDataValidator { let score: Option = con .zscore("incomplete_groups", group_key) .await - .map_err(|e| Error::msg(format!("Failed to check incomplete tracking: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to check incomplete tracking: {e}")))?; Ok(score.is_some()) } @@ -270,13 +270,10 @@ impl SyntheticDataValidator { let _: () = con .zadd("incomplete_groups", group_key, new_deadline) .await - .map_err(|e| { - Error::msg(format!("Failed to update incomplete group deadline: {}", e)) - })?; + .map_err(|e| Error::msg(format!("Failed to update incomplete group deadline: {e}")))?; debug!( - "Updated deadline for incomplete group {} to {} ({} minutes from now)", - group_key, new_deadline, minutes_from_now + "Updated deadline for incomplete group {group_key} to {new_deadline} ({minutes_from_now} minutes from now)" ); Ok(()) @@ -420,7 +417,7 @@ impl SyntheticDataValidator { let data: Option = con .get(key) .await - .map_err(|e| Error::msg(format!("Failed to get work validation status: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to get work validation status: {e}")))?; match data { Some(data) => { @@ -435,8 +432,7 @@ impl SyntheticDataValidator { reason: None, })), Err(e) => Err(Error::msg(format!( - "Failed to parse work validation data: {}", - e + "Failed to parse work validation data: {e}" ))), } } @@ -1576,8 +1572,7 @@ impl SyntheticDataValidator { .await { error!( - "Failed to update work validation status for {}: {}", - work_key, e + "Failed to update work validation status for {work_key}: {e}" ); } } diff --git a/crates/validator/src/validators/synthetic_data/tests/mod.rs b/crates/validator/src/validators/synthetic_data/tests/mod.rs index a589076f..48aaee85 100644 --- a/crates/validator/src/validators/synthetic_data/tests/mod.rs +++ b/crates/validator/src/validators/synthetic_data/tests/mod.rs @@ -34,7 +34,7 @@ fn setup_test_env() -> Result<(RedisStore, Contracts), Error> { "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", url, ) - .map_err(|e| Error::msg(format!("Failed to create demo wallet: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to create demo wallet: {e}")))?; let contracts = ContractBuilder::new(demo_wallet.provider()) .with_compute_registry() @@ -45,7 +45,7 @@ fn setup_test_env() -> Result<(RedisStore, Contracts), Error> { .with_stake_manager() .with_synthetic_data_validator(Some(Address::ZERO)) .build() - .map_err(|e| Error::msg(format!("Failed to build contracts: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to build contracts: {e}")))?; Ok((store, contracts)) } @@ -197,8 +197,8 @@ async fn test_status_update() -> Result<(), Error> { ) .await .map_err(|e| { - error!("Failed to update work validation status: {}", e); - Error::msg(format!("Failed to update work validation status: {}", e)) + error!("Failed to update work validation status: {e}"); + Error::msg(format!("Failed to update work validation status: {e}")) })?; tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; @@ -206,8 +206,8 @@ async fn test_status_update() -> Result<(), Error> { .get_work_validation_status_from_redis("0x0000000000000000000000000000000000000000") .await .map_err(|e| { - error!("Failed to get work validation status: {}", e); - Error::msg(format!("Failed to get work validation status: {}", e)) + error!("Failed to get work validation status: {e}"); + Error::msg(format!("Failed to get work validation status: {e}")) })?; assert_eq!(status, Some(ValidationResult::Accept)); Ok(()) @@ -344,20 +344,20 @@ async fn test_group_e2e_accept() -> Result<(), Error> { let mock_storage = MockStorageProvider::new(); mock_storage .add_file( - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-1-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-1-0-0.parquet"), "file1", ) .await; mock_storage .add_mapping_file( FILE_SHA, - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-1-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-1-0-0.parquet"), ) .await; server .mock( "POST", - format!("/validategroup/dataset/samplingn-{}-1-0.parquet", GROUP_ID).as_str(), + format!("/validategroup/dataset/samplingn-{GROUP_ID}-1-0.parquet").as_str(), ) .match_body(mockito::Matcher::Json(serde_json::json!({ "file_shas": [FILE_SHA], @@ -371,7 +371,7 @@ async fn test_group_e2e_accept() -> Result<(), Error> { server .mock( "GET", - format!("/statusgroup/dataset/samplingn-{}-1-0.parquet", GROUP_ID).as_str(), + format!("/statusgroup/dataset/samplingn-{GROUP_ID}-1-0.parquet").as_str(), ) .with_status(200) .with_body(r#"{"status": "accept", "input_flops": 1, "output_flops": 1000}"#) @@ -463,7 +463,7 @@ async fn test_group_e2e_accept() -> Result<(), Error> { metrics_2.contains("validator_work_keys_to_process{pool_id=\"0\",validator_id=\"0\"} 0") ); assert!(metrics_2.contains("toploc_config_name=\"Qwen/Qwen0.6\"")); - assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{}\",pool_id=\"0\",result=\"match\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{GROUP_ID}\",pool_id=\"0\",result=\"match\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1"))); Ok(()) } @@ -490,32 +490,32 @@ async fn test_group_e2e_work_unit_mismatch() -> Result<(), Error> { let mock_storage = MockStorageProvider::new(); mock_storage .add_file( - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-0.parquet"), "file1", ) .await; mock_storage .add_file( - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-1.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-1.parquet"), "file2", ) .await; mock_storage .add_mapping_file( HONEST_FILE_SHA, - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-0.parquet"), ) .await; mock_storage .add_mapping_file( EXCESSIVE_FILE_SHA, - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-1.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-1.parquet"), ) .await; server .mock( "POST", - format!("/validategroup/dataset/samplingn-{}-2-0.parquet", GROUP_ID).as_str(), + format!("/validategroup/dataset/samplingn-{GROUP_ID}-2-0.parquet").as_str(), ) .match_body(mockito::Matcher::Json(serde_json::json!({ "file_shas": [HONEST_FILE_SHA, EXCESSIVE_FILE_SHA], @@ -529,7 +529,7 @@ async fn test_group_e2e_work_unit_mismatch() -> Result<(), Error> { server .mock( "GET", - format!("/statusgroup/dataset/samplingn-{}-2-0.parquet", GROUP_ID).as_str(), + format!("/statusgroup/dataset/samplingn-{GROUP_ID}-2-0.parquet").as_str(), ) .with_status(200) .with_body(r#"{"status": "accept", "input_flops": 1, "output_flops": 2000}"#) @@ -636,12 +636,12 @@ async fn test_group_e2e_work_unit_mismatch() -> Result<(), Error> { assert_eq!(plan_3.group_trigger_tasks.len(), 0); assert_eq!(plan_3.group_status_check_tasks.len(), 0); let metrics_2 = export_metrics().unwrap(); - assert!(metrics_2.contains(&format!("validator_group_validations_total{{group_id=\"{}\",pool_id=\"0\",result=\"accept\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics_2.contains(&format!("validator_group_validations_total{{group_id=\"{GROUP_ID}\",pool_id=\"0\",result=\"accept\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1"))); assert!( metrics_2.contains("validator_work_keys_to_process{pool_id=\"0\",validator_id=\"0\"} 0") ); assert!(metrics_2.contains("toploc_config_name=\"Qwen/Qwen0.6\"")); - assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{}\",pool_id=\"0\",result=\"mismatch\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{GROUP_ID}\",pool_id=\"0\",result=\"mismatch\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1"))); Ok(()) } @@ -734,26 +734,26 @@ async fn test_incomplete_group_recovery() -> Result<(), Error> { mock_storage .add_file( - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), "file1", ) .await; mock_storage .add_file( - &format!("TestModel/dataset/test-{}-2-0-1.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-1.parquet"), "file2", ) .await; mock_storage .add_mapping_file( FILE_SHA_1, - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), ) .await; mock_storage .add_mapping_file( FILE_SHA_2, - &format!("TestModel/dataset/test-{}-2-0-1.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-1.parquet"), ) .await; @@ -800,7 +800,7 @@ async fn test_incomplete_group_recovery() -> Result<(), Error> { assert!(group.is_none(), "Group should be incomplete"); // Check that the incomplete group is being tracked - let group_key = format!("group:{}:2:0", GROUP_ID); + let group_key = format!("group:{GROUP_ID}:2:0"); let is_tracked = validator .is_group_being_tracked_as_incomplete(&group_key) .await?; @@ -847,14 +847,14 @@ async fn test_expired_incomplete_group_soft_invalidation() -> Result<(), Error> mock_storage .add_file( - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), "file1", ) .await; mock_storage .add_mapping_file( FILE_SHA_1, - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), ) .await; @@ -902,7 +902,7 @@ async fn test_expired_incomplete_group_soft_invalidation() -> Result<(), Error> // Manually expire the incomplete group tracking by removing it and simulating expiry // In a real test, you would wait for the actual expiry, but for testing we simulate it - let group_key = format!("group:{}:2:0", GROUP_ID); + let group_key = format!("group:{GROUP_ID}:2:0"); validator.track_incomplete_group(&group_key).await?; // Process groups past grace period (this would normally find groups past deadline) @@ -936,7 +936,7 @@ async fn test_expired_incomplete_group_soft_invalidation() -> Result<(), Error> assert_eq!(key_status, Some(ValidationResult::IncompleteGroup)); let metrics = export_metrics().unwrap(); - assert!(metrics.contains(&format!("validator_work_keys_soft_invalidated_total{{group_key=\"group:{}:2:0\",pool_id=\"0\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics.contains(&format!("validator_work_keys_soft_invalidated_total{{group_key=\"group:{GROUP_ID}:2:0\",pool_id=\"0\",validator_id=\"0\"}} 1"))); Ok(()) } @@ -952,14 +952,14 @@ async fn test_incomplete_group_status_tracking() -> Result<(), Error> { mock_storage .add_file( - &format!("TestModel/dataset/test-{}-3-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-3-0-0.parquet"), "file1", ) .await; mock_storage .add_mapping_file( FILE_SHA_1, - &format!("TestModel/dataset/test-{}-3-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-3-0-0.parquet"), ) .await; @@ -1006,7 +1006,7 @@ async fn test_incomplete_group_status_tracking() -> Result<(), Error> { // Manually process groups past grace period to simulate what would happen // after the grace period expires (we simulate this since we can't wait in tests) - let group_key = format!("group:{}:3:0", GROUP_ID); + let group_key = format!("group:{GROUP_ID}:3:0"); // Manually add the group to tracking and then process it validator.track_incomplete_group(&group_key).await?; diff --git a/crates/validator/src/validators/synthetic_data/toploc.rs b/crates/validator/src/validators/synthetic_data/toploc.rs index 33d9f57f..f5641533 100644 --- a/crates/validator/src/validators/synthetic_data/toploc.rs +++ b/crates/validator/src/validators/synthetic_data/toploc.rs @@ -689,8 +689,7 @@ mod tests { Some(expected_idx) => { assert!( matched, - "Expected file {} to match config {}", - test_file, expected_idx + "Expected file {test_file} to match config {expected_idx}" ); assert_eq!( matched_idx, @@ -701,7 +700,7 @@ mod tests { expected_idx ); } - None => assert!(!matched, "File {} should not match any config", test_file), + None => assert!(!matched, "File {test_file} should not match any config"), } } } diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 18596ba5..c0bebb35 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -8,6 +8,7 @@ workspace = true [dependencies] shared = { workspace = true } +prime-core = { workspace = true} actix-web = { workspace = true } bollard = "0.18.1" clap = { workspace = true } diff --git a/crates/worker/src/checks/hardware/interconnect.rs b/crates/worker/src/checks/hardware/interconnect.rs index 21725686..d87d1819 100644 --- a/crates/worker/src/checks/hardware/interconnect.rs +++ b/crates/worker/src/checks/hardware/interconnect.rs @@ -78,7 +78,7 @@ mod tests { #[tokio::test] async fn test_check_speeds() { let result = InterconnectCheck::check_speeds().await; - println!("Test Result: {:?}", result); + println!("Test Result: {result:?}"); // Verify the result is Ok and contains expected tuple structure assert!(result.is_ok()); diff --git a/crates/worker/src/checks/hardware/storage.rs b/crates/worker/src/checks/hardware/storage.rs index 9509e731..8360993b 100644 --- a/crates/worker/src/checks/hardware/storage.rs +++ b/crates/worker/src/checks/hardware/storage.rs @@ -216,7 +216,7 @@ fn test_or_create_app_directory(path: &str) -> bool { } #[cfg(not(target_os = "linux"))] -pub fn find_largest_storage() -> Option { +pub(crate) fn find_largest_storage() -> Option { None } @@ -233,7 +233,7 @@ pub(crate) fn get_available_space(path: &str) -> Option { } #[cfg(not(target_os = "linux"))] -pub fn get_available_space(_path: &str) -> Option { +pub(crate) fn get_available_space(_path: &str) -> Option { None } diff --git a/crates/worker/src/checks/stun.rs b/crates/worker/src/checks/stun.rs index 5830b49e..734f2795 100644 --- a/crates/worker/src/checks/stun.rs +++ b/crates/worker/src/checks/stun.rs @@ -139,7 +139,7 @@ mod tests { async fn test_get_public_ip() { let stun_check = StunCheck::new(Duration::from_secs(5), 0); let public_ip = stun_check.get_public_ip().await.unwrap(); - println!("Public IP: {}", public_ip); + println!("Public IP: {public_ip}"); assert!(!public_ip.is_empty()); } } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 92de379e..69d2b91f 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -6,9 +6,8 @@ use crate::console::Console; use crate::docker::taskbridge::TaskBridge; use crate::docker::DockerService; use crate::metrics::store::MetricsStore; -use crate::operations::compute_node::ComputeNodeOperations; use crate::operations::heartbeat::service::HeartbeatService; -use crate::operations::provider::ProviderOperations; +use crate::operations::node_monitor::NodeMonitor; use crate::p2p::P2PContext; use crate::p2p::P2PService; use crate::services::discovery::DiscoveryService; @@ -21,6 +20,8 @@ use alloy::signers::local::PrivateKeySigner; use alloy::signers::Signer; use clap::{Parser, Subcommand}; use log::{error, info}; +use prime_core::operations::compute_node::ComputeNodeOperations; +use prime_core::operations::provider::ProviderOperations; use shared::models::node::ComputeRequirements; use shared::models::node::Node; use shared::web3::contracts::core::builder::ContractBuilder; @@ -280,12 +281,10 @@ pub async fn execute_command( let provider_ops_cancellation = cancellation_token.clone(); - let compute_node_state = state.clone(); let compute_node_ops = ComputeNodeOperations::new( &provider_wallet_instance, &node_wallet_instance, contracts.clone(), - compute_node_state, ); let discovery_urls = vec![discovery_url @@ -593,7 +592,7 @@ pub async fn execute_command( .retry_register_provider( required_stake, *funding_retry_count, - cancellation_token.clone(), + Some(cancellation_token.clone()), ) .await { @@ -696,7 +695,7 @@ pub async fn execute_command( let heartbeat = match heartbeat_service.clone() { Ok(service) => service, Err(e) => { - error!("❌ Heartbeat service is not available: {e}"); + error!("❌ Heartbeat service is not available: {e:?}"); std::process::exit(1); } }; @@ -815,8 +814,13 @@ pub async fn execute_command( provider_ops.start_monitoring(provider_ops_cancellation); let pool_id = state.compute_pool_id.clone().unwrap_or("0".to_string()); - if let Err(err) = compute_node_ops.start_monitoring(cancellation_token.clone(), pool_id) - { + let node_monitor = NodeMonitor::new( + provider_wallet_instance.clone(), + node_wallet_instance.clone(), + contracts.clone(), + state.clone(), + ); + if let Err(err) = node_monitor.start_monitoring(cancellation_token.clone(), pool_id) { error!("❌ Failed to start node monitoring: {err}"); std::process::exit(1); } @@ -1021,11 +1025,10 @@ pub async fn execute_command( std::process::exit(1); } }; - let state = Arc::new(SystemState::new(None, true, None)); + /* Initialize dependencies - services, contracts, operations */ - let contracts = ContractBuilder::new(provider_wallet_instance.provider()) .with_compute_registry() .with_ai_token() @@ -1039,7 +1042,6 @@ pub async fn execute_command( &provider_wallet_instance, &node_wallet_instance, contracts.clone(), - state.clone(), ); let provider_ops = diff --git a/crates/worker/src/docker/taskbridge/bridge.rs b/crates/worker/src/docker/taskbridge/bridge.rs index 65a28f76..1fe85e59 100644 --- a/crates/worker/src/docker/taskbridge/bridge.rs +++ b/crates/worker/src/docker/taskbridge/bridge.rs @@ -565,7 +565,7 @@ mod tests { "test_label2": 20.0, }); let sample_metric = serde_json::to_string(&data)?; - debug!("Sending {:?}", sample_metric); + debug!("Sending {sample_metric:?}"); let msg = format!("{}{}", sample_metric, "\n"); stream.write_all(msg.as_bytes()).await?; stream.flush().await?; @@ -616,7 +616,7 @@ mod tests { "output/input_flops": 2500.0, }); let sample_metric = serde_json::to_string(&json)?; - debug!("Sending {:?}", sample_metric); + debug!("Sending {sample_metric:?}"); let msg = format!("{}{}", sample_metric, "\n"); stream.write_all(msg.as_bytes()).await?; stream.flush().await?; @@ -626,8 +626,7 @@ mod tests { let all_metrics = metrics_store.get_all_metrics().await; assert!( all_metrics.is_empty(), - "Expected metrics to be empty but found: {:?}", - all_metrics + "Expected metrics to be empty but found: {all_metrics:?}" ); bridge_handle.abort(); diff --git a/crates/worker/src/operations/heartbeat/service.rs b/crates/worker/src/operations/heartbeat/service.rs index 0d77d783..ccace01d 100644 --- a/crates/worker/src/operations/heartbeat/service.rs +++ b/crates/worker/src/operations/heartbeat/service.rs @@ -24,7 +24,6 @@ pub(crate) struct HeartbeatService { docker_service: Arc, metrics_store: Arc, } - #[derive(Debug, Clone, thiserror::Error)] pub(crate) enum HeartbeatError { #[error("HTTP request failed")] @@ -32,6 +31,7 @@ pub(crate) enum HeartbeatError { #[error("Service initialization failed")] InitFailed, } + impl HeartbeatService { #[allow(clippy::too_many_arguments)] pub(crate) fn new( diff --git a/crates/worker/src/operations/mod.rs b/crates/worker/src/operations/mod.rs index 193b64ae..d684160a 100644 --- a/crates/worker/src/operations/mod.rs +++ b/crates/worker/src/operations/mod.rs @@ -1,3 +1,2 @@ -pub(crate) mod compute_node; pub(crate) mod heartbeat; -pub(crate) mod provider; +pub(crate) mod node_monitor; diff --git a/crates/worker/src/operations/compute_node.rs b/crates/worker/src/operations/node_monitor.rs similarity index 54% rename from crates/worker/src/operations/compute_node.rs rename to crates/worker/src/operations/node_monitor.rs index 39b18c29..dc8734f2 100644 --- a/crates/worker/src/operations/compute_node.rs +++ b/crates/worker/src/operations/node_monitor.rs @@ -1,5 +1,5 @@ -use crate::{console::Console, state::system_state::SystemState}; -use alloy::{primitives::utils::keccak256 as keccak, primitives::U256, signers::Signer}; +use crate::state::system_state::SystemState; +use alloy::primitives::U256; use anyhow::Result; use shared::web3::wallet::Wallet; use shared::web3::{contracts::core::builder::Contracts, wallet::WalletProvider}; @@ -7,17 +7,17 @@ use std::sync::Arc; use tokio::time::{sleep, Duration}; use tokio_util::sync::CancellationToken; -pub(crate) struct ComputeNodeOperations<'c> { - provider_wallet: &'c Wallet, - node_wallet: &'c Wallet, +pub(crate) struct NodeMonitor { + provider_wallet: Wallet, + node_wallet: Wallet, contracts: Contracts, system_state: Arc, } -impl<'c> ComputeNodeOperations<'c> { +impl NodeMonitor { pub(crate) fn new( - provider_wallet: &'c Wallet, - node_wallet: &'c Wallet, + provider_wallet: Wallet, + node_wallet: Wallet, contracts: Contracts, system_state: Arc, ) -> Self { @@ -43,11 +43,12 @@ impl<'c> ComputeNodeOperations<'c> { let mut last_claimable = None; let mut last_locked = None; let mut first_check = true; + tokio::spawn(async move { loop { tokio::select! { _ = cancellation_token.cancelled() => { - Console::info("Monitor", "Shutting down node status monitor..."); + log::info!("Shutting down node status monitor..."); break; } _ = async { @@ -55,16 +56,15 @@ impl<'c> ComputeNodeOperations<'c> { Ok((active, validated)) => { if first_check || active != last_active { if !first_check { - Console::info("🔄 Chain Sync - Pool membership changed", &format!("From {last_active} to {active}" - )); + log::info!("🔄 Chain Sync - Pool membership changed: From {last_active} to {active}"); } else { - Console::info("🔄 Chain Sync - Node pool membership", &format!("{active}")); + log::info!("🔄 Chain Sync - Node pool membership: {active}"); } last_active = active; } let is_running = system_state.is_running().await; if !active && is_running { - Console::warning("Node is not longer in pool, shutting down heartbeat..."); + log::warn!("Node is not longer in pool, shutting down heartbeat..."); if let Err(e) = system_state.set_running(false, None).await { log::error!("Failed to set running to false: {e:?}"); } @@ -72,10 +72,9 @@ impl<'c> ComputeNodeOperations<'c> { if first_check || validated != last_validated { if !first_check { - Console::info("🔄 Chain Sync - Validation changed", &format!("From {last_validated} to {validated}" - )); + log::info!("🔄 Chain Sync - Validation changed: From {last_validated} to {validated}"); } else { - Console::info("🔄 Chain Sync - Node validation", &format!("{validated}")); + log::info!("🔄 Chain Sync - Node validation: {validated}"); } last_validated = validated; } @@ -92,7 +91,7 @@ impl<'c> ComputeNodeOperations<'c> { last_locked = Some(locked); let claimable_formatted = claimable.to_string().parse::().unwrap_or(0.0) / 10f64.powf(18.0); let locked_formatted = locked.to_string().parse::().unwrap_or(0.0) / 10f64.powf(18.0); - Console::info("Rewards", &format!("{claimable_formatted} claimable, {locked_formatted} locked")); + log::info!("Rewards: {claimable_formatted} claimable, {locked_formatted} locked"); } } Err(e) => { @@ -114,74 +113,4 @@ impl<'c> ComputeNodeOperations<'c> { }); Ok(()) } - - pub(crate) async fn check_compute_node_exists( - &self, - ) -> Result> { - let compute_node = self - .contracts - .compute_registry - .get_node( - self.provider_wallet.wallet.default_signer().address(), - self.node_wallet.wallet.default_signer().address(), - ) - .await; - - match compute_node { - Ok(_) => Ok(true), - Err(_) => Ok(false), - } - } - - // Returns true if the compute node was added, false if it already exists - pub(crate) async fn add_compute_node( - &self, - compute_units: U256, - ) -> Result> { - Console::title("🔄 Adding compute node"); - - if self.check_compute_node_exists().await? { - return Ok(false); - } - - Console::progress("Adding compute node"); - let provider_address = self.provider_wallet.wallet.default_signer().address(); - let node_address = self.node_wallet.wallet.default_signer().address(); - let digest = keccak([provider_address.as_slice(), node_address.as_slice()].concat()); - - let signature = self - .node_wallet - .signer - .sign_message(digest.as_slice()) - .await? - .as_bytes(); - - // Create the signature bytes - let add_node_tx = self - .contracts - .prime_network - .add_compute_node(node_address, compute_units, signature.to_vec()) - .await?; - Console::success(&format!("Add node tx: {add_node_tx:?}")); - Ok(true) - } - - pub(crate) async fn remove_compute_node(&self) -> Result> { - Console::title("🔄 Removing compute node"); - - if !self.check_compute_node_exists().await? { - return Ok(false); - } - - Console::progress("Removing compute node"); - let provider_address = self.provider_wallet.wallet.default_signer().address(); - let node_address = self.node_wallet.wallet.default_signer().address(); - let remove_node_tx = self - .contracts - .prime_network - .remove_compute_node(provider_address, node_address) - .await?; - Console::success(&format!("Remove node tx: {remove_node_tx:?}")); - Ok(true) - } } diff --git a/crates/worker/src/p2p/service.rs b/crates/worker/src/p2p/service.rs index 51a68405..fef5f409 100644 --- a/crates/worker/src/p2p/service.rs +++ b/crates/worker/src/p2p/service.rs @@ -700,10 +700,10 @@ mod tests { .await .unwrap(); - let response_nonce = match response { - P2PMessage::Pong { nonce, .. } => nonce, - _ => panic!("Expected Pong message"), + let P2PMessage::Pong { nonce, .. } = response else { + panic!("Expected Pong message"); }; + let response_nonce = nonce; assert_eq!(response_nonce, random_nonce); } #[tokio::test] diff --git a/crates/worker/src/utils/p2p.rs b/crates/worker/src/utils/p2p.rs index ef07b28c..8fe175c7 100644 --- a/crates/worker/src/utils/p2p.rs +++ b/crates/worker/src/utils/p2p.rs @@ -50,10 +50,10 @@ mod tests { fn test_deterministic_generation() { // Same seed should generate same node_id let seed = generate_random_seed(); - println!("seed: {}", seed); + println!("seed: {seed}"); let result1 = generate_iroh_node_id_from_seed(seed).unwrap(); let result2 = generate_iroh_node_id_from_seed(seed).unwrap(); - println!("result1: {}", result1); + println!("result1: {result1}"); assert_eq!(result1, result2); } From 90931348519e189f0c2797ada0c60bee3da92e65 Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Fri, 11 Jul 2025 16:44:13 +0200 Subject: [PATCH 33/40] basic message queue with mock data --- Cargo.lock | 36 +++ crates/prime-protocol-py/Cargo.toml | 6 +- crates/prime-protocol-py/Makefile | 33 +- crates/prime-protocol-py/README.md | 54 +++- .../prime-protocol-py/examples/basic_usage.py | 98 +++++- crates/prime-protocol-py/src/lib.rs | 108 ++++++- crates/prime-protocol-py/src/message_queue.rs | 160 ++++++++++ .../src/utils/json_parser.rs | 8 + crates/prime-protocol-py/src/utils/mod.rs | 1 + .../src/{client.rs => worker.rs} | 291 ++++++++++++------ 10 files changed, 653 insertions(+), 142 deletions(-) create mode 100644 crates/prime-protocol-py/src/message_queue.rs create mode 100644 crates/prime-protocol-py/src/utils/json_parser.rs create mode 100644 crates/prime-protocol-py/src/utils/mod.rs rename crates/prime-protocol-py/src/{client.rs => worker.rs} (50%) diff --git a/Cargo.lock b/Cargo.lock index cfd97e47..f872d960 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6762,7 +6762,11 @@ dependencies = [ "prime-core", "pyo3", "pyo3-log", + "pythonize", + "serde", + "serde_json", "shared", + "test-log", "thiserror 1.0.69", "tokio", "tokio-test", @@ -7050,6 +7054,16 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "pythonize" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597907139a488b22573158793aa7539df36ae863eba300c75f3a0d65fc475e27" +dependencies = [ + "pyo3", + "serde", +] + [[package]] name = "quanta" version = "0.10.1" @@ -8851,6 +8865,28 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "test-log" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e33b98a582ea0be1168eba097538ee8dd4bbe0f2b01b22ac92ea30054e5be7b" +dependencies = [ + "env_logger", + "test-log-macros", + "tracing-subscriber", +] + +[[package]] +name = "test-log-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "451b374529930d7601b1eef8d32bc79ae870b6079b069401709c2a8bf9e75f36" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "thiserror" version = "1.0.69" diff --git a/crates/prime-protocol-py/Cargo.toml b/crates/prime-protocol-py/Cargo.toml index 9441afe1..cbb7b513 100644 --- a/crates/prime-protocol-py/Cargo.toml +++ b/crates/prime-protocol-py/Cargo.toml @@ -16,12 +16,16 @@ shared = { workspace = true } prime-core = { workspace = true } alloy = { workspace = true } alloy-provider = { workspace = true } -tokio = { version = "1.35", features = ["rt"] } +tokio = { version = "1.35", features = ["rt", "rt-multi-thread", "sync", "time", "macros"] } url = "2.5" log = { workspace = true } pyo3-log = "0.12.4" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +pythonize = "0.25" [dev-dependencies] +test-log = "0.2" tokio-test = "0.4" [profile.release] diff --git a/crates/prime-protocol-py/Makefile b/crates/prime-protocol-py/Makefile index fe1858d0..dfb10ac9 100644 --- a/crates/prime-protocol-py/Makefile +++ b/crates/prime-protocol-py/Makefile @@ -4,43 +4,18 @@ install: @./setup.sh # Uses uv for fast package management .PHONY: build -build: +build: install + @uv cache clean @source .venv/bin/activate && maturin develop @source .venv/bin/activate && uv pip install --force-reinstall -e . -.PHONY: dev -dev: - @source .venv/bin/activate && maturin develop --watch - -.PHONY: build-release -build-release: - @source .venv/bin/activate && maturin build --release --strip - -.PHONY: test -test: - @source .venv/bin/activate && pytest tests/ -v - -.PHONY: example -example: - @source .venv/bin/activate && python examples/basic_usage.py - .PHONY: clean clean: @rm -rf target/ dist/ *.egg-info .pytest_cache __pycache__ .venv/ -.PHONY: clear-cache -clear-cache: - @uv cache clean - @echo "uv cache cleared" - .PHONY: help help: @echo "Available commands:" @echo " make install - Setup environment and install dependencies" - @echo " make build - Build development version" - @echo " make dev - Build with hot reload (watches for changes)" - @echo " make build-release - Build release wheel" - @echo " make test - Run tests" - @echo " make example - Run example script" - @echo " make clean - Clean build artifacts" - @echo " make clear-cache - Clear uv cache" \ No newline at end of file + @echo " make build - Build development version (includes install and cache clear)" + @echo " make clean - Clean build artifacts" \ No newline at end of file diff --git a/crates/prime-protocol-py/README.md b/crates/prime-protocol-py/README.md index 439218c3..b72b39db 100644 --- a/crates/prime-protocol-py/README.md +++ b/crates/prime-protocol-py/README.md @@ -1,7 +1,5 @@ # Prime Protocol Python Client -Python bindings for checking if compute pools exist. - ## Build ```bash @@ -15,13 +13,59 @@ make install ## Usage +### Worker Client with Message Queue + +The Worker Client provides a message queue system for handling P2P messages from pool owners and validators. Messages are processed in a FIFO (First-In-First-Out) manner. + ```python -from primeprotocol import PrimeProtocolClient +from primeprotocol import WorkerClient +import asyncio + +# Initialize the worker client +client = WorkerClient( + compute_pool_id=1, + rpc_url="http://localhost:8545", + private_key_provider="your_provider_key", + private_key_node="your_node_key", +) + +# Start the client (registers on-chain and starts message listener) +client.start() -client = PrimeProtocolClient("http://localhost:8545") -exists = client.compute_pool_exists(0) +# Poll for messages in your application loop +async def process_messages(): + while True: + # Get next message from pool owner queue + pool_msg = client.get_pool_owner_message() + if pool_msg: + print(f"Pool owner message: {pool_msg}") + # Process the message... + + # Get next message from validator queue + validator_msg = client.get_validator_message() + if validator_msg: + print(f"Validator message: {validator_msg}") + # Process the message... + + await asyncio.sleep(0.1) + +# Run the message processing loop +asyncio.run(process_messages()) + +# Gracefully shutdown +client.stop() ``` +### Message Queue Features + +- **Background Listener**: Rust protocol listens for P2P messages in the background +- **FIFO Queue**: Messages are processed in the order they are received +- **Message Types**: Separate queues for pool owner, validator, and system messages +- **Mock Mode**: Currently generates mock messages for testing (P2P integration coming soon) +- **Thread-Safe**: Safe to use from async Python code + +See `examples/message_queue_example.py` for a complete working example. + ## Development ```bash diff --git a/crates/prime-protocol-py/examples/basic_usage.py b/crates/prime-protocol-py/examples/basic_usage.py index 639eccf7..66572db7 100644 --- a/crates/prime-protocol-py/examples/basic_usage.py +++ b/crates/prime-protocol-py/examples/basic_usage.py @@ -1,13 +1,70 @@ #!/usr/bin/env python3 """Example usage of the Prime Protocol Python client.""" +import asyncio import logging import os -from primeprotocol import PrimeProtocolClient +import signal +import sys +import time +from typing import Dict, Any, Optional +from primeprotocol import WorkerClient FORMAT = '%(levelname)s %(name)s %(asctime)-15s %(filename)s:%(lineno)d %(message)s' logging.basicConfig(format=FORMAT) -logging.getLogger().setLevel(logging.INFO) +logging.getLogger().setLevel(logging.DEBUG) + + +def handle_pool_owner_message(message: Dict[str, Any]) -> None: + """Handle messages from pool owner""" + logging.info(f"Received message from pool owner: {message}") + + if message.get("type") == "inference_request": + prompt = message.get("prompt", "") + # Simulate processing the inference request + response = f"Processed: {prompt}" + + logging.info(f"Processing inference request: {prompt}") + logging.info(f"Generated response: {response}") + + # In a real implementation, you would send the response back + # client.send_response({"type": "inference_response", "result": response}) + else: + logging.info("Sending PONG response") + # client.send_response("PONG") + + +def handle_validator_message(message: Dict[str, Any]) -> None: + """Handle messages from validator""" + logging.info(f"Received message from validator: {message}") + + if message.get("type") == "inference_request": + prompt = message.get("prompt", "") + # Simulate processing the inference request + response = f"Validated: {prompt}" + + logging.info(f"Processing validation request: {prompt}") + logging.info(f"Generated response: {response}") + + # In a real implementation, you would send the response back + # client.send_response({"type": "inference_response", "result": response}) + + +def check_for_messages(client: WorkerClient) -> None: + """Check for new messages from pool owner and validator""" + try: + # Check for pool owner messages + pool_owner_message = client.get_pool_owner_message() + if pool_owner_message: + handle_pool_owner_message(pool_owner_message) + + # Check for validator messages + validator_message = client.get_validator_message() + if validator_message: + handle_validator_message(validator_message) + + except Exception as e: + logging.error(f"Error checking for messages: {e}") def main(): @@ -17,8 +74,41 @@ def main(): private_key_node = os.getenv("PRIVATE_KEY_NODE", None) logging.info(f"Connecting to: {rpc_url}") - client = PrimeProtocolClient(pool_id, rpc_url, private_key_provider, private_key_node) - client.start() + client = WorkerClient(pool_id, rpc_url, private_key_provider, private_key_node) + + def signal_handler(sig, frame): + logging.info("Received interrupt signal, shutting down gracefully...") + try: + client.stop() + logging.info("Client stopped successfully") + except Exception as e: + logging.error(f"Error during shutdown: {e}") + sys.exit(0) + + # Register signal handler for Ctrl+C + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + client.start() + logging.info("Setup completed. Starting message polling loop...") + print("Worker client started. Polling for messages. Press Ctrl+C to stop.") + + # Message polling loop + while True: + check_for_messages(client) + time.sleep(0.1) # Small delay to prevent busy waiting + + except KeyboardInterrupt: + logging.info("Keyboard interrupt received") + signal_handler(signal.SIGINT, None) + except Exception as e: + logging.error(f"Unexpected error: {e}") + try: + client.stop() + except: + pass + sys.exit(1) if __name__ == "__main__": main() \ No newline at end of file diff --git a/crates/prime-protocol-py/src/lib.rs b/crates/prime-protocol-py/src/lib.rs index faa72b0c..b332a9e0 100644 --- a/crates/prime-protocol-py/src/lib.rs +++ b/crates/prime-protocol-py/src/lib.rs @@ -1,20 +1,21 @@ use pyo3::prelude::*; -mod client; mod error; +mod message_queue; +mod utils; +mod worker; -use client::PrimeProtocolClientCore; +use worker::WorkerClientCore; -// todo: We need a manager + validator side to send messages - -/// Prime Protocol Python client +/// Prime Protocol Worker Client - for compute nodes that execute tasks #[pyclass] -pub struct PrimeProtocolClient { - inner: PrimeProtocolClientCore, +pub struct WorkerClient { + inner: WorkerClientCore, + runtime: Option, } #[pymethods] -impl PrimeProtocolClient { +impl WorkerClient { #[new] #[pyo3(signature = (compute_pool_id, rpc_url, private_key_provider=None, private_key_node=None))] pub fn new( @@ -23,8 +24,7 @@ impl PrimeProtocolClient { private_key_provider: Option, private_key_node: Option, ) -> PyResult { - // todo: revisit default arguments here that are currently none - let inner = PrimeProtocolClientCore::new( + let inner = WorkerClientCore::new( compute_pool_id, rpc_url, private_key_provider, @@ -34,29 +34,105 @@ impl PrimeProtocolClient { ) .map_err(|e| PyErr::new::(e.to_string()))?; - Ok(Self { inner }) + Ok(Self { + inner, + runtime: None, + }) } - pub fn start(&self) -> PyResult<()> { + pub fn start(&mut self) -> PyResult<()> { // Create a new runtime for this call - let rt = tokio::runtime::Builder::new_current_thread() + let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .build() .map_err(|e| PyErr::new::(e.to_string()))?; // Run the async function let result = rt.block_on(self.inner.start_async()); + println!("system start completed"); - // Clean shutdown - rt.shutdown_background(); + // Store the runtime for future use + self.runtime = Some(rt); result.map_err(|e| PyErr::new::(e.to_string())) } + + pub fn get_pool_owner_message(&self) -> PyResult> { + if let Some(rt) = self.runtime.as_ref() { + Ok(rt.block_on(self.inner.get_message_queue().get_pool_owner_message())) + } else { + Err(PyErr::new::( + "Client not started. Call start() first.".to_string(), + )) + } + } + + pub fn get_validator_message(&self) -> PyResult> { + if let Some(rt) = self.runtime.as_ref() { + Ok(rt.block_on(self.inner.get_message_queue().get_validator_message())) + } else { + Err(PyErr::new::( + "Client not started. Call start() first.".to_string(), + )) + } + } + + pub fn stop(&mut self) -> PyResult<()> { + if let Some(rt) = self.runtime.as_ref() { + rt.block_on(self.inner.stop_async()) + .map_err(|e| PyErr::new::(e.to_string()))?; + } + + // Clean up the runtime + if let Some(rt) = self.runtime.take() { + rt.shutdown_background(); + } + + Ok(()) + } +} + +/// Prime Protocol Orchestrator Client - for managing and distributing tasks +#[pyclass] +pub struct OrchestratorClient { + // TODO: Implement orchestrator-specific functionality +} + +#[pymethods] +impl OrchestratorClient { + #[new] + #[pyo3(signature = (rpc_url, private_key=None))] + pub fn new(rpc_url: String, private_key: Option) -> PyResult { + // TODO: Implement orchestrator initialization + let _ = rpc_url; + let _ = private_key; + Ok(Self {}) + } +} + +/// Prime Protocol Validator Client - for validating task results +#[pyclass] +pub struct ValidatorClient { + // TODO: Implement validator-specific functionality +} + +#[pymethods] +impl ValidatorClient { + #[new] + #[pyo3(signature = (rpc_url, private_key=None))] + pub fn new(rpc_url: String, private_key: Option) -> PyResult { + // TODO: Implement validator initialization + let _ = rpc_url; + let _ = private_key; + Ok(Self {}) + } } #[pymodule] fn primeprotocol(m: &Bound<'_, PyModule>) -> PyResult<()> { pyo3_log::init(); - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/crates/prime-protocol-py/src/message_queue.rs b/crates/prime-protocol-py/src/message_queue.rs new file mode 100644 index 00000000..9af9a687 --- /dev/null +++ b/crates/prime-protocol-py/src/message_queue.rs @@ -0,0 +1,160 @@ +use pyo3::prelude::*; +use serde::{Deserialize, Serialize}; +use std::collections::VecDeque; +use std::sync::Arc; +use tokio::sync::mpsc; +use tokio::sync::Mutex; +use tokio::time::{interval, Duration}; + +use crate::utils::json_parser::json_to_pyobject; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + pub message_type: MessageType, + pub content: serde_json::Value, + pub timestamp: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum MessageType { + PoolOwner, + Validator, +} + +#[derive(Clone)] +pub struct MessageQueue { + pool_owner_queue: Arc>>, + validator_queue: Arc>>, + shutdown_tx: Arc>>>, +} + +impl MessageQueue { + pub fn new() -> Self { + Self { + pool_owner_queue: Arc::new(Mutex::new(VecDeque::new())), + validator_queue: Arc::new(Mutex::new(VecDeque::new())), + shutdown_tx: Arc::new(Mutex::new(None)), + } + } + + /// Start the background message listener + pub(crate) async fn start_listener(&self) -> Result<(), String> { + let (shutdown_tx, mut shutdown_rx) = mpsc::channel::<()>(1); + + // Store the shutdown sender + { + let mut tx_guard = self.shutdown_tx.lock().await; + *tx_guard = Some(shutdown_tx); + } + + let pool_owner_queue = self.pool_owner_queue.clone(); + let validator_queue = self.validator_queue.clone(); + + // Spawn background task to simulate incoming p2p messages + tokio::spawn(async move { + let mut ticker = interval(Duration::from_secs(5)); + let mut counter = 0u64; + + loop { + tokio::select! { + _ = ticker.tick() => { + // Mock pool owner messages + if counter % 2 == 0 { + let message = Message { + message_type: MessageType::PoolOwner, + content: serde_json::json!({ + "type": "inference_request", + "task_id": format!("task_{}", counter), + "prompt": format!("Test prompt {}", counter), + }), + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + }; + + let mut queue = pool_owner_queue.lock().await; + queue.push_back(message); + log::debug!("Added mock pool owner message to queue"); + } + + // Mock validator messages + if counter % 3 == 0 { + let message = Message { + message_type: MessageType::Validator, + content: serde_json::json!({ + "type": "validation_request", + "task_id": format!("validation_{}", counter), + }), + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + }; + + let mut queue = validator_queue.lock().await; + queue.push_back(message); + log::debug!("Added mock validator message to queue"); + } + + counter += 1; + } + _ = shutdown_rx.recv() => { + log::info!("Message listener shutting down"); + break; + } + } + } + }); + + Ok(()) + } + + /// Stop the background listener + #[allow(unused)] + pub(crate) async fn stop_listener(&self) -> Result<(), String> { + if let Some(tx) = self.shutdown_tx.lock().await.take() { + let _ = tx.send(()).await; + } + Ok(()) + } + /// Get the next message from the pool owner queue + pub(crate) async fn get_pool_owner_message(&self) -> Option { + let mut queue = self.pool_owner_queue.lock().await; + queue + .pop_front() + .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + } + + /// Get the next message from the validator queue + pub(crate) async fn get_validator_message(&self) -> Option { + let mut queue = self.validator_queue.lock().await; + queue + .pop_front() + .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + } + + /// Push a message to the appropriate queue (for testing or internal use) + #[allow(unused)] + pub(crate) async fn push_message(&self, message: Message) -> Result<(), String> { + match message.message_type { + MessageType::PoolOwner => { + let mut queue = self.pool_owner_queue.lock().await; + queue.push_back(message); + } + MessageType::Validator => { + let mut queue = self.validator_queue.lock().await; + queue.push_back(message); + } + } + Ok(()) + } + + /// Get queue sizes for monitoring + #[allow(unused)] + pub(crate) async fn get_queue_sizes(&self) -> (usize, usize) { + let pool_owner_size = self.pool_owner_queue.lock().await.len(); + let validator_size = self.validator_queue.lock().await.len(); + (pool_owner_size, validator_size) + } +} diff --git a/crates/prime-protocol-py/src/utils/json_parser.rs b/crates/prime-protocol-py/src/utils/json_parser.rs new file mode 100644 index 00000000..b5ed4aa2 --- /dev/null +++ b/crates/prime-protocol-py/src/utils/json_parser.rs @@ -0,0 +1,8 @@ +use pyo3::prelude::*; +use pythonize::pythonize; + +/// Convert a serde_json::Value to a Python object +pub fn json_to_pyobject(py: Python, value: &serde_json::Value) -> PyObject { + // pythonize handles all the conversion automatically! + pythonize(py, value).unwrap().into() +} diff --git a/crates/prime-protocol-py/src/utils/mod.rs b/crates/prime-protocol-py/src/utils/mod.rs new file mode 100644 index 00000000..3e9394ce --- /dev/null +++ b/crates/prime-protocol-py/src/utils/mod.rs @@ -0,0 +1 @@ +pub mod json_parser; diff --git a/crates/prime-protocol-py/src/client.rs b/crates/prime-protocol-py/src/worker.rs similarity index 50% rename from crates/prime-protocol-py/src/client.rs rename to crates/prime-protocol-py/src/worker.rs index b4139b7b..d7459ba4 100644 --- a/crates/prime-protocol-py/src/client.rs +++ b/crates/prime-protocol-py/src/worker.rs @@ -1,23 +1,26 @@ use crate::error::{PrimeProtocolError, Result}; +use crate::message_queue::MessageQueue; use alloy::primitives::utils::format_ether; -use alloy::primitives::U256; +use alloy::primitives::{Address, U256}; use prime_core::operations::compute_node::ComputeNodeOperations; use prime_core::operations::provider::ProviderOperations; use shared::web3::contracts::core::builder::{ContractBuilder, Contracts}; use shared::web3::contracts::structs::compute_pool::PoolStatus; use shared::web3::wallet::{Wallet, WalletProvider}; +use std::sync::Arc; use url::Url; -pub struct PrimeProtocolClientCore { +pub struct WorkerClientCore { rpc_url: String, compute_pool_id: u64, private_key_provider: Option, private_key_node: Option, auto_accept_transactions: bool, funding_retry_count: u32, + message_queue: Arc, } -impl PrimeProtocolClientCore { +impl WorkerClientCore { pub fn new( compute_pool_id: u64, rpc_url: String, @@ -42,6 +45,7 @@ impl PrimeProtocolClientCore { private_key_node, auto_accept_transactions: auto_accept_transactions.unwrap_or(true), funding_retry_count: funding_retry_count.unwrap_or(10), + message_queue: Arc::new(MessageQueue::new()), }) } @@ -50,15 +54,23 @@ impl PrimeProtocolClientCore { self.initialize_blockchain_components().await?; let pool_info = self.wait_for_active_pool(&contracts).await?; - log::info!("Pool info: {:?}", pool_info); - + log::debug!("Pool info: {:?}", pool_info); + log::debug!("Checking provider"); self.ensure_provider_registered(&provider_wallet, &contracts) .await?; + log::debug!("Checking compute node"); self.ensure_compute_node_registered(&provider_wallet, &node_wallet, &contracts) .await?; - // TODO: Optional - run hardware check? - // TODO: p2p reachable? + log::debug!("blockchain components initialized"); + log::debug!("starting queues"); + + // Start the message queue listener + self.message_queue.start_listener().await.map_err(|e| { + PrimeProtocolError::InvalidConfig(format!("Failed to start message listener: {}", e)) + })?; + + log::debug!("Message queue listener started"); Ok(()) } @@ -114,6 +126,7 @@ impl PrimeProtocolClientCore { } } } + async fn ensure_provider_registered( &self, provider_wallet: &Wallet, @@ -125,22 +138,33 @@ impl PrimeProtocolClientCore { self.auto_accept_transactions, ); - // Check if provider exists - let provider_exists = provider_ops.check_provider_exists().await.map_err(|e| { + let provider_exists = self.check_provider_exists(&provider_ops).await?; + let is_whitelisted = self.check_provider_whitelisted(&provider_ops).await?; + + if provider_exists && is_whitelisted { + log::info!("Provider is registered and whitelisted"); + } else { + self.register_provider_if_needed(&provider_ops, contracts) + .await?; + } + + self.ensure_adequate_stake(&provider_ops, provider_wallet, contracts) + .await?; + + Ok(()) + } + + async fn check_provider_exists(&self, provider_ops: &ProviderOperations) -> Result { + provider_ops.check_provider_exists().await.map_err(|e| { PrimeProtocolError::BlockchainError(format!( "Failed to check if provider exists: {}", e )) - })?; - - let Some(stake_manager) = contracts.stake_manager.as_ref() else { - return Err(PrimeProtocolError::BlockchainError( - "Stake manager not initialized".to_string(), - )); - }; + }) + } - // Check if provider is whitelisted - let is_whitelisted = provider_ops + async fn check_provider_whitelisted(&self, provider_ops: &ProviderOperations) -> Result { + provider_ops .check_provider_whitelisted() .await .map_err(|e| { @@ -148,59 +172,58 @@ impl PrimeProtocolClientCore { "Failed to check provider whitelist status: {}", e )) - })?; - - // todo: revisit this - if provider_exists && is_whitelisted { - log::info!("Provider is registered and whitelisted"); - } else { - // For now, we'll use a default compute_units value - this should be configurable - let compute_units = U256::from(1); - - let required_stake = stake_manager - .calculate_stake(compute_units, U256::from(0)) - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!( - "Failed to calculate required stake: {}", - e - )) - })?; - - log::info!("Required stake: {}", format_ether(required_stake)); - - provider_ops - .retry_register_provider(required_stake, self.funding_retry_count, None) - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!( - "Failed to register provider: {}", - e - )) - })?; + }) + } - log::info!("Provider registered successfully"); - } + async fn register_provider_if_needed( + &self, + provider_ops: &ProviderOperations, + contracts: &Contracts, + ) -> Result<()> { + let stake_manager = contracts.stake_manager.as_ref().ok_or_else(|| { + PrimeProtocolError::BlockchainError("Stake manager not initialized".to_string()) + })?; + let compute_units = U256::from(1); // TODO: Make configurable - // Get provider's current total compute and stake - let provider_total_compute = contracts - .compute_registry - .get_provider_total_compute(provider_wallet.wallet.default_signer().address()) + let required_stake = stake_manager + .calculate_stake(compute_units, U256::from(0)) .await .map_err(|e| { PrimeProtocolError::BlockchainError(format!( - "Failed to get provider total compute: {}", + "Failed to calculate required stake: {}", e )) })?; - let provider_stake = stake_manager - .get_stake(provider_wallet.wallet.default_signer().address()) + log::info!("Required stake: {}", format_ether(required_stake)); + + provider_ops + .retry_register_provider(required_stake, self.funding_retry_count, None) .await - .unwrap_or_default(); + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to register provider: {}", e)) + })?; + + log::info!("Provider registered successfully"); + Ok(()) + } + + async fn ensure_adequate_stake( + &self, + provider_ops: &ProviderOperations, + provider_wallet: &Wallet, + contracts: &Contracts, + ) -> Result<()> { + let stake_manager = contracts.stake_manager.as_ref().ok_or_else(|| { + PrimeProtocolError::BlockchainError("Stake manager not initialized".to_string()) + })?; + let provider_address = provider_wallet.wallet.default_signer().address(); - // For now, we'll use a default compute_units value - this should be configurable - let compute_units = U256::from(1); + let provider_total_compute = self + .get_provider_total_compute(contracts, provider_address) + .await?; + let provider_stake = self.get_provider_stake(contracts, provider_address).await; + let compute_units = U256::from(1); // TODO: Make configurable let required_stake = stake_manager .calculate_stake(compute_units, provider_total_compute) @@ -213,22 +236,65 @@ impl PrimeProtocolClientCore { })?; if required_stake > provider_stake { - log::info!( - "Provider stake is less than required stake. Required: {} tokens, Current: {} tokens", - format_ether(required_stake), - format_ether(provider_stake) - ); - - provider_ops - .increase_stake(required_stake - provider_stake) - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!("Failed to increase stake: {}", e)) - })?; + self.increase_provider_stake(provider_ops, required_stake, provider_stake) + .await?; + } + + Ok(()) + } + + async fn get_provider_total_compute( + &self, + contracts: &Contracts, + provider_address: Address, + ) -> Result { + contracts + .compute_registry + .get_provider_total_compute(provider_address) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to get provider total compute: {}", + e + )) + }) + } - log::info!("Successfully increased stake"); + async fn get_provider_stake( + &self, + contracts: &Contracts, + provider_address: Address, + ) -> U256 { + let stake_manager = contracts.stake_manager.as_ref(); + match stake_manager { + Some(manager) => manager + .get_stake(provider_address) + .await + .unwrap_or_default(), + None => U256::from(0), } + } + async fn increase_provider_stake( + &self, + provider_ops: &ProviderOperations, + required_stake: U256, + current_stake: U256, + ) -> Result<()> { + log::info!( + "Provider stake is less than required stake. Required: {} tokens, Current: {} tokens", + format_ether(required_stake), + format_ether(current_stake) + ); + + provider_ops + .increase_stake(required_stake - current_stake) + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to increase stake: {}", e)) + })?; + + log::info!("Successfully increased stake"); Ok(()) } @@ -241,27 +307,40 @@ impl PrimeProtocolClientCore { let compute_node_ops = ComputeNodeOperations::new(provider_wallet, node_wallet, contracts.clone()); - // Check if compute node exists - let compute_node_exists = - compute_node_ops - .check_compute_node_exists() - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!( - "Failed to check if compute node exists: {}", - e - )) - })?; + let compute_node_exists = self.check_compute_node_exists(&compute_node_ops).await?; if compute_node_exists { log::info!("Compute node is already registered"); return Ok(()); } - // If compute node doesn't exist, register it - // For now, we'll use default compute specs - this should be configurable + self.register_compute_node(&compute_node_ops).await?; + Ok(()) + } + + async fn check_compute_node_exists( + &self, + compute_node_ops: &ComputeNodeOperations<'_>, + ) -> Result { compute_node_ops - .add_compute_node(U256::from(1)) + .check_compute_node_exists() + .await + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!( + "Failed to check if compute node exists: {}", + e + )) + }) + } + + async fn register_compute_node( + &self, + compute_node_ops: &ComputeNodeOperations<'_>, + ) -> Result<()> { + let compute_units = U256::from(1); // TODO: Make configurable + + compute_node_ops + .add_compute_node(compute_units) .await .map_err(|e| { PrimeProtocolError::BlockchainError(format!( @@ -291,4 +370,42 @@ impl PrimeProtocolClientCore { }), } } + + /// Get the shared message queue instance + pub fn get_message_queue(&self) -> Arc { + self.message_queue.clone() + } + + /// Stop the message queue listener + pub async fn stop_async(&self) -> Result<()> { + self.message_queue.stop_listener().await.map_err(|e| { + PrimeProtocolError::InvalidConfig(format!("Failed to stop message listener: {}", e)) + })?; + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use test_log::test; + + #[test(tokio::test)] + async fn test_start_async() { + // standard anvil blockchain keys for local testing + let node_key = "0x7c852118294e51e653712a81e05800f419141751be58f605c371e15141b007a6"; + let provider_key = "0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a"; + + // todo: currently still have to make up the local blockchain incl. smart contract deployments + let worker = WorkerClientCore::new( + 0, + "http://localhost:8545".to_string(), + Some(provider_key.to_string()), + Some(node_key.to_string()), + None, + None, + ) + .unwrap(); + worker.start_async().await.unwrap(); + } } From ecb5b66f527c387c9cc1bbfc8a6f0e845cccde5b Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:09:23 -0400 Subject: [PATCH 34/40] fix tests --- crates/orchestrator/src/api/tests/helper.rs | 12 ++++++------ .../validator/src/validators/hardware_challenge.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index 92b26cce..f4204262 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -46,8 +46,8 @@ pub(crate) async fn create_test_app_state() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); - let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(1); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(1); Data::new(AppState { store_context: store_context.clone(), @@ -112,8 +112,8 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); - let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(1); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(1); Data::new(AppState { store_context: store_context.clone(), @@ -175,8 +175,8 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new("0".to_string())); - let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); - let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(1); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(1); Data::new(AppState { store_context: store_context.clone(), diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 5580096e..6d8df988 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -65,13 +65,13 @@ impl HardwareChallenge { .await .context("failed to send hardware challenge request to p2p service")?; - println!("hardware challenge sent to node {}", node.id); + info!("hardware challenge sent to node {}", node.id); let resp = response_rx .await .context("failed to receive response from node")?; - println!("response received from node {}: {:?}", node.id, resp); + info!("response received from node {}: {:?}", node.id, resp); if challenge_expected.result == resp.result { info!("Challenge for node {} successful", node.id); From 4798692eb16acfc738f7433c7e67102d6eb1fc89 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:32:19 -0400 Subject: [PATCH 35/40] remove explicit dialing, messaging now working --- crates/p2p/src/behaviour.rs | 3 +- crates/p2p/src/lib.rs | 62 +++---------------- crates/shared/src/p2p/service.rs | 40 +++--------- .../src/validators/hardware_challenge.rs | 4 -- crates/worker/src/p2p/mod.rs | 2 +- 5 files changed, 18 insertions(+), 93 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 9d92be15..399693b5 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -11,8 +11,8 @@ use libp2p::mdns; use libp2p::ping; use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; +use log::debug; use std::time::Duration; -use tracing::debug; use crate::message::IncomingMessage; use crate::message::{Request, Response}; @@ -155,6 +155,7 @@ impl BehaviourEvent { BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { debug!("received message from peer {peer:?}: {message:?}"); + // if this errors, user dropped their incoming message channel let _ = message_tx.send(IncomingMessage { peer, message }).await; } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 896698c8..f5bc648c 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -7,9 +7,8 @@ use libp2p::yamux; use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; +use log::debug; use std::time::Duration; -use tracing::debug; -use tracing::info; mod behaviour; mod message; @@ -25,8 +24,6 @@ pub type ResponseChannel = libp2p::request_response::ResponseChannel; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; pub type Keypair = libp2p::identity::Keypair; -pub type DialSender = - tokio::sync::mpsc::Sender<(Vec, tokio::sync::oneshot::Sender>)>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -40,9 +37,6 @@ pub struct Node { bootnodes: Vec, cancellation_token: tokio_util::sync::CancellationToken, - dial_rx: - tokio::sync::mpsc::Receiver<(Vec, tokio::sync::oneshot::Sender>)>, - // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -80,7 +74,6 @@ impl Node { mut swarm, bootnodes, cancellation_token, - mut dial_rx, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -107,30 +100,16 @@ impl Node { debug!("cancellation token triggered, shutting down node"); break Ok(()); } - Some((addrs, res_tx)) = dial_rx.recv() => { - log::info!("dialing addresses: {addrs:?}"); - let mut res = Ok(()); - for addr in &addrs { - match swarm.dial(addr.clone()) { - Ok(_) => {} - Err(e) => { - res = Err(anyhow::anyhow!("failed to dial {addr}: {e:?}")); - break; - } - } - } - log::info!("finished dialing addresses: {addrs:?}"); - let _ = res_tx.send(res); - } Some(message) = outgoing_message_rx.recv() => { match message { - OutgoingMessage::Request((peer, _addrs, request)) => { + OutgoingMessage::Request((peer, addrs, request)) => { // TODO: if we're not connected to the peer, we should dial it - log::info!("sending request to peer {peer}: {request:?}"); + for addr in addrs { + swarm.add_peer_address(peer, addr); + } swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { - log::info!("sending response: {response:?}"); if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { debug!("failed to send response: {e:?}"); } @@ -152,7 +131,7 @@ impl Node { peer_id, .. } => { - info!("connection established with peer {peer_id}"); + debug!("connection established with peer {peer_id}"); } SwarmEvent::ConnectionClosed { peer_id, @@ -282,7 +261,6 @@ impl NodeBuilder { self, ) -> Result<( Node, - DialSender, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, )> { @@ -324,7 +302,6 @@ impl NodeBuilder { listen_addrs.push(listen_addr); } - let (dial_tx, dial_rx) = tokio::sync::mpsc::channel(100); let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); @@ -334,12 +311,10 @@ impl NodeBuilder { swarm, listen_addrs, bootnodes, - dial_rx, incoming_message_tx, outgoing_message_rx, cancellation_token: cancellation_token.unwrap_or_default(), }, - dial_tx, incoming_message_rx, outgoing_message_tx, )) @@ -364,34 +339,13 @@ mod test { use super::NodeBuilder; use crate::message; - #[tokio::test] - async fn can_dial() { - let (node1, _, _, _) = NodeBuilder::new().with_port(4002).try_build().unwrap(); - let node1_peer_id = node1.peer_id(); - let local_p2p_address: crate::Multiaddr = - format!("/ip4/127.0.0.1/tcp/4002/p2p/{}", node1_peer_id) - .parse() - .expect("can parse valid multiaddr"); - let (node2, dial_tx2, _, _) = NodeBuilder::new().try_build().unwrap(); - tokio::spawn(async move { node1.run().await }); - tokio::spawn(async move { node2.run().await }); - - let (res_tx, res_rx) = tokio::sync::oneshot::channel(); - dial_tx2 - .send((vec![local_p2p_address], res_tx)) - .await - .expect("can send dial request"); - let res = res_rx.await.expect("can receive dial response"); - assert!(res.is_ok(), "dialing node1 should succeed: {res:?}"); - } - #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, mut incoming_message_rx1, outgoing_message_tx1) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, _, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index f0d504ca..9223bc3d 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -26,7 +26,6 @@ pub struct OutgoingRequest { /// requests to the worker. pub struct Service { node: Node, - dial_tx: p2p::DialSender, incoming_messages_rx: Receiver, outgoing_messages_rx: Receiver, cancellation_token: CancellationToken, @@ -41,7 +40,7 @@ impl Service { wallet: Wallet, protocols: Protocols, ) -> Result<(Self, Sender)> { - let (node, dial_tx, incoming_messages_rx, outgoing_messages) = + let (node, incoming_messages_rx, outgoing_messages) = build_p2p_node(keypair, port, cancellation_token.clone(), protocols.clone()) .context("failed to build p2p node")?; let (outgoing_messages_tx, outgoing_messages_rx) = tokio::sync::mpsc::channel(100); @@ -49,7 +48,6 @@ impl Service { Ok(( Self { node, - dial_tx, incoming_messages_rx, outgoing_messages_rx, cancellation_token, @@ -64,7 +62,6 @@ impl Service { let Self { node, - dial_tx, mut incoming_messages_rx, mut outgoing_messages_rx, cancellation_token, @@ -81,7 +78,7 @@ impl Service { break; } Some(message) = outgoing_messages_rx.recv() => { - let handle = tokio::task::spawn(handle_outgoing_message(message, dial_tx.clone(), context.clone())); + let handle = tokio::task::spawn(handle_outgoing_message(message, context.clone())); outgoing_message_handlers.push(handle); } Some(message) = incoming_messages_rx.recv() => { @@ -111,12 +108,7 @@ fn build_p2p_node( port: u16, cancellation_token: CancellationToken, protocols: Protocols, -) -> Result<( - Node, - p2p::DialSender, - Receiver, - Sender, -)> { +) -> Result<(Node, Receiver, Sender)> { NodeBuilder::new() .with_keypair(keypair) .with_port(port) @@ -171,11 +163,7 @@ impl Context { } } -async fn handle_outgoing_message( - message: OutgoingRequest, - dial_tx: p2p::DialSender, - context: Context, -) -> Result<()> { +async fn handle_outgoing_message(message: OutgoingRequest, context: Context) -> Result<()> { use rand_v8::rngs::OsRng; use rand_v8::Rng as _; use std::str::FromStr as _; @@ -205,9 +193,6 @@ async fn handle_outgoing_message( return Ok(()); } - log::info!("sending validation authentication request to {peer_id}"); - - // first, dial the worker // ensure there's no ongoing challenge // use write-lock to make this atomic until we finish sending the auth request and writing to the map let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; @@ -217,25 +202,14 @@ async fn handle_outgoing_message( let multiaddrs = multiaddrs .iter() - .filter_map(|addr| p2p::Multiaddr::from_str(addr).ok()?.with_p2p(peer_id).ok()) + .filter_map( + |addr| p2p::Multiaddr::from_str(addr).ok(), /* ?.with_p2p(peer_id).ok()*/ + ) .collect::>(); if multiaddrs.is_empty() { bail!("no valid multiaddrs for peer id {peer_id}"); } - // TODO: we can improve this by checking if we're already connected to the peer before dialing - let (res_tx, res_rx) = tokio::sync::oneshot::channel(); - dial_tx - .send((multiaddrs.clone(), res_tx)) - .await - .context("failed to send dial request")?; - log::info!("dialing worker {peer_id} with multiaddrs: {multiaddrs:?}"); - res_rx - .await - .context("failed to receive dial response")? - .context("failed to dial worker")?; - log::info!("dialed worker {peer_id} with multiaddrs: {multiaddrs:?}"); - // create the authentication challenge request message let challenge_bytes: [u8; 32] = OsRng.gen(); let auth_challenge_message: String = hex::encode(challenge_bytes); diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 6d8df988..0c570ce0 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -65,14 +65,10 @@ impl HardwareChallenge { .await .context("failed to send hardware challenge request to p2p service")?; - info!("hardware challenge sent to node {}", node.id); - let resp = response_rx .await .context("failed to receive response from node")?; - info!("response received from node {}: {:?}", node.id, resp); - if challenge_expected.result == resp.result { info!("Challenge for node {} successful", node.id); } else { diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index c8464a7a..94fe10a3 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -112,7 +112,7 @@ fn build_p2p_node( port: u16, cancellation_token: CancellationToken, ) -> Result<(Node, Receiver, Sender)> { - let (node, _, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() + let (node, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() .with_keypair(keypair) .with_port(port) .with_authentication() From f87d5d3f894e72ce30d37c24621475333ecb983c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:33:03 -0400 Subject: [PATCH 36/40] remove println --- crates/validator/src/validators/hardware_challenge.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 0c570ce0..6970355d 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -30,11 +30,6 @@ impl HardwareChallenge { .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; - println!( - "Challenging node {} with P2P ID: {} and addresses: {:?}", - node.id, p2p_id, p2p_addresses - ); - // create random challenge matrix let challenge_matrix = self.random_challenge(3, 3, 3, 3); let challenge_expected = p2p::calc_matrix(&challenge_matrix); From d77ef0492dcf882a067f9256b0d078e75f652c77 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:43:12 -0400 Subject: [PATCH 37/40] fix unit tests --- crates/worker/src/cli/command.rs | 12 +++++- crates/worker/src/docker/taskbridge/bridge.rs | 10 ++--- crates/worker/src/state/system_state.rs | 38 ++++++++++--------- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 539de1ae..1e9e5825 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -220,11 +220,19 @@ pub async fn execute_command( ); std::process::exit(1); } - let state = Arc::new(SystemState::new( + let state = match SystemState::new( state_dir_overwrite.clone(), *disable_state_storing, *compute_pool_id, - )); + ) { + Ok(state) => state, + Err(e) => { + error!("❌ Failed to initialize system state: {e}"); + std::process::exit(1); + } + }; + + let state = Arc::new(state); let private_key_provider = if let Some(key) = private_key_provider { Console::warning("Using private key from command line is not recommended. Consider using PRIVATE_KEY_PROVIDER environment variable instead."); diff --git a/crates/worker/src/docker/taskbridge/bridge.rs b/crates/worker/src/docker/taskbridge/bridge.rs index 80b8aee7..4765ef06 100644 --- a/crates/worker/src/docker/taskbridge/bridge.rs +++ b/crates/worker/src/docker/taskbridge/bridge.rs @@ -473,7 +473,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -506,7 +506,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -541,7 +541,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -590,7 +590,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -639,7 +639,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index bed32693..39955de8 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -1,3 +1,4 @@ +use anyhow::bail; use anyhow::Result; use directories::ProjectDirs; use log::debug; @@ -60,7 +61,7 @@ impl SystemState { state_dir: Option, disable_state_storing: bool, compute_pool_id: u32, - ) -> Self { + ) -> Result { let default_state_dir = get_default_state_dir(); debug!("Default state dir: {default_state_dir:?}"); let state_path = state_dir @@ -84,7 +85,7 @@ impl SystemState { endpoint = loaded_state.endpoint; p2p_keypair = Some(loaded_state.p2p_keypair); } else { - debug!("Failed to load state from {state_file:?}"); + bail!("failed to load state from {state_file:?}"); } } } @@ -93,7 +94,7 @@ impl SystemState { p2p_keypair = Some(p2p::Keypair::generate_ed25519()); } - Self { + Ok(Self { last_heartbeat: Arc::new(RwLock::new(None)), is_running: Arc::new(RwLock::new(false)), endpoint: Arc::new(RwLock::new(endpoint)), @@ -101,7 +102,7 @@ impl SystemState { disable_state_storing, compute_pool_id, p2p_keypair: p2p_keypair.expect("p2p keypair must be Some at this point"), - } + }) } fn save_state(&self, heartbeat_endpoint: Option) -> Result<()> { @@ -141,8 +142,7 @@ impl SystemState { match serde_json::from_str(&contents) { Ok(state) => return Ok(Some(state)), Err(e) => { - debug!("Error parsing state file: {e}"); - return Ok(None); + bail!("failed to parse state file: {e}"); } } } @@ -232,7 +232,8 @@ mod tests { Some(temp_dir.path().to_string_lossy().to_string()), false, 0, - ); + ) + .unwrap(); let _ = state .set_running(true, Some("http://localhost:8080/heartbeat".to_string())) .await; @@ -255,30 +256,33 @@ mod tests { let state_file = temp_dir.path().join(STATE_FILENAME); fs::write(&state_file, "invalid_toml_content").expect("Failed to write to state file"); - let state = SystemState::new( + assert!(SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, 0, - ); - assert!(!(state.is_running().await)); - assert_eq!(state.get_heartbeat_endpoint().await, None); + ) + .is_err()); } #[tokio::test] async fn test_load_state() { + let keypair = p2p::Keypair::generate_ed25519(); + let state = PersistedSystemState { + endpoint: Some("http://localhost:8080/heartbeat".to_string()), + p2p_keypair: keypair, + }; + let serialized = serde_json::to_string_pretty(&state).unwrap(); + let temp_dir = setup_test_dir(); let state_file = temp_dir.path().join(STATE_FILENAME); - fs::write( - &state_file, - r#"{"endpoint": "http://localhost:8080/heartbeat"}"#, - ) - .expect("Failed to write to state file"); + fs::write(&state_file, serialized).unwrap(); let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, 0, - ); + ) + .unwrap(); assert_eq!( state.get_heartbeat_endpoint().await, Some("http://localhost:8080/heartbeat".to_string()) From 25a0d6d16a2aa17fb5a4b703f22d51f35d070801 Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Fri, 11 Jul 2025 18:26:13 +0200 Subject: [PATCH 38/40] restructure python sdk lib to have pyo bindings in sep. modules --- crates/prime-protocol-py/src/lib.rs | 129 +----------------- .../prime-protocol-py/src/orchestrator/mod.rs | 19 +++ crates/prime-protocol-py/src/validator/mod.rs | 19 +++ .../src/{worker.rs => worker/client.rs} | 2 +- .../src/{ => worker}/message_queue.rs | 0 crates/prime-protocol-py/src/worker/mod.rs | 88 ++++++++++++ .../src/checks/hardware/storage.rs:236:1 | 0 7 files changed, 132 insertions(+), 125 deletions(-) create mode 100644 crates/prime-protocol-py/src/orchestrator/mod.rs create mode 100644 crates/prime-protocol-py/src/validator/mod.rs rename crates/prime-protocol-py/src/{worker.rs => worker/client.rs} (99%) rename crates/prime-protocol-py/src/{ => worker}/message_queue.rs (100%) create mode 100644 crates/prime-protocol-py/src/worker/mod.rs create mode 100644 crates/worker/src/checks/hardware/storage.rs:236:1 diff --git a/crates/prime-protocol-py/src/lib.rs b/crates/prime-protocol-py/src/lib.rs index b332a9e0..0715c33a 100644 --- a/crates/prime-protocol-py/src/lib.rs +++ b/crates/prime-protocol-py/src/lib.rs @@ -1,133 +1,14 @@ +use crate::orchestrator::OrchestratorClient; +use crate::validator::ValidatorClient; +use crate::worker::WorkerClient; use pyo3::prelude::*; mod error; -mod message_queue; +mod orchestrator; mod utils; +mod validator; mod worker; -use worker::WorkerClientCore; - -/// Prime Protocol Worker Client - for compute nodes that execute tasks -#[pyclass] -pub struct WorkerClient { - inner: WorkerClientCore, - runtime: Option, -} - -#[pymethods] -impl WorkerClient { - #[new] - #[pyo3(signature = (compute_pool_id, rpc_url, private_key_provider=None, private_key_node=None))] - pub fn new( - compute_pool_id: u64, - rpc_url: String, - private_key_provider: Option, - private_key_node: Option, - ) -> PyResult { - let inner = WorkerClientCore::new( - compute_pool_id, - rpc_url, - private_key_provider, - private_key_node, - None, - None, - ) - .map_err(|e| PyErr::new::(e.to_string()))?; - - Ok(Self { - inner, - runtime: None, - }) - } - - pub fn start(&mut self) -> PyResult<()> { - // Create a new runtime for this call - let rt = tokio::runtime::Builder::new_multi_thread() - .enable_all() - .build() - .map_err(|e| PyErr::new::(e.to_string()))?; - - // Run the async function - let result = rt.block_on(self.inner.start_async()); - println!("system start completed"); - - // Store the runtime for future use - self.runtime = Some(rt); - - result.map_err(|e| PyErr::new::(e.to_string())) - } - - pub fn get_pool_owner_message(&self) -> PyResult> { - if let Some(rt) = self.runtime.as_ref() { - Ok(rt.block_on(self.inner.get_message_queue().get_pool_owner_message())) - } else { - Err(PyErr::new::( - "Client not started. Call start() first.".to_string(), - )) - } - } - - pub fn get_validator_message(&self) -> PyResult> { - if let Some(rt) = self.runtime.as_ref() { - Ok(rt.block_on(self.inner.get_message_queue().get_validator_message())) - } else { - Err(PyErr::new::( - "Client not started. Call start() first.".to_string(), - )) - } - } - - pub fn stop(&mut self) -> PyResult<()> { - if let Some(rt) = self.runtime.as_ref() { - rt.block_on(self.inner.stop_async()) - .map_err(|e| PyErr::new::(e.to_string()))?; - } - - // Clean up the runtime - if let Some(rt) = self.runtime.take() { - rt.shutdown_background(); - } - - Ok(()) - } -} - -/// Prime Protocol Orchestrator Client - for managing and distributing tasks -#[pyclass] -pub struct OrchestratorClient { - // TODO: Implement orchestrator-specific functionality -} - -#[pymethods] -impl OrchestratorClient { - #[new] - #[pyo3(signature = (rpc_url, private_key=None))] - pub fn new(rpc_url: String, private_key: Option) -> PyResult { - // TODO: Implement orchestrator initialization - let _ = rpc_url; - let _ = private_key; - Ok(Self {}) - } -} - -/// Prime Protocol Validator Client - for validating task results -#[pyclass] -pub struct ValidatorClient { - // TODO: Implement validator-specific functionality -} - -#[pymethods] -impl ValidatorClient { - #[new] - #[pyo3(signature = (rpc_url, private_key=None))] - pub fn new(rpc_url: String, private_key: Option) -> PyResult { - // TODO: Implement validator initialization - let _ = rpc_url; - let _ = private_key; - Ok(Self {}) - } -} - #[pymodule] fn primeprotocol(m: &Bound<'_, PyModule>) -> PyResult<()> { pyo3_log::init(); diff --git a/crates/prime-protocol-py/src/orchestrator/mod.rs b/crates/prime-protocol-py/src/orchestrator/mod.rs new file mode 100644 index 00000000..39f4d915 --- /dev/null +++ b/crates/prime-protocol-py/src/orchestrator/mod.rs @@ -0,0 +1,19 @@ +use pyo3::prelude::*; + +/// Prime Protocol Orchestrator Client - for managing and distributing tasks +#[pyclass] +pub struct OrchestratorClient { + // TODO: Implement orchestrator-specific functionality +} + +#[pymethods] +impl OrchestratorClient { + #[new] + #[pyo3(signature = (rpc_url, private_key=None))] + pub fn new(rpc_url: String, private_key: Option) -> PyResult { + // TODO: Implement orchestrator initialization + let _ = rpc_url; + let _ = private_key; + Ok(Self {}) + } +} diff --git a/crates/prime-protocol-py/src/validator/mod.rs b/crates/prime-protocol-py/src/validator/mod.rs new file mode 100644 index 00000000..ed02939c --- /dev/null +++ b/crates/prime-protocol-py/src/validator/mod.rs @@ -0,0 +1,19 @@ +use pyo3::prelude::*; + +/// Prime Protocol Validator Client - for validating task results +#[pyclass] +pub(crate) struct ValidatorClient { + // TODO: Implement validator-specific functionality +} + +#[pymethods] +impl ValidatorClient { + #[new] + #[pyo3(signature = (rpc_url, private_key=None))] + pub fn new(rpc_url: String, private_key: Option) -> PyResult { + // TODO: Implement validator initialization + let _ = rpc_url; + let _ = private_key; + Ok(Self {}) + } +} diff --git a/crates/prime-protocol-py/src/worker.rs b/crates/prime-protocol-py/src/worker/client.rs similarity index 99% rename from crates/prime-protocol-py/src/worker.rs rename to crates/prime-protocol-py/src/worker/client.rs index d7459ba4..e15ed7c6 100644 --- a/crates/prime-protocol-py/src/worker.rs +++ b/crates/prime-protocol-py/src/worker/client.rs @@ -1,5 +1,5 @@ use crate::error::{PrimeProtocolError, Result}; -use crate::message_queue::MessageQueue; +use crate::worker::message_queue::MessageQueue; use alloy::primitives::utils::format_ether; use alloy::primitives::{Address, U256}; use prime_core::operations::compute_node::ComputeNodeOperations; diff --git a/crates/prime-protocol-py/src/message_queue.rs b/crates/prime-protocol-py/src/worker/message_queue.rs similarity index 100% rename from crates/prime-protocol-py/src/message_queue.rs rename to crates/prime-protocol-py/src/worker/message_queue.rs diff --git a/crates/prime-protocol-py/src/worker/mod.rs b/crates/prime-protocol-py/src/worker/mod.rs new file mode 100644 index 00000000..02f7634f --- /dev/null +++ b/crates/prime-protocol-py/src/worker/mod.rs @@ -0,0 +1,88 @@ +use pyo3::prelude::*; +mod client; +pub(crate) mod message_queue; +pub(crate) use client::WorkerClientCore; + +/// Prime Protocol Worker Client - for compute nodes that execute tasks +#[pyclass] +pub(crate) struct WorkerClient { + inner: WorkerClientCore, + runtime: Option, +} + +#[pymethods] +impl WorkerClient { + #[new] + #[pyo3(signature = (compute_pool_id, rpc_url, private_key_provider=None, private_key_node=None))] + pub fn new( + compute_pool_id: u64, + rpc_url: String, + private_key_provider: Option, + private_key_node: Option, + ) -> PyResult { + let inner = WorkerClientCore::new( + compute_pool_id, + rpc_url, + private_key_provider, + private_key_node, + None, + None, + ) + .map_err(|e| PyErr::new::(e.to_string()))?; + + Ok(Self { + inner, + runtime: None, + }) + } + + pub fn start(&mut self) -> PyResult<()> { + // Create a new runtime for this call + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .map_err(|e| PyErr::new::(e.to_string()))?; + + // Run the async function + let result = rt.block_on(self.inner.start_async()); + + // Store the runtime for future use + self.runtime = Some(rt); + + result.map_err(|e| PyErr::new::(e.to_string())) + } + + pub fn get_pool_owner_message(&self) -> PyResult> { + if let Some(rt) = self.runtime.as_ref() { + Ok(rt.block_on(self.inner.get_message_queue().get_pool_owner_message())) + } else { + Err(PyErr::new::( + "Client not started. Call start() first.".to_string(), + )) + } + } + + pub fn get_validator_message(&self) -> PyResult> { + if let Some(rt) = self.runtime.as_ref() { + Ok(rt.block_on(self.inner.get_message_queue().get_validator_message())) + } else { + Err(PyErr::new::( + "Client not started. Call start() first.".to_string(), + )) + } + } + + pub fn stop(&mut self) -> PyResult<()> { + if let Some(rt) = self.runtime.as_ref() { + rt.block_on(self.inner.stop_async()) + .map_err(|e| PyErr::new::(e.to_string()))?; + } + + // Clean up the runtime + if let Some(rt) = self.runtime.take() { + rt.shutdown_background(); + } + + Ok(()) + } +} diff --git a/crates/worker/src/checks/hardware/storage.rs:236:1 b/crates/worker/src/checks/hardware/storage.rs:236:1 new file mode 100644 index 00000000..e69de29b From 0f907f2365cc9607eb93d0cad4ffc69b8c63bdc0 Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Fri, 11 Jul 2025 18:58:36 +0200 Subject: [PATCH 39/40] fix async gil issues, add bootstrap cmd to Makefile --- Makefile | 12 ++++ .../prime-protocol-py/examples/basic_usage.py | 18 +++-- crates/prime-protocol-py/src/worker/client.rs | 70 +++++++++++++------ crates/prime-protocol-py/src/worker/mod.rs | 22 +++--- 4 files changed, 87 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index dfc0d0af..5de39578 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,18 @@ up: @# Attach to session @tmux attach-session -t prime-dev +# Start Docker services and deploy contracts only +.PHONY: bootstrap +bootstrap: + @echo "Starting Docker services and deploying contracts..." + @# Start Docker services + @docker compose up -d reth redis --wait --wait-timeout 180 + @# Deploy contracts + @cd smart-contracts && sh deploy.sh && sh deploy_work_validation.sh && cd .. + @# Run setup + @$(MAKE) setup + @echo "Bootstrap complete - Docker services running and contracts deployed" + # Stop development environment .PHONY: down down: diff --git a/crates/prime-protocol-py/examples/basic_usage.py b/crates/prime-protocol-py/examples/basic_usage.py index 66572db7..02b19bd9 100644 --- a/crates/prime-protocol-py/examples/basic_usage.py +++ b/crates/prime-protocol-py/examples/basic_usage.py @@ -12,7 +12,7 @@ FORMAT = '%(levelname)s %(name)s %(asctime)-15s %(filename)s:%(lineno)d %(message)s' logging.basicConfig(format=FORMAT) -logging.getLogger().setLevel(logging.DEBUG) +logging.getLogger().setLevel(logging.INFO) def handle_pool_owner_message(message: Dict[str, Any]) -> None: @@ -85,22 +85,30 @@ def signal_handler(sig, frame): logging.error(f"Error during shutdown: {e}") sys.exit(0) - # Register signal handler for Ctrl+C + # Register signal handler for Ctrl+C before starting client signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) try: + logging.info("Starting client... (Press Ctrl+C to interrupt)") client.start() logging.info("Setup completed. Starting message polling loop...") print("Worker client started. Polling for messages. Press Ctrl+C to stop.") # Message polling loop while True: - check_for_messages(client) - time.sleep(0.1) # Small delay to prevent busy waiting + try: + check_for_messages(client) + time.sleep(0.1) # Small delay to prevent busy waiting + except KeyboardInterrupt: + # Handle Ctrl+C during message polling + logging.info("Keyboard interrupt received during polling") + signal_handler(signal.SIGINT, None) + break except KeyboardInterrupt: - logging.info("Keyboard interrupt received") + # Handle Ctrl+C during client startup + logging.info("Keyboard interrupt received during startup") signal_handler(signal.SIGINT, None) except Exception as e: logging.error(f"Unexpected error: {e}") diff --git a/crates/prime-protocol-py/src/worker/client.rs b/crates/prime-protocol-py/src/worker/client.rs index e15ed7c6..db30c0b4 100644 --- a/crates/prime-protocol-py/src/worker/client.rs +++ b/crates/prime-protocol-py/src/worker/client.rs @@ -197,12 +197,23 @@ impl WorkerClientCore { log::info!("Required stake: {}", format_ether(required_stake)); - provider_ops - .retry_register_provider(required_stake, self.funding_retry_count, None) - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!("Failed to register provider: {}", e)) - })?; + // Add timeout to prevent hanging on blockchain operations + let register_future = + provider_ops.retry_register_provider(required_stake, self.funding_retry_count, None); + + tokio::time::timeout( + tokio::time::Duration::from_secs(300), // 5 minute timeout + register_future, + ) + .await + .map_err(|_| { + PrimeProtocolError::BlockchainError( + "Provider registration timed out after 5 minutes".to_string(), + ) + })? + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to register provider: {}", e)) + })?; log::info!("Provider registered successfully"); Ok(()) @@ -287,12 +298,22 @@ impl WorkerClientCore { format_ether(current_stake) ); - provider_ops - .increase_stake(required_stake - current_stake) - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!("Failed to increase stake: {}", e)) - })?; + // Add timeout to prevent hanging on stake increase operations + let stake_future = provider_ops.increase_stake(required_stake - current_stake); + + tokio::time::timeout( + tokio::time::Duration::from_secs(300), // 5 minute timeout + stake_future, + ) + .await + .map_err(|_| { + PrimeProtocolError::BlockchainError( + "Stake increase timed out after 5 minutes".to_string(), + ) + })? + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to increase stake: {}", e)) + })?; log::info!("Successfully increased stake"); Ok(()) @@ -339,15 +360,22 @@ impl WorkerClientCore { ) -> Result<()> { let compute_units = U256::from(1); // TODO: Make configurable - compute_node_ops - .add_compute_node(compute_units) - .await - .map_err(|e| { - PrimeProtocolError::BlockchainError(format!( - "Failed to register compute node: {}", - e - )) - })?; + // Add timeout to prevent hanging on compute node registration + let register_future = compute_node_ops.add_compute_node(compute_units); + + tokio::time::timeout( + tokio::time::Duration::from_secs(300), // 5 minute timeout + register_future, + ) + .await + .map_err(|_| { + PrimeProtocolError::BlockchainError( + "Compute node registration timed out after 5 minutes".to_string(), + ) + })? + .map_err(|e| { + PrimeProtocolError::BlockchainError(format!("Failed to register compute node: {}", e)) + })?; log::info!("Compute node registered successfully"); Ok(()) diff --git a/crates/prime-protocol-py/src/worker/mod.rs b/crates/prime-protocol-py/src/worker/mod.rs index 02f7634f..b28e2216 100644 --- a/crates/prime-protocol-py/src/worker/mod.rs +++ b/crates/prime-protocol-py/src/worker/mod.rs @@ -36,15 +36,15 @@ impl WorkerClient { }) } - pub fn start(&mut self) -> PyResult<()> { + pub fn start(&mut self, py: Python) -> PyResult<()> { // Create a new runtime for this call let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .build() .map_err(|e| PyErr::new::(e.to_string()))?; - // Run the async function - let result = rt.block_on(self.inner.start_async()); + // Run the async function with GIL released + let result = py.allow_threads(|| rt.block_on(self.inner.start_async())); // Store the runtime for future use self.runtime = Some(rt); @@ -52,9 +52,11 @@ impl WorkerClient { result.map_err(|e| PyErr::new::(e.to_string())) } - pub fn get_pool_owner_message(&self) -> PyResult> { + pub fn get_pool_owner_message(&self, py: Python) -> PyResult> { if let Some(rt) = self.runtime.as_ref() { - Ok(rt.block_on(self.inner.get_message_queue().get_pool_owner_message())) + Ok(py.allow_threads(|| { + rt.block_on(self.inner.get_message_queue().get_pool_owner_message()) + })) } else { Err(PyErr::new::( "Client not started. Call start() first.".to_string(), @@ -62,9 +64,11 @@ impl WorkerClient { } } - pub fn get_validator_message(&self) -> PyResult> { + pub fn get_validator_message(&self, py: Python) -> PyResult> { if let Some(rt) = self.runtime.as_ref() { - Ok(rt.block_on(self.inner.get_message_queue().get_validator_message())) + Ok(py.allow_threads(|| { + rt.block_on(self.inner.get_message_queue().get_validator_message()) + })) } else { Err(PyErr::new::( "Client not started. Call start() first.".to_string(), @@ -72,9 +76,9 @@ impl WorkerClient { } } - pub fn stop(&mut self) -> PyResult<()> { + pub fn stop(&mut self, py: Python) -> PyResult<()> { if let Some(rt) = self.runtime.as_ref() { - rt.block_on(self.inner.stop_async()) + py.allow_threads(|| rt.block_on(self.inner.stop_async())) .map_err(|e| PyErr::new::(e.to_string()))?; } From 291ced4cc590a84c45b018251247161b9fa3da3f Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Fri, 11 Jul 2025 19:34:26 +0200 Subject: [PATCH 40/40] cleanup message queue setup --- .../prime-protocol-py/src/orchestrator/mod.rs | 36 ++++ .../src/utils/message_queue.rs | 152 +++++++++++++++++ crates/prime-protocol-py/src/utils/mod.rs | 3 +- .../src/validator/message_queue.rs | 46 +++++ crates/prime-protocol-py/src/validator/mod.rs | 93 ++++++++++- .../src/worker/message_queue.rs | 158 +++++------------- crates/prime-protocol-py/src/worker/mod.rs | 1 - 7 files changed, 366 insertions(+), 123 deletions(-) create mode 100644 crates/prime-protocol-py/src/utils/message_queue.rs create mode 100644 crates/prime-protocol-py/src/validator/message_queue.rs diff --git a/crates/prime-protocol-py/src/orchestrator/mod.rs b/crates/prime-protocol-py/src/orchestrator/mod.rs index 39f4d915..c610ea6f 100644 --- a/crates/prime-protocol-py/src/orchestrator/mod.rs +++ b/crates/prime-protocol-py/src/orchestrator/mod.rs @@ -16,4 +16,40 @@ impl OrchestratorClient { let _ = private_key; Ok(Self {}) } + + pub fn list_validated_nodes(&self) -> PyResult> { + // TODO: Implement orchestrator node listing + Ok(vec![]) + } + + pub fn list_nodes_from_chain(&self) -> PyResult> { + // TODO: Implement orchestrator node listing from chain + Ok(vec![]) + } + + // pub fn get_node_details(&self, node_id: String) -> PyResult> { + // // TODO: Implement orchestrator node details fetching + // Ok(None) + // } + + // pub fn get_node_details_from_chain(&self, node_id: String) -> PyResult> { + // // TODO: Implement orchestrator node details fetching from chain + // Ok(None) + // } + + // pub fn send_invite_to_node(&self, node_id: String) -> PyResult<()> { + // // TODO: Implement orchestrator node invite sending + // Ok(()) + // } + + // pub fn send_request_to_node(&self, node_id: String, request: String) -> PyResult<()> { + // // TODO: Implement orchestrator node request sending + // Ok(()) + // } + + // // TODO: Sender of this message? + // pub fn read_message(&self) -> PyResult> { + // // TODO: Implement orchestrator message reading + // Ok(None) + // } } diff --git a/crates/prime-protocol-py/src/utils/message_queue.rs b/crates/prime-protocol-py/src/utils/message_queue.rs new file mode 100644 index 00000000..43153cb1 --- /dev/null +++ b/crates/prime-protocol-py/src/utils/message_queue.rs @@ -0,0 +1,152 @@ +use pyo3::prelude::*; +use serde::{Deserialize, Serialize}; +use std::collections::VecDeque; +use std::sync::Arc; +use tokio::sync::mpsc; +use tokio::sync::Mutex; +use tokio::time::{interval, Duration}; + +use crate::utils::json_parser::json_to_pyobject; + +/// Generic message that can be sent between components +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + pub content: serde_json::Value, + pub timestamp: u64, + pub sender: Option, +} + +/// Simple message queue for handling messages +#[derive(Clone)] +pub struct MessageQueue { + queue: Arc>>, + max_size: Option, + shutdown_tx: Arc>>>, +} + +impl MessageQueue { + /// Create a new message queue + pub fn new(max_size: Option) -> Self { + Self { + queue: Arc::new(Mutex::new(VecDeque::new())), + max_size, + shutdown_tx: Arc::new(Mutex::new(None)), + } + } + + /// Push a message to the queue + pub async fn push_message(&self, message: Message) -> Result<(), String> { + let mut queue = self.queue.lock().await; + + // Check max size if configured + if let Some(max_size) = self.max_size { + if queue.len() >= max_size { + return Err(format!("Queue is full (max size: {})", max_size)); + } + } + + queue.push_back(message); + Ok(()) + } + + /// Get the next message from the queue + pub async fn get_message(&self) -> Option { + let mut queue = self.queue.lock().await; + + queue + .pop_front() + .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + } + + /// Get all messages from the queue (draining it) + pub async fn get_all_messages(&self) -> Vec { + let mut queue = self.queue.lock().await; + + let messages: Vec = queue.drain(..).collect(); + messages + .into_iter() + .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + .collect() + } + + /// Peek at the next message without removing it + pub async fn peek_message(&self) -> Option { + let queue = self.queue.lock().await; + + queue + .front() + .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + } + + /// Get the size of the queue + pub async fn get_queue_size(&self) -> usize { + let queue = self.queue.lock().await; + queue.len() + } + + /// Clear the queue + pub async fn clear(&self) -> Result<(), String> { + let mut queue = self.queue.lock().await; + queue.clear(); + Ok(()) + } + + /// Start a mock message listener (for testing/development) + pub async fn start_mock_listener(&self, frequency: u64) -> Result<(), String> { + let (shutdown_tx, mut shutdown_rx) = mpsc::channel::<()>(1); + + // Store the shutdown sender + { + let mut tx_guard = self.shutdown_tx.lock().await; + *tx_guard = Some(shutdown_tx); + } + + let queue_clone = self.queue.clone(); + + // Spawn background task to simulate incoming messages + tokio::spawn(async move { + let mut ticker = interval(Duration::from_secs(1)); + let mut counter = 0u64; + + loop { + tokio::select! { + _ = ticker.tick() => { + if counter % frequency == 0 { + let message = Message { + content: serde_json::json!({ + "type": "mock_message", + "id": format!("mock_{}", counter), + "data": format!("Mock data #{}", counter), + }), + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + sender: Some("mock_listener".to_string()), + }; + + let mut queue = queue_clone.lock().await; + queue.push_back(message); + log::debug!("Added mock message to queue"); + } + counter += 1; + } + _ = shutdown_rx.recv() => { + log::info!("Mock message listener shutting down"); + break; + } + } + } + }); + + Ok(()) + } + + /// Stop the mock listener + pub async fn stop_listener(&self) -> Result<(), String> { + if let Some(tx) = self.shutdown_tx.lock().await.take() { + let _ = tx.send(()).await; + } + Ok(()) + } +} diff --git a/crates/prime-protocol-py/src/utils/mod.rs b/crates/prime-protocol-py/src/utils/mod.rs index 3e9394ce..da6afad7 100644 --- a/crates/prime-protocol-py/src/utils/mod.rs +++ b/crates/prime-protocol-py/src/utils/mod.rs @@ -1 +1,2 @@ -pub mod json_parser; +pub(crate) mod json_parser; +pub(crate) mod message_queue; diff --git a/crates/prime-protocol-py/src/validator/message_queue.rs b/crates/prime-protocol-py/src/validator/message_queue.rs new file mode 100644 index 00000000..72f1b468 --- /dev/null +++ b/crates/prime-protocol-py/src/validator/message_queue.rs @@ -0,0 +1,46 @@ +use crate::utils::message_queue::{Message, MessageQueue as GenericMessageQueue}; +use pyo3::prelude::*; + +/// Validator-specific message queue for incoming validation results +#[derive(Clone)] +pub struct MessageQueue { + inner: GenericMessageQueue, +} + +impl MessageQueue { + /// Create a new validator message queue for validation results + pub fn new() -> Self { + let inner = GenericMessageQueue::new(None); + + Self { inner } + } + + /// Get the next validation result from nodes + pub async fn get_validation_result(&self) -> Option { + self.inner.get_message().await + } + + /// Push a validation result (for testing or internal use) + pub async fn push_validation_result(&self, content: serde_json::Value) -> Result<(), String> { + let message = Message { + content, + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + sender: None, // Will be set to the node ID when implemented + }; + + self.inner.push_message(message).await + } + + /// Get the number of pending validation results + pub async fn get_queue_size(&self) -> usize { + self.inner.get_queue_size().await + } + + /// Clear all validation results (use with caution) + pub async fn clear(&self) -> Result<(), String> { + self.inner.clear().await + } +} diff --git a/crates/prime-protocol-py/src/validator/mod.rs b/crates/prime-protocol-py/src/validator/mod.rs index ed02939c..6890e799 100644 --- a/crates/prime-protocol-py/src/validator/mod.rs +++ b/crates/prime-protocol-py/src/validator/mod.rs @@ -1,9 +1,29 @@ use pyo3::prelude::*; +pub(crate) mod message_queue; +use self::message_queue::MessageQueue; + +/// Node details for validator operations +#[pyclass] +#[derive(Clone)] +pub(crate) struct NodeDetails { + #[pyo3(get)] + pub address: String, +} + +#[pymethods] +impl NodeDetails { + #[new] + pub fn new(address: String) -> Self { + Self { address } + } +} + /// Prime Protocol Validator Client - for validating task results #[pyclass] pub(crate) struct ValidatorClient { - // TODO: Implement validator-specific functionality + message_queue: MessageQueue, + runtime: Option, } #[pymethods] @@ -14,6 +34,75 @@ impl ValidatorClient { // TODO: Implement validator initialization let _ = rpc_url; let _ = private_key; - Ok(Self {}) + + Ok(Self { + message_queue: MessageQueue::new(), + runtime: None, + }) + } + + /// Initialize the validator client and start listening for messages + pub fn start(&mut self, py: Python) -> PyResult<()> { + // Create a new runtime for this validator + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .map_err(|e| PyErr::new::(e.to_string()))?; + + // Store the runtime for future use + self.runtime = Some(rt); + + Ok(()) + } + + pub fn list_nodes(&self) -> PyResult> { + // TODO: Implement validator node listing from chain that are not yet validated + Ok(vec![]) + } + + pub fn fetch_node_details(&self, node_id: String) -> PyResult> { + // TODO: Implement validator node details fetching + Ok(None) + } + + pub fn mark_node_as_validated(&self, node_id: String) -> PyResult<()> { + // TODO: Implement validator node marking as validated + Ok(()) + } + + pub fn send_request_to_node(&self, node_id: String, request: String) -> PyResult<()> { + // TODO: Implement validator node request sending + Ok(()) + } + + pub fn send_request_to_node_address( + &self, + node_address: String, + request: String, + ) -> PyResult<()> { + // TODO: Implement validator node request sending to specific address + let _ = node_address; + let _ = request; + Ok(()) + } + + /// Get the latest validation result from the internal message queue + pub fn get_latest_message(&self, py: Python) -> PyResult> { + if let Some(rt) = self.runtime.as_ref() { + Ok(py.allow_threads(|| rt.block_on(self.message_queue.get_validation_result()))) + } else { + Err(PyErr::new::( + "Validator not started. Call start() first.".to_string(), + )) + } + } + + /// Get the number of pending validation results + pub fn get_queue_size(&self, py: Python) -> PyResult { + if let Some(rt) = self.runtime.as_ref() { + Ok(py.allow_threads(|| rt.block_on(self.message_queue.get_queue_size()))) + } else { + Ok(0) + } } } diff --git a/crates/prime-protocol-py/src/worker/message_queue.rs b/crates/prime-protocol-py/src/worker/message_queue.rs index 9af9a687..167fde05 100644 --- a/crates/prime-protocol-py/src/worker/message_queue.rs +++ b/crates/prime-protocol-py/src/worker/message_queue.rs @@ -1,160 +1,80 @@ +use crate::utils::message_queue::{Message, MessageQueue as GenericMessageQueue}; use pyo3::prelude::*; -use serde::{Deserialize, Serialize}; -use std::collections::VecDeque; -use std::sync::Arc; -use tokio::sync::mpsc; -use tokio::sync::Mutex; -use tokio::time::{interval, Duration}; -use crate::utils::json_parser::json_to_pyobject; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Message { - pub message_type: MessageType, - pub content: serde_json::Value, - pub timestamp: u64, -} - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub enum MessageType { +/// Queue types for the worker message queue +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum QueueType { PoolOwner, Validator, } +/// Worker-specific message queue with predefined queue types #[derive(Clone)] pub struct MessageQueue { - pool_owner_queue: Arc>>, - validator_queue: Arc>>, - shutdown_tx: Arc>>>, + pool_owner_queue: GenericMessageQueue, + validator_queue: GenericMessageQueue, } impl MessageQueue { + /// Create a new worker message queue with pool_owner and validator queues pub fn new() -> Self { Self { - pool_owner_queue: Arc::new(Mutex::new(VecDeque::new())), - validator_queue: Arc::new(Mutex::new(VecDeque::new())), - shutdown_tx: Arc::new(Mutex::new(None)), + pool_owner_queue: GenericMessageQueue::new(None), + validator_queue: GenericMessageQueue::new(None), } } - /// Start the background message listener + /// Start the background message listener for worker pub(crate) async fn start_listener(&self) -> Result<(), String> { - let (shutdown_tx, mut shutdown_rx) = mpsc::channel::<()>(1); - - // Store the shutdown sender - { - let mut tx_guard = self.shutdown_tx.lock().await; - *tx_guard = Some(shutdown_tx); - } - - let pool_owner_queue = self.pool_owner_queue.clone(); - let validator_queue = self.validator_queue.clone(); - - // Spawn background task to simulate incoming p2p messages - tokio::spawn(async move { - let mut ticker = interval(Duration::from_secs(5)); - let mut counter = 0u64; - - loop { - tokio::select! { - _ = ticker.tick() => { - // Mock pool owner messages - if counter % 2 == 0 { - let message = Message { - message_type: MessageType::PoolOwner, - content: serde_json::json!({ - "type": "inference_request", - "task_id": format!("task_{}", counter), - "prompt": format!("Test prompt {}", counter), - }), - timestamp: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs(), - }; - - let mut queue = pool_owner_queue.lock().await; - queue.push_back(message); - log::debug!("Added mock pool owner message to queue"); - } - - // Mock validator messages - if counter % 3 == 0 { - let message = Message { - message_type: MessageType::Validator, - content: serde_json::json!({ - "type": "validation_request", - "task_id": format!("validation_{}", counter), - }), - timestamp: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs(), - }; - - let mut queue = validator_queue.lock().await; - queue.push_back(message); - log::debug!("Added mock validator message to queue"); - } - - counter += 1; - } - _ = shutdown_rx.recv() => { - log::info!("Message listener shutting down"); - break; - } - } - } - }); - + // Start mock listeners with different frequencies + // pool_owner messages every 2 seconds, validator messages every 3 seconds + self.pool_owner_queue.start_mock_listener(2).await?; + self.validator_queue.start_mock_listener(3).await?; Ok(()) } /// Stop the background listener - #[allow(unused)] pub(crate) async fn stop_listener(&self) -> Result<(), String> { - if let Some(tx) = self.shutdown_tx.lock().await.take() { - let _ = tx.send(()).await; - } + self.pool_owner_queue.stop_listener().await?; + self.validator_queue.stop_listener().await?; Ok(()) } + /// Get the next message from the pool owner queue pub(crate) async fn get_pool_owner_message(&self) -> Option { - let mut queue = self.pool_owner_queue.lock().await; - queue - .pop_front() - .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + self.pool_owner_queue.get_message().await } /// Get the next message from the validator queue pub(crate) async fn get_validator_message(&self) -> Option { - let mut queue = self.validator_queue.lock().await; - queue - .pop_front() - .map(|msg| Python::with_gil(|py| json_to_pyobject(py, &msg.content))) + self.validator_queue.get_message().await } /// Push a message to the appropriate queue (for testing or internal use) - #[allow(unused)] - pub(crate) async fn push_message(&self, message: Message) -> Result<(), String> { - match message.message_type { - MessageType::PoolOwner => { - let mut queue = self.pool_owner_queue.lock().await; - queue.push_back(message); - } - MessageType::Validator => { - let mut queue = self.validator_queue.lock().await; - queue.push_back(message); - } + pub(crate) async fn push_message( + &self, + queue_type: QueueType, + content: serde_json::Value, + ) -> Result<(), String> { + let message = Message { + content, + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + sender: Some("worker".to_string()), + }; + + match queue_type { + QueueType::PoolOwner => self.pool_owner_queue.push_message(message).await, + QueueType::Validator => self.validator_queue.push_message(message).await, } - Ok(()) } /// Get queue sizes for monitoring - #[allow(unused)] pub(crate) async fn get_queue_sizes(&self) -> (usize, usize) { - let pool_owner_size = self.pool_owner_queue.lock().await.len(); - let validator_size = self.validator_queue.lock().await.len(); + let pool_owner_size = self.pool_owner_queue.get_queue_size().await; + let validator_size = self.validator_queue.get_queue_size().await; (pool_owner_size, validator_size) } } diff --git a/crates/prime-protocol-py/src/worker/mod.rs b/crates/prime-protocol-py/src/worker/mod.rs index b28e2216..a308df12 100644 --- a/crates/prime-protocol-py/src/worker/mod.rs +++ b/crates/prime-protocol-py/src/worker/mod.rs @@ -2,7 +2,6 @@ use pyo3::prelude::*; mod client; pub(crate) mod message_queue; pub(crate) use client::WorkerClientCore; - /// Prime Protocol Worker Client - for compute nodes that execute tasks #[pyclass] pub(crate) struct WorkerClient {