From ceac7cd90bd9e40934a6f49a1740f747916ecc33 Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Fri, 13 Feb 2026 02:30:05 +0000 Subject: [PATCH] Fix firedancer-dev bench Fixes bench-zen4-128core. Does 600k TPS on my box, needs further tuning. - Introduce HTTP code 503 for when RPC is not yet ready (no bank), fixes crash in bencho - Enable sandbox when watch is disabled (seems to work fine) - Add support for `firedancer-dev mem --topo bench` - Drop bench genesis account configs to avoid hardcoded limit in fd_genesis_parse --- src/app/fddev/commands/bench.c | 3 +- src/app/firedancer-dev/commands/bench.c | 10 +---- .../firedancer/config/bench-zen3-32core.toml | 2 +- .../firedancer/config/bench-zen4-128core.toml | 2 +- src/app/shared/fd_action.h | 1 + src/app/shared_dev/commands/bench/bench.c | 39 +++++++++++++------ src/app/shared_dev/commands/bench/bench.h | 3 +- src/app/shared_dev/commands/bench/fd_bencho.c | 3 +- src/app/shared_dev/rpc_client/fd_rpc_client.c | 5 ++- src/app/shared_dev/rpc_client/fd_rpc_client.h | 32 ++++++++------- src/discof/rpc/fd_rpc_tile.c | 6 ++- src/waltz/http/fd_http_server.c | 3 ++ 12 files changed, 66 insertions(+), 43 deletions(-) diff --git a/src/app/fddev/commands/bench.c b/src/app/fddev/commands/bench.c index 58d6cb5f19f..a9a7aadff1d 100644 --- a/src/app/fddev/commands/bench.c +++ b/src/app/fddev/commands/bench.c @@ -20,7 +20,7 @@ agave_thread_main( void * _args ) { void fddev_bench_cmd_fn( args_t * args, config_t * config ) { - bench_cmd_fn( args, config, 0 ); + bench_cmd_fn( args, config ); pthread_t agave; pthread_create( &agave, NULL, agave_thread_main, (void *)config ); @@ -32,6 +32,7 @@ fddev_bench_cmd_fn( args_t * args, action_t fd_action_bench = { .name = "bench", .args = bench_cmd_args, + .topo = bench_topo, .fn = fddev_bench_cmd_fn, .perm = dev_cmd_perm, .is_local_cluster = 1, diff --git a/src/app/firedancer-dev/commands/bench.c b/src/app/firedancer-dev/commands/bench.c index 1a66a662d8b..ef5a47dc69e 100644 --- a/src/app/firedancer-dev/commands/bench.c +++ b/src/app/firedancer-dev/commands/bench.c @@ -3,16 +3,10 @@ #include -static void -bench_cmd_topo( config_t * config ) { - config->development.sandbox = 0; - config->development.no_clone = 1; -} - void firedancer_dev_bench_cmd_fn( args_t * args, config_t * config ) { - bench_cmd_fn( args, config, 1 ); + bench_cmd_fn( args, config ); /* Sleep parent thread forever, Ctrl+C will terminate. */ for(;;) pause(); @@ -21,9 +15,9 @@ firedancer_dev_bench_cmd_fn( args_t * args, action_t fd_action_bench = { .name = "bench", .args = bench_cmd_args, + .topo = bench_topo, .fn = firedancer_dev_bench_cmd_fn, .perm = dev_cmd_perm, - .topo = bench_cmd_topo, .is_local_cluster = 1, .description = "Test validator TPS benchmark" }; diff --git a/src/app/firedancer/config/bench-zen3-32core.toml b/src/app/firedancer/config/bench-zen3-32core.toml index 9d3d0184d66..a9d49b124e2 100644 --- a/src/app/firedancer/config/bench-zen3-32core.toml +++ b/src/app/firedancer/config/bench-zen3-32core.toml @@ -17,7 +17,7 @@ sign_tile_count = 2 [development.genesis] - fund_initial_accounts = 32768 + fund_initial_accounts = 1000 [development.bench] benchg_tile_count = 12 diff --git a/src/app/firedancer/config/bench-zen4-128core.toml b/src/app/firedancer/config/bench-zen4-128core.toml index 65634780b94..d05aa6e2d79 100644 --- a/src/app/firedancer/config/bench-zen4-128core.toml +++ b/src/app/firedancer/config/bench-zen4-128core.toml @@ -12,7 +12,7 @@ sign_tile_count = 2 [development.genesis] - fund_initial_accounts = 32768 + fund_initial_accounts = 1000 [development.bench] benchg_tile_count = 20 diff --git a/src/app/shared/fd_action.h b/src/app/shared/fd_action.h index e346782632f..ab563665ca6 100644 --- a/src/app/shared/fd_action.h +++ b/src/app/shared/fd_action.h @@ -104,6 +104,7 @@ union fdctl_args { ulong benchg; ulong benchs; int no_quic; + int no_watch; int transaction_mode; float contending_fraction; float cu_price_spread; diff --git a/src/app/shared_dev/commands/bench/bench.c b/src/app/shared_dev/commands/bench/bench.c index 0fb9e51d1df..a14e9e4abe1 100644 --- a/src/app/shared_dev/commands/bench/bench.c +++ b/src/app/shared_dev/commands/bench/bench.c @@ -32,7 +32,8 @@ void bench_cmd_args( int * pargc, char *** pargv, args_t * args ) { - args->load.no_quic = fd_env_strip_cmdline_contains( pargc, pargv, "--no-quic" ); + args->load.no_quic = fd_env_strip_cmdline_contains( pargc, pargv, "--no-quic" ); + args->load.no_watch = fd_env_strip_cmdline_contains( pargc, pargv, "--no-watch" ); } void @@ -49,7 +50,6 @@ add_bench_topo( fd_topo_t * topo, uint send_to_ip_addr, ushort rpc_port, uint rpc_ip_addr, - int no_quic, int reserve_agave_cores ) { fd_topob_wksp( topo, "bench" ); @@ -101,7 +101,6 @@ add_bench_topo( fd_topo_t * topo, benchs->benchs.send_to_ip_addr = send_to_ip_addr; benchs->benchs.send_to_port = send_to_port; benchs->benchs.conn_cnt = conn_cnt; - benchs->benchs.no_quic = no_quic; } fd_topob_tile_out( topo, "bencho", 0UL, "bencho_out", 0UL ); @@ -123,13 +122,11 @@ add_bench_topo( fd_topo_t * topo, extern int * fd_log_private_shared_lock; void -bench_cmd_fn( args_t * args, - config_t * config, - int watch ) { +fd_topo_initialize( config_t * config ); - ushort dest_port = fd_ushort_if( args->load.no_quic, - config->tiles.quic.regular_transaction_listen_port, - config->tiles.quic.quic_transaction_listen_port ); +void +bench_topo( config_t * config ) { + fd_topo_initialize( config ); ushort rpc_port; uint rpc_ip_addr; @@ -166,12 +163,26 @@ bench_cmd_fn( args_t * args, config->development.genesis.fund_initial_accounts, 0, 0.0f, 0.0f, config->layout.quic_tile_count, - dest_port, + config->tiles.quic.quic_transaction_listen_port, config->net.ip_addr, rpc_port, rpc_ip_addr, - args->load.no_quic, !config->is_firedancer ); +} + +void +bench_cmd_fn( args_t * args, + config_t * config ) { + + if( args->load.no_quic ) { + ushort port = config->tiles.quic.regular_transaction_listen_port; + ulong benchs_tile_cnt = fd_topo_tile_name_cnt( &config->topo, "benchs" ); + for( ulong i=0UL; itopo.tiles[ fd_topo_find_tile( &config->topo, "benchs", i ) ]; + benchs->benchs.no_quic = 1; + benchs->benchs.send_to_port = port; + } + } args_t configure_args = { .configure.command = CONFIGURE_CMD_INIT, @@ -193,7 +204,11 @@ bench_cmd_fn( args_t * args, fd_log_private_shared_lock[ 1 ] = 0; fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_WRITE, FD_TOPO_CORE_DUMP_LEVEL_DISABLED ); - if( watch ) { + if( !args->load.no_watch ) { + /* watch incompatible with sandbox */ + config->development.sandbox = 0; + config->development.no_clone = 1; + int pipefd[2]; if( FD_UNLIKELY( pipe2( pipefd, O_NONBLOCK ) ) ) FD_LOG_ERR(( "pipe2() failed (%i-%s)", errno, fd_io_strerror( errno ) )); diff --git a/src/app/shared_dev/commands/bench/bench.h b/src/app/shared_dev/commands/bench/bench.h index a0373596b6a..7d225013cd6 100644 --- a/src/app/shared_dev/commands/bench/bench.h +++ b/src/app/shared_dev/commands/bench/bench.h @@ -6,7 +6,8 @@ FD_PROTOTYPES_BEGIN -void bench_cmd_fn( args_t * args, config_t * config, int watch ); +void bench_topo( config_t * config ); +void bench_cmd_fn( args_t * args, config_t * config ); void bench_cmd_args( int * pargc, char *** pargv, args_t * args ); void diff --git a/src/app/shared_dev/commands/bench/fd_bencho.c b/src/app/shared_dev/commands/bench/fd_bencho.c index 6acd0744558..2845358b1b0 100644 --- a/src/app/shared_dev/commands/bench/fd_bencho.c +++ b/src/app/shared_dev/commands/bench/fd_bencho.c @@ -74,7 +74,8 @@ service_block_hash( fd_bencho_ctx_t * ctx, return did_work; } - if( FD_UNLIKELY( fd_log_wallclock()rpc_ready_deadline && response->status==FD_RPC_CLIENT_ERR_NETWORK ) ) { + if( FD_UNLIKELY( ( fd_log_wallclock()rpc_ready_deadline && response->status==FD_RPC_CLIENT_ERR_NETWORK ) || + response->status==FD_RPC_CLIENT_ERR_UNAVAILABLE ) ) { /* RPC server not yet responding, give it some more time... */ ctx->blockhash_state = FD_BENCHO_STATE_WAIT; ctx->blockhash_deadline = fd_log_wallclock() + 100L * 1000L * 1000L; /* 100 millis to retry */ diff --git a/src/app/shared_dev/rpc_client/fd_rpc_client.c b/src/app/shared_dev/rpc_client/fd_rpc_client.c index 706a9718e4b..09917f81cc3 100644 --- a/src/app/shared_dev/rpc_client/fd_rpc_client.c +++ b/src/app/shared_dev/rpc_client/fd_rpc_client.c @@ -218,7 +218,10 @@ parse_response( char * response, if( FD_UNLIKELY( -2==http_len ) ) return FD_RPC_CLIENT_PENDING; else if( FD_UNLIKELY( -1==http_len ) ) return FD_RPC_CLIENT_ERR_MALFORMED; - if( FD_UNLIKELY( status!=200 ) ) return FD_RPC_CLIENT_ERR_MALFORMED; + if( FD_UNLIKELY( status!=200 ) ) { + if( status==503 ) return FD_RPC_CLIENT_ERR_UNAVAILABLE; + return FD_RPC_CLIENT_ERR_MALFORMED; + } ulong content_length = fd_rpc_phr_content_length( headers, num_headers ); if( FD_UNLIKELY( content_length==ULONG_MAX ) ) return FD_RPC_CLIENT_ERR_MALFORMED; diff --git a/src/app/shared_dev/rpc_client/fd_rpc_client.h b/src/app/shared_dev/rpc_client/fd_rpc_client.h index 02120e24e0d..cf4c5fb145c 100644 --- a/src/app/shared_dev/rpc_client/fd_rpc_client.h +++ b/src/app/shared_dev/rpc_client/fd_rpc_client.h @@ -11,13 +11,14 @@ interoperability. It is not fuzzed or hardened, and should not be used in any code that matters. */ -#define FD_RPC_CLIENT_SUCCESS (0) -#define FD_RPC_CLIENT_PENDING (-1) -#define FD_RPC_CLIENT_ERR_NOT_FOUND (-2) -#define FD_RPC_CLIENT_ERR_TOO_LARGE (-3) -#define FD_RPC_CLIENT_ERR_TOO_MANY (-4) -#define FD_RPC_CLIENT_ERR_MALFORMED (-5) -#define FD_RPC_CLIENT_ERR_NETWORK (-6) +#define FD_RPC_CLIENT_SUCCESS (0) +#define FD_RPC_CLIENT_PENDING (-1) +#define FD_RPC_CLIENT_ERR_NOT_FOUND (-2) +#define FD_RPC_CLIENT_ERR_TOO_LARGE (-3) +#define FD_RPC_CLIENT_ERR_TOO_MANY (-4) +#define FD_RPC_CLIENT_ERR_MALFORMED (-5) +#define FD_RPC_CLIENT_ERR_NETWORK (-6) +#define FD_RPC_CLIENT_ERR_UNAVAILABLE (-7) #define FD_RPC_CLIENT_ALIGN (8UL) #define FD_RPC_CLIENT_FOOTPRINT (273424UL) @@ -58,14 +59,15 @@ FD_PROTOTYPES_BEGIN FD_FN_CONST static inline char const * fd_rpc_client_strerror( long err ) { switch( err ) { - case FD_RPC_CLIENT_SUCCESS: return "Success"; - case FD_RPC_CLIENT_PENDING: return "Pending"; - case FD_RPC_CLIENT_ERR_NOT_FOUND: return "Not found"; - case FD_RPC_CLIENT_ERR_TOO_LARGE: return "Request too large"; - case FD_RPC_CLIENT_ERR_TOO_MANY: return "Too many requests in flight"; - case FD_RPC_CLIENT_ERR_MALFORMED: return "Malformed response"; - case FD_RPC_CLIENT_ERR_NETWORK: return "Network error"; - default: return "Unknown error"; + case FD_RPC_CLIENT_SUCCESS: return "Success"; + case FD_RPC_CLIENT_PENDING: return "Pending"; + case FD_RPC_CLIENT_ERR_NOT_FOUND: return "Not found"; + case FD_RPC_CLIENT_ERR_TOO_LARGE: return "Request too large"; + case FD_RPC_CLIENT_ERR_TOO_MANY: return "Too many requests in flight"; + case FD_RPC_CLIENT_ERR_MALFORMED: return "Malformed response"; + case FD_RPC_CLIENT_ERR_NETWORK: return "Network error"; + case FD_RPC_CLIENT_ERR_UNAVAILABLE: return "RPC server unavailable"; + default: return "Unknown error"; } } diff --git a/src/discof/rpc/fd_rpc_tile.c b/src/discof/rpc/fd_rpc_tile.c index a19182c5f4e..014cfe0d3f5 100644 --- a/src/discof/rpc/fd_rpc_tile.c +++ b/src/discof/rpc/fd_rpc_tile.c @@ -529,7 +529,7 @@ fd_rpc_validate_config( fd_rpc_tile_t * ctx, _bank_idx = ctx->finalized_idx; } if( FD_UNLIKELY( _bank_idx==ULONG_MAX ) ) { - *res = (fd_http_server_response_t){ .status = 500 }; + *res = (fd_http_server_response_t){ .status = 503 }; return 0; } *bank_idx = _bank_idx; @@ -1077,7 +1077,9 @@ static fd_http_server_response_t getLatestBlockhash( fd_rpc_tile_t * ctx, cJSON const * id, cJSON const * params ) { - if( FD_UNLIKELY( ctx->processed_idx==ULONG_MAX || ctx->banks[ ctx->processed_idx ].slot==ULONG_MAX ) ) return (fd_http_server_response_t){ .status = 500 }; + if( FD_UNLIKELY( ctx->processed_idx==ULONG_MAX || ctx->banks[ ctx->processed_idx ].slot==ULONG_MAX ) ) { + return (fd_http_server_response_t){ .status = 503 }; + } fd_http_server_response_t response; if( FD_UNLIKELY( !fd_rpc_validate_params( ctx, id, params, 0, 1, &response ) ) ) return response; diff --git a/src/waltz/http/fd_http_server.c b/src/waltz/http/fd_http_server.c index 948413e524b..7864bc00784 100644 --- a/src/waltz/http/fd_http_server.c +++ b/src/waltz/http/fd_http_server.c @@ -877,6 +877,9 @@ write_conn_http( fd_http_server_t * http, case 500: FD_TEST( fd_cstr_printf_check( header_buf, sizeof( header_buf ), &response_len, "HTTP/1.1 500 Internal Server Error\r\nContent-Length: 0\r\n" ) ); break; + case 503: + FD_TEST( fd_cstr_printf_check( header_buf, sizeof( header_buf ), &response_len, "HTTP/1.1 503 Service Unavailable\r\nContent-Length: 0\r\n" ) ); + break; default: FD_TEST( fd_cstr_printf_check( header_buf, sizeof( header_buf ), &response_len, "HTTP/1.1 500 Internal Server Error\r\nContent-Length: 0\r\n" ) ); break;