From 6f94aaae10fe853bd09aad27ee2443ba5942dd1a Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Thu, 12 Feb 2026 02:27:11 +0000 Subject: [PATCH] replay: make dispatcher depth configurable Make replay dispatcher depth configurable to save memory. fd_rdisp_new takes a considerable amount of time with MSan (30s) with the previous default. Decreases live client depth to 1M and backtest depth to 8K. --- src/app/firedancer-dev/commands/backtest.c | 1 + src/app/firedancer/config/default.toml | 7 ++ src/app/firedancer/topology.c | 1 + src/app/shared/fd_config.h | 4 + src/app/shared/fd_config_parse.c | 1 + src/disco/topo/fd_topo.h | 1 + src/discof/replay/fd_replay_tile.c | 6 +- src/discof/replay/fd_sched.c | 113 +++++++++++++++------ src/discof/replay/fd_sched.h | 28 ++++- 9 files changed, 123 insertions(+), 39 deletions(-) diff --git a/src/app/firedancer-dev/commands/backtest.c b/src/app/firedancer-dev/commands/backtest.c index 1ba7842d9fb..4ef03e948e5 100644 --- a/src/app/firedancer-dev/commands/backtest.c +++ b/src/app/firedancer-dev/commands/backtest.c @@ -520,6 +520,7 @@ extern int * fd_log_private_shared_lock; static void backtest_cmd_topo( config_t * config ) { + config->firedancer.development.replay.scheduler_depth = 8192UL; backtest_topo( config ); } diff --git a/src/app/firedancer/config/default.toml b/src/app/firedancer/config/default.toml index 64df51024fd..71a7f05dfc2 100644 --- a/src/app/firedancer/config/default.toml +++ b/src/app/firedancer/config/default.toml @@ -1944,3 +1944,10 @@ telemetry = true # the layout will be ignored. [development.snapshots] disable_lthash_verification = true + + [development.replay] + # scheduler_depth controls the number of transactions tracked by + # the replay scheduler. Larger values can result in faster + # replay times (particularly during heavy forking) at the cost + # of greater memory use. + scheduler_depth = 1048576 diff --git a/src/app/firedancer/topology.c b/src/app/firedancer/topology.c index 69a9cfcda72..10e40d74ebf 100644 --- a/src/app/firedancer/topology.c +++ b/src/app/firedancer/topology.c @@ -1534,6 +1534,7 @@ fd_topo_configure_tile( fd_topo_tile_t * tile, tile->replay.expected_shred_version = config->consensus.expected_shred_version; tile->replay.wait_for_vote_to_start_leader = config->consensus.wait_for_vote_to_start_leader; + tile->replay.sched_depth = config->firedancer.development.replay.scheduler_depth; tile->replay.max_live_slots = config->firedancer.runtime.max_live_slots; tile->replay.write_delay_slots = config->firedancer.vinyl.write_delay_slots; diff --git a/src/app/shared/fd_config.h b/src/app/shared/fd_config.h index f633d4d9947..9857ab18cd5 100644 --- a/src/app/shared/fd_config.h +++ b/src/app/shared/fd_config.h @@ -170,6 +170,10 @@ struct fd_configf { struct { int hard_fork_fatal; + + struct { + ulong scheduler_depth; + } replay; } development; struct { diff --git a/src/app/shared/fd_config_parse.c b/src/app/shared/fd_config_parse.c index bf406a001a7..ae6576a82b1 100644 --- a/src/app/shared/fd_config_parse.c +++ b/src/app/shared/fd_config_parse.c @@ -126,6 +126,7 @@ fd_config_extract_podf( uchar * pod, CFG_POP ( uint, snapshots.min_download_speed_mibs ); CFG_POP ( bool, development.hard_fork_fatal ); + CFG_POP ( ulong, development.replay.scheduler_depth ); return config; } diff --git a/src/disco/topo/fd_topo.h b/src/disco/topo/fd_topo.h index e4321e1b0dd..f379333b6bb 100644 --- a/src/disco/topo/fd_topo.h +++ b/src/disco/topo/fd_topo.h @@ -397,6 +397,7 @@ struct fd_topo_tile { int wait_for_vote_to_start_leader; ulong heap_size_gib; + ulong sched_depth; ulong max_live_slots; ulong write_delay_slots; diff --git a/src/discof/replay/fd_replay_tile.c b/src/discof/replay/fd_replay_tile.c index 0cb7f0373a2..bd17403b27c 100644 --- a/src/discof/replay/fd_replay_tile.c +++ b/src/discof/replay/fd_replay_tile.c @@ -449,7 +449,7 @@ scratch_footprint( fd_topo_tile_t const * tile ) { l = FD_LAYOUT_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) ); l = FD_LAYOUT_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) ); l = FD_LAYOUT_APPEND( l, fd_reasm_align(), fd_reasm_footprint( tile->replay.fec_max ) ); - l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.max_live_slots ) ); + l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.sched_depth, tile->replay.max_live_slots ) ); l = FD_LAYOUT_APPEND( l, fd_vinyl_req_pool_align(), fd_vinyl_req_pool_footprint( 1UL, 1UL ) ); l = FD_LAYOUT_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() ); l = FD_LAYOUT_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() ); @@ -2500,7 +2500,7 @@ unprivileged_init( fd_topo_t * topo, void * block_id_map_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) ); void * _txncache = FD_SCRATCH_ALLOC_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) ); void * reasm_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_reasm_align(), fd_reasm_footprint( tile->replay.fec_max ) ); - void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.max_live_slots ) ); + void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.sched_depth, tile->replay.max_live_slots ) ); void * vinyl_req_pool_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vinyl_req_pool_align(), fd_vinyl_req_pool_footprint( 1UL, 1UL ) ); void * vote_tracker_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() ); void * _capture_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() ); @@ -2626,7 +2626,7 @@ unprivileged_init( fd_topo_t * topo, ctx->reasm = fd_reasm_join( fd_reasm_new( reasm_mem, tile->replay.fec_max, ctx->reasm_seed ) ); FD_TEST( ctx->reasm ); - ctx->sched = fd_sched_join( fd_sched_new( sched_mem, tile->replay.max_live_slots, ctx->exec_cnt ), tile->replay.max_live_slots ); + ctx->sched = fd_sched_join( fd_sched_new( sched_mem, tile->replay.sched_depth, tile->replay.max_live_slots, ctx->exec_cnt ) ); FD_TEST( ctx->sched ); FD_TEST( fd_vinyl_req_pool_new( vinyl_req_pool_mem, 1UL, 1UL ) ); diff --git a/src/discof/replay/fd_sched.c b/src/discof/replay/fd_sched.c index c95e8e3f590..083fe476969 100644 --- a/src/discof/replay/fd_sched.c +++ b/src/discof/replay/fd_sched.c @@ -10,7 +10,6 @@ #include "../../flamenco/runtime/fd_runtime.h" /* for fd_runtime_load_txn_address_lookup_tables */ #include "../../flamenco/runtime/sysvar/fd_sysvar_slot_hashes.h" /* for ALUTs */ -#define FD_SCHED_MAX_DEPTH (FD_RDISP_MAX_DEPTH>>2) #define FD_SCHED_MAX_STAGING_LANES_LOG (2) #define FD_SCHED_MAX_STAGING_LANES (1UL<=sizeof(ulong), resize buffer for res #define FD_SCHED_MAX_TXN_PER_FEC ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/FD_TXN_MIN_SERIALIZED_SZ+1UL) /* 478 */ +FD_STATIC_ASSERT( FD_SCHED_MIN_DEPTH>=FD_SCHED_MAX_TXN_PER_FEC, limits ); +FD_STATIC_ASSERT( FD_SCHED_MAX_DEPTH<=FD_RDISP_MAX_DEPTH, limits ); + #define FD_SCHED_MAGIC (0xace8a79c181f89b6UL) /* echo -n "fd_sched_v0" | sha512sum | head -c 16 */ #define FD_SCHED_OK (0) @@ -215,6 +217,7 @@ struct fd_sched { ulong print_buf_sz; fd_sched_metrics_t metrics[ 1 ]; ulong canary; /* == FD_SCHED_MAGIC */ + ulong depth; /* Immutable. */ ulong block_cnt_max; /* Immutable. */ ulong exec_cnt; /* Immutable. */ long txn_in_flight_last_tick; @@ -232,8 +235,8 @@ struct fd_sched { ulong staged_head_bank_idx[ FD_SCHED_MAX_STAGING_LANES ]; /* Head of the linear chain in each staging lane, ignored if bit i is not set in the bitset. */ ulong txn_pool_free_cnt; - fd_txn_p_t txn_pool[ FD_SCHED_MAX_DEPTH ]; - uint txn_idx_to_parse_idx[ FD_SCHED_MAX_DEPTH ]; + fd_txn_p_t * txn_pool; + uint * txn_idx_to_parse_idx; ulong tile_to_bank_idx[ FD_SCHED_MAX_EXEC_TILE_CNT ]; /* Index of the bank that the exec tile is executing against. */ txn_bitset_t exec_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */ txn_bitset_t sigverify_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */ @@ -483,13 +486,13 @@ print_histogram( fd_sched_t * sched, fd_histf_t * hist, ulong converter, char * FD_FN_UNUSED static void print_block_metrics( fd_sched_t * sched, fd_sched_block_t * block ) { fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, fec_eos %d, rooted %d, txn_parsed_cnt %u, txn_exec_done_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, fec_cnt %u, mblk_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n", - block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, FD_SCHED_MAX_DEPTH, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob ); + block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob ); } FD_FN_UNUSED static void print_block_debug( fd_sched_t * sched, fd_sched_block_t * block ) { fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, staged %d (lane %lu), dying %d, in_rdisp %d, fec_eos %d, rooted %d, block_start_signaled %d, block_end_signaled %d, block_start_done %d, block_end_done %d, txn_parsed_cnt %u, txn_exec_in_flight_cnt %u, txn_exec_done_cnt %u, txn_sigverify_in_flight_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_in_flight_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, fec_cnt %u, mblk_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, prev_tick_hashcnt %lu, curr_tick_hashcnt %lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n", - block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, FD_SCHED_MAX_DEPTH, block->block_pool_max_popcnt, sched->block_cnt_max, block->prev_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob ); + block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->prev_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob ); } FD_FN_UNUSED static void @@ -509,7 +512,7 @@ print_metrics( fd_sched_t * sched ) { FD_FN_UNUSED static void print_sched( fd_sched_t * sched ) { fd_sched_printf( sched, "sched canary 0x%lx, exec_cnt %lu, root_idx %lu, txn_exec_ready_bitset[ 0 ] 0x%lx, sigverify_ready_bitset[ 0 ] 0x%lx, poh_ready_bitset[ 0 ] 0x%lx, active_idx %lu, staged_bitset %lu, staged_head_idx[0] %lu, staged_head_idx[1] %lu, staged_head_idx[2] %lu, staged_head_idx[3] %lu, txn_pool_free_cnt %lu/%lu, block_pool_popcnt %lu/%lu\n", - sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->txn_pool_free_cnt, FD_SCHED_MAX_DEPTH, sched->block_pool_popcnt, sched->block_cnt_max ); + sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->txn_pool_free_cnt, sched->depth, sched->block_pool_popcnt, sched->block_cnt_max ); fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx ); if( active_block ) print_block_debug( sched, active_block ); for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) { @@ -540,35 +543,73 @@ handle_bad_block( fd_sched_t * sched, fd_sched_block_t * block ) { /* Public functions. */ -ulong fd_sched_align( void ) { +ulong +fd_sched_align( void ) { return fd_ulong_max( alignof(fd_sched_t), fd_ulong_max( fd_rdisp_align(), fd_ulong_max( alignof(fd_sched_block_t), 64UL ))); /* Minimally cache line aligned. */ } ulong -fd_sched_footprint( ulong block_cnt_max ) { +fd_sched_footprint( ulong depth, + ulong block_cnt_max ) { + if( FD_UNLIKELY( depthFD_SCHED_MAX_DEPTH ) ) return 0UL; /* bad depth */ + if( FD_UNLIKELY( !block_cnt_max ) ) return 0UL; /* bad block_cnt_max */ ulong l = FD_LAYOUT_INIT; - l = FD_LAYOUT_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) ); - l = FD_LAYOUT_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( FD_SCHED_MAX_DEPTH, block_cnt_max ) ); /* dispatcher */ - l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); /* block pool */ - l = FD_LAYOUT_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) ); + l = FD_LAYOUT_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) ); + l = FD_LAYOUT_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) ); /* dispatcher */ + l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); /* block pool */ + l = FD_LAYOUT_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) ); + l = FD_LAYOUT_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) ); /* txn_pool */ + l = FD_LAYOUT_APPEND( l, sizeof(uint), depth*sizeof(uint) ); /* txn_idx_to_parse_idx */ return FD_LAYOUT_FINI( l, fd_sched_align() ); } void * -fd_sched_new( void * mem, ulong block_cnt_max, ulong exec_cnt ) { - FD_TEST( exec_cnt && exec_cnt<=FD_SCHED_MAX_EXEC_TILE_CNT ); +fd_sched_new( void * mem, + ulong depth, + ulong block_cnt_max, + ulong exec_cnt ) { + + if( FD_UNLIKELY( !mem ) ) { + FD_LOG_WARNING(( "NULL mem" )); + return NULL; + } + + if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_sched_align() ) ) ) { + FD_LOG_WARNING(( "misaligned mem (%p)", mem )); + return NULL; + } + + if( FD_UNLIKELY( depth<32UL || depth>FD_SCHED_MAX_DEPTH ) ) { + FD_LOG_WARNING(( "bad depth (%lu)", depth )); + return NULL; + } + + if( FD_UNLIKELY( !block_cnt_max ) ) { + FD_LOG_WARNING(( "bad block_cnt_max (%lu)", block_cnt_max )); + return NULL; + } + + if( FD_UNLIKELY( !exec_cnt || exec_cnt>FD_SCHED_MAX_EXEC_TILE_CNT ) ) { + FD_LOG_WARNING(( "bad exec_cnt (%lu)", exec_cnt )); + return NULL; + } FD_SCRATCH_ALLOC_INIT( l, mem ); - fd_sched_t * sched = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) ); - void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( FD_SCHED_MAX_DEPTH, block_cnt_max ) ); - void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); - void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) ); + fd_sched_t * sched = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) ); + void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) ); + void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); + void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) ); + fd_txn_p_t * txn_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) ); + uint * txn_idx_to_parse_idx = FD_SCRATCH_ALLOC_APPEND( l, sizeof(uint), depth*sizeof(uint) ); FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() ); + sched->txn_pool = txn_pool; + sched->txn_idx_to_parse_idx = txn_idx_to_parse_idx; + ulong seed = ((ulong)fd_tickcount()) ^ FD_SCHED_MAGIC; - fd_rdisp_new( _rdisp, FD_SCHED_MAX_DEPTH, block_cnt_max, seed ); + fd_rdisp_new( _rdisp, depth, block_cnt_max, seed ); fd_sched_block_t * bpool = (fd_sched_block_t *)_bpool; for( ulong i=0; itxn_in_flight_last_tick = LONG_MAX; sched->canary = FD_SCHED_MAGIC; + sched->depth = depth; sched->block_cnt_max = block_cnt_max; sched->exec_cnt = exec_cnt; sched->root_idx = ULONG_MAX; @@ -593,7 +635,7 @@ fd_sched_new( void * mem, ulong block_cnt_max, ulong exec_cnt ) { sched->sigverify_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt ); sched->poh_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt ); - sched->txn_pool_free_cnt = FD_SCHED_MAX_DEPTH-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */ + sched->txn_pool_free_cnt = depth-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */ txn_bitset_new( sched->exec_done_set ); txn_bitset_new( sched->sigverify_done_set ); @@ -607,17 +649,24 @@ fd_sched_new( void * mem, ulong block_cnt_max, ulong exec_cnt ) { } fd_sched_t * -fd_sched_join( void * mem, ulong block_cnt_max ) { - fd_sched_t * sched = (fd_sched_t *)mem; +fd_sched_join( void * mem ) { + + if( FD_UNLIKELY( !mem ) ) { + FD_LOG_WARNING(( "NULL mem" )); + return NULL; + } + + fd_sched_t * sched = (fd_sched_t *)mem; + ulong depth = sched->depth; + ulong block_cnt_max = sched->block_cnt_max; FD_TEST( sched->canary==FD_SCHED_MAGIC ); - FD_TEST( sched->block_cnt_max==block_cnt_max ); FD_SCRATCH_ALLOC_INIT( l, mem ); - /* */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) ); - void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( FD_SCHED_MAX_DEPTH, block_cnt_max ) ); - void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); - void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) ); + /* */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) ); + void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) ); + void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); + void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) ); FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() ); sched->rdisp = fd_rdisp_join( _rdisp ); @@ -793,7 +842,7 @@ fd_sched_fec_ingest( fd_sched_t * sched, } } - block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, FD_SCHED_MAX_DEPTH-sched->txn_pool_free_cnt ); + block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt ); block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt ); if( FD_UNLIKELY( block->dying ) ) { @@ -957,7 +1006,7 @@ fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out ) { FD_LOG_CRIT(( "invariant violation: active_bank_idx %lu is not activatable nor has anything in-flight", sched->active_bank_idx )); } - block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, FD_SCHED_MAX_DEPTH-sched->txn_pool_free_cnt ); + block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt ); block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt ); if( FD_UNLIKELY( !block->block_start_signaled ) ) { @@ -1161,7 +1210,7 @@ fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong ex case FD_SCHED_TT_TXN_EXEC: case FD_SCHED_TT_TXN_SIGVERIFY: { (void)data; - FD_TEST( txn_idxdepth ); bank_idx = sched->tile_to_bank_idx[ exec_idx ]; break; } @@ -1189,7 +1238,7 @@ fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong ex block->slot, block->parent_slot )); } - block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, FD_SCHED_MAX_DEPTH-sched->txn_pool_free_cnt ); + block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt ); block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt ); int exec_tile_idx = (int)exec_idx; @@ -1588,7 +1637,7 @@ add_block( fd_sched_t * sched, block->mblk_tick_cnt = 0U; block->mblk_unhashed_cnt = 0U; block->hashcnt = 0UL; - block->txn_pool_max_popcnt = FD_SCHED_MAX_DEPTH-sched->txn_pool_free_cnt; + block->txn_pool_max_popcnt = sched->depth - sched->txn_pool_free_cnt; block->block_pool_max_popcnt = sched->block_pool_popcnt; mblk_in_progress_bitset_full( block->mblk_in_progress_pool_free_bitset ); diff --git a/src/discof/replay/fd_sched.h b/src/discof/replay/fd_sched.h index d56e692fa3c..75d6d09d09a 100644 --- a/src/discof/replay/fd_sched.h +++ b/src/discof/replay/fd_sched.h @@ -50,6 +50,9 @@ more ingest, more ready, more done ... ... */ +#define FD_SCHED_MIN_DEPTH 478 +#define FD_SCHED_MAX_DEPTH FD_RDISP_MAX_DEPTH + struct fd_sched; typedef struct fd_sched fd_sched_t; @@ -154,16 +157,33 @@ FD_PROTOTYPES_BEGIN /* fd_sched_{align,footprint} return the required alignment and footprint in bytes for a region of memory to be used as a scheduler. + footprint silently returns 0 if params are invalid (thus convenient + to validate params). + + depth controls the reorder buffer transaction count (~1 million + recommended for live replay, ~10k recommended for async replay). block_cnt_max is the maximum number of blocks that will be tracked by the scheduler. */ -ulong fd_sched_align ( void ); -ulong fd_sched_footprint( ulong block_cnt_max ); + +ulong +fd_sched_align( void ); + +ulong +fd_sched_footprint( ulong depth, /* in [FD_SCHED_MIN_DEPTH,FD_SCHED_MAX_DEPTH] */ + ulong block_cnt_max ); /* >= 1 */ + +/* fd_sched_new creates a sched object backed by the given memory region + (conforming to align() and footprint()). Returns NULL if any + parameter is invalid. */ void * -fd_sched_new( void * mem, ulong block_cnt_max, ulong exec_cnt ); +fd_sched_new( void * mem, + ulong depth, + ulong block_cnt_max, + ulong exec_cnt ); fd_sched_t * -fd_sched_join( void * mem, ulong block_cnt_max ); +fd_sched_join( void * mem ); /* Add the data in the FEC set to the scheduler. If is_last_fec is 1, then this is the last FEC set in the block. Transactions may span