Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/apply-load-max-sac-tps.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ APPLY_LOAD_BL_LAST_BATCH_LEDGERS = 0
# Minimal core config boilerplate

RUN_STANDALONE=true
PARALLEL_LEDGER_APPLY=false
NODE_IS_VALIDATOR=true
UNSAFE_QUORUM=true
NETWORK_PASSPHRASE="Standalone Network ; February 2017"
Expand Down
4 changes: 4 additions & 0 deletions src/bucket/test/BucketManagerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,10 @@ TEST_CASE_VERSIONS(
cfg.MAX_CONCURRENT_SUBPROCESSES = 1;
cfg.ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING = true;
cfg.ARTIFICIALLY_PESSIMIZE_MERGES_FOR_TESTING = true;
// Test loop calls forgetUnreferencedBuckets and
// setNextLedgerEntryBatchForBucketTesting while ledgers close
// automatically, which races with background apply.
cfg.PARALLEL_LEDGER_APPLY = false;
stellar::historytestutils::TmpDirHistoryConfigurator tcfg;
cfg = tcfg.configure(cfg, true);

Expand Down
5 changes: 5 additions & 0 deletions src/herder/test/HerderTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5282,6 +5282,11 @@ externalize(SecretKey const& sk, LedgerManager& lm, HerderImpl& herder,
xdr::xvector<UpgradeType, 6>{}, sk);
herder.getHerderSCPDriver().valueExternalized(ledgerSeq,
xdr::xdr_to_opaque(sv));
// With background apply, crank until the ledger is fully applied
while (lm.getLastClosedLedgerNum() < ledgerSeq)
{
app.getClock().crank(true);
Comment on lines +5285 to +5288
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helper now contains an unbounded while loop that will crank forever if ledger apply stalls. Since this is used by multiple tests, it’s especially important to add a deadline/timeout and fail fast with a clear error when ledgerSeq isn’t reached.

Suggested change
// With background apply, crank until the ledger is fully applied
while (lm.getLastClosedLedgerNum() < ledgerSeq)
{
app.getClock().crank(true);
// With background apply, crank until the ledger is fully applied,
// but fail fast if it does not close within a reasonable time.
auto timeout = app.getClock().now() + std::chrono::seconds(5);
while (lm.getLastClosedLedgerNum() < ledgerSeq)
{
app.getClock().crank(true);
REQUIRE(app.getClock().now() < timeout);

Copilot uses AI. Check for mistakes.
}
}

TEST_CASE("do not flood invalid transactions", "[herder]")
Expand Down
6 changes: 6 additions & 0 deletions src/herder/test/TransactionQueueTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3069,6 +3069,12 @@ TEST_CASE("remove applied", "[herder][transactionqueue]")
app->getConfig().NODE_SEED);
herder.getHerderSCPDriver().valueExternalized(ledgerSeq,
xdr::xdr_to_opaque(sv));

// With background apply, crank until the ledger is fully applied
while (lm.getLastClosedLedgerNum() < ledgerSeq)
{
Comment on lines +3073 to +3075
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This unbounded crank loop can hang the test indefinitely if the ledger apply never completes. Consider adding a deadline (e.g., auto timeout = clock.now() + ...; REQUIRE(clock.now() < timeout);) or using a helper that waits with a bounded timeout so failures don’t wedge the test runner.

Suggested change
// With background apply, crank until the ledger is fully applied
while (lm.getLastClosedLedgerNum() < ledgerSeq)
{
// With background apply, crank until the ledger is fully applied
auto const timeout =
clock.now() + std::chrono::seconds(10);
while (lm.getLastClosedLedgerNum() < ledgerSeq)
{
REQUIRE(clock.now() < timeout);

Copilot uses AI. Check for mistakes.
clock.crank(true);
}
}

REQUIRE(tq.getTransactions({}).size() == 1);
Expand Down
14 changes: 14 additions & 0 deletions src/history/test/HistoryTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,14 @@ TEST_CASE_VERSIONS(

while (hm.getPublishQueueCount() != 1)
{
// With background apply, wait for any in-progress
// ledger close to finish before writing to the shared
// test-entry vectors that finalizeLedgerTxnChanges
// reads on the apply thread.
while (lm.isApplying())
{
clock.crank(true);
}
auto lcl = lm.getLastClosedLedgerHeader();
lcl.header.ledgerSeq += 1;
// Generate entries excluding soroban types to avoid worrying
Expand Down Expand Up @@ -1975,6 +1983,12 @@ TEST_CASE("Introduce and fix gap without starting catchup",
// Fill in the second gap. All buffered ledgers should be applied, but we
// wait for another ledger to close to get in sync
catchupSimulation.externalizeLedger(herder, nextLedger + 4);

// With background apply, crank until all queued ledgers are applied
while (lm.getLastClosedLedgerNum() < nextLedger + 5)
{
app->getClock().crank(true);
}
Comment on lines +1987 to +1991
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This unbounded crank loop can hang the test indefinitely if something goes wrong and LCL never advances. Since this file already uses testutil::crankUntil elsewhere, consider switching to a bounded crankUntil/deadline-based wait that fails with a clear message on timeout.

Suggested change
// With background apply, crank until all queued ledgers are applied
while (lm.getLastClosedLedgerNum() < nextLedger + 5)
{
app->getClock().crank(true);
}
// With background apply, crank until all queued ledgers are applied.
REQUIRE(testutil::crankUntil(app->getClock(), [&]() {
return lm.getLastClosedLedgerNum() >= nextLedger + 5;
}));

Copilot uses AI. Check for mistakes.
REQUIRE(lm.isSynced());
REQUIRE(lam.getLargestLedgerSeqHeard() == lm.getLastClosedLedgerNum());
REQUIRE(!lam.isCatchupInitialized());
Expand Down
8 changes: 8 additions & 0 deletions src/invariant/test/ConservationOfLumensTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,11 @@ TEST_CASE(
{
auto cfg = getTestConfig();
cfg.INVARIANT_CHECKS = {"ConservationOfLumens"};
// This test directly modifies LedgerTxnRoot header (totalCoins), which
// creates a hash mismatch between the DB header and what SCP externalized.
// This is incompatible with background apply where the cross-check runs on
// a thread that doesn't have access to the cached LCL header.
cfg.PARALLEL_LEDGER_APPLY = false;

SorobanTest test(cfg);

Expand Down Expand Up @@ -378,6 +383,9 @@ TEST_CASE("ConservationOfLumens snapshot invariant detects bucket corruption",
auto cfg = getTestConfig();
cfg.INVARIANT_CHECKS = {}; // Disable automatic invariant checks because we
// will invoke it manually
// This test directly modifies LedgerTxnRoot header (totalCoins), which is
// incompatible with background apply (see comment in the test above).
cfg.PARALLEL_LEDGER_APPLY = false;

VirtualClock clock;
auto app = createTestApplication<BucketTestUtils::BucketTestApplication>(
Expand Down
11 changes: 11 additions & 0 deletions src/ledger/LedgerManagerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1520,6 +1520,17 @@ LedgerManagerImpl::applyLedger(LedgerCloseData const& ledgerData,
CLOG_ERROR(Ledger, "{}", xdrToCerealString(prevHeader, "Full LCL"));
CLOG_ERROR(Ledger, "{}", POSSIBLY_CORRUPTED_LOCAL_DATA);

#ifdef BUILD_TESTS
if (!threadIsMain())
{
throw std::runtime_error(
"txset mismatch on background apply thread. This usually means "
"a test directly modified the LedgerTxnRoot header (e.g. "
"totalCoins). Set cfg.PARALLEL_LEDGER_APPLY = false for such "
"tests.");
}
#endif

throw std::runtime_error("txset mismatch");
}

Expand Down
7 changes: 7 additions & 0 deletions src/ledger/test/LedgerCloseMetaStreamTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,13 @@ TEST_CASE("METADATA_DEBUG_LEDGERS works", "[metadebug]")
{
// Generate just enough meta to not triggers garbage collection
closeLedgers(cfg.METADATA_DEBUG_LEDGERS);

// Drain any remaining background apply before stopping, so the
// debug tx set file and LCL are consistent when we read them.
while (lm.isApplying())
{
clock.crank(true);
}
app->gracefulStop();

// Verify presence of the latest debug tx set
Expand Down
9 changes: 9 additions & 0 deletions src/main/ApplicationImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,15 @@ ApplicationImpl::manualClose(std::optional<uint32_t> const& manualLedgerSeq,

if (mConfig.RUN_STANDALONE)
{
// With background apply, triggerNextLedger posts work to the
// apply thread. Crank until the ledger is fully applied and
// LCL has advanced.
while (getLedgerManager().getLastClosedLedgerNum() <
targetLedgerSeq)
{
getClock().crank(true);
}
Comment on lines +990 to +997
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This introduces an unbounded while loop that will crank forever if the ledger never advances (e.g., apply thread stuck or an exception prevented completion). To avoid hanging the manualclose command, add a timeout/deadline and throw a descriptive error if targetLedgerSeq isn’t reached in time.

Copilot uses AI. Check for mistakes.

auto const newLedgerSeq =
getLedgerManager().getLastClosedLedgerNum();
if (newLedgerSeq != targetLedgerSeq)
Expand Down
3 changes: 3 additions & 0 deletions src/main/CommandLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1960,6 +1960,9 @@ runApplyLoad(CommandLineArgs const& args)

// Apply Load may exceed TX_SET byte size limits, so ignore them
config.IGNORE_MESSAGE_LIMITS_FOR_TESTING = true;

// Always use background ledger close for max-sac-tps
config.PARALLEL_LEDGER_APPLY = true;
}

VirtualClock clock(VirtualClock::REAL_TIME);
Expand Down
21 changes: 5 additions & 16 deletions src/main/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1966,19 +1966,10 @@ Config::processConfig(std::shared_ptr<cpptoml::table> t)

if (PARALLEL_LEDGER_APPLY && !parallelLedgerClose())
{
if (RUN_STANDALONE)
{
LOG_WARNING(DEFAULT_LOG, "RUN_STANDALONE is enabled, disabling "
"PARALLEL_LEDGER_APPLY");
PARALLEL_LEDGER_APPLY = false;
}
else
{
std::string msg =
"Invalid configuration: PARALLEL_LEDGER_APPLY "
"does not support in-memory database modes.";
throw std::runtime_error(msg);
}
LOG_WARNING(DEFAULT_LOG,
"PARALLEL_LEDGER_APPLY is not supported with "
"in-memory SQLite, disabling.");
PARALLEL_LEDGER_APPLY = false;
Comment on lines 1967 to +1972
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Behavior change: previously, enabling PARALLEL_LEDGER_APPLY with an in-memory DB would throw unless RUN_STANDALONE was set; now it silently disables parallel apply for all modes. This can mask misconfiguration outside tests (users may think parallel apply is active). Consider preserving the previous behavior (throw when not RUN_STANDALONE, or at least when not BUILD_TESTS) and only auto-disable in standalone/testing contexts.

Copilot uses AI. Check for mistakes.
}

if (INVARIANT_EXTRA_CHECKS && NODE_IS_VALIDATOR)
Expand Down Expand Up @@ -2547,9 +2538,7 @@ Config::allBucketsInMemory() const
bool
Config::parallelLedgerClose() const
{
// Standalone mode expects synchronous ledger application
return PARALLEL_LEDGER_APPLY && !RUN_STANDALONE &&
DATABASE.value != "sqlite3://:memory:";
return PARALLEL_LEDGER_APPLY && DATABASE.value != "sqlite3://:memory:";
}

void
Expand Down
7 changes: 5 additions & 2 deletions src/overlay/test/TCPPeerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,10 @@ TEST_CASE("TCPPeer read malformed messages", "[overlay]")
v11SecretKey.getPublicKey());
s->startAllNodes();
s->stopOverlayTick();
s->crankForAtLeast(std::chrono::seconds(5), false);
// Use generous timeouts: ARTIFICIALLY_SLEEP_MAIN_THREAD_FOR_TESTING
// adds 300ms per postOnMainThread callback. With background ledger
// apply, there are additional callbacks that each incur this sleep.
s->crankForAtLeast(std::chrono::seconds(15), false);
auto p0 = n0->getOverlayManager().getConnectedPeer(
PeerBareAddress{"127.0.0.1", n1->getConfig().PEER_PORT});

Expand Down Expand Up @@ -193,7 +196,7 @@ TEST_CASE("TCPPeer read malformed messages", "[overlay]")
return !p0->isConnectedForTesting() &&
!p1->isConnectedForTesting();
},
std::chrono::seconds(10), false);
std::chrono::seconds(30), false);
REQUIRE(!p0->isConnectedForTesting());
REQUIRE(!p1->isConnectedForTesting());
REQUIRE(p1->getDropReason() == dropReason);
Expand Down
4 changes: 4 additions & 0 deletions src/transactions/test/InflationTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,10 @@ TEST_CASE_VERSIONS("inflation total coins", "[tx][inflation]")
return;
}

// This test directly modifies LedgerTxnRoot header (ledgerVersion), which
// creates a hash mismatch incompatible with background apply.
cfg.PARALLEL_LEDGER_APPLY = false;

// The math in this test assumes that every tx will be charged the fee it
// specifies, but this isn't true for v11, so start from V0 and update the
// version in the header
Expand Down
20 changes: 17 additions & 3 deletions src/util/Timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,12 +388,26 @@ VirtualClock::crank(bool block)

// Subtract out any timer cancellations from the above two steps.
progressCount -= nRealTimerCancelEvents;

if (mMode == VIRTUAL_TIME && progressCount == 0 &&
mBackgroundWorkCount.load() == 0)
{
// If we did nothing and we're in virtual mode, we're idle and can
// skip time forward, dispatching all timers at the next time-step.
progressCount += advanceToNext();
// Check if there are pending actions from background threads
// before deciding to advance virtual time. Without this check,
// we might spuriously jump time forward while background apply
// results are waiting in the pending queue.
bool hasPendingActions = false;
{
std::lock_guard<std::mutex> guard(mPendingActionQueueMutex);
hasPendingActions = !mPendingActionQueue.empty();
}
if (!hasPendingActions)
{
// If we did nothing and we're in virtual mode, we're idle
// and can skip time forward, dispatching all timers at the
// next time-step.
progressCount += advanceToNext();
}
}
}

Expand Down
Loading