From e9faba8d981d18bd1472685785608ef7c5aa4018 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Wed, 7 Jan 2026 17:22:09 +0500 Subject: [PATCH 1/6] Fix hash table corruption from removing entries during iteration The relmetacache_flush function was calling hash_search with HASH_REMOVE while iterating through RelMetaCache using hash_seq_search. This corrupts the iterator state because removing an entry deallocates memory that the iterator references, reorganizes bucket chains that invalidate the iterator's position, and may trigger dynamic resizing. The result is segmentation faults from accessing freed memory, skipped entries when the iterator loses its position, or infinite loops from corrupted chain pointers. Fixed by collecting all relids in a first pass, then removing them in a second pass after iteration completes. The relmetacache_prune function had the same issue when removing invalid cache entries during iteration. It was calling hash_search with HASH_REMOVE on entries marked as invalid while hash_seq_search was actively scanning the hash table. This causes identical corruption problems as the iterator's cached bucket index and chain pointers become stale when entries are physically removed from the table. Fixed using the same two-pass pattern: collect relids of invalid entries during iteration, then remove them after the scan finishes. The reset_channel_stats function was removing all statistics entries from SpockHash while iterating with hash_seq_search. This violated the fundamental rule that hash tables must not be modified during sequential scans. The iterator maintains pointers to the current entry and the next entry to visit, both of which become dangling pointers when hash_search removes the current entry. Under concurrent load this causes crashes with hash table corrupted errors and potential loss of statistics data. Fixed by copying all keys during iteration, then removing entries in a separate loop after the iterator completes its scan. --- src/spock_functions.c | 73 ++++++++++++++++++++++- src/spock_output_plugin.c | 118 ++++++++++++++++++++++++++++++++------ 2 files changed, 171 insertions(+), 20 deletions(-) diff --git a/src/spock_functions.c b/src/spock_functions.c index 34724ab5..84086c47 100644 --- a/src/spock_functions.c +++ b/src/spock_functions.c @@ -3023,6 +3023,10 @@ reset_channel_stats(PG_FUNCTION_ARGS) { HASH_SEQ_STATUS hash_seq; spockStatsEntry *entry; + spockStatsKey *keys_to_remove; + int num_entries; + int i; + int idx; if (!SpockCtx || !SpockHash) ereport(ERROR, @@ -3031,10 +3035,73 @@ reset_channel_stats(PG_FUNCTION_ARGS) LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE); - hash_seq_init(&hash_seq, SpockHash); - while ((entry = hash_seq_search(&hash_seq)) != NULL) + /* + * Reset all channel statistics by removing all entries from SpockHash. + * + * CRITICAL: We must not call hash_search(HASH_REMOVE) during iteration + * with hash_seq_search(). The PostgreSQL hash table API maintains + * iteration state in HASH_SEQ_STATUS, which includes: + * + * 1. Current bucket index being scanned + * 2. Pointer to current entry in the bucket's chain + * 3. Pointer to next entry to visit + * + * When hash_search(HASH_REMOVE) is called, it physically removes the + * entry from the hash table, which may: + * + * a) Free the memory pointed to by the iterator's current entry pointer + * b) Reorganize the bucket chain, invalidating the next-entry pointer + * c) Trigger dynamic hash table resizing (if bucket count changes) + * d) Modify bucket links that the iterator depends on for traversal + * + * Any of these consequences breaks the iterator's invariants, leading to: + * - Segmentation faults (accessing freed entry memory) + * - Skipped entries (iterator loses track of position) + * - Duplicate visits (iterator revisits reorganized entries) + * - Infinite loops (corrupted chain pointers creating cycles) + * + * The correct and safe pattern used here is: + * + * Pass 1 (Iteration): Scan the entire hash table with hash_seq_search() + * while the structure is stable, collecting all keys + * into a temporary array. The keys are copied (by + * value), so we have no dangling pointers. + * + * Pass 2 (Removal): After iteration completes and the iterator is no + * longer active, remove all entries using the + * collected keys. Hash table modifications are now + * safe because no iteration is in progress. + * + * Note: We hold LW_EXCLUSIVE lock during both passes to prevent + * concurrent modifications by other processes, ensuring atomicity of the + * reset operation. The two-pass approach only protects against corruption + * from our own modifications during iteration, not concurrent access + * (which the lock handles). + */ + num_entries = hash_get_num_entries(SpockHash); + if (num_entries > 0) { - hash_search(SpockHash, &entry->key, HASH_REMOVE, NULL); + /* + * Allocate space for all keys. We copy the entire spockStatsKey + * structure (containing dboid, subid, relid) for each entry. + */ + keys_to_remove = (spockStatsKey *) palloc(num_entries * sizeof(spockStatsKey)); + idx = 0; + + /* First pass: collect all keys while iterating over stable hash table */ + hash_seq_init(&hash_seq, SpockHash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + keys_to_remove[idx++] = entry->key; + } + + /* Second pass: remove all entries now that iteration is complete */ + for (i = 0; i < idx; i++) + { + hash_search(SpockHash, &keys_to_remove[i], HASH_REMOVE, NULL); + } + + pfree(keys_to_remove); } LWLockRelease(SpockCtx->lock); diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c index 68e8526d..f930db20 100644 --- a/src/spock_output_plugin.c +++ b/src/spock_output_plugin.c @@ -1568,19 +1568,60 @@ relmetacache_flush(void) { HASH_SEQ_STATUS status; struct SPKRelMetaCacheEntry *hentry; + Oid *relids; + int num_entries; + int i; + int idx; + + if (RelMetaCache == NULL) + return; + + /* + * We must not call hash_search(HASH_REMOVE) while iterating through the + * hash table with hash_seq_search(), because removing entries corrupts + * the internal iterator state maintained by HASH_SEQ_STATUS. The hash + * table implementation uses a bucket-based structure, and when an entry + * is removed, it may cause bucket reorganization or entry relocation that + * invalidates the iterator's current position. This can lead to: + * + * 1. Iterator skipping entries that should be visited + * 2. Iterator visiting the same entry multiple times + * 3. Iterator dereferencing freed memory (crash) + * 4. Infinite loops or premature iteration termination + * + * The correct pattern is to collect all keys/OIDs to be removed during + * the first pass (iteration), then perform all removals in a second pass + * after the iteration completes. This ensures the iterator completes its + * scan over a stable hash table structure. + * + * We allocate an array sized to the current number of entries, which is + * safe because we're removing all entries (no entry will be skipped). + * The palloc is temporary and freed at function exit. + */ + num_entries = hash_get_num_entries(RelMetaCache); + if (num_entries == 0) + return; - if (RelMetaCache != NULL) + relids = (Oid *) palloc(num_entries * sizeof(Oid)); + idx = 0; + + /* First pass: collect all relids while iterating */ + hash_seq_init(&status, RelMetaCache); + while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL) { - hash_seq_init(&status, RelMetaCache); + relids[idx++] = hentry->relid; + } - while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL) - { - if (hash_search(RelMetaCache, - (void *) &hentry->relid, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } + /* Second pass: remove all entries now that iteration is complete */ + for (i = 0; i < idx; i++) + { + if (hash_search(RelMetaCache, + (void *) &relids[i], + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); } + + pfree(relids); } /* @@ -1594,27 +1635,70 @@ relmetacache_prune(void) { HASH_SEQ_STATUS status; struct SPKRelMetaCacheEntry *hentry; + Oid *relids_to_remove; + int num_entries; + int i; + int idx; /* - * Since the pruning can be expensive, do it only if ig we invalidated at + * Since the pruning can be expensive, do it only if we invalidated at * least half of initial cache size. */ if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE / 2) return; - hash_seq_init(&status, RelMetaCache); + /* + * We must not call hash_search(HASH_REMOVE) while iterating through the + * hash table with hash_seq_search(), because removing entries corrupts + * the internal iterator state maintained by HASH_SEQ_STATUS. The hash + * table uses bucket-based open addressing with chaining, and when an + * entry is removed, it may trigger: + * + * 1. Bucket chain reorganization (moving entries within the chain) + * 2. Bucket splitting or merging (if dynamic hashing is enabled) + * 3. Memory deallocation of the removed entry + * + * Any of these operations can invalidate the iterator's cached position + * (current bucket index and chain pointer), leading to undefined behavior + * such as visiting wrong entries, skipping valid entries, or accessing + * freed memory resulting in segmentation faults. + * + * The standard PostgreSQL pattern is a two-pass approach: first, iterate + * through the hash table and collect keys of entries to be removed; + * second, after iteration completes, remove all collected entries. This + * guarantees iterator stability because the hash table structure remains + * unchanged during iteration. + * + * We allocate space for num_entries even though we'll only remove invalid + * entries. This wastes some memory temporarily but simplifies the code + * and avoids a second pass to count invalid entries. The array is freed + * immediately after removal completes. + */ + num_entries = hash_get_num_entries(RelMetaCache); + if (num_entries == 0) + return; + + relids_to_remove = (Oid *) palloc(num_entries * sizeof(Oid)); + idx = 0; + /* First pass: collect relids of invalid entries while iterating */ + hash_seq_init(&status, RelMetaCache); while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL) { if (!hentry->is_valid) - { - if (hash_search(RelMetaCache, - (void *) &hentry->relid, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } + relids_to_remove[idx++] = hentry->relid; + } + + /* Second pass: remove all invalid entries now that iteration is complete */ + for (i = 0; i < idx; i++) + { + if (hash_search(RelMetaCache, + (void *) &relids_to_remove[i], + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); } + pfree(relids_to_remove); InvalidRelMetaCacheCnt = 0; } From d9b71af400a17f2fa761a79cc77288b1d31e5756 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 8 Jan 2026 19:41:11 +0500 Subject: [PATCH 2/6] Reduce verbosity of hash table iterator corruption comments The previous commit added extensive documentation explaining the hash table iterator corruption issue. While thorough, these comments were overly verbose for production code. This commit condenses them to be more concise while preserving the essential information: the problem (calling hash_search HASH_REMOVE during hash_seq_search corrupts the iterator state), the consequences (crashes, skipped entries), and the solution (two-pass approach: collect keys, then remove). Simplified comments in relmetacache_flush, relmetacache_prune, and reset_channel_stats functions without changing any code logic. --- src/spock_functions.c | 53 ++++------------------------------ src/spock_output_plugin.c | 61 ++++++++------------------------------- 2 files changed, 18 insertions(+), 96 deletions(-) diff --git a/src/spock_functions.c b/src/spock_functions.c index 84086c47..e4b8908d 100644 --- a/src/spock_functions.c +++ b/src/spock_functions.c @@ -3036,66 +3036,25 @@ reset_channel_stats(PG_FUNCTION_ARGS) LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE); /* - * Reset all channel statistics by removing all entries from SpockHash. - * - * CRITICAL: We must not call hash_search(HASH_REMOVE) during iteration - * with hash_seq_search(). The PostgreSQL hash table API maintains - * iteration state in HASH_SEQ_STATUS, which includes: - * - * 1. Current bucket index being scanned - * 2. Pointer to current entry in the bucket's chain - * 3. Pointer to next entry to visit - * - * When hash_search(HASH_REMOVE) is called, it physically removes the - * entry from the hash table, which may: - * - * a) Free the memory pointed to by the iterator's current entry pointer - * b) Reorganize the bucket chain, invalidating the next-entry pointer - * c) Trigger dynamic hash table resizing (if bucket count changes) - * d) Modify bucket links that the iterator depends on for traversal - * - * Any of these consequences breaks the iterator's invariants, leading to: - * - Segmentation faults (accessing freed entry memory) - * - Skipped entries (iterator loses track of position) - * - Duplicate visits (iterator revisits reorganized entries) - * - Infinite loops (corrupted chain pointers creating cycles) - * - * The correct and safe pattern used here is: - * - * Pass 1 (Iteration): Scan the entire hash table with hash_seq_search() - * while the structure is stable, collecting all keys - * into a temporary array. The keys are copied (by - * value), so we have no dangling pointers. - * - * Pass 2 (Removal): After iteration completes and the iterator is no - * longer active, remove all entries using the - * collected keys. Hash table modifications are now - * safe because no iteration is in progress. - * - * Note: We hold LW_EXCLUSIVE lock during both passes to prevent - * concurrent modifications by other processes, ensuring atomicity of the - * reset operation. The two-pass approach only protects against corruption - * from our own modifications during iteration, not concurrent access - * (which the lock handles). + * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration + * because removing entries invalidates the iterator state (bucket chain + * pointers, memory references), causing crashes or skipped entries. + * Use two-pass approach: collect keys during iteration, then remove. */ num_entries = hash_get_num_entries(SpockHash); if (num_entries > 0) { - /* - * Allocate space for all keys. We copy the entire spockStatsKey - * structure (containing dboid, subid, relid) for each entry. - */ keys_to_remove = (spockStatsKey *) palloc(num_entries * sizeof(spockStatsKey)); idx = 0; - /* First pass: collect all keys while iterating over stable hash table */ + /* First pass: collect keys */ hash_seq_init(&hash_seq, SpockHash); while ((entry = hash_seq_search(&hash_seq)) != NULL) { keys_to_remove[idx++] = entry->key; } - /* Second pass: remove all entries now that iteration is complete */ + /* Second pass: remove entries */ for (i = 0; i < idx; i++) { hash_search(SpockHash, &keys_to_remove[i], HASH_REMOVE, NULL); diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c index f930db20..f33e5219 100644 --- a/src/spock_output_plugin.c +++ b/src/spock_output_plugin.c @@ -1577,26 +1577,10 @@ relmetacache_flush(void) return; /* - * We must not call hash_search(HASH_REMOVE) while iterating through the - * hash table with hash_seq_search(), because removing entries corrupts - * the internal iterator state maintained by HASH_SEQ_STATUS. The hash - * table implementation uses a bucket-based structure, and when an entry - * is removed, it may cause bucket reorganization or entry relocation that - * invalidates the iterator's current position. This can lead to: - * - * 1. Iterator skipping entries that should be visited - * 2. Iterator visiting the same entry multiple times - * 3. Iterator dereferencing freed memory (crash) - * 4. Infinite loops or premature iteration termination - * - * The correct pattern is to collect all keys/OIDs to be removed during - * the first pass (iteration), then perform all removals in a second pass - * after the iteration completes. This ensures the iterator completes its - * scan over a stable hash table structure. - * - * We allocate an array sized to the current number of entries, which is - * safe because we're removing all entries (no entry will be skipped). - * The palloc is temporary and freed at function exit. + * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration + * because removing entries corrupts the iterator state (bucket pointers, + * freed memory), causing crashes or skipped entries. Use two-pass + * approach: collect relids during iteration, then remove. */ num_entries = hash_get_num_entries(RelMetaCache); if (num_entries == 0) @@ -1605,14 +1589,14 @@ relmetacache_flush(void) relids = (Oid *) palloc(num_entries * sizeof(Oid)); idx = 0; - /* First pass: collect all relids while iterating */ + /* First pass: collect relids */ hash_seq_init(&status, RelMetaCache); while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL) { relids[idx++] = hentry->relid; } - /* Second pass: remove all entries now that iteration is complete */ + /* Second pass: remove entries */ for (i = 0; i < idx; i++) { if (hash_search(RelMetaCache, @@ -1648,31 +1632,10 @@ relmetacache_prune(void) return; /* - * We must not call hash_search(HASH_REMOVE) while iterating through the - * hash table with hash_seq_search(), because removing entries corrupts - * the internal iterator state maintained by HASH_SEQ_STATUS. The hash - * table uses bucket-based open addressing with chaining, and when an - * entry is removed, it may trigger: - * - * 1. Bucket chain reorganization (moving entries within the chain) - * 2. Bucket splitting or merging (if dynamic hashing is enabled) - * 3. Memory deallocation of the removed entry - * - * Any of these operations can invalidate the iterator's cached position - * (current bucket index and chain pointer), leading to undefined behavior - * such as visiting wrong entries, skipping valid entries, or accessing - * freed memory resulting in segmentation faults. - * - * The standard PostgreSQL pattern is a two-pass approach: first, iterate - * through the hash table and collect keys of entries to be removed; - * second, after iteration completes, remove all collected entries. This - * guarantees iterator stability because the hash table structure remains - * unchanged during iteration. - * - * We allocate space for num_entries even though we'll only remove invalid - * entries. This wastes some memory temporarily but simplifies the code - * and avoids a second pass to count invalid entries. The array is freed - * immediately after removal completes. + * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration + * because removing entries corrupts the iterator state (bucket pointers, + * freed memory), causing crashes or skipped entries. Use two-pass + * approach: collect relids of invalid entries, then remove. */ num_entries = hash_get_num_entries(RelMetaCache); if (num_entries == 0) @@ -1681,7 +1644,7 @@ relmetacache_prune(void) relids_to_remove = (Oid *) palloc(num_entries * sizeof(Oid)); idx = 0; - /* First pass: collect relids of invalid entries while iterating */ + /* First pass: collect invalid entries */ hash_seq_init(&status, RelMetaCache); while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL) { @@ -1689,7 +1652,7 @@ relmetacache_prune(void) relids_to_remove[idx++] = hentry->relid; } - /* Second pass: remove all invalid entries now that iteration is complete */ + /* Second pass: remove entries */ for (i = 0; i < idx; i++) { if (hash_search(RelMetaCache, From 0e40427a0dfb3aaab57cab08f25237beb0d52e5a Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 8 Jan 2026 19:43:50 +0500 Subject: [PATCH 3/6] Remove redundant memory context switches in hash table operations The hash_create call was configured with HASH_CONTEXT flag and ctl.hcxt = RelMetaCacheContext, which instructs the hash table API to perform all allocations in the specified context automatically. The explicit MemoryContextSwitchTo calls around hash_create and hash_search were therefore redundant and have been removed. This simplifies the code by removing 2 variable declarations and 4 lines of context switch boilerplate. The hash table operations will continue to use RelMetaCacheContext for all allocations as configured. --- src/spock_output_plugin.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c index f33e5219..17e6544d 100644 --- a/src/spock_output_plugin.c +++ b/src/spock_output_plugin.c @@ -1490,8 +1490,6 @@ relmetacache_init(MemoryContext decoding_context) if (RelMetaCache == NULL) { - MemoryContext old_ctxt; - RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext, "spock output relmetacache", ALLOCSET_DEFAULT_SIZES); @@ -1504,11 +1502,9 @@ relmetacache_init(MemoryContext decoding_context) ctl.entrysize = sizeof(struct SPKRelMetaCacheEntry); ctl.hcxt = RelMetaCacheContext; hash_flags |= HASH_BLOBS; - old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext); RelMetaCache = hash_create("spock relation metadata cache", RELMETACACHE_INITIAL_SIZE, &ctl, hash_flags); - (void) MemoryContextSwitchTo(old_ctxt); Assert(RelMetaCache != NULL); @@ -1532,14 +1528,11 @@ relmetacache_get_relation(struct SpockOutputData *data, { struct SPKRelMetaCacheEntry *hentry; bool found; - MemoryContext old_mctx; /* Find cached function info, creating if not found */ - old_mctx = MemoryContextSwitchTo(RelMetaCacheContext); hentry = (struct SPKRelMetaCacheEntry *) hash_search(RelMetaCache, (void *) (&RelationGetRelid(rel)), HASH_ENTER, &found); - (void) MemoryContextSwitchTo(old_mctx); /* If not found or not valid, it can't be cached. */ if (!found || !hentry->is_valid) From fc68b2859754f20c38787095dccd09d4d3ea5253 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 8 Jan 2026 22:32:32 +0500 Subject: [PATCH 4/6] Fix hash table corruption issues in shared memory 1. Missing NULL checks after hash_search(HASH_ENTER) on HASH_FIXED_SIZE tables. When SpockHash or SpockGroupHash reach capacity, hash_search returns NULL instead of expanding the table. Dereferencing NULL causes segmentation faults. Added NULL checks in handle_stats_counter(), spock_group_attach(), and spock_group_progress_update(). 2. Race condition in handle_stats_counter() where the function releases LW_SHARED, checks hash fullness without lock, then reacquires LW_EXCLUSIVE. Between these locks, concurrent processes can fill the table, causing HASH_ENTER to return NULL unexpectedly. The added NULL check handles this TOCTOU (time-of-check-to-time-of-use) race. 3. Lock mismatch in get_channel_stats() causing torn reads of 64-bit counter values. The reader held only LW_SHARED while writers used per-entry spinlocks (entry->mutex). Added spinlock acquisition in reader to ensure atomic access and prevent data races that violate the C memory model. All three fixes prevent crashes from NULL pointer dereference and ensure data consistency for statistics counters in shared memory hash tables. --- src/spock_functions.c | 8 ++++++++ src/spock_group.c | 25 +++++++++++++++++++++++++ src/spock_worker.c | 12 ++++++++++++ 3 files changed, 45 insertions(+) diff --git a/src/spock_functions.c b/src/spock_functions.c index e4b8908d..54e57195 100644 --- a/src/spock_functions.c +++ b/src/spock_functions.c @@ -3002,8 +3002,16 @@ get_channel_stats(PG_FUNCTION_ARGS) values[i++] = ObjectIdGetDatum(entry->key.subid); values[i++] = ObjectIdGetDatum(entry->key.relid); + /* + * Acquire spinlock before reading counter values to prevent torn + * reads. The writer (handle_stats_counter) uses entry->mutex to + * protect counter updates, so we must use the same lock for reads + * to ensure atomic access to 64-bit counter values. + */ + SpinLockAcquire(&entry->mutex); for (j = 0; j < SPOCK_STATS_NUM_COUNTERS; j++) values[i++] = Int64GetDatum(entry->counter[j]); + SpinLockRelease(&entry->mutex); tuplestore_putvalues(tupstore, tupdesc, values, nulls); } diff --git a/src/spock_group.c b/src/spock_group.c index 41ae2fc0..b6d19272 100644 --- a/src/spock_group.c +++ b/src/spock_group.c @@ -188,6 +188,19 @@ spock_group_attach(Oid dbid, Oid node_id, Oid remote_node_id) bool found; e = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found); + + /* + * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this + * to prevent dereferencing NULL pointer. + */ + if (e == NULL) + { + elog(ERROR, "SpockGroupHash is full, cannot attach to group " + "(dbid=%u, node_id=%u, remote_node_id=%u)", + dbid, node_id, remote_node_id); + return NULL; + } + if (!found) { /* initialize key values; Other entries will be updated later */ @@ -307,6 +320,18 @@ spock_group_progress_update(const SpockApplyProgress *sap) key = make_key(sap->key.dbid, sap->key.node_id, sap->key.remote_node_id); e = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found); + /* + * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this + * to prevent dereferencing NULL pointer. + */ + if (e == NULL) + { + elog(WARNING, "SpockGroupHash is full, cannot update progress for group " + "(dbid=%u, node_id=%u, remote_node_id=%u)", + sap->key.dbid, sap->key.node_id, sap->key.remote_node_id); + return false; + } + if (!found) /* New Entry */ { /* Initialize key values; Other entries will be updated later */ diff --git a/src/spock_worker.c b/src/spock_worker.c index fc8857ac..f665f2bb 100644 --- a/src/spock_worker.c +++ b/src/spock_worker.c @@ -1100,6 +1100,18 @@ handle_stats_counter(Relation relation, Oid subid, spockStatsType typ, int ntup) entry = (spockStatsEntry *) hash_search(SpockHash, &key, HASH_ENTER, &found); + + /* + * HASH_FIXED_SIZE hash tables can return NULL when full. Check for + * this to prevent dereferencing NULL pointer. + */ + if (entry == NULL) + { + LWLockRelease(SpockCtx->lock); + elog(WARNING, "SpockHash is full, cannot add stats entry"); + spock_stats_hash_full = true; + return; + } } if (!found) From e0c13208d1dbd2ed77a570f1cbd747c16527dd9d Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Mon, 12 Jan 2026 23:08:31 +0500 Subject: [PATCH 5/6] Simplify hash table flush to match PostgreSQL pattern Replace the two-pass approach (collect keys then remove) with the simpler pattern used in PostgreSQL core code: iterate once and remove entries during iteration. This matches the style used in ShippableCache and other similar hash table flush operations in PostgreSQL. The previous implementation re-initialized the iterator after each removal to avoid iterator corruption, but PostgreSQL's pattern of removing entries during iteration works correctly for hash tables when flushing all entries. --- src/spock_functions.c | 37 ++++++++++--------------------------- src/spock_output_plugin.c | 32 ++++++-------------------------- 2 files changed, 16 insertions(+), 53 deletions(-) diff --git a/src/spock_functions.c b/src/spock_functions.c index 08b7a13a..04a39bb9 100644 --- a/src/spock_functions.c +++ b/src/spock_functions.c @@ -3031,10 +3031,6 @@ reset_channel_stats(PG_FUNCTION_ARGS) { HASH_SEQ_STATUS hash_seq; spockStatsEntry *entry; - spockStatsKey *keys_to_remove; - int num_entries; - int i; - int idx; if (!SpockCtx || !SpockHash) ereport(ERROR, @@ -3044,31 +3040,18 @@ reset_channel_stats(PG_FUNCTION_ARGS) LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE); /* - * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration - * because removing entries invalidates the iterator state (bucket chain - * pointers, memory references), causing crashes or skipped entries. - * Use two-pass approach: collect keys during iteration, then remove. + * In principle we could reset only specific channel statistics; but that + * would be more complicated, and it's probably not worth the trouble. + * So for now, just reset all entries. */ - num_entries = hash_get_num_entries(SpockHash); - if (num_entries > 0) + hash_seq_init(&hash_seq, SpockHash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - keys_to_remove = (spockStatsKey *) palloc(num_entries * sizeof(spockStatsKey)); - idx = 0; - - /* First pass: collect keys */ - hash_seq_init(&hash_seq, SpockHash); - while ((entry = hash_seq_search(&hash_seq)) != NULL) - { - keys_to_remove[idx++] = entry->key; - } - - /* Second pass: remove entries */ - for (i = 0; i < idx; i++) - { - hash_search(SpockHash, &keys_to_remove[i], HASH_REMOVE, NULL); - } - - pfree(keys_to_remove); + if (hash_search(SpockHash, + &entry->key, + HASH_REMOVE, + NULL) == NULL) + elog(ERROR, "hash table corrupted"); } LWLockRelease(SpockCtx->lock); diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c index 1fab8ef6..0d404628 100644 --- a/src/spock_output_plugin.c +++ b/src/spock_output_plugin.c @@ -1559,44 +1559,24 @@ relmetacache_flush(void) { HASH_SEQ_STATUS status; struct SPKRelMetaCacheEntry *hentry; - Oid *relids; - int num_entries; - int i; - int idx; if (RelMetaCache == NULL) return; /* - * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration - * because removing entries corrupts the iterator state (bucket pointers, - * freed memory), causing crashes or skipped entries. Use two-pass - * approach: collect relids during iteration, then remove. + * In principle we could flush only cache entries relating to specific + * relations; but that would be more complicated, and it's probably not + * worth the trouble. So for now, just flush all entries. */ - num_entries = hash_get_num_entries(RelMetaCache); - if (num_entries == 0) - return; - - relids = (Oid *) palloc(num_entries * sizeof(Oid)); - idx = 0; - - /* First pass: collect relids */ hash_seq_init(&status, RelMetaCache); while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL) - { - relids[idx++] = hentry->relid; - } - - /* Second pass: remove entries */ - for (i = 0; i < idx; i++) { if (hash_search(RelMetaCache, - (void *) &relids[i], - HASH_REMOVE, NULL) == NULL) + (void *) &hentry->relid, + HASH_REMOVE, + NULL) == NULL) elog(ERROR, "hash table corrupted"); } - - pfree(relids); } /* From b18eec202b01bfd3046b7faaddeb06060a5dfa8b Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Tue, 13 Jan 2026 21:01:19 +0500 Subject: [PATCH 6/6] Fix undeclared variable 'e' in spock_group functions --- src/spock_group.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spock_group.c b/src/spock_group.c index c721ad4e..a16de662 100644 --- a/src/spock_group.c +++ b/src/spock_group.c @@ -215,13 +215,13 @@ spock_group_attach(Oid dbid, Oid node_id, Oid remote_node_id) SpockGroupEntry *entry; bool found; - e = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found); + entry = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found); /* * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this * to prevent dereferencing NULL pointer. */ - if (e == NULL) + if (entry == NULL) { elog(ERROR, "SpockGroupHash is full, cannot attach to group " "(dbid=%u, node_id=%u, remote_node_id=%u)", @@ -373,7 +373,7 @@ spock_group_progress_update(const SpockApplyProgress *sap) * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this * to prevent dereferencing NULL pointer. */ - if (e == NULL) + if (entry == NULL) { elog(WARNING, "SpockGroupHash is full, cannot update progress for group " "(dbid=%u, node_id=%u, remote_node_id=%u)",