From e9faba8d981d18bd1472685785608ef7c5aa4018 Mon Sep 17 00:00:00 2001
From: Ibrar Ahmed <ibrar@pgedge.com>
Date: Wed, 7 Jan 2026 17:22:09 +0500
Subject: [PATCH 1/6] Fix hash table corruption from removing entries during
 iteration

The relmetacache_flush function was calling hash_search with HASH_REMOVE
while iterating through RelMetaCache using hash_seq_search. This corrupts
the iterator state because removing an entry deallocates memory that the
iterator references, reorganizes bucket chains that invalidate the
iterator's position, and may trigger dynamic resizing. The result is
segmentation faults from accessing freed memory, skipped entries when the
iterator loses its position, or infinite loops from corrupted chain
pointers. Fixed by collecting all relids in a first pass, then removing
them in a second pass after iteration completes.

The relmetacache_prune function had the same issue when removing invalid
cache entries during iteration. It was calling hash_search with
HASH_REMOVE on entries marked as invalid while hash_seq_search was
actively scanning the hash table. This causes identical corruption
problems as the iterator's cached bucket index and chain pointers become
stale when entries are physically removed from the table. Fixed using the
same two-pass pattern: collect relids of invalid entries during
iteration, then remove them after the scan finishes.

The reset_channel_stats function was removing all statistics entries from
SpockHash while iterating with hash_seq_search. This violated the
fundamental rule that hash tables must not be modified during sequential
scans. The iterator maintains pointers to the current entry and the next
entry to visit, both of which become dangling pointers when hash_search
removes the current entry. Under concurrent load this causes crashes with
hash table corrupted errors and potential loss of statistics data. Fixed
by copying all keys during iteration, then removing entries in a separate
loop after the iterator completes its scan.
---
 src/spock_functions.c     |  73 ++++++++++++++++++++++-
 src/spock_output_plugin.c | 118 ++++++++++++++++++++++++++++++++------
 2 files changed, 171 insertions(+), 20 deletions(-)

diff --git a/src/spock_functions.c b/src/spock_functions.c
index 34724ab5..84086c47 100644
--- a/src/spock_functions.c
+++ b/src/spock_functions.c
@@ -3023,6 +3023,10 @@ reset_channel_stats(PG_FUNCTION_ARGS)
 {
 	HASH_SEQ_STATUS hash_seq;
 	spockStatsEntry *entry;
+	spockStatsKey *keys_to_remove;
+	int			num_entries;
+	int			i;
+	int			idx;
 
 	if (!SpockCtx || !SpockHash)
 		ereport(ERROR,
@@ -3031,10 +3035,73 @@ reset_channel_stats(PG_FUNCTION_ARGS)
 
 	LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE);
 
-	hash_seq_init(&hash_seq, SpockHash);
-	while ((entry = hash_seq_search(&hash_seq)) != NULL)
+	/*
+	 * Reset all channel statistics by removing all entries from SpockHash.
+	 *
+	 * CRITICAL: We must not call hash_search(HASH_REMOVE) during iteration
+	 * with hash_seq_search(). The PostgreSQL hash table API maintains
+	 * iteration state in HASH_SEQ_STATUS, which includes:
+	 *
+	 * 1. Current bucket index being scanned
+	 * 2. Pointer to current entry in the bucket's chain
+	 * 3. Pointer to next entry to visit
+	 *
+	 * When hash_search(HASH_REMOVE) is called, it physically removes the
+	 * entry from the hash table, which may:
+	 *
+	 * a) Free the memory pointed to by the iterator's current entry pointer
+	 * b) Reorganize the bucket chain, invalidating the next-entry pointer
+	 * c) Trigger dynamic hash table resizing (if bucket count changes)
+	 * d) Modify bucket links that the iterator depends on for traversal
+	 *
+	 * Any of these consequences breaks the iterator's invariants, leading to:
+	 * - Segmentation faults (accessing freed entry memory)
+	 * - Skipped entries (iterator loses track of position)
+	 * - Duplicate visits (iterator revisits reorganized entries)
+	 * - Infinite loops (corrupted chain pointers creating cycles)
+	 *
+	 * The correct and safe pattern used here is:
+	 *
+	 * Pass 1 (Iteration): Scan the entire hash table with hash_seq_search()
+	 *                     while the structure is stable, collecting all keys
+	 *                     into a temporary array. The keys are copied (by
+	 *                     value), so we have no dangling pointers.
+	 *
+	 * Pass 2 (Removal):   After iteration completes and the iterator is no
+	 *                     longer active, remove all entries using the
+	 *                     collected keys. Hash table modifications are now
+	 *                     safe because no iteration is in progress.
+	 *
+	 * Note: We hold LW_EXCLUSIVE lock during both passes to prevent
+	 * concurrent modifications by other processes, ensuring atomicity of the
+	 * reset operation. The two-pass approach only protects against corruption
+	 * from our own modifications during iteration, not concurrent access
+	 * (which the lock handles).
+	 */
+	num_entries = hash_get_num_entries(SpockHash);
+	if (num_entries > 0)
 	{
-		hash_search(SpockHash, &entry->key, HASH_REMOVE, NULL);
+		/*
+		 * Allocate space for all keys. We copy the entire spockStatsKey
+		 * structure (containing dboid, subid, relid) for each entry.
+		 */
+		keys_to_remove = (spockStatsKey *) palloc(num_entries * sizeof(spockStatsKey));
+		idx = 0;
+
+		/* First pass: collect all keys while iterating over stable hash table */
+		hash_seq_init(&hash_seq, SpockHash);
+		while ((entry = hash_seq_search(&hash_seq)) != NULL)
+		{
+			keys_to_remove[idx++] = entry->key;
+		}
+
+		/* Second pass: remove all entries now that iteration is complete */
+		for (i = 0; i < idx; i++)
+		{
+			hash_search(SpockHash, &keys_to_remove[i], HASH_REMOVE, NULL);
+		}
+
+		pfree(keys_to_remove);
 	}
 
 	LWLockRelease(SpockCtx->lock);
diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c
index 68e8526d..f930db20 100644
--- a/src/spock_output_plugin.c
+++ b/src/spock_output_plugin.c
@@ -1568,19 +1568,60 @@ relmetacache_flush(void)
 {
 	HASH_SEQ_STATUS status;
 	struct SPKRelMetaCacheEntry *hentry;
+	Oid		   *relids;
+	int			num_entries;
+	int			i;
+	int			idx;
+
+	if (RelMetaCache == NULL)
+		return;
+
+	/*
+	 * We must not call hash_search(HASH_REMOVE) while iterating through the
+	 * hash table with hash_seq_search(), because removing entries corrupts
+	 * the internal iterator state maintained by HASH_SEQ_STATUS. The hash
+	 * table implementation uses a bucket-based structure, and when an entry
+	 * is removed, it may cause bucket reorganization or entry relocation that
+	 * invalidates the iterator's current position. This can lead to:
+	 *
+	 * 1. Iterator skipping entries that should be visited
+	 * 2. Iterator visiting the same entry multiple times
+	 * 3. Iterator dereferencing freed memory (crash)
+	 * 4. Infinite loops or premature iteration termination
+	 *
+	 * The correct pattern is to collect all keys/OIDs to be removed during
+	 * the first pass (iteration), then perform all removals in a second pass
+	 * after the iteration completes. This ensures the iterator completes its
+	 * scan over a stable hash table structure.
+	 *
+	 * We allocate an array sized to the current number of entries, which is
+	 * safe because we're removing all entries (no entry will be skipped).
+	 * The palloc is temporary and freed at function exit.
+	 */
+	num_entries = hash_get_num_entries(RelMetaCache);
+	if (num_entries == 0)
+		return;
 
-	if (RelMetaCache != NULL)
+	relids = (Oid *) palloc(num_entries * sizeof(Oid));
+	idx = 0;
+
+	/* First pass: collect all relids while iterating */
+	hash_seq_init(&status, RelMetaCache);
+	while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
 	{
-		hash_seq_init(&status, RelMetaCache);
+		relids[idx++] = hentry->relid;
+	}
 
-		while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
-		{
-			if (hash_search(RelMetaCache,
-							(void *) &hentry->relid,
-							HASH_REMOVE, NULL) == NULL)
-				elog(ERROR, "hash table corrupted");
-		}
+	/* Second pass: remove all entries now that iteration is complete */
+	for (i = 0; i < idx; i++)
+	{
+		if (hash_search(RelMetaCache,
+						(void *) &relids[i],
+						HASH_REMOVE, NULL) == NULL)
+			elog(ERROR, "hash table corrupted");
 	}
+
+	pfree(relids);
 }
 
 /*
@@ -1594,27 +1635,70 @@ relmetacache_prune(void)
 {
 	HASH_SEQ_STATUS status;
 	struct SPKRelMetaCacheEntry *hentry;
+	Oid		   *relids_to_remove;
+	int			num_entries;
+	int			i;
+	int			idx;
 
 	/*
-	 * Since the pruning can be expensive, do it only if ig we invalidated at
+	 * Since the pruning can be expensive, do it only if we invalidated at
 	 * least half of initial cache size.
 	 */
 	if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE / 2)
 		return;
 
-	hash_seq_init(&status, RelMetaCache);
+	/*
+	 * We must not call hash_search(HASH_REMOVE) while iterating through the
+	 * hash table with hash_seq_search(), because removing entries corrupts
+	 * the internal iterator state maintained by HASH_SEQ_STATUS. The hash
+	 * table uses bucket-based open addressing with chaining, and when an
+	 * entry is removed, it may trigger:
+	 *
+	 * 1. Bucket chain reorganization (moving entries within the chain)
+	 * 2. Bucket splitting or merging (if dynamic hashing is enabled)
+	 * 3. Memory deallocation of the removed entry
+	 *
+	 * Any of these operations can invalidate the iterator's cached position
+	 * (current bucket index and chain pointer), leading to undefined behavior
+	 * such as visiting wrong entries, skipping valid entries, or accessing
+	 * freed memory resulting in segmentation faults.
+	 *
+	 * The standard PostgreSQL pattern is a two-pass approach: first, iterate
+	 * through the hash table and collect keys of entries to be removed;
+	 * second, after iteration completes, remove all collected entries. This
+	 * guarantees iterator stability because the hash table structure remains
+	 * unchanged during iteration.
+	 *
+	 * We allocate space for num_entries even though we'll only remove invalid
+	 * entries. This wastes some memory temporarily but simplifies the code
+	 * and avoids a second pass to count invalid entries. The array is freed
+	 * immediately after removal completes.
+	 */
+	num_entries = hash_get_num_entries(RelMetaCache);
+	if (num_entries == 0)
+		return;
+
+	relids_to_remove = (Oid *) palloc(num_entries * sizeof(Oid));
+	idx = 0;
 
+	/* First pass: collect relids of invalid entries while iterating */
+	hash_seq_init(&status, RelMetaCache);
 	while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
 	{
 		if (!hentry->is_valid)
-		{
-			if (hash_search(RelMetaCache,
-							(void *) &hentry->relid,
-							HASH_REMOVE, NULL) == NULL)
-				elog(ERROR, "hash table corrupted");
-		}
+			relids_to_remove[idx++] = hentry->relid;
+	}
+
+	/* Second pass: remove all invalid entries now that iteration is complete */
+	for (i = 0; i < idx; i++)
+	{
+		if (hash_search(RelMetaCache,
+						(void *) &relids_to_remove[i],
+						HASH_REMOVE, NULL) == NULL)
+			elog(ERROR, "hash table corrupted");
 	}
 
+	pfree(relids_to_remove);
 	InvalidRelMetaCacheCnt = 0;
 }
 

From d9b71af400a17f2fa761a79cc77288b1d31e5756 Mon Sep 17 00:00:00 2001
From: Ibrar Ahmed <ibrar@pgedge.com>
Date: Thu, 8 Jan 2026 19:41:11 +0500
Subject: [PATCH 2/6] Reduce verbosity of hash table iterator corruption
 comments

The previous commit added extensive documentation explaining the hash
table iterator corruption issue. While thorough, these comments were
overly verbose for production code. This commit condenses them to be
more concise while preserving the essential information: the problem
(calling hash_search HASH_REMOVE during hash_seq_search corrupts the
iterator state), the consequences (crashes, skipped entries), and the
solution (two-pass approach: collect keys, then remove).

Simplified comments in relmetacache_flush, relmetacache_prune, and
reset_channel_stats functions without changing any code logic.
---
 src/spock_functions.c     | 53 ++++------------------------------
 src/spock_output_plugin.c | 61 ++++++++-------------------------------
 2 files changed, 18 insertions(+), 96 deletions(-)

diff --git a/src/spock_functions.c b/src/spock_functions.c
index 84086c47..e4b8908d 100644
--- a/src/spock_functions.c
+++ b/src/spock_functions.c
@@ -3036,66 +3036,25 @@ reset_channel_stats(PG_FUNCTION_ARGS)
 	LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE);
 
 	/*
-	 * Reset all channel statistics by removing all entries from SpockHash.
-	 *
-	 * CRITICAL: We must not call hash_search(HASH_REMOVE) during iteration
-	 * with hash_seq_search(). The PostgreSQL hash table API maintains
-	 * iteration state in HASH_SEQ_STATUS, which includes:
-	 *
-	 * 1. Current bucket index being scanned
-	 * 2. Pointer to current entry in the bucket's chain
-	 * 3. Pointer to next entry to visit
-	 *
-	 * When hash_search(HASH_REMOVE) is called, it physically removes the
-	 * entry from the hash table, which may:
-	 *
-	 * a) Free the memory pointed to by the iterator's current entry pointer
-	 * b) Reorganize the bucket chain, invalidating the next-entry pointer
-	 * c) Trigger dynamic hash table resizing (if bucket count changes)
-	 * d) Modify bucket links that the iterator depends on for traversal
-	 *
-	 * Any of these consequences breaks the iterator's invariants, leading to:
-	 * - Segmentation faults (accessing freed entry memory)
-	 * - Skipped entries (iterator loses track of position)
-	 * - Duplicate visits (iterator revisits reorganized entries)
-	 * - Infinite loops (corrupted chain pointers creating cycles)
-	 *
-	 * The correct and safe pattern used here is:
-	 *
-	 * Pass 1 (Iteration): Scan the entire hash table with hash_seq_search()
-	 *                     while the structure is stable, collecting all keys
-	 *                     into a temporary array. The keys are copied (by
-	 *                     value), so we have no dangling pointers.
-	 *
-	 * Pass 2 (Removal):   After iteration completes and the iterator is no
-	 *                     longer active, remove all entries using the
-	 *                     collected keys. Hash table modifications are now
-	 *                     safe because no iteration is in progress.
-	 *
-	 * Note: We hold LW_EXCLUSIVE lock during both passes to prevent
-	 * concurrent modifications by other processes, ensuring atomicity of the
-	 * reset operation. The two-pass approach only protects against corruption
-	 * from our own modifications during iteration, not concurrent access
-	 * (which the lock handles).
+	 * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration
+	 * because removing entries invalidates the iterator state (bucket chain
+	 * pointers, memory references), causing crashes or skipped entries.
+	 * Use two-pass approach: collect keys during iteration, then remove.
 	 */
 	num_entries = hash_get_num_entries(SpockHash);
 	if (num_entries > 0)
 	{
-		/*
-		 * Allocate space for all keys. We copy the entire spockStatsKey
-		 * structure (containing dboid, subid, relid) for each entry.
-		 */
 		keys_to_remove = (spockStatsKey *) palloc(num_entries * sizeof(spockStatsKey));
 		idx = 0;
 
-		/* First pass: collect all keys while iterating over stable hash table */
+		/* First pass: collect keys */
 		hash_seq_init(&hash_seq, SpockHash);
 		while ((entry = hash_seq_search(&hash_seq)) != NULL)
 		{
 			keys_to_remove[idx++] = entry->key;
 		}
 
-		/* Second pass: remove all entries now that iteration is complete */
+		/* Second pass: remove entries */
 		for (i = 0; i < idx; i++)
 		{
 			hash_search(SpockHash, &keys_to_remove[i], HASH_REMOVE, NULL);
diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c
index f930db20..f33e5219 100644
--- a/src/spock_output_plugin.c
+++ b/src/spock_output_plugin.c
@@ -1577,26 +1577,10 @@ relmetacache_flush(void)
 		return;
 
 	/*
-	 * We must not call hash_search(HASH_REMOVE) while iterating through the
-	 * hash table with hash_seq_search(), because removing entries corrupts
-	 * the internal iterator state maintained by HASH_SEQ_STATUS. The hash
-	 * table implementation uses a bucket-based structure, and when an entry
-	 * is removed, it may cause bucket reorganization or entry relocation that
-	 * invalidates the iterator's current position. This can lead to:
-	 *
-	 * 1. Iterator skipping entries that should be visited
-	 * 2. Iterator visiting the same entry multiple times
-	 * 3. Iterator dereferencing freed memory (crash)
-	 * 4. Infinite loops or premature iteration termination
-	 *
-	 * The correct pattern is to collect all keys/OIDs to be removed during
-	 * the first pass (iteration), then perform all removals in a second pass
-	 * after the iteration completes. This ensures the iterator completes its
-	 * scan over a stable hash table structure.
-	 *
-	 * We allocate an array sized to the current number of entries, which is
-	 * safe because we're removing all entries (no entry will be skipped).
-	 * The palloc is temporary and freed at function exit.
+	 * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration
+	 * because removing entries corrupts the iterator state (bucket pointers,
+	 * freed memory), causing crashes or skipped entries. Use two-pass
+	 * approach: collect relids during iteration, then remove.
 	 */
 	num_entries = hash_get_num_entries(RelMetaCache);
 	if (num_entries == 0)
@@ -1605,14 +1589,14 @@ relmetacache_flush(void)
 	relids = (Oid *) palloc(num_entries * sizeof(Oid));
 	idx = 0;
 
-	/* First pass: collect all relids while iterating */
+	/* First pass: collect relids */
 	hash_seq_init(&status, RelMetaCache);
 	while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
 	{
 		relids[idx++] = hentry->relid;
 	}
 
-	/* Second pass: remove all entries now that iteration is complete */
+	/* Second pass: remove entries */
 	for (i = 0; i < idx; i++)
 	{
 		if (hash_search(RelMetaCache,
@@ -1648,31 +1632,10 @@ relmetacache_prune(void)
 		return;
 
 	/*
-	 * We must not call hash_search(HASH_REMOVE) while iterating through the
-	 * hash table with hash_seq_search(), because removing entries corrupts
-	 * the internal iterator state maintained by HASH_SEQ_STATUS. The hash
-	 * table uses bucket-based open addressing with chaining, and when an
-	 * entry is removed, it may trigger:
-	 *
-	 * 1. Bucket chain reorganization (moving entries within the chain)
-	 * 2. Bucket splitting or merging (if dynamic hashing is enabled)
-	 * 3. Memory deallocation of the removed entry
-	 *
-	 * Any of these operations can invalidate the iterator's cached position
-	 * (current bucket index and chain pointer), leading to undefined behavior
-	 * such as visiting wrong entries, skipping valid entries, or accessing
-	 * freed memory resulting in segmentation faults.
-	 *
-	 * The standard PostgreSQL pattern is a two-pass approach: first, iterate
-	 * through the hash table and collect keys of entries to be removed;
-	 * second, after iteration completes, remove all collected entries. This
-	 * guarantees iterator stability because the hash table structure remains
-	 * unchanged during iteration.
-	 *
-	 * We allocate space for num_entries even though we'll only remove invalid
-	 * entries. This wastes some memory temporarily but simplifies the code
-	 * and avoids a second pass to count invalid entries. The array is freed
-	 * immediately after removal completes.
+	 * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration
+	 * because removing entries corrupts the iterator state (bucket pointers,
+	 * freed memory), causing crashes or skipped entries. Use two-pass
+	 * approach: collect relids of invalid entries, then remove.
 	 */
 	num_entries = hash_get_num_entries(RelMetaCache);
 	if (num_entries == 0)
@@ -1681,7 +1644,7 @@ relmetacache_prune(void)
 	relids_to_remove = (Oid *) palloc(num_entries * sizeof(Oid));
 	idx = 0;
 
-	/* First pass: collect relids of invalid entries while iterating */
+	/* First pass: collect invalid entries */
 	hash_seq_init(&status, RelMetaCache);
 	while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
 	{
@@ -1689,7 +1652,7 @@ relmetacache_prune(void)
 			relids_to_remove[idx++] = hentry->relid;
 	}
 
-	/* Second pass: remove all invalid entries now that iteration is complete */
+	/* Second pass: remove entries */
 	for (i = 0; i < idx; i++)
 	{
 		if (hash_search(RelMetaCache,

From 0e40427a0dfb3aaab57cab08f25237beb0d52e5a Mon Sep 17 00:00:00 2001
From: Ibrar Ahmed <ibrar@pgedge.com>
Date: Thu, 8 Jan 2026 19:43:50 +0500
Subject: [PATCH 3/6] Remove redundant memory context switches in hash table
 operations

The hash_create call was configured with HASH_CONTEXT flag and
ctl.hcxt = RelMetaCacheContext, which instructs the hash table API to
perform all allocations in the specified context automatically. The
explicit MemoryContextSwitchTo calls around hash_create and hash_search
were therefore redundant and have been removed.

This simplifies the code by removing 2 variable declarations and 4
lines of context switch boilerplate. The hash table operations will
continue to use RelMetaCacheContext for all allocations as configured.
---
 src/spock_output_plugin.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c
index f33e5219..17e6544d 100644
--- a/src/spock_output_plugin.c
+++ b/src/spock_output_plugin.c
@@ -1490,8 +1490,6 @@ relmetacache_init(MemoryContext decoding_context)
 
 	if (RelMetaCache == NULL)
 	{
-		MemoryContext old_ctxt;
-
 		RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext,
 													"spock output relmetacache",
 													ALLOCSET_DEFAULT_SIZES);
@@ -1504,11 +1502,9 @@ relmetacache_init(MemoryContext decoding_context)
 		ctl.entrysize = sizeof(struct SPKRelMetaCacheEntry);
 		ctl.hcxt = RelMetaCacheContext;
 		hash_flags |= HASH_BLOBS;
-		old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext);
 		RelMetaCache = hash_create("spock relation metadata cache",
 								   RELMETACACHE_INITIAL_SIZE,
 								   &ctl, hash_flags);
-		(void) MemoryContextSwitchTo(old_ctxt);
 
 		Assert(RelMetaCache != NULL);
 
@@ -1532,14 +1528,11 @@ relmetacache_get_relation(struct SpockOutputData *data,
 {
 	struct SPKRelMetaCacheEntry *hentry;
 	bool		found;
-	MemoryContext old_mctx;
 
 	/* Find cached function info, creating if not found */
-	old_mctx = MemoryContextSwitchTo(RelMetaCacheContext);
 	hentry = (struct SPKRelMetaCacheEntry *) hash_search(RelMetaCache,
 														 (void *) (&RelationGetRelid(rel)),
 														 HASH_ENTER, &found);
-	(void) MemoryContextSwitchTo(old_mctx);
 
 	/* If not found or not valid, it can't be cached. */
 	if (!found || !hentry->is_valid)

From fc68b2859754f20c38787095dccd09d4d3ea5253 Mon Sep 17 00:00:00 2001
From: Ibrar Ahmed <ibrar@pgedge.com>
Date: Thu, 8 Jan 2026 22:32:32 +0500
Subject: [PATCH 4/6] Fix hash table corruption issues in shared memory

1. Missing NULL checks after hash_search(HASH_ENTER) on HASH_FIXED_SIZE
   tables. When SpockHash or SpockGroupHash reach capacity, hash_search
   returns NULL instead of expanding the table. Dereferencing NULL causes
   segmentation faults. Added NULL checks in handle_stats_counter(),
   spock_group_attach(), and spock_group_progress_update().

2. Race condition in handle_stats_counter() where the function releases
   LW_SHARED, checks hash fullness without lock, then reacquires
   LW_EXCLUSIVE. Between these locks, concurrent processes can fill the
   table, causing HASH_ENTER to return NULL unexpectedly. The added NULL
   check handles this TOCTOU (time-of-check-to-time-of-use) race.

3. Lock mismatch in get_channel_stats() causing torn reads of 64-bit
   counter values. The reader held only LW_SHARED while writers used
   per-entry spinlocks (entry->mutex). Added spinlock acquisition in
   reader to ensure atomic access and prevent data races that violate
   the C memory model.

All three fixes prevent crashes from NULL pointer dereference and ensure
data consistency for statistics counters in shared memory hash tables.
---
 src/spock_functions.c |  8 ++++++++
 src/spock_group.c     | 25 +++++++++++++++++++++++++
 src/spock_worker.c    | 12 ++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/src/spock_functions.c b/src/spock_functions.c
index e4b8908d..54e57195 100644
--- a/src/spock_functions.c
+++ b/src/spock_functions.c
@@ -3002,8 +3002,16 @@ get_channel_stats(PG_FUNCTION_ARGS)
 		values[i++] = ObjectIdGetDatum(entry->key.subid);
 		values[i++] = ObjectIdGetDatum(entry->key.relid);
 
+		/*
+		 * Acquire spinlock before reading counter values to prevent torn
+		 * reads. The writer (handle_stats_counter) uses entry->mutex to
+		 * protect counter updates, so we must use the same lock for reads
+		 * to ensure atomic access to 64-bit counter values.
+		 */
+		SpinLockAcquire(&entry->mutex);
 		for (j = 0; j < SPOCK_STATS_NUM_COUNTERS; j++)
 			values[i++] = Int64GetDatum(entry->counter[j]);
+		SpinLockRelease(&entry->mutex);
 
 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
 	}
diff --git a/src/spock_group.c b/src/spock_group.c
index 41ae2fc0..b6d19272 100644
--- a/src/spock_group.c
+++ b/src/spock_group.c
@@ -188,6 +188,19 @@ spock_group_attach(Oid dbid, Oid node_id, Oid remote_node_id)
 	bool		found;
 
 	e = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found);
+
+	/*
+	 * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this
+	 * to prevent dereferencing NULL pointer.
+	 */
+	if (e == NULL)
+	{
+		elog(ERROR, "SpockGroupHash is full, cannot attach to group "
+			 "(dbid=%u, node_id=%u, remote_node_id=%u)",
+			 dbid, node_id, remote_node_id);
+		return NULL;
+	}
+
 	if (!found)
 	{
 		/* initialize key values; Other entries will be updated later */
@@ -307,6 +320,18 @@ spock_group_progress_update(const SpockApplyProgress *sap)
 	key = make_key(sap->key.dbid, sap->key.node_id, sap->key.remote_node_id);
 	e = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found);
 
+	/*
+	 * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this
+	 * to prevent dereferencing NULL pointer.
+	 */
+	if (e == NULL)
+	{
+		elog(WARNING, "SpockGroupHash is full, cannot update progress for group "
+			 "(dbid=%u, node_id=%u, remote_node_id=%u)",
+			 sap->key.dbid, sap->key.node_id, sap->key.remote_node_id);
+		return false;
+	}
+
 	if (!found)					/* New Entry */
 	{
 		/* Initialize key values; Other entries will be updated later */
diff --git a/src/spock_worker.c b/src/spock_worker.c
index fc8857ac..f665f2bb 100644
--- a/src/spock_worker.c
+++ b/src/spock_worker.c
@@ -1100,6 +1100,18 @@ handle_stats_counter(Relation relation, Oid subid, spockStatsType typ, int ntup)
 
 		entry = (spockStatsEntry *) hash_search(SpockHash, &key,
 												HASH_ENTER, &found);
+
+		/*
+		 * HASH_FIXED_SIZE hash tables can return NULL when full. Check for
+		 * this to prevent dereferencing NULL pointer.
+		 */
+		if (entry == NULL)
+		{
+			LWLockRelease(SpockCtx->lock);
+			elog(WARNING, "SpockHash is full, cannot add stats entry");
+			spock_stats_hash_full = true;
+			return;
+		}
 	}
 
 	if (!found)

From e0c13208d1dbd2ed77a570f1cbd747c16527dd9d Mon Sep 17 00:00:00 2001
From: Ibrar Ahmed <ibrar@pgedge.com>
Date: Mon, 12 Jan 2026 23:08:31 +0500
Subject: [PATCH 5/6] Simplify hash table flush to match PostgreSQL pattern

Replace the two-pass approach (collect keys then remove) with the simpler
pattern used in PostgreSQL core code: iterate once and remove entries
during iteration. This matches the style used in ShippableCache and
other similar hash table flush operations in PostgreSQL.

The previous implementation re-initialized the iterator after each
removal to avoid iterator corruption, but PostgreSQL's pattern of
removing entries during iteration works correctly for hash tables when
flushing all entries.
---
 src/spock_functions.c     | 37 ++++++++++---------------------------
 src/spock_output_plugin.c | 32 ++++++--------------------------
 2 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/src/spock_functions.c b/src/spock_functions.c
index 08b7a13a..04a39bb9 100644
--- a/src/spock_functions.c
+++ b/src/spock_functions.c
@@ -3031,10 +3031,6 @@ reset_channel_stats(PG_FUNCTION_ARGS)
 {
 	HASH_SEQ_STATUS hash_seq;
 	spockStatsEntry *entry;
-	spockStatsKey *keys_to_remove;
-	int			num_entries;
-	int			i;
-	int			idx;
 
 	if (!SpockCtx || !SpockHash)
 		ereport(ERROR,
@@ -3044,31 +3040,18 @@ reset_channel_stats(PG_FUNCTION_ARGS)
 	LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE);
 
 	/*
-	 * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration
-	 * because removing entries invalidates the iterator state (bucket chain
-	 * pointers, memory references), causing crashes or skipped entries.
-	 * Use two-pass approach: collect keys during iteration, then remove.
+	 * In principle we could reset only specific channel statistics; but that
+	 * would be more complicated, and it's probably not worth the trouble.
+	 * So for now, just reset all entries.
 	 */
-	num_entries = hash_get_num_entries(SpockHash);
-	if (num_entries > 0)
+	hash_seq_init(&hash_seq, SpockHash);
+	while ((entry = hash_seq_search(&hash_seq)) != NULL)
 	{
-		keys_to_remove = (spockStatsKey *) palloc(num_entries * sizeof(spockStatsKey));
-		idx = 0;
-
-		/* First pass: collect keys */
-		hash_seq_init(&hash_seq, SpockHash);
-		while ((entry = hash_seq_search(&hash_seq)) != NULL)
-		{
-			keys_to_remove[idx++] = entry->key;
-		}
-
-		/* Second pass: remove entries */
-		for (i = 0; i < idx; i++)
-		{
-			hash_search(SpockHash, &keys_to_remove[i], HASH_REMOVE, NULL);
-		}
-
-		pfree(keys_to_remove);
+		if (hash_search(SpockHash,
+						&entry->key,
+						HASH_REMOVE,
+						NULL) == NULL)
+			elog(ERROR, "hash table corrupted");
 	}
 
 	LWLockRelease(SpockCtx->lock);
diff --git a/src/spock_output_plugin.c b/src/spock_output_plugin.c
index 1fab8ef6..0d404628 100644
--- a/src/spock_output_plugin.c
+++ b/src/spock_output_plugin.c
@@ -1559,44 +1559,24 @@ relmetacache_flush(void)
 {
 	HASH_SEQ_STATUS status;
 	struct SPKRelMetaCacheEntry *hentry;
-	Oid		   *relids;
-	int			num_entries;
-	int			i;
-	int			idx;
 
 	if (RelMetaCache == NULL)
 		return;
 
 	/*
-	 * Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration
-	 * because removing entries corrupts the iterator state (bucket pointers,
-	 * freed memory), causing crashes or skipped entries. Use two-pass
-	 * approach: collect relids during iteration, then remove.
+	 * In principle we could flush only cache entries relating to specific
+	 * relations; but that would be more complicated, and it's probably not
+	 * worth the trouble. So for now, just flush all entries.
 	 */
-	num_entries = hash_get_num_entries(RelMetaCache);
-	if (num_entries == 0)
-		return;
-
-	relids = (Oid *) palloc(num_entries * sizeof(Oid));
-	idx = 0;
-
-	/* First pass: collect relids */
 	hash_seq_init(&status, RelMetaCache);
 	while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
-	{
-		relids[idx++] = hentry->relid;
-	}
-
-	/* Second pass: remove entries */
-	for (i = 0; i < idx; i++)
 	{
 		if (hash_search(RelMetaCache,
-						(void *) &relids[i],
-						HASH_REMOVE, NULL) == NULL)
+						(void *) &hentry->relid,
+						HASH_REMOVE,
+						NULL) == NULL)
 			elog(ERROR, "hash table corrupted");
 	}
-
-	pfree(relids);
 }
 
 /*

From b18eec202b01bfd3046b7faaddeb06060a5dfa8b Mon Sep 17 00:00:00 2001
From: Ibrar Ahmed <ibrar@pgedge.com>
Date: Tue, 13 Jan 2026 21:01:19 +0500
Subject: [PATCH 6/6] Fix undeclared variable 'e' in spock_group functions

---
 src/spock_group.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spock_group.c b/src/spock_group.c
index c721ad4e..a16de662 100644
--- a/src/spock_group.c
+++ b/src/spock_group.c
@@ -215,13 +215,13 @@ spock_group_attach(Oid dbid, Oid node_id, Oid remote_node_id)
 	SpockGroupEntry *entry;
 	bool		found;
 
-	e = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found);
+	entry = (SpockGroupEntry *) hash_search(SpockGroupHash, &key, HASH_ENTER, &found);
 
 	/*
 	 * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this
 	 * to prevent dereferencing NULL pointer.
 	 */
-	if (e == NULL)
+	if (entry == NULL)
 	{
 		elog(ERROR, "SpockGroupHash is full, cannot attach to group "
 			 "(dbid=%u, node_id=%u, remote_node_id=%u)",
@@ -373,7 +373,7 @@ spock_group_progress_update(const SpockApplyProgress *sap)
 	 * HASH_FIXED_SIZE hash tables can return NULL when full. Check for this
 	 * to prevent dereferencing NULL pointer.
 	 */
-	if (e == NULL)
+	if (entry == NULL)
 	{
 		elog(WARNING, "SpockGroupHash is full, cannot update progress for group "
 			 "(dbid=%u, node_id=%u, remote_node_id=%u)",