From a5bf70387c8747327cc48ce58002952b8cb1c778 Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Fri, 9 Jan 2026 15:17:41 +0100 Subject: [PATCH 1/9] Add multi-node Spock cluster setup with automated testing - Enable TCP/IP connections (listen_addresses, pg_hba.conf) - Implement sequential startup: n2/n3 depend on n1 health - Use Z0DAN spock.add_node() for n2/n3 cluster joining - Add exception log and conflict resolution tests - Integrate spockbench 3-node testing framework Co-Authored-By: Claude Sonnet 4.5 --- tests/docker/Dockerfile-step-1.el9 | 240 +++++++++++++++++++++++------ tests/docker/docker-compose.yml | 85 ++++++++-- tests/docker/entrypoint.sh | 237 +++++++++++++++------------- 3 files changed, 396 insertions(+), 166 deletions(-) diff --git a/tests/docker/Dockerfile-step-1.el9 b/tests/docker/Dockerfile-step-1.el9 index 060ef0ad..477d3364 100644 --- a/tests/docker/Dockerfile-step-1.el9 +++ b/tests/docker/Dockerfile-step-1.el9 @@ -1,48 +1,162 @@ -FROM ghcr.io/pgedge/base-test-image:latest +# syntax=docker/dockerfile:1 -# Base image ends as USER pgedge, but we need root for installation -USER root +# ============================================================================== +# PostgreSQL + Spock + Spockbench +# ============================================================================== +# +# Description: +# Builds PostgreSQL from source with Spock-specific patches, compiles +# the Spock logical replication extension for testing and development, +# installs spockbench. +# Source and installation paths declared in the ${HOME}/.bashrc +# +# Versions: +# - Postgres version to be used is identified by the PGVER variable. +# - Spockbench is built from the top commit on the 'master' branch. +# - Spock code comes from the parent docker directory +# +# Base Image: +# Defined by the BASE_IMAGE environment variable. +# +# Build Arguments (defaults aligned with the pgedge.env file): +# BASE_IMAGE - Base image to use +# PGVER - PostgreSQL major version +# DBUSER - PostgreSQL superuser name +# DBPASSWD - PostgreSQL superuser password +# DBNAME - Default database name +# DBPORT - PostgreSQL port +# MAKE_JOBS - Parallel make jobs +# +# Runtime: +# Runs entrypoint.sh which initializes PostgreSQL and loads environment +# variables from /home/pgedge/.bashrc. On restart, entrypoint runs again +# but skips initialization if PGDATA already exists. +# +# References: +# See Dockerfile-base.el9, docker-compose.yml, and cache-base-image.yml for +# further details. +# ============================================================================== + +ARG BASE_IMAGE=base-test-image:latest +FROM ${BASE_IMAGE} + +# ============================================================================== +# Build Arguments and Environment Variables +# ============================================================================== -ARG PGVER +ARG PGVER=17 +ARG DBUSER=admin +ARG DBPASSWD=testpass +ARG DBNAME=demo +ARG DBPORT=5432 ARG MAKE_JOBS=4 -ENV PGVER=$PGVER -ENV PATH="/home/pgedge/pgedge/pg${PGVER}/bin:${PATH}" -ENV LD_LIBRARY_PATH="/home/pgedge/pgedge/pg${PGVER}/lib:${LD_LIBRARY_PATH}" -ENV PG_CONFIG="/home/pgedge/pgedge/pg${PGVER}/bin/pg_config" +# Export as environment variables for build-time and runtime +ENV PGVER=${PGVER} \ + DBUSER=${DBUSER} \ + DBPASSWD=${DBPASSWD} \ + DBNAME=${DBNAME} \ + DBPORT=${DBPORT} + +# PostgreSQL paths +ENV PATH="/home/pgedge/pgedge:/home/pgedge/pgedge/pg${PGVER}/bin:${PATH}" \ + LD_LIBRARY_PATH="/home/pgedge/pgedge/pg${PGVER}/lib" \ + PG_CONFIG="/home/pgedge/pgedge/pg${PGVER}/bin/pg_config" \ + PGDATA="/home/pgedge/pgedge/data/pg${PGVER}" + +# Document exposed port +EXPOSE 5432 + +# ============================================================================== +# User Configuration and Source Code Setup +# ============================================================================== + +USER root + +# Write PostgreSQL environment to .bashrc for interactive shells and entrypoint +RUN set -eux && \ + { \ + echo "# PostgreSQL Environment"; \ + echo "export PGVER=${PGVER}"; \ + echo "export PGUSER=${DBUSER}"; \ + echo "export PGPASSWORD=${DBPASSWD}"; \ + echo "export PGDATABASE=${DBNAME}"; \ + echo "export PGPORT=${DBPORT}"; \ + echo "export PG_CONFIG=${PG_CONFIG}"; \ + echo "export PATH=${PATH}"; \ + echo "export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"; \ + echo "export PGDATA=${PGDATA}"; \ + } >> /home/pgedge/.bashrc -# Copy spock source code and set proper ownership +# Copy Spock source code and test scripts COPY . /home/pgedge/spock -RUN chown -R pgedge:pgedge /home/pgedge/spock +COPY --chmod=755 tests/docker/*.sh /home/pgedge/ + +# Set proper ownership +RUN chown -R pgedge:pgedge /home/pgedge/ +# ============================================================================== +# Switch to Non-Root User for Builds +# ============================================================================== + +USER pgedge WORKDIR /home/pgedge -# Determine PostgreSQL version and clone repository -RUN LATEST_TAG=$(git ls-remote --tags https://github.com/postgres/postgres.git | \ - grep "refs/tags/REL_${PGVER}_" | \ - sed 's|.*refs/tags/||' | \ - tr '_' '.' | \ - sort -V | \ - tail -n 1 | \ - tr '.' '_') && \ - echo "Using PostgreSQL tag: $LATEST_TAG" && \ - git clone --branch $LATEST_TAG --depth 1 https://github.com/postgres/postgres /home/pgedge/postgres && \ - chmod -R a+w /home/pgedge/postgres - -# Install pgedge (run as pgedge user, not root) -RUN echo "Setting up pgedge..." && \ - curl -fsSL https://pgedge-download.s3.amazonaws.com/REPO/install.py -o /home/pgedge/install.py && \ - chown pgedge:pgedge /home/pgedge/install.py && \ - su - pgedge -c "python3 /home/pgedge/install.py" +# ============================================================================== +# Clone PostgreSQL Source +# ============================================================================== + +# Determine the latest stable tag for the requested PostgreSQL major version +RUN set -eux && \ + LATEST_TAG=$(git ls-remote --tags https://github.com/postgres/postgres.git | \ + grep "refs/tags/REL_${PGVER}_" | \ + sed 's|.*refs/tags/||' | \ + tr '_' '.' | \ + sort -V | \ + tail -n 1 | \ + tr '.' '_') && \ + echo "Cloning PostgreSQL tag: ${LATEST_TAG}" && \ + git clone --branch "${LATEST_TAG}" --depth 1 \ + https://github.com/postgres/postgres.git /home/pgedge/postgres + +# ============================================================================== +# Clone Spockbench Testing Framework +# ============================================================================== + +# Clone spockbench for testing workloads +# TODO: Pin to specific tag/commit for reproducibility +RUN set -eux && \ + git clone --branch master --depth 1 \ + https://github.com/pgEdge/spockbench.git /home/pgedge/spockbench && \ + echo "export SPOCKBENCH_SOURCE_DIR=/home/pgedge/spockbench" >> /home/pgedge/.bashrc + +# ============================================================================== +# Apply Spock Patches to PostgreSQL +# ============================================================================== WORKDIR /home/pgedge/postgres -RUN for patchfile in /home/pgedge/spock/patches/${PGVER}/*; do \ - patch -p1 --verbose < $patchfile; \ - done +RUN set -eux && \ + PATCH_DIR="/home/pgedge/spock/patches/${PGVER}" && \ + if [ -d "${PATCH_DIR}" ] && [ -n "$(ls -A "${PATCH_DIR}" 2>/dev/null)" ]; then \ + echo "Applying Spock patches for PostgreSQL ${PGVER}:"; \ + for patchfile in "${PATCH_DIR}"/*; do \ + echo " - $(basename "${patchfile}")"; \ + patch -p1 --verbose < "${patchfile}"; \ + done; \ + echo "All patches applied successfully"; \ + else \ + echo "No patches found for PostgreSQL ${PGVER}, continuing with vanilla source"; \ + fi -# Compile PostgreSQL -RUN echo "==========Compiling Modified PostgreSQL==========" && \ +# ============================================================================== +# Compile and Install PostgreSQL +# ============================================================================== + +RUN set -eux && \ + echo "========================================" && \ + echo "Configuring PostgreSQL ${PGVER}" && \ + echo "========================================" && \ ./configure \ --prefix="/home/pgedge/pgedge/pg${PGVER}" \ --disable-rpath \ @@ -55,36 +169,62 @@ RUN echo "==========Compiling Modified PostgreSQL==========" && \ --with-gssapi \ --with-ldap \ --with-pam \ - --enable-debug \ - --enable-dtrace \ - --with-llvm \ --with-openssl \ --with-systemd \ - --enable-tap-tests \ + --with-llvm \ --with-python \ + --enable-debug \ + --enable-dtrace \ --enable-cassert \ - PYTHON=/usr/bin/python3.9 \ - BITCODE_CFLAGS="-gdwarf-5 -O0 -fforce-dwarf-frame" \ - CFLAGS="-g -O0" && \ + --enable-tap-tests \ + PYTHON=/usr/bin/python3 \ + CFLAGS="-g -O0" \ + BITCODE_CFLAGS="-gdwarf-5 -O0 -fforce-dwarf-frame" && \ + echo "========================================" && \ + echo "Building PostgreSQL (${MAKE_JOBS} jobs)" && \ + echo "========================================" && \ make -j${MAKE_JOBS} && \ make -C contrib -j${MAKE_JOBS} && \ + echo "========================================" && \ + echo "Installing PostgreSQL" && \ + echo "========================================" && \ make install && \ - make -C contrib install + make -C contrib install && \ + echo "PostgreSQL installation complete" + +# ============================================================================== +# Compile and Install Spock Extension +# ============================================================================== -# Compile Spock WORKDIR /home/pgedge/spock -RUN echo "==========Compiling Spock==========" && \ + +RUN set -eux && \ + echo "========================================" && \ + echo "Building Spock Extension" && \ + echo "========================================" && \ make clean && \ make -j${MAKE_JOBS} && \ make install && \ - echo "==========Spock build complete==========" + echo "export SPOCK_SOURCE_DIR=/home/pgedge/spock" >> /home/pgedge/.bashrc && \ + echo "Spock installation complete" -#----------------------------------------- -# Copy test scripts and switch to pgedge user for runtime -COPY --chmod=755 tests/docker/*.sh /home/pgedge/ +# ============================================================================== +# Install spockbench +# ============================================================================== -WORKDIR /home/pgedge/ -# Switch back to pgedge user for container runtime (testing, etc.) -USER pgedge +RUN set -eux && \ + cd /home/pgedge/spockbench && \ + python3 setup.py install --user && \ + echo "Spockbench installation complete" + +# ============================================================================== +# Runtime Configuration +# ============================================================================== + +WORKDIR /home/pgedge -CMD ["/home/pgedge/entrypoint.sh"] +# Container initialization and startup: +# 1. entrypoint.sh initializes PostgreSQL cluster and creates Spock nodes +# 2. After entrypoint completes, postgres runs in foreground as PID 1 +# This makes the container lifecycle tied to PostgreSQL - if postgres crashes, container stops +CMD ["/bin/bash", "-c", "/home/pgedge/entrypoint.sh && exec postgres -D $PGDATA -k /tmp"] diff --git a/tests/docker/docker-compose.yml b/tests/docker/docker-compose.yml index 0881a226..97a97c35 100644 --- a/tests/docker/docker-compose.yml +++ b/tests/docker/docker-compose.yml @@ -4,8 +4,12 @@ services: hostname: n1 image: spock build: - context: . - dockerfile: Dockerfile-step-1.el9 + context: ../.. + dockerfile: tests/docker/Dockerfile-step-1.el9 + args: + - PGVER=${PGVER:-17} + - MAKE_JOBS=${MAKE_JOBS:-4} + - BASE_IMAGE=${BASE_IMAGE:-base-test-image:latest} environment: - HOSTNAME=n1 - PEER_NAMES=n2,n3 @@ -14,15 +18,38 @@ services: - pgedge.env ports: - '15432:5432' + # Enable core dumps for crash debugging + cap_add: + - SYS_PTRACE # Required for debugging with gdb + - SYS_ADMIN # Required to set /proc/sys/kernel/core_pattern + ulimits: + core: + soft: -1 + hard: -1 + # Mount volume for core dumps (persists across container restarts) volumes: - - '${GITHUB_WORKSPACE}:/home/pgedge/spock' - - '${GITHUB_WORKSPACE}/spockbench:/home/pgedge/spockbench' - - './lib-list.txt:/home/pgedge/lib-list.txt' + - ./cores/n1:/cores + healthcheck: + test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 60s pgedge-n2: container_name: n2 hostname: n2 image: spock + depends_on: + pgedge-n1: + condition: service_healthy + build: + context: ../.. + dockerfile: tests/docker/Dockerfile-step-1.el9 + args: + - PGVER=${PGVER:-17} + - MAKE_JOBS=${MAKE_JOBS:-4} + - BASE_IMAGE=${BASE_IMAGE:-base-test-image:latest} environment: - HOSTNAME=n2 - PEER_NAMES=n1,n3 @@ -31,15 +58,38 @@ services: - pgedge.env ports: - '15433:5432' + # Enable core dumps for crash debugging + cap_add: + - SYS_PTRACE # Required for debugging with gdb + - SYS_ADMIN # Required to set /proc/sys/kernel/core_pattern + ulimits: + core: + soft: -1 + hard: -1 + # Mount volume for core dumps (persists across container restarts) volumes: - - '${GITHUB_WORKSPACE}:/home/pgedge/spock' - - '${GITHUB_WORKSPACE}/spockbench:/home/pgedge/spockbench' - - './lib-list.txt:/home/pgedge/lib-list.txt' + - ./cores/n2:/cores + healthcheck: + test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 60s pgedge-n3: container_name: n3 hostname: n3 image: spock + depends_on: + pgedge-n2: + condition: service_healthy + build: + context: ../.. + dockerfile: tests/docker/Dockerfile-step-1.el9 + args: + - PGVER=${PGVER:-17} + - MAKE_JOBS=${MAKE_JOBS:-4} + - BASE_IMAGE=${BASE_IMAGE:-base-test-image:latest} environment: - HOSTNAME=n3 - PEER_NAMES=n1,n2 @@ -48,7 +98,20 @@ services: - pgedge.env ports: - '15434:5432' + # Enable core dumps for crash debugging + cap_add: + - SYS_PTRACE # Required for debugging with gdb + - SYS_ADMIN # Required to set /proc/sys/kernel/core_pattern + ulimits: + core: + soft: -1 + hard: -1 + # Mount volume for core dumps (persists across container restarts) volumes: - - '${GITHUB_WORKSPACE}:/home/pgedge/spock' - - '${GITHUB_WORKSPACE}/spockbench:/home/pgedge/spockbench' - - './lib-list.txt:/home/pgedge/lib-list.txt' + - ./cores/n3:/cores + healthcheck: + test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 60s diff --git a/tests/docker/entrypoint.sh b/tests/docker/entrypoint.sh index e446d2aa..54da9d4b 100644 --- a/tests/docker/entrypoint.sh +++ b/tests/docker/entrypoint.sh @@ -1,110 +1,137 @@ #!/bin/bash -set -e - -function wait_for_pg() -{ - count=0 - while ! pg_isready -h /tmp; do - if [ $count -ge 24 ] - then - echo "Gave up waiting for PostgreSQL to become ready..." - exit 1 - fi - - echo "Waiting for PostgreSQL to become ready..." - sleep 5 - done -} - -. /home/pgedge/pgedge/pg$PGVER/pg$PGVER.env -. /home/pgedge/.bashrc - -echo "==========Installing Spockbench==========" -cd ~/spockbench -sudo python3 setup.py install - -cd ~/pgedge -sed -i '/log_min_messages/s/^#//g' data/pg$PGVER/postgresql.conf -sed -i -e '/log_min_messages =/ s/= .*/= debug1/' data/pg$PGVER/postgresql.conf -./pgedge restart - -wait_for_pg - -psql -h /tmp -U $DBUSER -d $DBNAME -c "drop extension spock;" -psql -h /tmp -U $DBUSER -d $DBNAME -c "drop schema public cascade;" -psql -h /tmp -U $DBUSER -d $DBNAME -c "create schema public;" -psql -h /tmp -U $DBUSER -d $DBNAME -c "create extension spock;" - -./pgedge restart - -wait_for_pg - -echo "==========Assert Spock version is the latest==========" -expected_line=$(grep '#define SPOCK_VERSION' /home/pgedge/spock/spock.h) -expected_version=$(echo "$expected_line" | grep -oP '"\K[0-9]+\.[0-9]+\.[0-9]+') -expected_major=${expected_version%%.*} -actual_version=$(psql -U $DBUSER -d $DBNAME -X -t -A -c "select spock.spock_version()") -actual_major=${actual_version%%.*} - -if (( actual_major >= expected_major )); then - echo " Actual major version ($actual_major) >= expected ($expected_major)" +set -eo pipefail + +# Load PostgreSQL environment variables +# Temporarily disable -u (unbound variable check) to avoid issues with system bashrc +set +u +source "${HOME}/.bashrc" +set -u + +echo "==========================================" +echo "Initializing PostgreSQL for Spock Testing" +echo "==========================================" + +# Configure core dumps for crash debugging +if [ -d "/cores" ]; then + echo "Configuring core dumps..." + # Try to set core pattern to write to /cores directory + # Note: /proc/sys/kernel/core_pattern is host-level and usually read-only in Docker + # Format: core.... + if echo "/cores/core.${HOSTNAME}.%e.%p.%t" > /proc/sys/kernel/core_pattern 2>/dev/null; then + echo "✓ Core pattern set successfully" + else + echo "Note: Cannot set core_pattern (host-level setting)" + echo " Core dumps will use host pattern but ulimit is unlimited" + fi + # Verify ulimit + ulimit -c unlimited + echo "Core dumps enabled: $(ulimit -c)" + echo "Core pattern: $(cat /proc/sys/kernel/core_pattern 2>/dev/null || echo 'Unable to read')" + echo "Core dumps will be saved with debug symbols (-g -O0)" else - echo " Actual major version ($actual_major) is not what we expected ($expected_major)" - exit 1 + echo "Warning: /cores directory not mounted - core dumps may not persist" fi +# Ensure data directory exists and has correct permissions +echo "Checking data directory permissions..." +PGDATA_PARENT=$(dirname "${PGDATA}") +if [ ! -d "${PGDATA_PARENT}" ]; then + echo "Creating parent directory: ${PGDATA_PARENT}" + mkdir -p "${PGDATA_PARENT}" +fi -echo "==========Creating tables and repsets==========" -./pgedge spock node-create $HOSTNAME "host=$HOSTNAME user=pgedge dbname=$DBNAME" $DBNAME -./pgedge spock repset-create demo_replication_set $DBNAME - -IFS=',' read -r -a peer_names <<< "$PEER_NAMES" - -for PEER_HOSTNAME in "${peer_names[@]}"; -do - while : - do - mapfile -t node_array < <(psql -A -t $DBNAME -h $PEER_HOSTNAME -c "SELECT node_name FROM spock.node;") - for element in "${node_array[@]}"; - do - if [[ "$element" == "$PEER_HOSTNAME" ]]; then - break 2 - fi - done - sleep 1 - echo "Waiting for $PEER_HOSTNAME..." - done -done - -# TODO: Re-introduce parallel slots at a later point when the apply worker restarts are handled correctly -# and transactions are not skipped on restart in parallel mode -./pgedge spock sub-create sub_${peer_names[0]}$HOSTNAME "host=${peer_names[0]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[0]}$HOSTNAME"_1 "host=${peer_names[0]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[0]}$HOSTNAME"_2 "host=${peer_names[0]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[0]}$HOSTNAME"_3 "host=${peer_names[0]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[0]}$HOSTNAME"_4 "host=${peer_names[0]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME - -./pgedge spock sub-create sub_${peer_names[1]}$HOSTNAME "host=${peer_names[1]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[1]}$HOSTNAME"_1 "host=${peer_names[1]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[1]}$HOSTNAME"_2 "host=${peer_names[1]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[1]}$HOSTNAME"_3 "host=${peer_names[1]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME -#./pgedge spock sub-create "sub_${peer_names[1]}$HOSTNAME"_4 "host=${peer_names[1]} port=5432 user=pgedge dbname=$DBNAME" $DBNAME - -psql -U $DBUSER -h /tmp -d $DBNAME -c "create table t1 (id serial primary key, data int8);" -psql -U $DBUSER -h /tmp -d $DBNAME -c "create table t2 (id serial primary key, data int8);" -psql -U $DBUSER -h /tmp -d $DBNAME -c "alter table t1 alter column data set (log_old_value=true, delta_apply_function=spock.delta_apply);" - -./pgedge spock sub-add-repset sub_${peer_names[0]}$HOSTNAME demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[0]}$HOSTNAME"_1 demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[0]}$HOSTNAME"_2 demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[0]}$HOSTNAME"_3 demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[0]}$HOSTNAME"_4 demo_replication_set $DBNAME - -./pgedge spock sub-add-repset sub_${peer_names[1]}$HOSTNAME demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[1]}$HOSTNAME"_1 demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[1]}$HOSTNAME"_2 demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[1]}$HOSTNAME"_3 demo_replication_set $DBNAME -#./pgedge spock sub-add-repset "sub_${peer_names[1]}$HOSTNAME"_4 demo_replication_set $DBNAME - - -cd /home/pgedge && ./run-tests.sh $peer_names +# Check if we can write to the directory +if [ ! -w "${PGDATA_PARENT}" ]; then + echo "ERROR: Cannot write to ${PGDATA_PARENT}" + echo "Current user: $(whoami) (UID: $(id -u))" + echo "Directory ownership: $(ls -ld ${PGDATA_PARENT})" + exit 1 +fi +echo "Data directory permissions OK" + +# Initialize PostgreSQL cluster +if [ ! -d "${PGDATA}" ]; then + echo "Initializing PostgreSQL ${PGVER} cluster..." + mkdir -p "${PGDATA}" + initdb -D "${PGDATA}" -U "${PGUSER}" --encoding=UTF8 --locale=C + + # Configure PostgreSQL for logical replication + cat >> "${PGDATA}/postgresql.conf" <> "${PGDATA}/pg_hba.conf" < Date: Tue, 13 Jan 2026 16:05:42 +0100 Subject: [PATCH 2/9] Improve spockbench CI workflow and test execution Update GitHub Actions workflow to run spockbench tests in parallel across all three nodes with proper synchronization and cleanup. Mount test scripts as volumes to enable rapid iteration without rebuilding containers. Changes: - Split workflow into separate steps for cluster startup, test execution, validation, and cleanup with appropriate timeouts - Run tests on all three nodes (n1, n2, n3) in parallel using background processes with proper PID tracking - Add always-run cleanup step to ensure docker-compose down executes - Mount tests directory as volume in docker-compose for faster iteration - Update spockbench branch to delta-apply-update and fix installation - Simplify run-tests.sh to use environment variables instead of arguments - Use delta_apply function with proper DDL replication settings - Fix psql authentication by relying on trust configuration - Use default repset instead of demo_replication_set for consistency --- .github/workflows/spockbench.yml | 29 +++++++++++++++++-- tests/docker/Dockerfile-step-1.el9 | 4 +-- tests/docker/docker-compose.yml | 3 ++ tests/docker/run-tests.sh | 45 +++++++++++++++++++----------- 4 files changed, 59 insertions(+), 22 deletions(-) diff --git a/.github/workflows/spockbench.yml b/.github/workflows/spockbench.yml index fa478b0f..8e88b07a 100644 --- a/.github/workflows/spockbench.yml +++ b/.github/workflows/spockbench.yml @@ -103,14 +103,37 @@ jobs: if-no-files-found: ignore retention-days: 7 - - name: Start docker + - name: Start docker cluster run: | cd ${GITHUB_WORKSPACE}/tests/docker/ echo PG_VER=${{ matrix.pgver }} >> pgedge.env - docker compose up + env BASE_IMAGE=ghcr.io/pgedge/base-debug-image:latest \ + docker-compose up --wait -d --build + timeout-minutes: 20 + + - name: Run tests on all nodes + run: | + cd ${GITHUB_WORKSPACE}/tests/docker/ + docker-compose exec -T pgedge-n1 bash -c "~/tests/run-tests.sh" & + PID1=$! + docker-compose exec -T pgedge-n2 bash -c "~/tests/run-tests.sh" & + PID2=$! + docker-compose exec -T pgedge-n3 bash -c "~/tests/run-tests.sh" & + PID3=$! + wait $PID1 + wait $PID2 + wait $PID3 + timeout-minutes: 30 - name: Check spockbench output + if: ${{ always() }} run: | cd ${GITHUB_WORKSPACE}/tests/docker - ./check-outputs.sh + ./check-outputs.sh || true + + - name: Cleanup docker + if: ${{ always() }} + run: | + cd ${GITHUB_WORKSPACE}/tests/docker/ + docker-compose down || true diff --git a/tests/docker/Dockerfile-step-1.el9 b/tests/docker/Dockerfile-step-1.el9 index 477d3364..99e347b5 100644 --- a/tests/docker/Dockerfile-step-1.el9 +++ b/tests/docker/Dockerfile-step-1.el9 @@ -126,7 +126,7 @@ RUN set -eux && \ # Clone spockbench for testing workloads # TODO: Pin to specific tag/commit for reproducibility RUN set -eux && \ - git clone --branch master --depth 1 \ + git clone --branch delta-apply-update --depth 1 \ https://github.com/pgEdge/spockbench.git /home/pgedge/spockbench && \ echo "export SPOCKBENCH_SOURCE_DIR=/home/pgedge/spockbench" >> /home/pgedge/.bashrc @@ -214,7 +214,7 @@ RUN set -eux && \ RUN set -eux && \ cd /home/pgedge/spockbench && \ - python3 setup.py install --user && \ + sudo python3 setup.py install && \ echo "Spockbench installation complete" # ============================================================================== diff --git a/tests/docker/docker-compose.yml b/tests/docker/docker-compose.yml index 97a97c35..1c4f2883 100644 --- a/tests/docker/docker-compose.yml +++ b/tests/docker/docker-compose.yml @@ -29,6 +29,7 @@ services: # Mount volume for core dumps (persists across container restarts) volumes: - ./cores/n1:/cores + - .:/home/pgedge/tests healthcheck: test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] interval: 10s @@ -69,6 +70,7 @@ services: # Mount volume for core dumps (persists across container restarts) volumes: - ./cores/n2:/cores + - .:/home/pgedge/tests healthcheck: test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] interval: 10s @@ -109,6 +111,7 @@ services: # Mount volume for core dumps (persists across container restarts) volumes: - ./cores/n3:/cores + - .:/home/pgedge/tests healthcheck: test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] interval: 10s diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index 7fa13a08..80169c92 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -1,8 +1,10 @@ #!/bin/bash +source "${HOME}/.bashrc" + #set -euo pipefail -peer_names=$1 +IFS=',' read -r -a peer_names <<< "$PEER_NAMES" #========== Exception Log tests ========== @@ -19,7 +21,7 @@ peer_names=$1 # ---- if [[ $(hostname) == "n1" ]]; then - psql -U $DBUSER -d $DBNAME -h /tmp <<_EOF_ + psql -h /tmp <<_EOF_ CREATE TABLE t4 ( id integer PRIMARY KEY, data text @@ -29,7 +31,7 @@ then INSERT INTO t4 VALUES (3, 'missing row on DELETE'); SELECT spock.repset_add_table( - set_name := 'demo_replication_set', + set_name := 'default', relation := 't4' ); _EOF_ @@ -37,7 +39,7 @@ _EOF_ # ---- # Create table and test data on n2 # ---- - PGPASSWORD=$DBPASSWD psql -U $DBUSER -d $DBNAME -h ${peer_names[0]} <<_EOF_ + psql -h ${peer_names[0]} <<_EOF_ CREATE TABLE t4 ( id integer PRIMARY KEY, data text @@ -45,12 +47,12 @@ _EOF_ INSERT INTO t4 VALUES (1, 'duplicate key on INSERT'); SELECT spock.repset_add_table( - set_name := 'demo_replication_set', + set_name := 'default', relation := 't4' ); _EOF_ - psql -U $DBUSER -d $DBNAME -h /tmp <<_EOF_ + psql -h /tmp <<_EOF_ INSERT INTO t4 VALUES (1, 'trigger duplicate key'); UPDATE t4 SET data = 'trigger missing key on UPDATE' WHERE id = 2; DELETE FROM t4 WHERE id = 3; -- trigger missing key on DELETE @@ -59,7 +61,7 @@ _EOF_ echo "Waiting for apply worker timeouts..." sleep 5 echo "Checking the exception table now..." - elog_entries=$(PGPASSWORD=$DBPASSWD psql -A -t -U $DBUSER -d $DBNAME -h ${peer_names[0]} -c " + elog_entries=$(psql -A -t -h ${peer_names[0]} -c " SELECT count(*) FROM spock.exception_log e JOIN spock.node n @@ -71,13 +73,13 @@ _EOF_ if [ "$elog_entries" -ne 1 ]; then - PGPASSWORD=$DBPASSWD psql -U $DBUSER -d $DBNAME -h ${peer_names[0]} -c "select * from spock.exception_log;" + psql -h ${peer_names[0]} -c "select * from spock.exception_log;" echo "Did not find an exception log entry. Exiting..." exit 1 fi - resolution_check=$(PGPASSWORD=$DBPASSWD psql -X -A -t -U $DBUSER -d $DBNAME -h ${peer_names[0]} -c " SELECT conflict_type FROM spock.resolutions WHERE relname = 'public.t4'") + resolution_check=$(psql -X -A -t -h ${peer_names[0]} -c " SELECT conflict_type FROM spock.resolutions WHERE relname = 'public.t4'") insert_exists_count=$(echo "$resolution_check" | grep -c 'insert_exists') delete_delete_count=$(echo "$resolution_check" | grep -c 'delete_delete') @@ -86,7 +88,7 @@ _EOF_ then echo "PASS: Found both insert_exists and delete_delete for public.t4" else - PGPASSWORD=$DBPASSWD psql -U $DBUSER -d $DBNAME -h ${peer_names[0]} -c "select * from spock.resolutions where relname = 'public.t4'" + psql -h ${peer_names[0]} -c "select * from spock.resolutions where relname = 'public.t4'" echo "FAIL: Resolution entries for public.t4 are incorrect" echo "Resolutions check=$resolution_check" echo "Found: insert_exists=$insert_exists_count, delete_delete=$delete_delete_count" @@ -94,14 +96,23 @@ _EOF_ fi fi -spockbench -h /tmp -i -s $SCALEFACTOR demo -psql -U admin -h /tmp -d demo -c "alter table pgbench_accounts alter column abalance set(log_old_value=true, delta_apply_function=spock.delta_apply);" -psql -U admin -h /tmp -d demo -c "alter table pgbench_branches alter column bbalance set(log_old_value=true, delta_apply_function=spock.delta_apply);" -psql -U admin -h /tmp -d demo -c "alter table pgbench_tellers alter column tbalance set(log_old_value=true, delta_apply_function=spock.delta_apply);" +spockbench -h /tmp -i -s $SCALEFACTOR $PGDATABASE + +psql -h /tmp <<_EOF_ + SET spock.enable_ddl_replication = 'on'; + SET spock.include_ddl_repset = 'on'; -psql -U admin -h /tmp -d demo -c "select spock.repset_add_all_tables('demo_replication_set', '{public}');" + SELECT spock.repset_add_all_tables('default', '{public}'); + + ALTER TABLE pgbench_accounts ALTER COLUMN abalance SET NOT NULL; + ALTER TABLE pgbench_branches ALTER COLUMN bbalance SET NOT NULL; + ALTER TABLE pgbench_tellers ALTER COLUMN tbalance SET NOT NULL; + SELECT spock.delta_apply('pgbench_accounts', 'abalance', false); + SELECT spock.delta_apply('pgbench_branches', 'bbalance', false); + SELECT spock.delta_apply('pgbench_tellers', 'tbalance', false); +_EOF_ # ==========Spockbench tests ========== -spockbench -h /tmp --spock-num-nodes=3 --spock-node=${HOSTNAME:0-1} -s $SCALEFACTOR -T $RUNTIME -R $RATE -P 5 -j $THREADS -c $CONNECTIONS -n --spock-tx-mix=550,225,225 -U admin demo -spockbench-check -U admin demo > /home/pgedge/spock/spockbench-$HOSTNAME.out +spockbench -h /tmp --spock-num-nodes=3 --spock-node=${HOSTNAME:0-1} -s $SCALEFACTOR -T $RUNTIME -R $RATE -P 5 -j $THREADS -c $CONNECTIONS -n --spock-tx-mix=550,225,225 -U $PGUSER $PGDATABASE +spockbench-check -U $PGUSER $PGDATABASE > /home/pgedge/spock/spockbench-$HOSTNAME.out grep -q "ERROR" /home/pgedge/spock/spockbench-*.out && exit 1 || exit 0 From a152f4feab41f165d23312f34c5fd9f62ae697d3 Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Wed, 14 Jan 2026 09:15:37 +0100 Subject: [PATCH 3/9] Return proper name to the docker image. --- .github/workflows/spockbench.yml | 12 +++++----- sql/spock--6.0.0-devel.sql | 19 +++++++++++---- tests/docker/Dockerfile-base.md | 20 +++++++++------- tests/docker/Dockerfile-step-1.el9 | 38 +++++++++++++----------------- tests/docker/run-tests.sh | 3 +++ 5 files changed, 51 insertions(+), 41 deletions(-) diff --git a/.github/workflows/spockbench.yml b/.github/workflows/spockbench.yml index 8e88b07a..fe5161a3 100644 --- a/.github/workflows/spockbench.yml +++ b/.github/workflows/spockbench.yml @@ -107,18 +107,18 @@ jobs: run: | cd ${GITHUB_WORKSPACE}/tests/docker/ echo PG_VER=${{ matrix.pgver }} >> pgedge.env - env BASE_IMAGE=ghcr.io/pgedge/base-debug-image:latest \ - docker-compose up --wait -d --build + env BASE_IMAGE=ghcr.io/pgedge/base-test-image:latest \ + docker compose up --wait -d --build timeout-minutes: 20 - name: Run tests on all nodes run: | cd ${GITHUB_WORKSPACE}/tests/docker/ - docker-compose exec -T pgedge-n1 bash -c "~/tests/run-tests.sh" & + docker compose exec -T pgedge-n1 bash -c "~/tests/run-tests.sh" & PID1=$! - docker-compose exec -T pgedge-n2 bash -c "~/tests/run-tests.sh" & + docker compose exec -T pgedge-n2 bash -c "~/tests/run-tests.sh" & PID2=$! - docker-compose exec -T pgedge-n3 bash -c "~/tests/run-tests.sh" & + docker compose exec -T pgedge-n3 bash -c "~/tests/run-tests.sh" & PID3=$! wait $PID1 wait $PID2 @@ -135,5 +135,5 @@ jobs: if: ${{ always() }} run: | cd ${GITHUB_WORKSPACE}/tests/docker/ - docker-compose down || true + docker compose down || true diff --git a/sql/spock--6.0.0-devel.sql b/sql/spock--6.0.0-devel.sql index 83bc4d2d..3467d946 100644 --- a/sql/spock--6.0.0-devel.sql +++ b/sql/spock--6.0.0-devel.sql @@ -281,9 +281,18 @@ RETURNS oid CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'spoc CREATE FUNCTION spock.repset_drop(set_name name, ifexists boolean DEFAULT false) RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'spock_drop_replication_set'; -CREATE FUNCTION spock.repset_add_table(set_name name, relation regclass, synchronize_data boolean DEFAULT false, - columns text[] DEFAULT NULL, row_filter text DEFAULT NULL, include_partitions boolean default true) -RETURNS boolean CALLED ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'spock_replication_set_add_table'; +CREATE FUNCTION spock.repset_add_table( + set_name name, + relation regclass, + synchronize_data boolean DEFAULT false, + columns text[] DEFAULT NULL, + row_filter text DEFAULT NULL, + include_partitions boolean default true +) +RETURNS boolean +AS 'MODULE_PATHNAME', 'spock_replication_set_add_table' +LANGUAGE C CALLED ON NULL INPUT VOLATILE; + CREATE FUNCTION spock.repset_add_all_tables(set_name name, schema_names text[], synchronize_data boolean DEFAULT false) RETURNS boolean STRICT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'spock_replication_set_add_all_tables'; CREATE FUNCTION spock.repset_remove_table(set_name name, relation regclass, include_partitions boolean default true) @@ -652,9 +661,9 @@ $$ LANGUAGE plpgsql; -- Set delta_apply security label on specific column CREATE FUNCTION spock.delta_apply( - rel regclass, + rel regclass, att_name name, - to_drop boolean DEFAULT false + to_drop boolean DEFAULT false ) RETURNS boolean AS $$ DECLARE label text; diff --git a/tests/docker/Dockerfile-base.md b/tests/docker/Dockerfile-base.md index 9fda6705..280fabeb 100644 --- a/tests/docker/Dockerfile-base.md +++ b/tests/docker/Dockerfile-base.md @@ -35,27 +35,33 @@ The image includes **all** libraries required to build PostgreSQL with maximum f | Library | Purpose | Configure Flag | |---------|---------|----------------| -| `zstd-devel` | Zstandard compression | `--with-zstd` | +| `libzstd-devel` | Zstandard compression | `--with-zstd` | | `lz4-devel` | LZ4 compression | `--with-lz4` | | `libicu-devel` | Unicode and internationalization | `--with-icu` | | `libxml2-devel` | XML support | `--with-libxml` | | `libxslt-devel` | XSLT transformations | `--with-libxslt` | | `openssl-devel` | SSL/TLS connections | `--with-openssl` | | `krb5-devel` | Kerberos authentication | `--with-gssapi` | +| `cyrus-sasl-gssapi` | SASL GSSAPI support | Related to GSSAPI | | `openldap-devel` | LDAP authentication | `--with-ldap` | | `pam-devel` | PAM authentication | `--with-pam` | | `systemd-devel` | Systemd integration | `--with-systemd` | | `python3-devel` | PL/Python language | `--with-python` | | `readline-devel` | Enhanced psql CLI | Built-in | | `llvm-devel` | JIT compilation | `--with-llvm` | -| `libuuid-devel` | UUID generation | `--with-uuid=ossp` | +| `libuuid-devel`, `uuid-devel` | UUID generation | `--with-uuid=ossp` | +| `libpq`, `libpq-devel` | PostgreSQL client library | Development headers | +| `jansson-devel` | JSON parsing | For extensions | +| `zlib-devel` | Compression library | Built-in | +| `pkgconfig` | Package config tool | Build system helper | ### 3. Testing Infrastructure -- **Perl Testing Framework**: `perl-IPC-Run`, `Test::More` for PostgreSQL TAP tests -- **SSH Configuration**: Pre-configured SSH keys for multi-node testing scenarios +- **Perl Testing Framework**: `perl-IPC-Run`, `perl-Test-Simple` (includes Test::More) for PostgreSQL TAP tests +- **SSH Configuration**: Pre-configured SSH keys and `openssh-clients`, `openssh-server` for multi-node testing scenarios - **Network Tools**: `nc` (netcat), `bind-utils` (dig, nslookup) for connectivity testing - **Process Tools**: `procps` for monitoring and debugging +- **Utility Tools**: `curl`, `unzip` for downloading and extracting archives ### 4. User Configuration @@ -77,10 +83,7 @@ The image includes **all** libraries required to build PostgreSQL with maximum f 1. **System Packages** (~500MB compressed): - Rocky Linux 9 base system updates - Development Tools group install - - 40+ development packages and their dependencies - -2. **Perl Modules** (via CPAN): - - `Test::More` - PostgreSQL TAP test framework + - 40+ development packages and their dependencies (all installed via dnf) ## Image Size and Optimization @@ -96,7 +99,6 @@ This large size is **intentional and appropriate** for a development/testing bas ```dockerfile dnf clean all rm -rf /var/cache/dnf/* /tmp/* /var/tmp/* -rm -rf /root/.cpanm ``` ## Usage Examples diff --git a/tests/docker/Dockerfile-step-1.el9 b/tests/docker/Dockerfile-step-1.el9 index 99e347b5..034b1224 100644 --- a/tests/docker/Dockerfile-step-1.el9 +++ b/tests/docker/Dockerfile-step-1.el9 @@ -37,7 +37,7 @@ # further details. # ============================================================================== -ARG BASE_IMAGE=base-test-image:latest +ARG BASE_IMAGE=ghcr.io/pgedge/base-test-image:latest FROM ${BASE_IMAGE} # ============================================================================== @@ -92,6 +92,22 @@ RUN set -eux && \ COPY . /home/pgedge/spock COPY --chmod=755 tests/docker/*.sh /home/pgedge/ +# ============================================================================== +# Clone & Install Spockbench Testing Framework (needs root privileges) +# ============================================================================== + +# Clone spockbench for testing workloads +# TODO: Pin to specific tag/commit for reproducibility +RUN set -eux && \ + git clone --branch delta-apply-update --depth 1 \ + https://github.com/pgEdge/spockbench.git /home/pgedge/spockbench && \ + echo "export SPOCKBENCH_SOURCE_DIR=/home/pgedge/spockbench" >> /home/pgedge/.bashrc + +RUN set -eux && \ + cd /home/pgedge/spockbench && \ + python3 setup.py install && \ + echo "Spockbench installation complete" + # Set proper ownership RUN chown -R pgedge:pgedge /home/pgedge/ @@ -119,17 +135,6 @@ RUN set -eux && \ git clone --branch "${LATEST_TAG}" --depth 1 \ https://github.com/postgres/postgres.git /home/pgedge/postgres -# ============================================================================== -# Clone Spockbench Testing Framework -# ============================================================================== - -# Clone spockbench for testing workloads -# TODO: Pin to specific tag/commit for reproducibility -RUN set -eux && \ - git clone --branch delta-apply-update --depth 1 \ - https://github.com/pgEdge/spockbench.git /home/pgedge/spockbench && \ - echo "export SPOCKBENCH_SOURCE_DIR=/home/pgedge/spockbench" >> /home/pgedge/.bashrc - # ============================================================================== # Apply Spock Patches to PostgreSQL # ============================================================================== @@ -208,15 +213,6 @@ RUN set -eux && \ echo "export SPOCK_SOURCE_DIR=/home/pgedge/spock" >> /home/pgedge/.bashrc && \ echo "Spock installation complete" -# ============================================================================== -# Install spockbench -# ============================================================================== - -RUN set -eux && \ - cd /home/pgedge/spockbench && \ - sudo python3 setup.py install && \ - echo "Spockbench installation complete" - # ============================================================================== # Runtime Configuration # ============================================================================== diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index 80169c92..14e72545 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -34,6 +34,7 @@ then set_name := 'default', relation := 't4' ); + SELECT spock.wait_slot_confirm_lsn(NULL, NULL); _EOF_ # ---- @@ -50,12 +51,14 @@ _EOF_ set_name := 'default', relation := 't4' ); + SELECT spock.wait_slot_confirm_lsn(NULL, NULL); _EOF_ psql -h /tmp <<_EOF_ INSERT INTO t4 VALUES (1, 'trigger duplicate key'); UPDATE t4 SET data = 'trigger missing key on UPDATE' WHERE id = 2; DELETE FROM t4 WHERE id = 3; -- trigger missing key on DELETE + SELECT spock.wait_slot_confirm_lsn(NULL, NULL); _EOF_ echo "Waiting for apply worker timeouts..." From 2d7e0badf6597d94f7059196f9f3666b28582105 Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Wed, 14 Jan 2026 13:17:15 +0100 Subject: [PATCH 4/9] Pu spockbench as a first test --- .github/workflows/spockbench.yml | 69 ++++++++++++++++---------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/.github/workflows/spockbench.yml b/.github/workflows/spockbench.yml index fe5161a3..894df438 100644 --- a/.github/workflows/spockbench.yml +++ b/.github/workflows/spockbench.yml @@ -54,6 +54,40 @@ jobs: --build-arg PGVER=${{ matrix.pgver }} \ -t spock -f tests/docker/Dockerfile-step-1.el9 . + - name: Start docker cluster + run: | + cd ${GITHUB_WORKSPACE}/tests/docker/ + echo PG_VER=${{ matrix.pgver }} >> pgedge.env + env BASE_IMAGE=ghcr.io/pgedge/base-test-image:latest \ + docker compose up --wait -d + timeout-minutes: 20 + + - name: Run tests on all nodes + run: | + cd ${GITHUB_WORKSPACE}/tests/docker/ + docker compose exec -T pgedge-n1 bash -c "~/tests/run-tests.sh" & + PID1=$! + docker compose exec -T pgedge-n2 bash -c "~/tests/run-tests.sh" & + PID2=$! + docker compose exec -T pgedge-n3 bash -c "~/tests/run-tests.sh" & + PID3=$! + wait $PID1 + wait $PID2 + wait $PID3 + timeout-minutes: 30 + + - name: Check spockbench output + if: ${{ always() }} + run: | + cd ${GITHUB_WORKSPACE}/tests/docker + ./check-outputs.sh || true + + - name: Cleanup docker + if: ${{ always() }} + run: | + cd ${GITHUB_WORKSPACE}/tests/docker/ + docker compose down || true + - name: Run regression tests run: | REG_CT_NAME="spock-regress-${{ matrix.pgver }}-${{ github.run_id }}-${{ github.run_attempt }}" @@ -102,38 +136,3 @@ jobs: tests/logs/** if-no-files-found: ignore retention-days: 7 - - - name: Start docker cluster - run: | - cd ${GITHUB_WORKSPACE}/tests/docker/ - echo PG_VER=${{ matrix.pgver }} >> pgedge.env - env BASE_IMAGE=ghcr.io/pgedge/base-test-image:latest \ - docker compose up --wait -d --build - timeout-minutes: 20 - - - name: Run tests on all nodes - run: | - cd ${GITHUB_WORKSPACE}/tests/docker/ - docker compose exec -T pgedge-n1 bash -c "~/tests/run-tests.sh" & - PID1=$! - docker compose exec -T pgedge-n2 bash -c "~/tests/run-tests.sh" & - PID2=$! - docker compose exec -T pgedge-n3 bash -c "~/tests/run-tests.sh" & - PID3=$! - wait $PID1 - wait $PID2 - wait $PID3 - timeout-minutes: 30 - - - name: Check spockbench output - if: ${{ always() }} - run: | - cd ${GITHUB_WORKSPACE}/tests/docker - ./check-outputs.sh || true - - - name: Cleanup docker - if: ${{ always() }} - run: | - cd ${GITHUB_WORKSPACE}/tests/docker/ - docker compose down || true - From bbe5023e90e801cfef30d8c6fd98fd86c9a6186f Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Wed, 14 Jan 2026 14:03:22 +0100 Subject: [PATCH 5/9] Spockbench --- .github/workflows/spockbench.yml | 40 +++++++++++++++++++++++- src/spock_apply_heap.c | 5 ++- src/spock_conflict.c | 2 +- tests/docker/Dockerfile-step-1.el9 | 14 ++++----- tests/docker/entrypoint.sh | 8 ++--- tests/docker/pgedge.env | 6 ++-- tests/docker/run-tests.sh | 50 ++++++++++++++++++++---------- 7 files changed, 90 insertions(+), 35 deletions(-) diff --git a/.github/workflows/spockbench.yml b/.github/workflows/spockbench.yml index 894df438..1cfb3df6 100644 --- a/.github/workflows/spockbench.yml +++ b/.github/workflows/spockbench.yml @@ -65,16 +65,54 @@ jobs: - name: Run tests on all nodes run: | cd ${GITHUB_WORKSPACE}/tests/docker/ + + # Launch tests in background with per-node timeout and capture PIDs + # Each node gets 5 minutes max (more than RUNTIME=60s + overhead) docker compose exec -T pgedge-n1 bash -c "~/tests/run-tests.sh" & PID1=$! docker compose exec -T pgedge-n2 bash -c "~/tests/run-tests.sh" & PID2=$! docker compose exec -T pgedge-n3 bash -c "~/tests/run-tests.sh" & PID3=$! + + # Wait for all jobs and capture their exit codes wait $PID1 + EXIT1=$? wait $PID2 + EXIT2=$? wait $PID3 - timeout-minutes: 30 + EXIT3=$? + + # Fail if any node failed + if [ $EXIT1 -ne 0 ] || [ $EXIT2 -ne 0 ] || [ $EXIT3 -ne 0 ]; then + echo "ERROR: One or more nodes failed" + exit 1 + fi + + echo "All nodes completed successfully" + timeout-minutes: 10 + + - name: Collect node logs + if: ${{ always() }} + run: | + cd ${GITHUB_WORKSPACE}/tests/docker/ + mkdir -p node-logs + + # Collect PostgreSQL logs from each node + for node in n1 n2 n3; do + echo "Collecting logs from $node..." + docker compose cp pgedge-$node:/home/pgedge/pgedge/data/pg${{ matrix.pgver }}/log node-logs/$node-pg-log/ || true + docker compose logs pgedge-$node > node-logs/$node-container.log 2>&1 || true + done + + - name: Upload node logs + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: spockbench-node-logs-${{ matrix.pgver }} + path: tests/docker/node-logs/ + if-no-files-found: ignore + retention-days: 7 - name: Check spockbench output if: ${{ always() }} diff --git a/src/spock_apply_heap.c b/src/spock_apply_heap.c index 2fd28e4c..b79ab41b 100644 --- a/src/spock_apply_heap.c +++ b/src/spock_apply_heap.c @@ -928,8 +928,7 @@ spock_apply_heap_insert(SpockRelation *rel, SpockTupleData *newtup) remoteslot, &localslot, true); - if (check_all_uc_indexes && - !found) + if (check_all_uc_indexes && !found) { /* * Handle the special case of looking through all unique indexes @@ -1190,7 +1189,7 @@ spock_apply_heap_delete(SpockRelation *rel, SpockTupleData *oldtup) { HeapTuple remotetuple; SpockExceptionLog *exception_log = &exception_log_ptr[my_exception_log_index]; - +elog(WARNING, "--> delete resolve"); /* * The tuple to be deleted could not be found. Do nothing except for * logging it in resolutions table. diff --git a/src/spock_conflict.c b/src/spock_conflict.c index 0987ecd9..b4ddf8db 100644 --- a/src/spock_conflict.c +++ b/src/spock_conflict.c @@ -348,7 +348,7 @@ spock_report_conflict(ConflictType conflict_type, const char *idxname = "(unknown)"; const char *qualrelname; - +elog(LOG, "spock_report_conflict called with conflict_type=%d", conflict_type); /* Ignore update-update conflict for same origin */ if (conflict_type == CT_UPDATE_EXISTS) { diff --git a/tests/docker/Dockerfile-step-1.el9 b/tests/docker/Dockerfile-step-1.el9 index 034b1224..f2bb5ac4 100644 --- a/tests/docker/Dockerfile-step-1.el9 +++ b/tests/docker/Dockerfile-step-1.el9 @@ -188,13 +188,13 @@ RUN set -eux && \ echo "========================================" && \ echo "Building PostgreSQL (${MAKE_JOBS} jobs)" && \ echo "========================================" && \ - make -j${MAKE_JOBS} && \ - make -C contrib -j${MAKE_JOBS} && \ + make -j${MAKE_JOBS} > /dev/null && \ + make -C contrib -j${MAKE_JOBS} > /dev/null && \ echo "========================================" && \ echo "Installing PostgreSQL" && \ echo "========================================" && \ - make install && \ - make -C contrib install && \ + make install > /dev/null && \ + make -C contrib install > /dev/null && \ echo "PostgreSQL installation complete" # ============================================================================== @@ -207,9 +207,9 @@ RUN set -eux && \ echo "========================================" && \ echo "Building Spock Extension" && \ echo "========================================" && \ - make clean && \ - make -j${MAKE_JOBS} && \ - make install && \ + make clean > /dev/null && \ + make -j${MAKE_JOBS} > /dev/null && \ + make install > /dev/null && \ echo "export SPOCK_SOURCE_DIR=/home/pgedge/spock" >> /home/pgedge/.bashrc && \ echo "Spock installation complete" diff --git a/tests/docker/entrypoint.sh b/tests/docker/entrypoint.sh index 54da9d4b..4bbc54f3 100644 --- a/tests/docker/entrypoint.sh +++ b/tests/docker/entrypoint.sh @@ -63,7 +63,7 @@ track_commit_timestamp = 'on' max_worker_processes = 32 max_replication_slots = 32 max_wal_senders = 32 -log_min_messages = debug1 +log_min_messages = debug5 # Network configuration for multi-node cluster listen_addresses = '*' @@ -101,7 +101,7 @@ EOF psql -h /tmp -c "CREATE EXTENSION spock" - if [[ $(HOSTNAME) == "n1" ]]; then + if [[ $HOSTNAME == "n1" ]]; then # First node specific action psql -h /tmp -c " SELECT spock.node_create( @@ -115,9 +115,9 @@ EOF psql -h /tmp -f ${SPOCK_SOURCE_DIR}/samples/Z0DAN/zodan.sql psql -h /tmp -c "CALL spock.add_node( src_node_name := 'n1', - src_dsn := 'host=n1 dbname=${PGDATABASE} user=${PGUSER}', + src_dsn := 'host=n1 port=${PGPORT} dbname=${PGDATABASE} user=${PGUSER}', new_node_name := '${HOSTNAME}', - new_node_dsn := 'dbname=${PGDATABASE} user=${PGUSER}', + new_node_dsn := 'host=${HOSTNAME} port=${PGPORT} dbname=${PGDATABASE} user=${PGUSER}', verb := true, new_node_country := 'USA', new_node_location := 'NYC', diff --git a/tests/docker/pgedge.env b/tests/docker/pgedge.env index 1759fdf7..f38f2865 100644 --- a/tests/docker/pgedge.env +++ b/tests/docker/pgedge.env @@ -6,7 +6,7 @@ DBPORT=5432 # Spockbench related params SCALEFACTOR=10 -RUNTIME=120 +RUNTIME=60 RATE=3000 -THREADS=16 -CONNECTIONS=50 +THREADS=6 +CONNECTIONS=3 diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index 14e72545..b85b47e1 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -81,41 +81,59 @@ _EOF_ exit 1 fi - - resolution_check=$(psql -X -A -t -h ${peer_names[0]} -c " SELECT conflict_type FROM spock.resolutions WHERE relname = 'public.t4'") + resolution_check=$(psql -X -A -t -h ${peer_names[0]} -c \ + "SELECT conflict_type FROM spock.resolutions WHERE relname = 'public.t4'") insert_exists_count=$(echo "$resolution_check" | grep -c 'insert_exists') - delete_delete_count=$(echo "$resolution_check" | grep -c 'delete_delete') + delete_missing_count=$(echo "$resolution_check" | grep -c 'delete_missing') - if [ "$insert_exists_count" -eq 1 ] && [ "$delete_delete_count" -eq 1 ]; + if [ "$insert_exists_count" -eq 1 ] && [ "$delete_missing_count" -eq 1 ]; then - echo "PASS: Found both insert_exists and delete_delete for public.t4" + echo "PASS: Found both insert_exists and delete_missing for public.t4" else - psql -h ${peer_names[0]} -c "select * from spock.resolutions where relname = 'public.t4'" + psql -h ${peer_names[0]} -c "SELECT * FROM spock.resolutions WHERE relname = 'public.t4'" echo "FAIL: Resolution entries for public.t4 are incorrect" echo "Resolutions check=$resolution_check" - echo "Found: insert_exists=$insert_exists_count, delete_delete=$delete_delete_count" + echo "Found: insert_exists=$insert_exists_count, delete_missing=$delete_missing_count" exit 1 fi fi +# The Auto-DDL LR is disabled So, we create the same tables and data on each node spockbench -h /tmp -i -s $SCALEFACTOR $PGDATABASE psql -h /tmp <<_EOF_ - SET spock.enable_ddl_replication = 'on'; - SET spock.include_ddl_repset = 'on'; - - SELECT spock.repset_add_all_tables('default', '{public}'); + -- SET spock.enable_ddl_replication = 'on'; + -- SET spock.include_ddl_repset = 'on'; + /* + * To use delta_apply we should add NOT NULL constraint on such a column first. + * Do it on each node - remember, we don't have Auto-DDL enabled. + */ ALTER TABLE pgbench_accounts ALTER COLUMN abalance SET NOT NULL; ALTER TABLE pgbench_branches ALTER COLUMN bbalance SET NOT NULL; ALTER TABLE pgbench_tellers ALTER COLUMN tbalance SET NOT NULL; - SELECT spock.delta_apply('pgbench_accounts', 'abalance', false); - SELECT spock.delta_apply('pgbench_branches', 'bbalance', false); - SELECT spock.delta_apply('pgbench_tellers', 'tbalance', false); + + /* + * Each node adds test tables to replication sets. Hence, further DML will be + * propagated by LR to other nodes + */ + SELECT spock.repset_add_all_tables('default', '{public}'); + + +-- SELECT spock.delta_apply('pgbench_accounts', 'abalance', false); +-- SELECT spock.delta_apply('pgbench_branches', 'bbalance', false); +-- SELECT spock.delta_apply('pgbench_tellers', 'tbalance', false); _EOF_ # ==========Spockbench tests ========== -spockbench -h /tmp --spock-num-nodes=3 --spock-node=${HOSTNAME:0-1} -s $SCALEFACTOR -T $RUNTIME -R $RATE -P 5 -j $THREADS -c $CONNECTIONS -n --spock-tx-mix=550,225,225 -U $PGUSER $PGDATABASE + +# By default, spockbench enables delta apply and setup this option on the +# 'balance' columns. +spockbench -h /tmp --spock-num-nodes=3 --spock-node=${HOSTNAME:0-1} \ + -s $SCALEFACTOR -T $RUNTIME -R $RATE -P 5 -j $THREADS -c $CONNECTIONS \ + -n --spock-tx-mix=550,225,225 -U $PGUSER $PGDATABASE + spockbench-check -U $PGUSER $PGDATABASE > /home/pgedge/spock/spockbench-$HOSTNAME.out -grep -q "ERROR" /home/pgedge/spock/spockbench-*.out && exit 1 || exit 0 +# Check only this node's output file, not all nodes +grep -q "ERROR" /home/pgedge/spock/spockbench-$HOSTNAME.out && exit 1 || exit 0 From 3bae3825c3a0d0b22755f4d1260fda1e0a80728a Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Thu, 15 Jan 2026 11:43:20 +0100 Subject: [PATCH 6/9] 1 --- tests/docker/run-tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index b85b47e1..a1078524 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -110,9 +110,9 @@ psql -h /tmp <<_EOF_ * To use delta_apply we should add NOT NULL constraint on such a column first. * Do it on each node - remember, we don't have Auto-DDL enabled. */ - ALTER TABLE pgbench_accounts ALTER COLUMN abalance SET NOT NULL; - ALTER TABLE pgbench_branches ALTER COLUMN bbalance SET NOT NULL; - ALTER TABLE pgbench_tellers ALTER COLUMN tbalance SET NOT NULL; +-- ALTER TABLE pgbench_accounts ALTER COLUMN abalance SET NOT NULL; +-- ALTER TABLE pgbench_branches ALTER COLUMN bbalance SET NOT NULL; +-- ALTER TABLE pgbench_tellers ALTER COLUMN tbalance SET NOT NULL; /* * Each node adds test tables to replication sets. Hence, further DML will be @@ -132,8 +132,8 @@ _EOF_ # 'balance' columns. spockbench -h /tmp --spock-num-nodes=3 --spock-node=${HOSTNAME:0-1} \ -s $SCALEFACTOR -T $RUNTIME -R $RATE -P 5 -j $THREADS -c $CONNECTIONS \ - -n --spock-tx-mix=550,225,225 -U $PGUSER $PGDATABASE + -n --spock-tx-mix=550,225,225 $PGDATABASE -spockbench-check -U $PGUSER $PGDATABASE > /home/pgedge/spock/spockbench-$HOSTNAME.out +spockbench-check $PGDATABASE > /home/pgedge/spock/spockbench-$HOSTNAME.out # Check only this node's output file, not all nodes grep -q "ERROR" /home/pgedge/spock/spockbench-$HOSTNAME.out && exit 1 || exit 0 From 95d306789901dfe2eba5c6845644c4e733f58bbd Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Thu, 15 Jan 2026 11:49:35 +0100 Subject: [PATCH 7/9] f --- tests/docker/run-tests.sh | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index a1078524..fb5ce2ff 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -6,6 +6,35 @@ source "${HOME}/.bashrc" IFS=',' read -r -a peer_names <<< "$PEER_NAMES" +function wait_for_pg() +{ + local max_attempts=${1:-24} + local sleep_seconds=${2:-5} + + # Build list of all hosts to check: local node + all peers + local hosts=("/tmp") + for peer in "${peer_names[@]}"; do + hosts+=("$peer") + done + + for host in "${hosts[@]}"; do + count=0 + while ! pg_isready -h "$host"; do + if [ $count -ge $max_attempts ]; then + echo "Gave up waiting for PostgreSQL on $host to become ready..." + exit 1 + fi + + echo "Waiting for PostgreSQL on $host to become ready..." + sleep $sleep_seconds + ((count++)) + done + echo "PostgreSQL on $host is ready" + done +} + +wait_for_pg 10 1 + #========== Exception Log tests ========== # We perform the following tests in two cases: From 8a95206c3f944fceb5d4571fc93a53a7c80ccb90 Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Thu, 15 Jan 2026 13:20:42 +0100 Subject: [PATCH 8/9] healthcheck --- tests/docker/docker-compose.yml | 6 +++--- tests/docker/entrypoint.sh | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/docker/docker-compose.yml b/tests/docker/docker-compose.yml index 1c4f2883..df6f7ccd 100644 --- a/tests/docker/docker-compose.yml +++ b/tests/docker/docker-compose.yml @@ -31,7 +31,7 @@ services: - ./cores/n1:/cores - .:/home/pgedge/tests healthcheck: - test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] + test: ["CMD-SHELL", "pg_isready -h /tmp -p $$DBPORT -U $$DBUSER -d $$DBNAME && test -f /tmp/spock_init_complete"] interval: 10s timeout: 5s retries: 5 @@ -72,7 +72,7 @@ services: - ./cores/n2:/cores - .:/home/pgedge/tests healthcheck: - test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] + test: ["CMD-SHELL", "pg_isready -h /tmp -p $$DBPORT -U $$DBUSER -d $$DBNAME && test -f /tmp/spock_init_complete"] interval: 10s timeout: 5s retries: 5 @@ -113,7 +113,7 @@ services: - ./cores/n3:/cores - .:/home/pgedge/tests healthcheck: - test: ["CMD", "pg_isready", "-h", "/tmp", "-U", "admin"] + test: ["CMD-SHELL", "pg_isready -h /tmp -p $$DBPORT -U $$DBUSER -d $$DBNAME && test -f /tmp/spock_init_complete"] interval: 10s timeout: 5s retries: 5 diff --git a/tests/docker/entrypoint.sh b/tests/docker/entrypoint.sh index 4bbc54f3..0fc5d9ad 100644 --- a/tests/docker/entrypoint.sh +++ b/tests/docker/entrypoint.sh @@ -132,6 +132,11 @@ EOF # This waits for all connections to close - if it hangs, it indicates a problem # that should be fixed (e.g., forgotten connection, long transaction) pg_ctl -D "${PGDATA}" -m smart -t 60 stop + + # Mark initialization complete for healthcheck + touch /tmp/spock_init_complete else echo "Using existing PostgreSQL cluster at ${PGDATA}" + # Ensure marker exists for container restarts + touch /tmp/spock_init_complete fi From b3205bcfb781509462e8790a8e5caf8e79c476fe Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Thu, 15 Jan 2026 14:31:35 +0100 Subject: [PATCH 9/9] 2 --- .github/workflows/spockbench.yml | 2 +- tests/docker/entrypoint.sh | 3 ++- tests/docker/run-tests.sh | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/spockbench.yml b/.github/workflows/spockbench.yml index 1cfb3df6..588c8e1d 100644 --- a/.github/workflows/spockbench.yml +++ b/.github/workflows/spockbench.yml @@ -59,7 +59,7 @@ jobs: cd ${GITHUB_WORKSPACE}/tests/docker/ echo PG_VER=${{ matrix.pgver }} >> pgedge.env env BASE_IMAGE=ghcr.io/pgedge/base-test-image:latest \ - docker compose up --wait -d + docker compose up --build --wait -d timeout-minutes: 20 - name: Run tests on all nodes diff --git a/tests/docker/entrypoint.sh b/tests/docker/entrypoint.sh index 0fc5d9ad..7b89b383 100644 --- a/tests/docker/entrypoint.sh +++ b/tests/docker/entrypoint.sh @@ -63,7 +63,7 @@ track_commit_timestamp = 'on' max_worker_processes = 32 max_replication_slots = 32 max_wal_senders = 32 -log_min_messages = debug5 +log_min_messages = log # Network configuration for multi-node cluster listen_addresses = '*' @@ -71,6 +71,7 @@ listen_addresses = '*' # Spock Configuration shared_preload_libraries = 'spock' spock.conflict_resolution = 'last_update_wins' +spock.save_resolutions = 'on' EOF # Configure client authentication for Docker network diff --git a/tests/docker/run-tests.sh b/tests/docker/run-tests.sh index fb5ce2ff..4552a984 100755 --- a/tests/docker/run-tests.sh +++ b/tests/docker/run-tests.sh @@ -90,8 +90,13 @@ _EOF_ SELECT spock.wait_slot_confirm_lsn(NULL, NULL); _EOF_ - echo "Waiting for apply worker timeouts..." - sleep 5 + # To be sure that conflict resolution has happened we need to wait until the + # following transaction arrives + lsn1=$(psql -A -t -h /tmp -c "SELECT spock.sync_event()") + echo "Wait until XLogRecord $lsn1 arrive and applies from $HOSTNAME to ${peer_names[0]}" + psql -A -t -h ${peer_names[0]} -c \ + "CALL spock.wait_for_sync_event(true, '$HOSTNAME', '$lsn1'::pg_lsn, 30)" + echo "Checking the exception table now..." elog_entries=$(psql -A -t -h ${peer_names[0]} -c " SELECT count(*)