From acb742bdc4126e7ba2be62b439fe47e5148ad7c8 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Wed, 2 Jul 2025 07:17:55 -0400 Subject: [PATCH 1/6] dev setup v12 --- .clang-format | 71 ++ .clangd | 89 ++ .envrc | 6 + .gitignore | 8 + .idea/.gitignore | 8 + .idea/editor.xml | 580 +++++++++++++ .idea/inspectionProfiles/Project_Default.xml | 7 + .idea/misc.xml | 18 + .idea/prettier.xml | 6 + .idea/vcs.xml | 6 + .vscode/launch.json | 22 + .vscode/settings.json | 5 + flake.lock | 78 ++ flake.nix | 45 + pg-aliases.sh | 300 +++++++ shell.nix | 814 +++++++++++++++++++ src/test/regress/pg_regress.c | 2 +- src/tools/pgindent/pgindent | 2 +- 18 files changed, 2065 insertions(+), 2 deletions(-) create mode 100644 .clang-format create mode 100644 .clangd create mode 100644 .envrc create mode 100644 .idea/.gitignore create mode 100644 .idea/editor.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/prettier.xml create mode 100644 .idea/vcs.xml create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 pg-aliases.sh create mode 100644 shell.nix diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000000..2f786ac8eef05 --- /dev/null +++ b/.clang-format @@ -0,0 +1,71 @@ +# the official .clang-format style for https://github.com/taocpp +# +# clang-format-4.0 -i -style=file $(find -name '[^.]*.[hc]pp') + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -3 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: true + AfterControlStatement: false + AfterEnum : true + AfterFunction : true + AfterNamespace : true + AfterStruct : true + AfterUnion : true + BeforeCatch : true + BeforeElse : true + IndentBraces : false +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakStringLiterals: false +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 0 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 3 +ContinuationIndentWidth: 3 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: true +IndentWidth: 3 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +PointerAlignment: Left +ReflowComments: false +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: Never +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: true +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: true +SpacesInParentheses: true +SpacesInSquareBrackets: true +TabWidth: 8 +UseTab: Never diff --git a/.clangd b/.clangd new file mode 100644 index 0000000000000..500c5d0d258d6 --- /dev/null +++ b/.clangd @@ -0,0 +1,89 @@ +Diagnostics: + MissingIncludes: None +InlayHints: + Enabled: true + ParameterNames: true + DeducedTypes: true +CompileFlags: + CompilationDatabase: build/ # Search build/ directory for compile_commands.json + Remove: [ -Werror ] + Add: + - -DDEBUG + - -DLOCAL + - -DPGDLLIMPORT= + - -DPIC + - -O2 + - -Wall + - -Wcast-function-type + - -Wconversion + - -Wdeclaration-after-statement + - -Wendif-labels + - -Werror=vla + - -Wextra + - -Wfloat-equal + - -Wformat-security + - -Wimplicit-fallthrough=3 + - -Wmissing-format-attribute + - -Wmissing-prototypes + - -Wno-format-truncation + - -Wno-sign-conversion + - -Wno-stringop-truncation + - -Wno-unused-const-variable + - -Wpointer-arith + - -Wshadow + - -Wshadow=compatible-local + - -fPIC + - -fexcess-precision=standard + - -fno-strict-aliasing + - -fvisibility=hidden + - -fwrapv + - -g + - -std=c11 + - -I. + - -I../../../../src/include +# gcc -E -v -xc++ /dev/null +# - -I/nix/store/l2sgvfcyqc1bgnzpz86qw5pjq99j8vlw-libtool-2.5.4/include +# - -I/nix/store/n087ac9g368fbl6h57a2mdd741lshzrc-file-5.46-dev/include +# - -I/nix/store/p7z72c2s722pbw31jmm3y0nwypksb5fj-gnumake-4.4.1/include +# - -I/nix/store/wzwlizg15dwh6x0h3ckjmibdblfkfdzf-flex-2.6.4/include +# - -I/nix/store/8nh579b2yl3sz2yfwyjc9ksb0jb7kwf5-libxslt-1.1.43-dev/include +# - -I/nix/store/cisb0723v3pgp74f2lj07z5d6w3j77sl-libxml2-2.13.8-dev/include +# - -I/nix/store/245c5yscaxyxi49fz9ys1i1apy5s2igz-valgrind-3.24.0-dev/include +# - -I/nix/store/nmxr110602fvajr9ax8d65ac1g40vx1a-curl-8.13.0-dev/include +# - -I/nix/store/slqvy0fgnwmvaq3bxmrvqclph8x909i2-brotli-1.1.0-dev/include +# - -I/nix/store/lchvccw6zl1z1wmhqayixcjcqyhqvyj7-krb5-1.21.3-dev/include +# - -I/nix/store/hybw3vnacqmm68fskbcchrbmj0h4ffv2-nghttp2-1.65.0-dev/include +# - -I/nix/store/2m0s7qxq2kgclyh6cfbflpxm65aga2h4-libidn2-2.3.8-dev/include +# - -I/nix/store/kcgqglb4iax0zh5jlrxmjdik93wlgsrq-openssl-3.4.1-dev/include +# - -I/nix/store/8mlcjg5js2r0zrpdjlfaxax6hyvppgz5-libpsl-0.21.5-dev/include +# - -I/nix/store/1nygjgimkj4wnmydzd6brsw6m0rd7gmx-libssh2-1.11.1-dev/include +# - -I/nix/store/cbdvjyn19y77m8l06n089x30v7irqz3j-zlib-1.3.1-dev/include +# - -I/nix/store/x10zhllc0rhk1s1mhjvsrzvbg55802gj-zstd-1.5.7-dev/include +# - -I/nix/store/8w718rm43x7z73xhw9d6vh8s4snrq67h-python3-3.12.10/include +# - -I/nix/store/1lrgn56jw2yww4bxj0frpgvahqh9i7gl-perf-linux-6.12.35/include +# - -I/nix/store/j87n5xqfj6c03633g7l95lfjq5ynml13-gdb-16.2/include +# - -I/nix/store/ih8dkkw9r7zx5fxg3arh53qc9zs422d1-llvm-21.1.0-dev/include +# - -I/nix/store/rz4bmcm8dwsy7ylx6rhffkwkqn6n8srn-ncurses-6.5-dev/include +# - -I/nix/store/29mcvdnd9s6sp46cjmqm0pfg4xs56rik-zlib-1.3.1-dev/include +# - -I/nix/store/42288hw25sc2gchgc5jp4wfgwisa0nxm-lldb-21.1.0-dev/include +# - -I/nix/store/wpfdp7vzd7h7ahnmp4rvxfcklg4viknl-tcl-8.6.15/include +# - -I/nix/store/4sq2x2770k0xrjshdi6piqrazqjfi5s4-readline-8.2p13-dev/include +# - -I/nix/store/myw381bc9yqd709hpray9lp7l98qmlm1-ncurses-6.5-dev/include +# - -I/nix/store/dvhx24q4icrig4q1v1lp7kzi3izd5jmb-icu4c-76.1-dev/include +# - -I/nix/store/7ld4hdn561a4vkk5hrkdhq8r6rxw8shl-lz4-1.10.0-dev/include +# - -I/nix/store/fnzbi6b8q79faggzj53paqi7igr091w0-util-linux-minimal-2.41-dev/include +# - -I/nix/store/vrdwlbzr74ibnzcli2yl1nxg9jqmr237-linux-pam-1.6.1/include +# - -I/nix/store/qizipyz9y17nr4w4gmxvwd3x4k0bp2rh-libxcrypt-4.4.38/include +# - -I/nix/store/7z8illxfqr4mvwh4l3inik6vdh12jx09-numactl-2.0.18-dev/include +# - -I/nix/store/f6lmz5inbk7qjc79099q4jvgzih7zbhy-openldap-2.6.9-dev/include +# - -I/nix/store/28vmjd90wzd6gij5a1nfj4nqaw191cfg-liburing-2.9-dev/include +# - -I/nix/store/75cyhmjxzx8z7v2z8vrmrydwraf00wyi-libselinux-3.8.1-dev/include +# - -I/nix/store/r25srliigrrv5q3n7y8ms6z10spvjcd9-glibc-2.40-66-dev/include +# - -I/nix/store/ldp1izmflvc74bd4n2svhrd5xrz61wyi-lld-21.1.0-dev/include +# - -I/nix/store/wd5cm50kmlw8n9mq6l1mkvpp8g443a1g-compiler-rt-libc-21.1.0-dev/include +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include/c++/14.2.1.20250322/ +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include/c++/14.2.1.20250322//x86_64-unknown-linux-gnu +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include/c++/14.2.1.20250322//backward +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/lib/gcc/x86_64-unknown-linux-gnu/14.2.1/include +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/lib/gcc/x86_64-unknown-linux-gnu/14.2.1/include-fixed diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000000..33b05aab1c50c --- /dev/null +++ b/.envrc @@ -0,0 +1,6 @@ +watch_file flake.nix +use flake + +#export MESON_EXTRA_SETUP="-Db_coverage=true" +#export GENINFO_OPTIONS="--ignore-errors inconsistent,gcov" +#export LCOV_OPTIONS="--ignore-errors inconsistent,gcov" diff --git a/.gitignore b/.gitignore index 4e911395fe3ba..8e429d66ca41f 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,11 @@ lib*.pc /Release/ /tmp_install/ /portlock/ + +build/ +install/ +test-db/ +.direnv/ +.cache/ +.history + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000000000..13566b81b018a --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/editor.xml b/.idea/editor.xml new file mode 100644 index 0000000000000..1f0ef49b4faf4 --- /dev/null +++ b/.idea/editor.xml @@ -0,0 +1,580 @@ + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000000000..9c69411050eac --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000000000..53624c9e1f9ab --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,18 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/prettier.xml b/.idea/prettier.xml new file mode 100644 index 0000000000000..b0c1c68fbbad6 --- /dev/null +++ b/.idea/prettier.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000000000..35eb1ddfbbc02 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000000000..f5d97424c5047 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "(gdb) Attach Postgres", + "type": "cppdbg", + "request": "attach", + "program": "${workspaceRoot}/install/bin/postgres", + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000..cc8a64fa9fa85 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "syscache.h": "c" + } +} \ No newline at end of file diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000000000..a609589066525 --- /dev/null +++ b/flake.lock @@ -0,0 +1,78 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1751211869, + "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-unstable": { + "locked": { + "lastModified": 1757651841, + "narHash": "sha256-Lh9QoMzTjY/O4LqNwcm6s/WSYStDmCH6f3V/izwlkHc=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "ad4e6dd68c30bc8bd1860a27bc6f0c485bd7f3b6", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "nixpkgs-unstable": "nixpkgs-unstable" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000000000..aae6d54c4c8cf --- /dev/null +++ b/flake.nix @@ -0,0 +1,45 @@ +{ + description = "PostgreSQL development environment"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + nixpkgs-unstable.url = "github:nixos/nixpkgs/nixpkgs-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + nixpkgs-unstable, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem ( + system: let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + }; + pkgs-unstable = import nixpkgs-unstable { + inherit system; + config.allowUnfree = true; + }; + + shellConfig = import ./shell.nix {inherit pkgs pkgs-unstable system;}; + in { + formatter = pkgs.alejandra; + devShells = { + default = shellConfig.devShell; + gcc = shellConfig.devShell; + clang = shellConfig.clangDevShell; + gcc-musl = shellConfig.muslDevShell; + clang-musl = shellConfig.clangMuslDevShell; + }; + + packages = { + inherit (shellConfig) gdbConfig flameGraphScript pgbenchScript; + }; + + environment.localBinInPath = true; + } + ); +} diff --git a/pg-aliases.sh b/pg-aliases.sh new file mode 100644 index 0000000000000..221ccdd773846 --- /dev/null +++ b/pg-aliases.sh @@ -0,0 +1,300 @@ +# PostgreSQL Development Aliases + +# Build system management +pg_clean_for_compiler() { + local current_compiler="$(basename $CC)" + local build_dir="$PG_BUILD_DIR" + + if [ -f "$build_dir/compile_commands.json" ]; then + local last_compiler=$(grep -o '/[^/]*/bin/[gc]cc\|/[^/]*/bin/clang' "$build_dir/compile_commands.json" | head -1 | xargs basename 2>/dev/null || echo "unknown") + + if [ "$last_compiler" != "$current_compiler" ] && [ "$last_compiler" != "unknown" ]; then + echo "Detected compiler change from $last_compiler to $current_compiler" + echo "Cleaning build directory..." + rm -rf "$build_dir" + mkdir -p "$build_dir" + fi + fi + + mkdir -p "$build_dir" + echo "$current_compiler" >"$build_dir/.compiler_used" +} + +# Core PostgreSQL commands +alias pg-setup=' + if [ -z "$PERL_CORE_DIR" ]; then + echo "Error: Could not find perl CORE directory" >&2 + return 1 + fi + + pg_clean_for_compiler + + echo "=== PostgreSQL Build Configuration ===" + echo "Compiler: $CC" + echo "LLVM: $(llvm-config --version 2>/dev/null || echo 'disabled')" + echo "Source: $PG_SOURCE_DIR" + echo "Build: $PG_BUILD_DIR" + echo "Install: $PG_INSTALL_DIR" + echo "======================================" + # --fatal-meson-warnings + + env CFLAGS="-I$PERL_CORE_DIR $CFLAGS" \ + LDFLAGS="-L$PERL_CORE_DIR -lperl $LDFLAGS" \ + meson setup $MESON_EXTRA_SETUP \ + --reconfigure \ + -Doptimization=g \ + -Ddebug=true \ + -Db_sanitize=none \ + -Db_lundef=false \ + -Dlz4=enabled \ + -Dzstd=enabled \ + -Dllvm=disabled \ + -Dplperl=enabled \ + -Dplpython=enabled \ + -Dpltcl=enabled \ + -Dlibxml=enabled \ + -Duuid=e2fs \ + -Dlibxslt=enabled \ + -Dssl=openssl \ + -Dldap=disabled \ + -Dcassert=true \ + -Dtap_tests=enabled \ + -Dinjection_points=true \ + -Ddocs_pdf=enabled \ + -Ddocs_html_style=website \ + --prefix="$PG_INSTALL_DIR" \ + "$PG_BUILD_DIR" \ + "$PG_SOURCE_DIR"' + +alias pg-compdb='compdb -p build/ list > compile_commands.json' +alias pg-build='meson compile -C "$PG_BUILD_DIR"' +alias pg-install='meson install -C "$PG_BUILD_DIR"' +alias pg-test='meson test -q --print-errorlogs -C "$PG_BUILD_DIR"' + +# Clean commands +alias pg-clean='ninja -C "$PG_BUILD_DIR" clean' +alias pg-full-clean='rm -rf "$PG_BUILD_DIR" "$PG_INSTALL_DIR" && echo "Build and install directories cleaned"' + +# Database management +alias pg-init='rm -rf "$PG_DATA_DIR" && "$PG_INSTALL_DIR/bin/initdb" --debug --no-clean "$PG_DATA_DIR"' +alias pg-start='"$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR" -k "$PG_DATA_DIR"' +alias pg-stop='pkill -f "postgres.*-D.*$PG_DATA_DIR" || true' +alias pg-restart='pg-stop && sleep 2 && pg-start' +alias pg-status='pgrep -f "postgres.*-D.*$PG_DATA_DIR" && echo "PostgreSQL is running" || echo "PostgreSQL is not running"' + +# Client connections +alias pg-psql='"$PG_INSTALL_DIR/bin/psql" -h "$PG_DATA_DIR" postgres' +alias pg-createdb='"$PG_INSTALL_DIR/bin/createdb" -h "$PG_DATA_DIR"' +alias pg-dropdb='"$PG_INSTALL_DIR/bin/dropdb" -h "$PG_DATA_DIR"' + +# Debugging +alias pg-debug-gdb='gdb -x "$GDBINIT" "$PG_INSTALL_DIR/bin/postgres"' +alias pg-debug-lldb='lldb "$PG_INSTALL_DIR/bin/postgres"' +alias pg-debug=' + if command -v gdb >/dev/null 2>&1; then + pg-debug-gdb + elif command -v lldb >/dev/null 2>&1; then + pg-debug-lldb + else + echo "No debugger available (gdb or lldb required)" + fi' + +# Attach to running process +alias pg-attach-gdb=' + PG_PID=$(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1) + if [ -n "$PG_PID" ]; then + echo "Attaching GDB to PostgreSQL process $PG_PID" + gdb -x "$GDBINIT" -p "$PG_PID" + else + echo "No PostgreSQL process found" + fi' + +alias pg-attach-lldb=' + PG_PID=$(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1) + if [ -n "$PG_PID" ]; then + echo "Attaching LLDB to PostgreSQL process $PG_PID" + lldb -p "$PG_PID" + else + echo "No PostgreSQL process found" + fi' + +alias pg-attach=' + if command -v gdb >/dev/null 2>&1; then + pg-attach-gdb + elif command -v lldb >/dev/null 2>&1; then + pg-attach-lldb + else + echo "No debugger available (gdb or lldb required)" + fi' + +# Performance profiling and analysis +alias pg-valgrind='valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all "$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR"' +alias pg-strace='strace -f -o /tmp/postgres.strace "$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR"' + +# Flame graph generation +alias pg-flame='pg-flame-generate' +alias pg-flame-30='pg-flame-generate 30' +alias pg-flame-60='pg-flame-generate 60' +alias pg-flame-120='pg-flame-generate 120' + +# Custom flame graph with specific duration and output +pg-flame-custom() { + local duration=${1:-30} + local output_dir=${2:-$PG_FLAME_DIR} + echo "Generating flame graph for ${duration}s, output to: $output_dir" + pg-flame-generate "$duration" "$output_dir" +} + +# Benchmarking with pgbench +alias pg-bench='pg-bench-run' +alias pg-bench-quick='pg-bench-run 5 1 100 1 30 select-only' +alias pg-bench-standard='pg-bench-run 10 2 1000 10 60 tpcb-like' +alias pg-bench-heavy='pg-bench-run 50 4 5000 100 300 tpcb-like' +alias pg-bench-readonly='pg-bench-run 20 4 2000 50 120 select-only' + +# Custom benchmark function +pg-bench-custom() { + local clients=${1:-10} + local threads=${2:-2} + local transactions=${3:-1000} + local scale=${4:-10} + local duration=${5:-60} + local test_type=${6:-tpcb-like} + + echo "Running custom benchmark:" + echo " Clients: $clients, Threads: $threads" + echo " Transactions: $transactions, Scale: $scale" + echo " Duration: ${duration}s, Type: $test_type" + + pg-bench-run "$clients" "$threads" "$transactions" "$scale" "$duration" "$test_type" +} + +# Benchmark with flame graph +pg-bench-flame() { + local duration=${1:-60} + local clients=${2:-10} + local scale=${3:-10} + + echo "Running benchmark with flame graph generation" + echo "Duration: ${duration}s, Clients: $clients, Scale: $scale" + + # Start benchmark in background + pg-bench-run "$clients" 2 1000 "$scale" "$duration" tpcb-like & + local bench_pid=$! + + # Wait a bit for benchmark to start + sleep 5 + + # Generate flame graph for most of the benchmark duration + local flame_duration=$((duration - 10)) + if [ $flame_duration -gt 10 ]; then + pg-flame-generate "$flame_duration" & + local flame_pid=$! + fi + + # Wait for benchmark to complete + wait $bench_pid + + # Wait for flame graph if it was started + if [ -n "${flame_pid:-}" ]; then + wait $flame_pid + fi + + echo "Benchmark and flame graph generation completed" +} + +# Performance monitoring +alias pg-perf='perf top -p $(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1)' +alias pg-htop='htop -p $(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | tr "\n" "," | sed "s/,$//")' + +# System performance stats during PostgreSQL operation +pg-stats() { + local duration=${1:-30} + echo "Collecting system stats for ${duration}s..." + + iostat -x 1 "$duration" >"$PG_BENCH_DIR/iostat_$(date +%Y%m%d_%H%M%S).log" & + vmstat 1 "$duration" >"$PG_BENCH_DIR/vmstat_$(date +%Y%m%d_%H%M%S).log" & + + wait + echo "System stats saved to $PG_BENCH_DIR" +} + +# Development helpers +pg-format() { + local since=${1:-HEAD} + + if [ ! -f "$PG_SOURCE_DIR/src/tools/pgindent/pgindent" ]; then + echo "Error: pgindent not found at $PG_SOURCE_DIR/src/tools/pgindent/pgindent" + else + + modified_files=$(git diff --name-only "${since}" | grep -E "\.c$|\.h$") + + if [ -z "$modified_files" ]; then + echo "No modified .c or .h files found" + else + + echo "Formatting modified files with pgindent:" + for file in $modified_files; do + if [ -f "$file" ]; then + echo " Formatting: $file" + "$PG_SOURCE_DIR/src/tools/pgindent/pgindent" "$file" + else + echo " Warning: File not found: $file" + fi + done + + echo "Checking files for whitespace:" + git diff --check "${since}" + fi + fi +} + +alias pg-tidy='find "$PG_SOURCE_DIR" -name "*.c" | head -10 | xargs clang-tidy' + +# Log management +alias pg-log='tail -f "$PG_DATA_DIR/log/postgresql-$(date +%Y-%m-%d).log" 2>/dev/null || echo "No log file found"' +alias pg-log-errors='grep -i error "$PG_DATA_DIR/log/"*.log 2>/dev/null || echo "No error logs found"' + +# Build logs +alias pg-build-log='cat "$PG_BUILD_DIR/meson-logs/meson-log.txt"' +alias pg-build-errors='grep -i error "$PG_BUILD_DIR/meson-logs/meson-log.txt" 2>/dev/null || echo "No build errors found"' + +# Results viewing +alias pg-bench-results='ls -la "$PG_BENCH_DIR" && echo "Latest results:" && tail -20 "$PG_BENCH_DIR"/results_*.txt 2>/dev/null | tail -20' +alias pg-flame-results='ls -la "$PG_FLAME_DIR" && echo "Open flame graphs with: firefox $PG_FLAME_DIR/*.svg"' + +# Clean up old results +pg-clean-results() { + local days=${1:-7} + echo "Cleaning benchmark and flame graph results older than $days days..." + find "$PG_BENCH_DIR" -type f -mtime +$days -delete 2>/dev/null || true + find "$PG_FLAME_DIR" -type f -mtime +$days -delete 2>/dev/null || true + echo "Cleanup completed" +} + +# Information +alias pg-info=' + echo "=== PostgreSQL Development Environment ===" + echo "Source: $PG_SOURCE_DIR" + echo "Build: $PG_BUILD_DIR" + echo "Install: $PG_INSTALL_DIR" + echo "Data: $PG_DATA_DIR" + echo "Benchmarks: $PG_BENCH_DIR" + echo "Flame graphs: $PG_FLAME_DIR" + echo "Compiler: $CC" + echo "" + echo "Available commands:" + echo " Setup: pg-setup, pg-build, pg-install" + echo " Database: pg-init, pg-start, pg-stop, pg-psql" + echo " Debug: pg-debug, pg-attach, pg-valgrind" + echo " Performance: pg-flame, pg-bench, pg-perf" + echo " Benchmarks: pg-bench-quick, pg-bench-standard, pg-bench-heavy" + echo " Flame graphs: pg-flame-30, pg-flame-60, pg-flame-custom" + echo " Combined: pg-bench-flame" + echo " Results: pg-bench-results, pg-flame-results" + echo " Logs: pg-log, pg-build-log" + echo " Clean: pg-clean, pg-full-clean, pg-clean-results" + echo " Code quality: pg-format, pg-tidy" + echo "=========================================="' + +echo "PostgreSQL aliases loaded. Run 'pg-info' for available commands." diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000000000..e503425eb9475 --- /dev/null +++ b/shell.nix @@ -0,0 +1,814 @@ +{ + pkgs, + pkgs-unstable, + system, +}: let + # Use LLVM for modern PostgreSQL development + llvmPkgs = pkgs-unstable.llvmPackages_21; + + # Configuration constants + config = { + pgSourceDir = "$PWD"; + pgBuildDir = "$PWD/build"; + pgInstallDir = "$PWD/install"; + pgDataDir = "/tmp/test-db-$(basename $PWD)"; + pgBenchDir = "/tmp/pgbench-results-$(basename $PWD)"; + pgFlameDir = "/tmp/flame-graphs-$(basename $PWD)"; + }; + + # Single dependency function that can be used for all environments + getPostgreSQLDeps = muslLibs: + with pkgs; + [ + # Build system (always use host tools) + pkgs-unstable.meson + pkgs-unstable.ninja + pkg-config + autoconf + libtool + git + which + binutils + gnumake + + # Parser/lexer tools + bison + flex + + # Perl with required packages + (perl.withPackages (ps: with ps; [IPCRun])) + + # Documentation + docbook_xml_dtd_45 + docbook-xsl-nons + libxslt + libxml2 + fop + + # Development tools (always use host tools) + coreutils + shellcheck + ripgrep + valgrind + curl + uv + pylint + black + lcov + strace + ltrace + perf-tools + linuxPackages.perf + flamegraph + htop + iotop + sysstat + ccache + cppcheck + compdb + + # GCC/GDB +# pkgs-unstable.gcc15 + gcc + gdb + + # LLVM toolchain + llvmPkgs.llvm + llvmPkgs.llvm.dev + llvmPkgs.clang-tools + llvmPkgs.lldb + + # Language support + (python3.withPackages (ps: with ps; [requests browser-cookie3])) + tcl + ] + ++ ( + if muslLibs + then [ + # Musl target libraries for cross-compilation + pkgs.pkgsMusl.readline + pkgs.pkgsMusl.zlib + pkgs.pkgsMusl.openssl + pkgs.pkgsMusl.icu + pkgs.pkgsMusl.lz4 + pkgs.pkgsMusl.zstd + pkgs.pkgsMusl.libuuid + pkgs.pkgsMusl.libkrb5 + pkgs.pkgsMusl.linux-pam + pkgs.pkgsMusl.libxcrypt + ] + else [ + # Glibc target libraries + readline + zlib + openssl + icu + lz4 + zstd + libuuid + libkrb5 + linux-pam + libxcrypt + numactl + openldap + liburing + libselinux + glibc + glibc.dev + ] + ); + + # GDB configuration for PostgreSQL debugging + gdbConfig = pkgs.writeText "gdbinit-postgres" '' + # PostgreSQL-specific GDB configuration + + # Pretty-print PostgreSQL data structures + define print_node + if $arg0 + printf "Node type: %s\n", nodeTagNames[$arg0->type] + print *$arg0 + else + printf "NULL node\n" + end + end + document print_node + Print a PostgreSQL Node with type information + Usage: print_node + end + + define print_list + set $list = (List*)$arg0 + if $list + printf "List length: %d\n", $list->length + set $cell = $list->head + set $i = 0 + while $cell && $i < $list->length + printf " [%d]: ", $i + print_node $cell->data.ptr_value + set $cell = $cell->next + set $i = $i + 1 + end + else + printf "NULL list\n" + end + end + document print_list + Print a PostgreSQL List structure + Usage: print_list + end + + define print_query + set $query = (Query*)$arg0 + if $query + printf "Query type: %d, command type: %d\n", $query->querySource, $query->commandType + print *$query + else + printf "NULL query\n" + end + end + document print_query + Print a PostgreSQL Query structure + Usage: print_query + end + + define print_relcache + set $rel = (Relation)$arg0 + if $rel + printf "Relation: %s.%s (OID: %u)\n", $rel->rd_rel->relnamespace, $rel->rd_rel->relname.data, $rel->rd_id + printf " natts: %d, relkind: %c\n", $rel->rd_rel->relnatts, $rel->rd_rel->relkind + else + printf "NULL relation\n" + end + end + document print_relcache + Print relation cache entry information + Usage: print_relcache + end + + define print_tupdesc + set $desc = (TupleDesc)$arg0 + if $desc + printf "TupleDesc: %d attributes\n", $desc->natts + set $i = 0 + while $i < $desc->natts + set $attr = $desc->attrs[$i] + printf " [%d]: %s (type: %u, len: %d)\n", $i, $attr->attname.data, $attr->atttypid, $attr->attlen + set $i = $i + 1 + end + else + printf "NULL tuple descriptor\n" + end + end + document print_tupdesc + Print tuple descriptor information + Usage: print_tupdesc + end + + define print_slot + set $slot = (TupleTableSlot*)$arg0 + if $slot + printf "TupleTableSlot: %s\n", $slot->tts_ops->name + printf " empty: %d, shouldFree: %d\n", $slot->tts_empty, $slot->tts_shouldFree + if $slot->tts_tupleDescriptor + print_tupdesc $slot->tts_tupleDescriptor + end + else + printf "NULL slot\n" + end + end + document print_slot + Print tuple table slot information + Usage: print_slot + end + + # Memory context debugging + define print_mcxt + set $context = (MemoryContext)$arg0 + if $context + printf "MemoryContext: %s\n", $context->name + printf " type: %s, parent: %p\n", $context->methods->name, $context->parent + printf " total: %zu, free: %zu\n", $context->mem_allocated, $context->freep - $context->freeptr + else + printf "NULL memory context\n" + end + end + document print_mcxt + Print memory context information + Usage: print_mcxt + end + + # Process debugging + define print_proc + set $proc = (PGPROC*)$arg0 + if $proc + printf "PGPROC: pid=%d, database=%u\n", $proc->pid, $proc->databaseId + printf " waiting: %d, waitStatus: %d\n", $proc->waiting, $proc->waitStatus + else + printf "NULL process\n" + end + end + document print_proc + Print process information + Usage: print_proc + end + + # Set useful defaults + set print pretty on + set print object on + set print static-members off + set print vtbl on + set print demangle on + set demangle-style gnu-v3 + set print sevenbit-strings off + set history save on + set history size 1000 + set history filename ~/.gdb_history_postgres + + # Common breakpoints for PostgreSQL debugging + define pg_break_common + break elog + break errfinish + break ExceptionalCondition + break ProcessInterrupts + end + document pg_break_common + Set common PostgreSQL debugging breakpoints + end + + printf "PostgreSQL GDB configuration loaded.\n" + printf "Available commands: print_node, print_list, print_query, print_relcache,\n" + printf " print_tupdesc, print_slot, print_mcxt, print_proc, pg_break_common\n" + ''; + + # Flame graph generation script + flameGraphScript = pkgs.writeScriptBin "pg-flame-generate" '' + #!${pkgs.bash}/bin/bash + set -euo pipefail + + DURATION=''${1:-30} + OUTPUT_DIR=''${2:-${config.pgFlameDir}} + TIMESTAMP=$(date +%Y%m%d_%H%M%S) + + mkdir -p "$OUTPUT_DIR" + + echo "Generating flame graph for PostgreSQL (duration: ''${DURATION}s)" + + # Find PostgreSQL processes + PG_PIDS=$(pgrep -f "postgres.*-D.*${config.pgDataDir}" || true) + + if [ -z "$PG_PIDS" ]; then + echo "Error: No PostgreSQL processes found" + exit 1 + fi + + echo "Found PostgreSQL processes: $PG_PIDS" + + # Record perf data + PERF_DATA="$OUTPUT_DIR/perf_$TIMESTAMP.data" + echo "Recording perf data to $PERF_DATA" + + ${pkgs.linuxPackages.perf}/bin/perf record \ + -F 997 \ + -g \ + --call-graph dwarf \ + -p "$(echo $PG_PIDS | tr ' ' ',')" \ + -o "$PERF_DATA" \ + sleep "$DURATION" + + # Generate flame graph + FLAME_SVG="$OUTPUT_DIR/postgres_flame_$TIMESTAMP.svg" + echo "Generating flame graph: $FLAME_SVG" + + ${pkgs.linuxPackages.perf}/bin/perf script -i "$PERF_DATA" | \ + ${pkgs.flamegraph}/bin/stackcollapse-perf.pl | \ + ${pkgs.flamegraph}/bin/flamegraph.pl \ + --title "PostgreSQL Flame Graph ($TIMESTAMP)" \ + --width 1200 \ + --height 800 \ + > "$FLAME_SVG" + + echo "Flame graph generated: $FLAME_SVG" + echo "Perf data saved: $PERF_DATA" + + # Generate summary report + REPORT="$OUTPUT_DIR/report_$TIMESTAMP.txt" + echo "Generating performance report: $REPORT" + + { + echo "PostgreSQL Performance Analysis Report" + echo "Generated: $(date)" + echo "Duration: ''${DURATION}s" + echo "Processes: $PG_PIDS" + echo "" + echo "=== Top Functions ===" + ${pkgs.linuxPackages.perf}/bin/perf report -i "$PERF_DATA" --stdio --sort comm,dso,symbol | head -50 + echo "" + echo "=== Call Graph ===" + ${pkgs.linuxPackages.perf}/bin/perf report -i "$PERF_DATA" --stdio -g --sort comm,dso,symbol | head -100 + } > "$REPORT" + + echo "Report generated: $REPORT" + echo "" + echo "Files created:" + echo " Flame graph: $FLAME_SVG" + echo " Perf data: $PERF_DATA" + echo " Report: $REPORT" + ''; + + # pgbench wrapper script + pgbenchScript = pkgs.writeScriptBin "pg-bench-run" '' + #!${pkgs.bash}/bin/bash + set -euo pipefail + + # Default parameters + CLIENTS=''${1:-10} + THREADS=''${2:-2} + TRANSACTIONS=''${3:-1000} + SCALE=''${4:-10} + DURATION=''${5:-60} + TEST_TYPE=''${6:-tpcb-like} + + OUTPUT_DIR="${config.pgBenchDir}" + TIMESTAMP=$(date +%Y%m%d_%H%M%S) + + mkdir -p "$OUTPUT_DIR" + + echo "=== PostgreSQL Benchmark Configuration ===" + echo "Clients: $CLIENTS" + echo "Threads: $THREADS" + echo "Transactions: $TRANSACTIONS" + echo "Scale factor: $SCALE" + echo "Duration: ''${DURATION}s" + echo "Test type: $TEST_TYPE" + echo "Output directory: $OUTPUT_DIR" + echo "============================================" + + # Check if PostgreSQL is running + if ! pgrep -f "postgres.*-D.*${config.pgDataDir}" >/dev/null; then + echo "Error: PostgreSQL is not running. Start it with 'pg-start'" + exit 1 + fi + + PGBENCH="${config.pgInstallDir}/bin/pgbench" + PSQL="${config.pgInstallDir}/bin/psql" + CREATEDB="${config.pgInstallDir}/bin/createdb" + DROPDB="${config.pgInstallDir}/bin/dropdb" + + DB_NAME="pgbench_test_$TIMESTAMP" + RESULTS_FILE="$OUTPUT_DIR/results_$TIMESTAMP.txt" + LOG_FILE="$OUTPUT_DIR/pgbench_$TIMESTAMP.log" + + echo "Creating test database: $DB_NAME" + "$CREATEDB" -h "${config.pgDataDir}" "$DB_NAME" || { + echo "Failed to create database" + exit 1 + } + + # Initialize pgbench tables + echo "Initializing pgbench tables (scale factor: $SCALE)" + "$PGBENCH" -h "${config.pgDataDir}" -i -s "$SCALE" "$DB_NAME" || { + echo "Failed to initialize pgbench tables" + "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true + exit 1 + } + + # Run benchmark based on test type + echo "Running benchmark..." + + case "$TEST_TYPE" in + "tpcb-like"|"default") + BENCH_ARGS="" + ;; + "select-only") + BENCH_ARGS="-S" + ;; + "simple-update") + BENCH_ARGS="-N" + ;; + "read-write") + BENCH_ARGS="-b select-only@70 -b tpcb-like@30" + ;; + *) + echo "Unknown test type: $TEST_TYPE" + echo "Available types: tpcb-like, select-only, simple-update, read-write" + "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true + exit 1 + ;; + esac + + { + echo "PostgreSQL Benchmark Results" + echo "Generated: $(date)" + echo "Test type: $TEST_TYPE" + echo "Clients: $CLIENTS, Threads: $THREADS" + echo "Transactions: $TRANSACTIONS, Duration: ''${DURATION}s" + echo "Scale factor: $SCALE" + echo "Database: $DB_NAME" + echo "" + echo "=== System Information ===" + echo "CPU: $(nproc) cores" + echo "Memory: $(free -h | grep '^Mem:' | awk '{print $2}')" + echo "Compiler: $CC" + echo "PostgreSQL version: $("$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -t -c "SELECT version();" | head -1)" + echo "" + echo "=== Benchmark Results ===" + } > "$RESULTS_FILE" + + # Run the actual benchmark + "$PGBENCH" \ + -h "${config.pgDataDir}" \ + -c "$CLIENTS" \ + -j "$THREADS" \ + -T "$DURATION" \ + -P 5 \ + --log \ + --log-prefix="$OUTPUT_DIR/pgbench_$TIMESTAMP" \ + $BENCH_ARGS \ + "$DB_NAME" 2>&1 | tee -a "$RESULTS_FILE" + + # Collect additional statistics + { + echo "" + echo "=== Database Statistics ===" + "$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -c " + SELECT + schemaname, + relname, + n_tup_ins as inserts, + n_tup_upd as updates, + n_tup_del as deletes, + n_live_tup as live_tuples, + n_dead_tup as dead_tuples + FROM pg_stat_user_tables; + " + + echo "" + echo "=== Index Statistics ===" + "$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -c " + SELECT + schemaname, + relname, + indexrelname, + idx_scan, + idx_tup_read, + idx_tup_fetch + FROM pg_stat_user_indexes; + " + } >> "$RESULTS_FILE" + + # Clean up + echo "Cleaning up test database: $DB_NAME" + "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true + + echo "" + echo "Benchmark completed!" + echo "Results saved to: $RESULTS_FILE" + echo "Transaction logs: $OUTPUT_DIR/pgbench_$TIMESTAMP*" + + # Show summary + echo "" + echo "=== Quick Summary ===" + grep -E "(tps|latency)" "$RESULTS_FILE" | tail -5 + ''; + + # Development shell (GCC + glibc) + devShell = pkgs.mkShell { + name = "postgresql-dev"; + buildInputs = + (getPostgreSQLDeps false) + ++ [ + flameGraphScript + pgbenchScript + ]; + + shellHook = let + icon = "f121"; + in '' + # History configuration + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + # Clean environment + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + # Essential tools in PATH + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]" + + # Ccache configuration + export PATH=${pkgs.ccache}/bin:$PATH + export CCACHE_COMPILERCHECK=content + export CCACHE_DIR=$HOME/.ccache/pg/$(basename $PWD) + mkdir -p "$CCACHE_DIR" + + # LLVM configuration + export LLVM_CONFIG="${llvmPkgs.llvm}/bin/llvm-config" + export PATH="${llvmPkgs.llvm}/bin:$PATH" + export PKG_CONFIG_PATH="${llvmPkgs.llvm.dev}/lib/pkgconfig:$PKG_CONFIG_PATH" + export LLVM_DIR="${llvmPkgs.llvm.dev}/lib/cmake/llvm" + export LLVM_ROOT="${llvmPkgs.llvm}" + + # Development tools in PATH + export PATH=${pkgs.clang-tools}/bin:$PATH + export PATH=${pkgs.cppcheck}/bin:$PATH + + # PosgreSQL Development CFLAGS + # -DRELCACHE_FORCE_RELEASE -DCATCACHE_FORCE_RELEASE -fno-omit-frame-pointer -fno-stack-protector -DUSE_VALGRIND + export CFLAGS="" + export CXXFLAGS="" + + # Python UV + UV_PYTHON_DOWNLOADS=never + + # GCC configuration (default compiler) + export CC="${pkgs.gcc}/bin/gcc" + export CXX="${pkgs.gcc}/bin/g++" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + # GDB configuration + export GDBINIT="${gdbConfig}" + + # Performance tools in PATH + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + # Create output directories + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + # Compiler verification + echo "Environment configured:" + echo " Compiler: $CC" + echo " libc: glibc" + echo " LLVM: $(llvm-config --version 2>/dev/null || echo 'not available')" + + # Load PostgreSQL development aliases + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + else + echo "Warning: pg-aliases.sh not found in current directory" + fi + + echo "" + echo "PostgreSQL Development Environment Ready (GCC + glibc)" + echo "Run 'pg-info' for available commands" + ''; + }; + + # Clang + glibc variant + clangDevShell = pkgs.mkShell { + name = "postgresql-clang-glibc"; + buildInputs = + (getPostgreSQLDeps false) + ++ [ + llvmPkgs.clang + llvmPkgs.lld + llvmPkgs.compiler-rt + flameGraphScript + pgbenchScript + ]; + + shellHook = let + icon = "f121"; + in '' + # History configuration + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + # Clean environment + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + # Essential tools in PATH + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]" + + # Ccache configuration + export PATH=${pkgs.ccache}/bin:$PATH + export CCACHE_COMPILERCHECK=content + export CCACHE_DIR=$HOME/.ccache_pg_dev_clang + mkdir -p "$CCACHE_DIR" + + # LLVM configuration + export LLVM_CONFIG="${llvmPkgs.llvm}/bin/llvm-config" + export PATH="${llvmPkgs.llvm}/bin:$PATH" + export PKG_CONFIG_PATH="${llvmPkgs.llvm.dev}/lib/pkgconfig:$PKG_CONFIG_PATH" + export LLVM_DIR="${llvmPkgs.llvm.dev}/lib/cmake/llvm" + export LLVM_ROOT="${llvmPkgs.llvm}" + + # Development tools in PATH + export PATH=${pkgs.clang-tools}/bin:$PATH + export PATH=${pkgs.cppcheck}/bin:$PATH + + # Clang + glibc configuration - use system linker instead of LLD for compatibility + export CC="${llvmPkgs.clang}/bin/clang" + export CXX="${llvmPkgs.clang}/bin/clang++" + + # Use system linker and standard runtime + #export CFLAGS="" + #export CXXFLAGS="" + #export LDFLAGS="" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + # GDB configuration + export GDBINIT="${gdbConfig}" + + # Performance tools in PATH + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + # Create output directories + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + # Compiler verification + echo "Environment configured:" + echo " Compiler: $CC" + echo " libc: glibc" + echo " LLVM: $(llvm-config --version 2>/dev/null || echo 'not available')" + + # Load PostgreSQL development aliases + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + else + echo "Warning: pg-aliases.sh not found in current directory" + fi + + echo "" + echo "PostgreSQL Development Environment Ready (Clang + glibc)" + echo "Run 'pg-info' for available commands" + ''; + }; + + # GCC + musl variant (cross-compilation) + muslDevShell = pkgs.mkShell { + name = "postgresql-gcc-musl"; + buildInputs = + (getPostgreSQLDeps true) + ++ [ + pkgs.gcc + flameGraphScript + pgbenchScript + ]; + + shellHook = '' + # Same base configuration as main shell + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + + # Cross-compilation to musl + export CC="${pkgs.gcc}/bin/gcc" + export CXX="${pkgs.gcc}/bin/g++" + + # Point to musl libraries for linking + export PKG_CONFIG_PATH="${pkgs.pkgsMusl.openssl.dev}/lib/pkgconfig:${pkgs.pkgsMusl.zlib.dev}/lib/pkgconfig:${pkgs.pkgsMusl.icu.dev}/lib/pkgconfig" + export CFLAGS="-ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export CXXFLAGS="-ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export LDFLAGS="-L${pkgs.pkgsMusl.stdenv.cc.libc}/lib -static-libgcc" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + export GDBINIT="${gdbConfig}" + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + echo "GCC + musl environment configured" + echo " Compiler: $CC" + echo " LibC: musl (cross-compilation)" + + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + fi + + echo "PostgreSQL Development Environment Ready (GCC + musl)" + ''; + }; + + # Clang + musl variant (cross-compilation) + clangMuslDevShell = pkgs.mkShell { + name = "postgresql-clang-musl"; + buildInputs = + (getPostgreSQLDeps true) + ++ [ + llvmPkgs.clang + llvmPkgs.lld + flameGraphScript + pgbenchScript + ]; + + shellHook = let + icon = "f121"; + in '' + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]" + + # Cross-compilation to musl with clang + export CC="${llvmPkgs.clang}/bin/clang" + export CXX="${llvmPkgs.clang}/bin/clang++" + + # Point to musl libraries for linking + export PKG_CONFIG_PATH="${pkgs.pkgsMusl.openssl.dev}/lib/pkgconfig:${pkgs.pkgsMusl.zlib.dev}/lib/pkgconfig:${pkgs.pkgsMusl.icu.dev}/lib/pkgconfig" + export CFLAGS="--target=x86_64-linux-musl -ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export CXXFLAGS="--target=x86_64-linux-musl -ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export LDFLAGS="--target=x86_64-linux-musl -L${pkgs.pkgsMusl.stdenv.cc.libc}/lib -fuse-ld=lld" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + export GDBINIT="${gdbConfig}" + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + echo "Clang + musl environment configured" + echo " Compiler: $CC" + echo " LibC: musl (cross-compilation)" + + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + fi + + echo "PostgreSQL Development Environment Ready (Clang + musl)" + ''; + }; +in { + inherit devShell clangDevShell muslDevShell clangMuslDevShell gdbConfig flameGraphScript pgbenchScript; +} diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index 61c035a39834a..6661559b684c6 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -1232,7 +1232,7 @@ spawn_process(const char *cmdline) char *cmdline2; cmdline2 = psprintf("exec %s", cmdline); - execl(shellprog, shellprog, "-c", cmdline2, (char *) NULL); + execlp(shellprog, shellprog, "-c", cmdline2, (char *) NULL); /* Not using the normal bail() here as we want _exit */ bail_noatexit("could not exec \"%s\": %m", shellprog); } diff --git a/src/tools/pgindent/pgindent b/src/tools/pgindent/pgindent index b7d718089248e..e1febc2c7b295 100755 --- a/src/tools/pgindent/pgindent +++ b/src/tools/pgindent/pgindent @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # Copyright (c) 2021-2025, PostgreSQL Global Development Group From 7215f1bb295a1f818f2950b70ced002f2b9ea432 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Wed, 27 Aug 2025 10:48:15 -0400 Subject: [PATCH 2/6] Prune heap page on INSERT when the page is considered full Exercise the existing heapam page pruning code on the INSERT paths to more pro-actively re-order heap pages and potentially free up space. --- src/backend/access/heap/heapam.c | 18 ++++++++++++++++-- src/include/storage/bufpage.h | 11 +++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index ed0c0c2dc9f48..fc5942a27137f 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2243,7 +2243,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, END_CRIT_SECTION(); - UnlockReleaseBuffer(buffer); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + /* Consider pruning the page if it's getting full */ + if (PageIsFull(BufferGetPage(buffer))) + heap_page_prune_opt(relation, buffer); + + ReleaseBuffer(buffer); + if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); @@ -2658,7 +2665,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); } - UnlockReleaseBuffer(buffer); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + /* Consider pruning the page if it's getting full */ + if (PageIsFull(BufferGetPage(buffer))) + heap_page_prune_opt(relation, buffer); + + ReleaseBuffer(buffer); + ndone += nthispage; /* diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index aeb67c498c59f..e158765e5a6c6 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -14,6 +14,7 @@ #ifndef BUFPAGE_H #define BUFPAGE_H +#include "access/transam.h" #include "access/xlogdefs.h" #include "storage/block.h" #include "storage/item.h" @@ -415,6 +416,8 @@ PageIsFull(const PageData *page) { return ((const PageHeaderData *) page)->pd_flags & PD_PAGE_FULL; } + + static inline void PageSetFull(Page page) { @@ -508,4 +511,12 @@ extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); extern void PageSetChecksumInplace(Page page, BlockNumber blkno); +static inline bool +PageHasPrunable(const PageData *page) +{ + return (PageHasFreeLinePointers(page) || + PageGetFreeSpace(page) < BLCKSZ / 4 || + PageGetMaxOffsetNumber(page) > 21); +} + #endif /* BUFPAGE_H */ From 4c71cf248745ced7658ccdf03e411cf9a7a3f056 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Wed, 27 Aug 2025 13:51:34 -0400 Subject: [PATCH 3/6] Proactively prune a page when the new tuple could fit. heap_page_prune_opt() doesn't take into account the size of the tuple that's trying to be inserted, it only checks if the page has less free space than the minimum threshold, but doesn't consider whether pruning could make enough space for a specific tuple. Consider pruning if the current free space plus potential space from pruning could accommodate the tuple, even if the page doesn't meet the normal threshold. --- src/backend/access/heap/heapam.c | 12 +++++++++--- src/backend/access/heap/heapam_handler.c | 4 ++-- src/backend/access/heap/pruneheap.c | 21 ++++++++++++++++++--- src/include/access/heapam.h | 2 +- src/include/storage/bufpage.h | 1 + 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index fc5942a27137f..bd639afb2813d 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -569,7 +569,7 @@ heap_prepare_pagescan(TableScanDesc sscan) /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_base.rs_rd, buffer); + heap_page_prune_opt(scan->rs_base.rs_rd, buffer, 0); /* * We must hold share lock on the buffer content while examining tuple @@ -2247,7 +2247,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* Consider pruning the page if it's getting full */ if (PageIsFull(BufferGetPage(buffer))) - heap_page_prune_opt(relation, buffer); + heap_page_prune_opt(relation, buffer, heaptup->t_len); ReleaseBuffer(buffer); @@ -2669,7 +2669,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* Consider pruning the page if it's getting full */ if (PageIsFull(BufferGetPage(buffer))) - heap_page_prune_opt(relation, buffer); + heap_page_prune_opt(relation, buffer, heaptuples[ndone]->t_len); ReleaseBuffer(buffer); @@ -3833,6 +3833,12 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, newtupsize = MAXALIGN(newtup->t_len); + if (newtupsize > pagefree && PageHasPrunable(page)) + { + heap_page_prune_opt(relation, buffer, newtupsize); + pagefree = PageGetHeapFreeSpace(page); + } + if (need_toast || newtupsize > pagefree) { TransactionId xmax_lock_old_tuple; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index bcbac844bb669..fc6364b142433 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -138,7 +138,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, * Prune page, but only if we weren't already on this page */ if (prev_buf != hscan->xs_cbuf) - heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf); + heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf, 0); } /* Obtain share-lock on the buffer so we can examine visibility */ @@ -2517,7 +2517,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan, /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_rd, buffer); + heap_page_prune_opt(scan->rs_rd, buffer, 0); /* * We must hold share lock on the buffer content while examining tuple diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index d8ea0c78f77cf..cad1531dfa8c5 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/heapam.h" +#include "storage/bufpage.h" #include "access/heapam_xlog.h" #include "access/htup_details.h" #include "access/multixact.h" @@ -187,10 +188,14 @@ static void page_verify_redirects(Page page); * Note: this is called quite often. It's important that it fall out quickly * if there's not any use in pruning. * + * If tuple_len is provided (> 0), the function will consider pruning even + * if the page doesn't meet the normal free space threshold, as long as + * pruning could potentially make room for a tuple of that size. + * * Caller must have pin on the buffer, and must *not* have a lock on it. */ void -heap_page_prune_opt(Relation relation, Buffer buffer) +heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) { Page page = BufferGetPage(buffer); TransactionId prune_xid; @@ -228,6 +233,10 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * for a new tuple version, or when free space falls below the relation's * fill-factor target (but not less than 10%). * + * If a specific tuple length is provided, we also consider pruning if the + * current free space plus potential space from pruning could accommodate + * the tuple, even if the page doesn't meet the normal threshold. + * * Checking free space here is questionable since we aren't holding any * lock on the buffer; in the worst case we could get a bogus answer. It's * unlikely to be *seriously* wrong, though, since reading either pd_lower @@ -239,7 +248,11 @@ heap_page_prune_opt(Relation relation, Buffer buffer) HEAP_DEFAULT_FILLFACTOR); minfree = Max(minfree, BLCKSZ / 10); - if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) + /* + * Check if we should prune based on normal criteria or tuple-specific + * needs + */ + if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree || tuple_len > 0) { /* OK, try to get exclusive buffer lock */ if (!ConditionalLockBufferForCleanup(buffer)) @@ -250,7 +263,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * page's free space, and recheck the heuristic about whether to * prune. */ - if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) + if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree || + (tuple_len > 0 && PageGetHeapFreeSpace(page) < MAXALIGN(tuple_len) && + PageHasPrunable(page))) { OffsetNumber dummy_off_loc; PruneFreezeResult presult; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index a1de400b9a531..dd4352ff12e40 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -366,7 +366,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel, /* in heap/pruneheap.c */ struct GlobalVisState; -extern void heap_page_prune_opt(Relation relation, Buffer buffer); +extern void heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len); extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index e158765e5a6c6..f6aaa0704bab2 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -405,6 +405,7 @@ PageSetHasFreeLinePointers(Page page) { ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES; } + static inline void PageClearHasFreeLinePointers(Page page) { From a794a00cd53542ce5b47dff35939943fa192ef29 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Tue, 23 Sep 2025 13:08:06 -0400 Subject: [PATCH 4/6] Prune when possible during scan of heap pages Add scan-time heap page pruning functionality and a mechanism to track page modifications during scans and re-evaluate pruning eligibility. This addresses the issue where multiple updates during a scan don't trigger heap page pruning until the next scan. --- src/backend/access/heap/heapam.c | 48 ++++++++++++++++++++++++++++++-- src/include/access/heapam.h | 6 ++++ src/include/storage/bufpage.h | 7 ++++- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index bd639afb2813d..f7285519c3b9a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -446,6 +446,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) scan->rs_ntuples = 0; scan->rs_cindex = 0; + /* Initialize scan-time pruning tracking */ + scan->rs_page_updates = 0; + scan->rs_page_pruned = false; + scan->rs_last_pruned_block = InvalidBlockNumber; + /* * Initialize to ForwardScanDirection because it is most common and * because heap scans go forward before going backward (e.g. CURSORs). @@ -925,6 +930,10 @@ heapgettup(HeapScanDesc scan, Assert(BufferGetBlockNumber(scan->rs_cbuf) == scan->rs_cblock); + /* Reset page tracking for new page */ + scan->rs_page_updates = 0; + scan->rs_page_pruned = false; + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); page = heapgettup_start_page(scan, dir, &linesleft, &lineoff); continue_page: @@ -942,7 +951,12 @@ heapgettup(HeapScanDesc scan, ItemId lpp = PageGetItemId(page, lineoff); if (!ItemIdIsNormal(lpp)) + { + /* Track dead line pointers as potential modifications */ + if (ItemIdIsDead(lpp)) + scan->rs_page_updates++; continue; + } tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp); tuple->t_len = ItemIdGetLength(lpp); @@ -972,10 +986,27 @@ heapgettup(HeapScanDesc scan, } /* - * if we get here, it means we've exhausted the items on this page and - * it's time to move to the next. + * Before moving to next page, check if current page needs scan-time + * pruning. This addresses the issue where multiple updates during a + * scan don't trigger pruning until the next scan. */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + if (scan->rs_page_updates > 0 && + scan->rs_cblock != scan->rs_last_pruned_block && + PageNeedsScanPruning(page, scan->rs_page_updates)) + { + /* Attempt opportunistic pruning */ + heap_page_prune_opt(scan->rs_base.rs_rd, scan->rs_cbuf, 0); + + /* Mark this block as pruned to avoid repeated attempts */ + scan->rs_last_pruned_block = scan->rs_cblock; + scan->rs_page_pruned = true; + } + + /* + * If we get here, it means we've exhausted the items on this page and + * it's time to move to the next. + */ } /* end of scan */ @@ -1042,6 +1073,10 @@ heapgettup_pagemode(HeapScanDesc scan, Assert(BufferGetBlockNumber(scan->rs_cbuf) == scan->rs_cblock); + /* Reset page tracking for new page */ + scan->rs_page_updates = 0; + scan->rs_page_pruned = false; + /* prune the page and determine visible tuple offsets */ heap_prepare_pagescan((TableScanDesc) scan); page = BufferGetPage(scan->rs_cbuf); @@ -1079,6 +1114,15 @@ heapgettup_pagemode(HeapScanDesc scan, } } + /* Before ending scan, check if current page needs scan-time pruning */ + if (BufferIsValid(scan->rs_cbuf) && scan->rs_page_updates > 0 && + scan->rs_cblock != scan->rs_last_pruned_block && + PageNeedsScanPruning(BufferGetPage(scan->rs_cbuf), scan->rs_page_updates)) + { + heap_page_prune_opt(scan->rs_base.rs_rd, scan->rs_cbuf, 0); + scan->rs_last_pruned_block = scan->rs_cblock; + } + /* end of scan */ if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index dd4352ff12e40..9be965fa654d0 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -97,6 +97,12 @@ typedef struct HeapScanDescData uint32 rs_cindex; /* current tuple's index in vistuples */ uint32 rs_ntuples; /* number of visible tuples on page */ OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ + + /* scan-time pruning tracking */ + int rs_page_updates; /* count of updates/deletes on current + * page */ + bool rs_page_pruned; /* whether current page was already pruned */ + BlockNumber rs_last_pruned_block; /* last block we attempted pruning on */ } HeapScanDescData; typedef struct HeapScanDescData *HeapScanDesc; diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index f6aaa0704bab2..b4a4504c5643f 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -418,7 +418,6 @@ PageIsFull(const PageData *page) return ((const PageHeaderData *) page)->pd_flags & PD_PAGE_FULL; } - static inline void PageSetFull(Page page) { @@ -520,4 +519,10 @@ PageHasPrunable(const PageData *page) PageGetMaxOffsetNumber(page) > 21); } +static inline bool +PageNeedsScanPruning(const PageData *page, int modifications) +{ + return modifications >= 3 && PageHasPrunable(page); +} + #endif /* BUFPAGE_H */ From 6d5e8221c60fdf65e7f4bc733fa75e5068e2d4c4 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Tue, 23 Sep 2025 13:12:44 -0400 Subject: [PATCH 5/6] Mark pages prunable during insert operations for abort cleanup Set PageSetPrunable during heap_insert() and heap_multi_insert() to enable opportunistic cleanup of dead tuples from aborted transactions. Previously, aborted insert tuples would remain until the next vacuum cycle if no other modifications occurred on the page. --- src/backend/access/heap/heapam.c | 38 ++++++++++++++++++++------- src/backend/access/heap/heapam_xlog.c | 8 ++++++ src/include/access/heapam_xlog.h | 2 ++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f7285519c3b9a..6e52773e2aeda 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2194,14 +2194,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } /* - * XXX Should we set PageSetPrunable on this page ? - * - * The inserting transaction may eventually abort thus making this tuple - * DEAD and hence available for pruning. Though we don't want to optimize - * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the - * aborted tuple will never be pruned until next vacuum is triggered. - * - * If you do add PageSetPrunable here, add it in heap_xlog_insert too. + * If the inserting transaction aborts, this tuple will become DEAD and + * can be pruned during subsequent page accesses rather than waiting for + * the next vacuum cycle. This is beneficial for pages that don't + * experience other modifications (UPDATEs/DELETEs) which would normally + * trigger the prunable marking. */ MarkBufferDirty(buffer); @@ -2237,6 +2234,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self); xlrec.flags = 0; + xlrec.prune_xid = InvalidTransactionId; + if (IsTransactionState() && TransactionIdIsNormal(xid)) + xlrec.prune_xid = xid; if (all_visible_cleared) xlrec.flags |= XLH_INSERT_ALL_VISIBLE_CLEARED; if (options & HEAP_INSERT_SPECULATIVE) @@ -2287,6 +2287,15 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, END_CRIT_SECTION(); + /* + * We marked the page as prunable during insert to enable cleanup of this + * tuple if the inserting transaction aborts. The prune_xid has been set + * to the current transaction ID and logged in the WAL record to ensure + * consistent recovery behavior. + */ + if (IsTransactionState() && TransactionIdIsNormal(xid)) + PageSetPrunable(BufferGetPage(buffer), xid); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Consider pruning the page if it's getting full */ @@ -2571,7 +2580,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, PageSetAllVisible(page); /* - * XXX Should we set PageSetPrunable on this page ? See heap_insert() + * Similar to heap_insert() we set the page as prunable and record the + * prune_xid for recovery. */ MarkBufferDirty(buffer); @@ -2614,6 +2624,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, Assert(!(all_visible_cleared && all_frozen_set)); xlrec->flags = 0; + xlrec->prune_xid = InvalidTransactionId; + if (IsTransactionState() && TransactionIdIsNormal(xid)) + xlrec->prune_xid = xid; if (all_visible_cleared) xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED; if (all_frozen_set) @@ -2709,6 +2722,13 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); } + /* + * Similar to heap_insert() we mark the page prunable and have + * recorded prune_xid in the WAL. + */ + if (IsTransactionState() && TransactionIdIsNormal(xid)) + PageSetPrunable(BufferGetPage(buffer), xid); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Consider pruning the page if it's getting full */ diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index cf843277938de..dc928ca34b852 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -505,6 +505,10 @@ heap_xlog_insert(XLogReaderState *record) freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + if (TransactionIdIsValid(xlrec->prune_xid) && + TransactionIdIsNormal(xlrec->prune_xid)) + PageSetPrunable(page, xlrec->prune_xid); + PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) @@ -648,6 +652,10 @@ heap_xlog_multi_insert(XLogReaderState *record) freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + if (TransactionIdIsValid(xlrec->prune_xid) && + TransactionIdIsNormal(xlrec->prune_xid)) + PageSetPrunable(page, xlrec->prune_xid); + PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index d4c0625b63228..83d69d0b979bd 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -159,6 +159,7 @@ typedef struct xl_heap_header /* This is what we need to know about insert */ typedef struct xl_heap_insert { + TransactionId prune_xid; /* record xid for pruning purposes */ OffsetNumber offnum; /* inserted tuple's offset */ uint8 flags; @@ -180,6 +181,7 @@ typedef struct xl_heap_insert */ typedef struct xl_heap_multi_insert { + TransactionId prune_xid; /* record xid for pruning purposes */ uint8 flags; uint16 ntuples; OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]; From 2284b3bcba39bd1e76c89af1246be39399145336 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Wed, 24 Sep 2025 14:00:11 -0400 Subject: [PATCH 6/6] DO NOT COMMIT - TESTING CODE --- out.log | 583 +++++++++++++++ post_setup.sql | 25 + prune_test.sql | 34 + results.log | 67 ++ run_test.sh | 138 ++++ setup.sql | 158 ++++ src/backend/access/heap/heapam.c | 15 +- src/backend/access/heap/heapam_handler.c | 4 +- src/backend/access/heap/pruneheap.c | 139 +++- src/backend/utils/adt/pgstatfuncs.c | 53 ++ src/backend/utils/misc/guc_parameters.dat | 6 + src/include/access/heapam.h | 44 +- src/include/access/tableam.h | 2 +- src/include/catalog/pg_proc.dat | 5 + src/include/utils/builtins.h | 2 + test.log | 870 ++++++++++++++++++++++ test.sql | 455 +++++++++++ 17 files changed, 2589 insertions(+), 11 deletions(-) create mode 100644 out.log create mode 100644 post_setup.sql create mode 100644 prune_test.sql create mode 100644 results.log create mode 100755 run_test.sh create mode 100644 setup.sql create mode 100644 test.log create mode 100644 test.sql diff --git a/out.log b/out.log new file mode 100644 index 0000000000000..109808fdca134 --- /dev/null +++ b/out.log @@ -0,0 +1,583 @@ +Null display is "[null]". +Timing is on. +Expanded display is used automatically. +Welcome to PostgreSQL! + +Type :version to see the PostgreSQL version. + +Type :extensions to see the available extensions. + +Type \q to exit. + +SET +Time: 0.485 ms +SET +Time: 0.102 ms +ALTER SYSTEM +Time: 0.304 ms + pg_reload_conf +---------------- + t +(1 row) + +Time: 0.551 ms +psql:test.sql:9: NOTICE: 42710: extension "pageinspect" already exists, skipping +LOCATION: CreateExtension, extension.c:1988 +CREATE EXTENSION +Time: 0.206 ms +DROP FUNCTION +Time: 1.324 ms +DROP FUNCTION +Time: 1.703 ms +CREATE FUNCTION +Time: 2.192 ms +CREATE FUNCTION +Time: 1.003 ms + pg_stat_reset +--------------- + +(1 row) + +Time: 0.405 ms +=== PHASE 1: Setup with Massive Tuples === +CREATE TABLE +Time: 6.849 ms +ALTER TABLE +Time: 0.441 ms +CREATE INDEX +Time: 0.849 ms +INSERT 0 20 +Time: 2.709 ms +psql:test.sql:164: WARNING: 25P01: there is no transaction in progress +LOCATION: EndTransactionBlock, xact.c:4164 +COMMIT +Time: 0.068 ms + phase | context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +----------+--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + BASELINE | SCAN_OPPORTUNISTIC | 506 | 4 | 79 | 15472 | 3906 | 4 | 364 | 53 | 85 | 0 | 0 | 0.79 | 7.719 + BASELINE | UPDATE_FULL_PAGE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | INSERT_SPACE_CHECK | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | SCAN_END | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | MULTI_INSERT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(5 rows) + +Time: 3.167 ms +=== PHASE 2: Create Dead Tuples in Separate Transactions === +BEGIN +Time: 0.055 ms +UPDATE 10 +Time: 1.329 ms +COMMIT +Time: 0.070 ms +--- After First Dead Tuple Creation --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 525 | 4 | 79 | 15472 | 3943 | 4 | 376 | 53 | 86 | 6 | 0 | 0.76 | 7.510 + UPDATE_FULL_PAGE | 10 | 0 | 0 | 0 | 15 | 0 | 0 | 0 | 0 | 10 | 0 | 0.00 | 1.500 +(2 rows) + +Time: 0.142 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 531 | 0.75 | INVALID_XACT_XID | 380 | 71.56 + UPDATE_FULL_PAGE | 10 | 0.00 | LOCK_FAILED | 10 | 100.00 +(2 rows) + +Time: 0.633 ms +BEGIN +Time: 0.061 ms +UPDATE 10 +Time: 1.255 ms +COMMIT +Time: 0.074 ms +--- After Second Dead Tuple Creation --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 535 | 5 | 89 | 18456 | 4075 | 5 | 380 | 56 | 88 | 6 | 0 | 0.93 | 7.617 + UPDATE_FULL_PAGE | 20 | 0 | 0 | 0 | 29 | 0 | 1 | 9 | 0 | 10 | 0 | 0.00 | 1.450 +(2 rows) + +Time: 0.139 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 535 | 0.93 | INVALID_XACT_XID | 380 | 71.03 + UPDATE_FULL_PAGE | 20 | 0.00 | LOCK_FAILED | 10 | 50.00 +(2 rows) + +Time: 0.141 ms +psql:test.sql:201: INFO: 00000: vacuuming "postgres.public.ultimate_prune_test" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:201: INFO: 00000: finished vacuuming "postgres.public.ultimate_prune_test": index scans: 1 +pages: 0 removed, 2 remain, 2 scanned (100.00% of total), 0 eagerly scanned +tuples: 10 removed, 20 remain, 0 are dead but not yet removable +removable cutoff: 867, which was 0 XIDs old when operation ended +new relfrozenxid: 865, which is 4 XIDs ahead of previous value +frozen: 0 pages from table (0.00% of total) had 0 tuples frozen +visibility map: 2 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan needed: 1 pages from table (50.00% of total) had 9 dead item identifiers removed +index "ultimate_prune_test_pkey": pages: 2 in total, 0 newly deleted, 0 currently deleted, 0 reusable +index "idx_ultimate_status": pages: 2 in total, 0 newly deleted, 0 currently deleted, 0 reusable +avg read rate: 0.000 MB/s, avg write rate: 122.709 MB/s +buffer usage: 40 hits, 0 reads, 3 dirtied +WAL usage: 11 records, 3 full page images, 25912 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:201: INFO: 00000: vacuuming "postgres.pg_toast.pg_toast_16536" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:201: INFO: 00000: finished vacuuming "postgres.pg_toast.pg_toast_16536": index scans: 0 +pages: 0 removed, 0 remain, 0 scanned (100.00% of total), 0 eagerly scanned +tuples: 0 removed, 0 remain, 0 are dead but not yet removable +removable cutoff: 867, which was 0 XIDs old when operation ended +new relfrozenxid: 867, which is 6 XIDs ahead of previous value +frozen: 0 pages from table (100.00% of total) had 0 tuples frozen +visibility map: 0 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (100.00% of total) had 0 dead item identifiers removed +avg read rate: 122.070 MB/s, avg write rate: 0.000 MB/s +buffer usage: 27 hits, 1 reads, 0 dirtied +WAL usage: 1 records, 0 full page images, 258 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:201: INFO: 00000: analyzing "public.ultimate_prune_test" +LOCATION: do_analyze_rel, analyze.c:320 +psql:test.sql:201: INFO: 00000: "ultimate_prune_test": scanned 2 of 2 pages, containing 20 live rows and 0 dead rows; 20 rows in sample, 20 estimated total rows +LOCATION: acquire_sample_rows, analyze.c:1344 +psql:test.sql:201: INFO: 00000: finished analyzing table "postgres.public.ultimate_prune_test" +avg read rate: 0.000 MB/s, avg write rate: 15.625 MB/s +buffer usage: 185 hits, 0 reads, 2 dirtied +WAL usage: 14 records, 2 full page images, 6871 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: do_analyze_rel, analyze.c:841 +VACUUM +Time: 1.017 ms +--- After VACUUM --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 636 | 5 | 89 | 18456 | 4147 | 5 | 440 | 56 | 129 | 6 | 0 | 0.79 | 6.520 + UPDATE_FULL_PAGE | 20 | 0 | 0 | 0 | 29 | 0 | 1 | 9 | 0 | 10 | 0 | 0.00 | 1.450 +(2 rows) + +Time: 0.164 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 636 | 0.79 | INVALID_XACT_XID | 440 | 69.18 + UPDATE_FULL_PAGE | 20 | 0.00 | LOCK_FAILED | 10 | 50.00 +(2 rows) + +Time: 0.149 ms +=== PHASE 3: Extreme INSERT Test === +CREATE TABLE +Time: 0.876 ms +ALTER TABLE +Time: 0.171 ms +INSERT 0 1 +Time: 0.253 ms +psql:test.sql:220: WARNING: 25P01: there is no transaction in progress +LOCATION: EndTransactionBlock, xact.c:4164 +COMMIT +Time: 0.064 ms +BEGIN +Time: 0.047 ms +UPDATE 1 +Time: 0.166 ms +COMMIT +Time: 0.056 ms +psql:test.sql:228: INFO: 00000: vacuuming "postgres.public.insert_mega_test" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:228: INFO: 00000: finished vacuuming "postgres.public.insert_mega_test": index scans: 0 +pages: 0 removed, 1 remain, 1 scanned (100.00% of total), 0 eagerly scanned +tuples: 1 removed, 1 remain, 0 are dead but not yet removable +removable cutoff: 872, which was 0 XIDs old when operation ended +new relfrozenxid: 871, which is 3 XIDs ahead of previous value +frozen: 0 pages from table (0.00% of total) had 0 tuples frozen +visibility map: 1 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (0.00% of total) had 0 dead item identifiers removed +avg read rate: 0.000 MB/s, avg write rate: 274.123 MB/s +buffer usage: 19 hits, 0 reads, 4 dirtied +WAL usage: 6 records, 4 full page images, 33343 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:228: INFO: 00000: vacuuming "postgres.pg_toast.pg_toast_16552" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:228: INFO: 00000: finished vacuuming "postgres.pg_toast.pg_toast_16552": index scans: 0 +pages: 0 removed, 0 remain, 0 scanned (100.00% of total), 0 eagerly scanned +tuples: 0 removed, 0 remain, 0 are dead but not yet removable +removable cutoff: 872, which was 0 XIDs old when operation ended +new relfrozenxid: 872, which is 4 XIDs ahead of previous value +frozen: 0 pages from table (100.00% of total) had 0 tuples frozen +visibility map: 0 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (100.00% of total) had 0 dead item identifiers removed +avg read rate: 137.061 MB/s, avg write rate: 0.000 MB/s +buffer usage: 25 hits, 1 reads, 0 dirtied +WAL usage: 1 records, 0 full page images, 258 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:228: INFO: 00000: analyzing "public.insert_mega_test" +LOCATION: do_analyze_rel, analyze.c:320 +psql:test.sql:228: INFO: 00000: "insert_mega_test": scanned 1 of 1 pages, containing 1 live rows and 0 dead rows; 1 rows in sample, 1 estimated total rows +LOCATION: acquire_sample_rows, analyze.c:1344 +psql:test.sql:228: INFO: 00000: finished analyzing table "postgres.public.insert_mega_test" +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 22 hits, 0 reads, 0 dirtied +WAL usage: 4 records, 0 full page images, 406 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: do_analyze_rel, analyze.c:841 +VACUUM +Time: 0.495 ms +INSERT 0 1 +Time: 0.259 ms +--- After INSERT Test --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 734 | 8 | 144 | 22176 | 4217 | 8 | 450 | 72 | 198 | 6 | 0 | 1.09 | 5.745 + UPDATE_FULL_PAGE | 20 | 0 | 0 | 0 | 29 | 0 | 1 | 9 | 0 | 10 | 0 | 0.00 | 1.450 +(2 rows) + +Time: 0.150 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 734 | 1.09 | INVALID_XACT_XID | 450 | 61.31 + UPDATE_FULL_PAGE | 20 | 0.00 | LOCK_FAILED | 10 | 50.00 +(2 rows) + +Time: 0.152 ms +DROP TABLE +Time: 0.684 ms +=== PHASE 4: UPDATE Test with Extreme Sizes === +BEGIN +Time: 0.062 ms +UPDATE 10 +Time: 1.297 ms +COMMIT +Time: 0.068 ms +psql:test.sql:254: INFO: 00000: vacuuming "postgres.public.ultimate_prune_test" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:254: INFO: 00000: finished vacuuming "postgres.public.ultimate_prune_test": index scans: 0 +pages: 0 removed, 2 remain, 2 scanned (100.00% of total), 0 eagerly scanned +tuples: 10 removed, 20 remain, 0 are dead but not yet removable +removable cutoff: 876, which was 0 XIDs old when operation ended +new relfrozenxid: 866, which is 1 XIDs ahead of previous value +frozen: 0 pages from table (0.00% of total) had 0 tuples frozen +visibility map: 2 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (0.00% of total) had 0 dead item identifiers removed +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 26 hits, 0 reads, 0 dirtied +WAL usage: 6 records, 0 full page images, 676 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:254: INFO: 00000: vacuuming "postgres.pg_toast.pg_toast_16536" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:254: INFO: 00000: finished vacuuming "postgres.pg_toast.pg_toast_16536": index scans: 0 +pages: 0 removed, 0 remain, 0 scanned (100.00% of total), 0 eagerly scanned +tuples: 0 removed, 0 remain, 0 are dead but not yet removable +removable cutoff: 876, which was 0 XIDs old when operation ended +new relfrozenxid: 876, which is 9 XIDs ahead of previous value +frozen: 0 pages from table (100.00% of total) had 0 tuples frozen +visibility map: 0 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (100.00% of total) had 0 dead item identifiers removed +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 12 hits, 0 reads, 0 dirtied +WAL usage: 1 records, 0 full page images, 258 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:254: INFO: 00000: analyzing "public.ultimate_prune_test" +LOCATION: do_analyze_rel, analyze.c:320 +psql:test.sql:254: INFO: 00000: "ultimate_prune_test": scanned 2 of 2 pages, containing 20 live rows and 0 dead rows; 20 rows in sample, 20 estimated total rows +LOCATION: acquire_sample_rows, analyze.c:1344 +psql:test.sql:254: INFO: 00000: finished analyzing table "postgres.public.ultimate_prune_test" +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 40 hits, 0 reads, 0 dirtied +WAL usage: 7 records, 0 full page images, 1020 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: do_analyze_rel, analyze.c:841 +VACUUM +Time: 0.453 ms +BEGIN +Time: 0.060 ms +UPDATE 5 +Time: 0.973 ms +COMMIT +Time: 0.071 ms +--- After UPDATE Test --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 824 | 9 | 162 | 24640 | 4251 | 9 | 461 | 77 | 271 | 6 | 0 | 1.09 | 5.159 + UPDATE_FULL_PAGE | 28 | 0 | 0 | 0 | 43 | 0 | 3 | 15 | 0 | 10 | 0 | 0.00 | 1.536 +(2 rows) + +Time: 0.150 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 824 | 1.09 | INVALID_XACT_XID | 461 | 55.95 + UPDATE_FULL_PAGE | 28 | 0.00 | NO_REMOVABLE_XIDS | 15 | 53.57 +(2 rows) + +Time: 0.141 ms +=== PHASE 5: Scan Pressure Test === +BEGIN +Time: 0.052 ms +UPDATE 10 +Time: 1.703 ms +COMMIT +Time: 0.070 ms +psql:test.sql:285: INFO: 00000: vacuuming "postgres.public.ultimate_prune_test" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:285: INFO: 00000: finished vacuuming "postgres.public.ultimate_prune_test": index scans: 1 +pages: 0 removed, 3 remain, 3 scanned (100.00% of total), 0 eagerly scanned +tuples: 10 removed, 20 remain, 0 are dead but not yet removable +removable cutoff: 879, which was 0 XIDs old when operation ended +new relfrozenxid: 875, which is 9 XIDs ahead of previous value +frozen: 0 pages from table (0.00% of total) had 0 tuples frozen +visibility map: 3 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan needed: 2 pages from table (66.67% of total) had 5 dead item identifiers removed +index "ultimate_prune_test_pkey": pages: 2 in total, 0 newly deleted, 0 currently deleted, 0 reusable +index "idx_ultimate_status": pages: 2 in total, 0 newly deleted, 0 currently deleted, 0 reusable +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 39 hits, 0 reads, 0 dirtied +WAL usage: 10 records, 0 full page images, 870 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:285: INFO: 00000: vacuuming "postgres.pg_toast.pg_toast_16536" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:285: INFO: 00000: finished vacuuming "postgres.pg_toast.pg_toast_16536": index scans: 0 +pages: 0 removed, 0 remain, 0 scanned (100.00% of total), 0 eagerly scanned +tuples: 0 removed, 0 remain, 0 are dead but not yet removable +removable cutoff: 879, which was 0 XIDs old when operation ended +new relfrozenxid: 879, which is 3 XIDs ahead of previous value +frozen: 0 pages from table (100.00% of total) had 0 tuples frozen +visibility map: 0 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (100.00% of total) had 0 dead item identifiers removed +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 12 hits, 0 reads, 0 dirtied +WAL usage: 1 records, 0 full page images, 258 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:285: INFO: 00000: analyzing "public.ultimate_prune_test" +LOCATION: do_analyze_rel, analyze.c:320 +psql:test.sql:285: INFO: 00000: "ultimate_prune_test": scanned 3 of 3 pages, containing 20 live rows and 0 dead rows; 20 rows in sample, 20 estimated total rows +LOCATION: acquire_sample_rows, analyze.c:1344 +psql:test.sql:285: INFO: 00000: finished analyzing table "postgres.public.ultimate_prune_test" +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 41 hits, 0 reads, 0 dirtied +WAL usage: 8 records, 0 full page images, 1014 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: do_analyze_rel, analyze.c:841 +VACUUM +Time: 0.590 ms + count +------- + 10 +(1 row) + +Time: 0.286 ms + count +------- + 10 +(1 row) + +Time: 0.126 ms + avg +------------------------ + 10390.0000000000000000 +(1 row) + +Time: 0.647 ms +SET +Time: 0.056 ms +SET +Time: 0.046 ms + count +------- + 20 +(1 row) + +Time: 0.496 ms + count +------- + 5 +(1 row) + +Time: 0.148 ms +RESET +Time: 0.094 ms +RESET +Time: 0.093 ms +--- After SCAN Test --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 962 | 10 | 194 | 30552 | 4432 | 10 | 567 | 81 | 298 | 6 | 0 | 1.04 | 4.607 + UPDATE_FULL_PAGE | 39 | 0 | 0 | 0 | 71 | 0 | 5 | 23 | 0 | 11 | 0 | 0.00 | 1.821 +(2 rows) + +Time: 0.164 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 962 | 1.04 | INVALID_XACT_XID | 567 | 58.94 + UPDATE_FULL_PAGE | 39 | 0.00 | NO_REMOVABLE_XIDS | 23 | 58.97 +(2 rows) + +Time: 0.146 ms +=== PHASE 6: Multi-Insert Test === +INSERT 0 20 +Time: 2.848 ms +--- After MULTI_INSERT Test --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 962 | 10 | 194 | 30552 | 4432 | 10 | 567 | 81 | 298 | 6 | 0 | 1.04 | 4.607 + UPDATE_FULL_PAGE | 39 | 0 | 0 | 0 | 71 | 0 | 5 | 23 | 0 | 11 | 0 | 0.00 | 1.821 +(2 rows) + +Time: 0.138 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 962 | 1.04 | INVALID_XACT_XID | 567 | 58.94 + UPDATE_FULL_PAGE | 39 | 0.00 | NO_REMOVABLE_XIDS | 23 | 58.97 +(2 rows) + +Time: 0.133 ms +=== PHASE 7: Ultimate Stress Test === +BEGIN +Time: 0.054 ms +UPDATE 20 +Time: 3.228 ms +COMMIT +Time: 0.069 ms +BEGIN +Time: 0.047 ms +UPDATE 20 +Time: 2.787 ms +COMMIT +Time: 0.069 ms +INSERT 0 20 +Time: 3.189 ms +--- After STRESS Test --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 999 | 11 | 195 | 31056 | 4490 | 11 | 569 | 113 | 299 | 6 | 1 | 1.10 | 4.494 + UPDATE_FULL_PAGE | 79 | 0 | 0 | 0 | 121 | 0 | 11 | 57 | 0 | 11 | 0 | 0.00 | 1.532 +(2 rows) + +Time: 0.146 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 999 | 1.10 | INVALID_XACT_XID | 569 | 56.96 + UPDATE_FULL_PAGE | 79 | 0.00 | NO_REMOVABLE_XIDS | 57 | 72.15 +(2 rows) + +Time: 0.139 ms +=== PHASE 8: Force Extreme Conditions === +CREATE TABLE +Time: 0.997 ms +ALTER TABLE +Time: 0.173 ms +INSERT 0 1 +Time: 0.216 ms +psql:test.sql:369: WARNING: 25P01: there is no transaction in progress +LOCATION: EndTransactionBlock, xact.c:4164 +COMMIT +Time: 0.067 ms +BEGIN +Time: 0.057 ms +UPDATE 1 +Time: 0.238 ms +COMMIT +Time: 0.060 ms +psql:test.sql:377: INFO: 00000: vacuuming "postgres.public.force_pruning_test" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:377: INFO: 00000: finished vacuuming "postgres.public.force_pruning_test": index scans: 0 +pages: 0 removed, 1 remain, 1 scanned (100.00% of total), 0 eagerly scanned +tuples: 1 removed, 1 remain, 0 are dead but not yet removable +removable cutoff: 888, which was 0 XIDs old when operation ended +new relfrozenxid: 887, which is 3 XIDs ahead of previous value +frozen: 0 pages from table (0.00% of total) had 0 tuples frozen +visibility map: 1 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (0.00% of total) had 0 dead item identifiers removed +avg read rate: 0.000 MB/s, avg write rate: 260.417 MB/s +buffer usage: 19 hits, 0 reads, 4 dirtied +WAL usage: 6 records, 4 full page images, 33343 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:377: INFO: 00000: vacuuming "postgres.pg_toast.pg_toast_16561" +LOCATION: heap_vacuum_rel, vacuumlazy.c:818 +psql:test.sql:377: INFO: 00000: finished vacuuming "postgres.pg_toast.pg_toast_16561": index scans: 0 +pages: 0 removed, 0 remain, 0 scanned (100.00% of total), 0 eagerly scanned +tuples: 0 removed, 0 remain, 0 are dead but not yet removable +removable cutoff: 888, which was 0 XIDs old when operation ended +new relfrozenxid: 888, which is 4 XIDs ahead of previous value +frozen: 0 pages from table (100.00% of total) had 0 tuples frozen +visibility map: 0 pages set all-visible, 0 pages set all-frozen (0 were all-visible) +index scan not needed: 0 pages from table (100.00% of total) had 0 dead item identifiers removed +avg read rate: 124.008 MB/s, avg write rate: 0.000 MB/s +buffer usage: 25 hits, 1 reads, 0 dirtied +WAL usage: 1 records, 0 full page images, 258 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: heap_vacuum_rel, vacuumlazy.c:1145 +psql:test.sql:377: INFO: 00000: analyzing "public.force_pruning_test" +LOCATION: do_analyze_rel, analyze.c:320 +psql:test.sql:377: INFO: 00000: "force_pruning_test": scanned 1 of 1 pages, containing 1 live rows and 0 dead rows; 1 rows in sample, 1 estimated total rows +LOCATION: acquire_sample_rows, analyze.c:1344 +psql:test.sql:377: INFO: 00000: finished analyzing table "postgres.public.force_pruning_test" +avg read rate: 0.000 MB/s, avg write rate: 0.000 MB/s +buffer usage: 22 hits, 0 reads, 0 dirtied +WAL usage: 4 records, 0 full page images, 406 bytes, 0 buffers full +system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +LOCATION: do_analyze_rel, analyze.c:841 +VACUUM +Time: 0.471 ms +INSERT 0 1 +Time: 0.273 ms +--- After FORCE PRUNING Test --- + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 1102 | 11 | 195 | 31056 | 4500 | 11 | 574 | 113 | 397 | 6 | 1 | 1.00 | 4.083 + UPDATE_FULL_PAGE | 79 | 0 | 0 | 0 | 121 | 0 | 11 | 57 | 0 | 11 | 0 | 0.00 | 1.532 +(2 rows) + +Time: 0.146 ms + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 1102 | 1.00 | INVALID_XACT_XID | 574 | 52.09 + UPDATE_FULL_PAGE | 79 | 0.00 | NO_REMOVABLE_XIDS | 57 | 72.15 +(2 rows) + +Time: 0.140 ms +DROP TABLE +Time: 0.573 ms +=== PHASE 9: Final Comprehensive Analysis === + context | calls_total | pages_pruned | tuples_pruned | space_freed | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct +--------------------+-------------+--------------+---------------+-------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------ + SCAN_OPPORTUNISTIC | 1149 | 11 | 195 | 31056 | 11 | 574 | 113 | 444 | 6 | 1 | 0.96 + UPDATE_FULL_PAGE | 79 | 0 | 0 | 0 | 0 | 11 | 57 | 0 | 11 | 0 | 0.00 +(2 rows) + +Time: 0.163 ms + report_type | context | main_failure_reason | failure_count | failure_pct | success_rate_pct +---------------------+--------------------+---------------------+---------------+-------------+------------------ + EXIT_REASON_SUMMARY | SCAN_OPPORTUNISTIC | INVALID_XACT_XID | 574 | 49.96 | 0.96 + EXIT_REASON_SUMMARY | UPDATE_FULL_PAGE | NO_REMOVABLE_XIDS | 57 | 72.15 | 0.00 +(2 rows) + +Time: 0.156 ms + report_type | context | main_failure_reason | recommendation +-----------------+--------------------+---------------------+------------------------------------------------------------ + RECOMMENDATIONS | SCAN_OPPORTUNISTIC | INVALID_XACT_XID | Check prune_xid setting and page header + RECOMMENDATIONS | UPDATE_FULL_PAGE | NO_REMOVABLE_XIDS | Need more transaction churn or longer waits for visibility +(2 rows) + +Time: 0.209 ms + schemaname | relname | n_tup_ins | n_tup_upd | n_tup_hot_upd | n_dead_tup | hot_update_pct | table_size | heap_size +------------+---------------------+-----------+-----------+---------------+------------+----------------+------------+----------- + public | ultimate_prune_test | 0 | 0 | 0 | 0 | 0 | 136 kB | 64 kB +(1 row) + +Time: 2.278 ms +=== Test Complete === +ALTER SYSTEM +Time: 0.132 ms + pg_reload_conf +---------------- + t +(1 row) + +Time: 0.105 ms +DROP TABLE +Time: 0.928 ms diff --git a/post_setup.sql b/post_setup.sql new file mode 100644 index 0000000000000..d8031c3c73417 --- /dev/null +++ b/post_setup.sql @@ -0,0 +1,25 @@ +-- Enable tracking (in case it was reset) +SET enable_heap_prune_tracking = on; + +-- Disable autovacuum on pgbench tables to preserve dead tuples for pruning tests +ALTER TABLE pgbench_accounts SET (autovacuum_enabled = false); +ALTER TABLE pgbench_branches SET (autovacuum_enabled = false); +ALTER TABLE pgbench_tellers SET (autovacuum_enabled = false); +ALTER TABLE pgbench_history SET (autovacuum_enabled = false); + +-- Add columns that can be updated without affecting indexes (for HOT updates) +ALTER TABLE pgbench_accounts ADD COLUMN IF NOT EXISTS last_updated timestamp DEFAULT now(); +ALTER TABLE pgbench_accounts ADD COLUMN IF NOT EXISTS update_count int DEFAULT 0; +ALTER TABLE pgbench_accounts ADD COLUMN IF NOT EXISTS notes text DEFAULT 'initial'; + +-- Make the notes column much larger to create page pressure +ALTER TABLE pgbench_accounts ALTER COLUMN notes SET DEFAULT repeat('initial_data', 100); + +-- Create a partial index to allow more HOT updates +CREATE INDEX IF NOT EXISTS idx_accounts_high_balance ON pgbench_accounts(abalance) WHERE abalance > 1000; + +-- Reset statistics after setup +SELECT pg_stat_reset(); + +-- Show initial state +SELECT 'POST_SETUP_BASELINE' as phase, * FROM capture_prune_stats(); diff --git a/prune_test.sql b/prune_test.sql new file mode 100644 index 0000000000000..b06e7995a116e --- /dev/null +++ b/prune_test.sql @@ -0,0 +1,34 @@ +\set aid random(1, 100000 * :scale) +\set note_suffix random(1, 1000) +\set large_data_size random(50, 200) + +-- Single transaction with multiple updates to same row to create dead tuples and page pressure +BEGIN; + +-- Update the same row multiple times with increasingly large data to create dead tuples +UPDATE pgbench_accounts +SET last_updated = now(), + update_count = update_count + 1, + notes = 'v1_' || :note_suffix || '_' || repeat('data_chunk_', :large_data_size) +WHERE aid = :aid; + +UPDATE pgbench_accounts +SET last_updated = now(), + update_count = update_count + 2, + notes = 'v2_' || :note_suffix || '_' || repeat('more_data_chunk_', :large_data_size + 10) +WHERE aid = :aid; + +UPDATE pgbench_accounts +SET last_updated = now(), + update_count = update_count + 3, + notes = 'v3_' || :note_suffix || '_' || repeat('final_data_chunk_', :large_data_size + 20) +WHERE aid = :aid; + +-- Also update a few nearby rows to create more page pressure +UPDATE pgbench_accounts +SET last_updated = now(), + update_count = update_count + 1, + notes = 'neighbor_' || :note_suffix || '_' || repeat('neighbor_data_', :large_data_size) +WHERE aid BETWEEN :aid + 1 AND :aid + 3; + +COMMIT; diff --git a/results.log b/results.log new file mode 100644 index 0000000000000..de72172764318 --- /dev/null +++ b/results.log @@ -0,0 +1,67 @@ + phase | context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +----------+--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + BASELINE | SCAN_OPPORTUNISTIC | 153 | 0 | 0 | 0 | 0 | 0 | 150 | 0 | 3 | 0 | 0 | 0.00 | 0.000 + BASELINE | UPDATE_FULL_PAGE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | INSERT_SPACE_CHECK | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | SCAN_END | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | MULTI_INSERT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(5 rows) + + phase | context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +------------------------+--------------------+-------------+------------------+---------------------+---------------+------------- + BASELINE_EXIT_ANALYSIS | SCAN_OPPORTUNISTIC | 160 | 0.00 | INVALID_XACT_XID | 157 | 98.13 +(1 row) + + phase | context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +----------+--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + MID_TEST | SCAN_OPPORTUNISTIC | 154 | 0 | 0 | 0 | 1 | 0 | 151 | 0 | 3 | 0 | 0 | 0.00 | 0.006 + MID_TEST | UPDATE_FULL_PAGE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + MID_TEST | INSERT_SPACE_CHECK | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + MID_TEST | SCAN_END | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + MID_TEST | MULTI_INSERT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(5 rows) + + phase | context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +------------------------+--------------------+-------------+------------------+---------------------+---------------+------------- + MID_TEST_EXIT_ANALYSIS | SCAN_OPPORTUNISTIC | 160 | 0.00 | INVALID_XACT_XID | 157 | 98.13 +(1 row) + + phase | context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +-----------+--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + POST_TEST | SCAN_OPPORTUNISTIC | 153 | 0 | 0 | 0 | 1 | 0 | 150 | 0 | 3 | 0 | 0 | 0.00 | 0.007 + POST_TEST | UPDATE_FULL_PAGE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + POST_TEST | INSERT_SPACE_CHECK | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + POST_TEST | SCAN_END | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + POST_TEST | MULTI_INSERT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(5 rows) + + phase | context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +-------------------------+--------------------+-------------+------------------+---------------------+---------------+------------- + POST_TEST_EXIT_ANALYSIS | SCAN_OPPORTUNISTIC | 160 | 0.00 | INVALID_XACT_XID | 157 | 98.13 +(1 row) + + report_type | context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +---------------------+--------------------+-------------+------------------+---------------------+---------------+------------- + EXIT_REASON_SUMMARY | SCAN_OPPORTUNISTIC | 160 | 0.00 | INVALID_XACT_XID | 157 | 98.13 +(1 row) + + report_type | context | main_failure_reason | recommendation | failure_pct +-----------------+--------------------+---------------------+---------------------------------------------------------------------------+------------- + RECOMMENDATIONS | SCAN_OPPORTUNISTIC | INVALID_XACT_XID | Check prune_xid setting and page header - may need transaction visibility | 98.58 +(1 row) + + schemaname | relname | n_tup_ins | n_tup_upd | n_tup_hot_upd | hot_update_pct | table_size +------------+------------------+-----------+-----------+---------------+----------------+------------ + public | pgbench_accounts | 0 | 7786123 | 6686906 | 85.88 | 1932 MB +(1 row) + + relpages | reltuples | n_dead_tup | last_vacuum | last_autovacuum | heap_size +----------+-----------+------------+-------------+-----------------+----------- + 81968 | 5e+06 | 1355592 | | | 1820 MB +(1 row) + + context | calls_total | pages_pruned | tuples_pruned | space_freed | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 153 | 0 | 0 | 0 | 0 | 150 | 0 | 3 | 0 | 0 | 0.00 | 0.144 +(1 row) + diff --git a/run_test.sh b/run_test.sh new file mode 100755 index 0000000000000..2f64f3e036f65 --- /dev/null +++ b/run_test.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash + +set -euo pipefail +#set -x + +PSQL="$PG_INSTALL_DIR/bin/psql -X -h $PG_DATA_DIR postgres" +PGBENCH="$PG_INSTALL_DIR/bin/pgbench -h $PG_DATA_DIR" + +PGBENCH_SCALE=50 +PGBENCH_CLIENTS=8 +PGBENCH_JOBS=4 +PGBENCH_TIME=120 + +echo "=== Setting up test environment ===" +$PSQL -f setup.sql + +echo "=== Setup the benchmark ===" +$PGBENCH -i -s $PGBENCH_SCALE postgres + +echo "=== Post-setup configuration ===" +$PSQL -f post_setup.sql + +echo "=== Pre-loading data to create page pressure ===" +$PSQL -c " +-- Create more update activity to fill pages with large tuples +UPDATE pgbench_accounts +SET notes = 'preload_' || (random() * 10000)::int || '_' || repeat('large_data_chunk', 50), + update_count = update_count + 1, + last_updated = now() +WHERE aid IN ( + SELECT (random() * (100000 * $PGBENCH_SCALE))::int + 1 + FROM generate_series(1, 20000) +); + +-- Create some dead tuples by updating again +UPDATE pgbench_accounts +SET notes = 'dead_preload_' || (random() * 10000)::int || '_' || repeat('dead_data_chunk', 60), + update_count = update_count + 2 +WHERE aid IN ( + SELECT (random() * (100000 * $PGBENCH_SCALE))::int + 1 + FROM generate_series(1, 10000) +); +" + +echo "=== Capturing baseline statistics ===" +$PSQL -c "SELECT 'BASELINE' as phase, * FROM capture_prune_stats();" >results.log +$PSQL -c "SELECT 'BASELINE_EXIT_ANALYSIS' as phase, * FROM analyze_exit_reasons();" >>results.log + +echo "=== Running pgbench HOT update workload ===" +$PGBENCH -c $PGBENCH_CLIENTS -j $PGBENCH_JOBS -T $PGBENCH_TIME -f prune_test.sql -P 10 postgres + +echo "=== Capturing mid-test statistics ===" +$PSQL -c "SELECT 'MID_TEST' as phase, * FROM capture_prune_stats();" >>results.log +$PSQL -c "SELECT 'MID_TEST_EXIT_ANALYSIS' as phase, * FROM analyze_exit_reasons();" >>results.log + +echo "=== Running standard TPC-B workload for comparison ===" +$PGBENCH -c $PGBENCH_CLIENTS -j $PGBENCH_JOBS -T 30 -P 10 postgres + +echo "=== Capturing post-test statistics ===" +$PSQL -c "SELECT 'POST_TEST' as phase, * FROM capture_prune_stats();" >>results.log +$PSQL -c "SELECT 'POST_TEST_EXIT_ANALYSIS' as phase, * FROM analyze_exit_reasons();" >>results.log + +echo "=== Exit Reason Analysis and Recommendations ===" +$PSQL -c " +SELECT + 'EXIT_REASON_SUMMARY' as report_type, + context, + calls_total, + success_rate_pct, + main_failure_reason, + failure_count, + failure_pct +FROM analyze_exit_reasons() +ORDER BY calls_total DESC; +" >>results.log + +$PSQL -c " +SELECT + 'RECOMMENDATIONS' as report_type, + context, + main_failure_reason, + recommendation, + failure_pct +FROM get_pruning_recommendations() +ORDER BY failure_pct DESC; +" >>results.log + +echo "=== HOT Update Effectiveness ===" +$PSQL -c " +SELECT + schemaname, relname, + n_tup_ins, n_tup_upd, n_tup_hot_upd, + CASE WHEN n_tup_upd > 0 + THEN round(100.0 * n_tup_hot_upd / n_tup_upd, 2) + ELSE 0 + END as hot_update_pct, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||relname)) as table_size +FROM pg_stat_user_tables +WHERE relname = 'pgbench_accounts'; +" >>results.log + +echo "=== Page-level analysis ===" +$PSQL -c " +SELECT + relpages, + reltuples, + n_dead_tup, + last_vacuum, + last_autovacuum, + pg_size_pretty(pg_relation_size('pgbench_accounts')) as heap_size +FROM pg_stat_user_tables +JOIN pg_class ON pg_class.relname = pg_stat_user_tables.relname +WHERE pg_stat_user_tables.relname = 'pgbench_accounts'; +" >>results.log + +echo "=== Detailed Statistics Breakdown ===" +$PSQL -c " +SELECT + context, + calls_total, + pages_pruned, + tuples_pruned, + space_freed, + exit_success, + exit_invalid_xact_xid, + exit_no_removable_xids, + exit_page_not_prunable, + exit_lock_failed, + exit_other, + prune_success_rate_pct, + avg_time_per_call_us +FROM capture_prune_stats() +WHERE calls_total > 0 +ORDER BY calls_total DESC; +" >>results.log + +echo "=== Test complete. Results in results.log ===" +cat results.log diff --git a/setup.sql b/setup.sql new file mode 100644 index 0000000000000..b79f9f1981511 --- /dev/null +++ b/setup.sql @@ -0,0 +1,158 @@ +-- Enable tracking +SET enable_heap_prune_tracking = on; +SET log_min_messages = debug2; + +-- Drop existing functions to avoid signature conflicts +DROP FUNCTION IF EXISTS capture_prune_stats(); +DROP FUNCTION IF EXISTS analyze_exit_reasons(); + +-- Function to capture current statistics with exit reasons +CREATE OR REPLACE FUNCTION capture_prune_stats() +RETURNS TABLE( + context text, + calls_total bigint, + pages_pruned bigint, + tuples_pruned bigint, + space_freed bigint, + time_spent_us bigint, + exit_success bigint, + exit_invalid_xact_xid bigint, + exit_no_removable_xids bigint, + exit_page_not_prunable bigint, + exit_lock_failed bigint, + exit_other bigint, + prune_success_rate_pct numeric, + avg_time_per_call_us numeric +) AS $$ +BEGIN + RETURN QUERY + SELECT + s.context, + s.calls_total, + s.pages_pruned, + s.tuples_pruned, + s.space_freed, + s.time_spent_us, + s.exit_success, + s.exit_invalid_xact_xid, + s.exit_no_removable_xids, + s.exit_page_not_prunable, + s.exit_lock_failed, + s.exit_other, + CASE WHEN s.calls_total > 0 + THEN round(100.0 * s.pages_pruned / s.calls_total, 2) + ELSE 0 + END as prune_success_rate_pct, + CASE WHEN s.calls_total > 0 + THEN round(s.time_spent_us::numeric / s.calls_total, 3) + ELSE 0 + END as avg_time_per_call_us + FROM pg_stat_get_heap_prune_stats() AS s( + context text, + calls_total bigint, + pages_pruned bigint, + tuples_pruned bigint, + space_freed bigint, + time_spent_us bigint, + exit_success bigint, + exit_invalid_xact_xid bigint, + exit_no_removable_xids bigint, + exit_page_not_prunable bigint, + exit_lock_failed bigint, + exit_other bigint + ) + ORDER BY s.calls_total DESC; +END; +$$ LANGUAGE plpgsql; + +-- Function to analyze exit reasons +CREATE OR REPLACE FUNCTION analyze_exit_reasons() +RETURNS TABLE( + context text, + calls_total bigint, + success_rate_pct numeric, + main_failure_reason text, + failure_count bigint, + failure_pct numeric +) AS $$ +BEGIN + RETURN QUERY + WITH exit_analysis AS ( + SELECT + s.context as ctx, + s.calls_total as total_calls, + s.exit_success, + s.exit_invalid_xact_xid, + s.exit_no_removable_xids, + s.exit_page_not_prunable, + s.exit_lock_failed, + s.exit_other, + CASE WHEN s.calls_total > 0 + THEN round(100.0 * s.exit_success / s.calls_total, 2) + ELSE 0 + END as success_rate + FROM capture_prune_stats() s + WHERE s.calls_total > 0 + ), + failure_reasons AS ( + SELECT + ea.ctx, + ea.total_calls, + ea.success_rate, + CASE + WHEN ea.exit_invalid_xact_xid >= GREATEST(ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) + THEN 'INVALID_XACT_XID' + WHEN ea.exit_no_removable_xids >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) + THEN 'NO_REMOVABLE_XIDS' + WHEN ea.exit_page_not_prunable >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_lock_failed, ea.exit_other) + THEN 'PAGE_NOT_PRUNABLE' + WHEN ea.exit_lock_failed >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_other) + THEN 'LOCK_FAILED' + ELSE 'OTHER' + END as main_reason, + GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) as max_failure_count + FROM exit_analysis ea + ) + SELECT + fr.ctx, + fr.total_calls, + fr.success_rate, + fr.main_reason, + fr.max_failure_count, + CASE WHEN fr.total_calls > 0 + THEN round(100.0 * fr.max_failure_count / fr.total_calls, 2) + ELSE 0 + END as failure_percentage + FROM failure_reasons fr + ORDER BY fr.total_calls DESC; +END; +$$ LANGUAGE plpgsql; + +-- Function to provide recommendations based on exit reasons +CREATE OR REPLACE FUNCTION get_pruning_recommendations() +RETURNS TABLE( + context text, + main_failure_reason text, + recommendation text, + failure_pct numeric +) AS $$ +BEGIN + RETURN QUERY + SELECT + ar.context, + ar.main_failure_reason, + CASE ar.main_failure_reason + WHEN 'INVALID_XACT_XID' THEN 'Check prune_xid setting and page header - may need transaction visibility' + WHEN 'NO_REMOVABLE_XIDS' THEN 'Need more transaction churn or longer waits for visibility - try VACUUM or wait' + WHEN 'PAGE_NOT_PRUNABLE' THEN 'Check PageHasPrunable() logic - page may not have dead tuples' + WHEN 'LOCK_FAILED' THEN 'Increase lock acquisition attempts or reduce contention' + ELSE 'Investigate other failure causes in logs' + END as recommendation, + ar.failure_pct + FROM analyze_exit_reasons() ar + WHERE ar.calls_total > 0; +END; +$$ LANGUAGE plpgsql; + +-- Reset statistics +SELECT pg_stat_reset(); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 6e52773e2aeda..fb0f3886092f4 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -106,6 +106,9 @@ static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy); +/* GUC variable */ +bool enable_heap_prune_tracking = false; +HeapPruneStats prune_stats_by_context[6] = {0}; /* * Each tuple lock mode has a corresponding heavyweight lock, and one or two @@ -574,7 +577,7 @@ heap_prepare_pagescan(TableScanDesc sscan) /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_base.rs_rd, buffer, 0); + heap_page_prune_opt(scan->rs_base.rs_rd, buffer, 0, PRUNE_CONTEXT_PREPARE_PAGESCAN); /* * We must hold share lock on the buffer content while examining tuple @@ -996,7 +999,7 @@ heapgettup(HeapScanDesc scan, PageNeedsScanPruning(page, scan->rs_page_updates)) { /* Attempt opportunistic pruning */ - heap_page_prune_opt(scan->rs_base.rs_rd, scan->rs_cbuf, 0); + heap_page_prune_opt(scan->rs_base.rs_rd, scan->rs_cbuf, 0, PRUNE_CONTEXT_SCAN_OPPORTUNISTIC); /* Mark this block as pruned to avoid repeated attempts */ scan->rs_last_pruned_block = scan->rs_cblock; @@ -1119,7 +1122,7 @@ heapgettup_pagemode(HeapScanDesc scan, scan->rs_cblock != scan->rs_last_pruned_block && PageNeedsScanPruning(BufferGetPage(scan->rs_cbuf), scan->rs_page_updates)) { - heap_page_prune_opt(scan->rs_base.rs_rd, scan->rs_cbuf, 0); + heap_page_prune_opt(scan->rs_base.rs_rd, scan->rs_cbuf, 0, PRUNE_CONTEXT_SCAN_END); scan->rs_last_pruned_block = scan->rs_cblock; } @@ -2300,7 +2303,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* Consider pruning the page if it's getting full */ if (PageIsFull(BufferGetPage(buffer))) - heap_page_prune_opt(relation, buffer, heaptup->t_len); + heap_page_prune_opt(relation, buffer, heaptup->t_len, PRUNE_CONTEXT_INSERT_SPACE_CHECK); ReleaseBuffer(buffer); @@ -2733,7 +2736,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* Consider pruning the page if it's getting full */ if (PageIsFull(BufferGetPage(buffer))) - heap_page_prune_opt(relation, buffer, heaptuples[ndone]->t_len); + heap_page_prune_opt(relation, buffer, heaptuples[ndone]->t_len, PRUNE_CONTEXT_MULTI_INSERT); ReleaseBuffer(buffer); @@ -3899,7 +3902,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, if (newtupsize > pagefree && PageHasPrunable(page)) { - heap_page_prune_opt(relation, buffer, newtupsize); + heap_page_prune_opt(relation, buffer, newtupsize, PRUNE_CONTEXT_UPDATE_FULL_PAGE); pagefree = PageGetHeapFreeSpace(page); } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index fc6364b142433..63b3f131d8ae4 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -138,7 +138,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, * Prune page, but only if we weren't already on this page */ if (prev_buf != hscan->xs_cbuf) - heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf, 0); + heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf, 0, PRUNE_CONTEXT_SCAN_OPPORTUNISTIC); } /* Obtain share-lock on the buffer so we can examine visibility */ @@ -2517,7 +2517,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan, /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_rd, buffer, 0); + heap_page_prune_opt(scan->rs_rd, buffer, 0, PRUNE_CONTEXT_SCAN_OPPORTUNISTIC); /* * We must hold share lock on the buffer content while examining tuple diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index cad1531dfa8c5..6582f24acc544 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -177,6 +177,7 @@ static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetN static void page_verify_redirects(Page page); +extern bool enable_heap_prune_tracking; /* * Optionally prune and repair fragmentation in the specified page. @@ -195,12 +196,38 @@ static void page_verify_redirects(Page page); * Caller must have pin on the buffer, and must *not* have a lock on it. */ void -heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) +heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len, HeapPruneContext context) { Page page = BufferGetPage(buffer); TransactionId prune_xid; GlobalVisState *vistest; Size minfree; + Size freespace_before = 0; + Size freespace_after = 0; + instr_time start_time, + end_time; + uint64 hot_updates = 0; + HeapPruneExitReason exit_reason = HEAP_PRUNE_EXIT_SUCCESS; + + static const char *context_names[] = { + "UPDATE_FULL_PAGE", + "INSERT_SPACE_CHECK", + "SCAN_OPPORTUNISTIC", + "SCAN_END", + "MULTI_INSERT", + "PREPARE_PAGESCAN" + }; + + /* Track statistics if enabled */ + if (enable_heap_prune_tracking) + { + INSTR_TIME_SET_CURRENT(start_time); + prune_stats_by_context[context].calls_total++; + freespace_before = PageGetHeapFreeSpace(page); + /* Get HOT updates on relation */ + if (relation->pgstat_info) + hot_updates = relation->pgstat_info->counts.tuples_hot_updated; + } /* * We can't write WAL in recovery mode, so there's no point trying to @@ -208,7 +235,15 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) * soon anyway, so this is no particular loss. */ if (RecoveryInProgress()) + { + if (enable_heap_prune_tracking) + { + exit_reason = HEAP_PRUNE_EXIT_RECOVERY_IN_PROGRESS; + elog(DEBUG2, "heap_page_prune_opt: context=%s RETURNed RecoveryInProgress", context_names[context]); + goto exit_with_reason; + } return; + } /* * First check whether there's any chance there's something to prune, @@ -217,7 +252,15 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) */ prune_xid = ((PageHeader) page)->pd_prune_xid; if (!TransactionIdIsValid(prune_xid)) + { + if (enable_heap_prune_tracking) + { + exit_reason = HEAP_PRUNE_EXIT_INVALID_XACT_XID; + elog(DEBUG2, "heap_page_prune_opt: context=%s RETURNed !TransactionIdIsValid", context_names[context]); + goto exit_with_reason; + } return; + } /* * Check whether prune_xid indicates that there may be dead rows that can @@ -226,7 +269,15 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) vistest = GlobalVisTestFor(relation); if (!GlobalVisTestIsRemovableXid(vistest, prune_xid)) + { + if (enable_heap_prune_tracking) + { + exit_reason = HEAP_PRUNE_EXIT_NO_REMOVABLE_XIDS; + elog(DEBUG2, "heap_page_prune_opt: context=%s RETURNed !GlobalVisTestIsRemovableXid", context_names[context]); + goto exit_with_reason; + } return; + } /* * We prune when a previous UPDATE failed to find enough space on the page @@ -256,7 +307,15 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) { /* OK, try to get exclusive buffer lock */ if (!ConditionalLockBufferForCleanup(buffer)) + { + if (enable_heap_prune_tracking) + { + exit_reason = HEAP_PRUNE_EXIT_LOCK_FAILED; + elog(DEBUG2, "heap_page_prune_opt: RETURNed !ConditionalLockBufferForCleanup"); + goto exit_with_reason; + } return; + } /* * Now that we have buffer lock, get accurate information about the @@ -295,6 +354,38 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) if (presult.ndeleted > presult.nnewlpdead) pgstat_update_heap_dead_tuples(relation, presult.ndeleted - presult.nnewlpdead); + + + /* Update statistics */ + if (enable_heap_prune_tracking) + { + int tuples_pruned = presult.ndeleted; + + freespace_after = PageGetHeapFreeSpace(page); + + if (tuples_pruned > 0) + { + exit_reason = HEAP_PRUNE_EXIT_SUCCESS; + prune_stats_by_context[context].pages_pruned++; + prune_stats_by_context[context].tuples_pruned += tuples_pruned; + prune_stats_by_context[context].space_freed += (freespace_after - freespace_before); + + elog(DEBUG2, "heap_page_prune_opt: context=%s, tuples_pruned=%d, space_freed=%zu, " + "tuple_len=%zu, hot_updates=%lu, relation=%s", + context_names[context], tuples_pruned, + (freespace_after - freespace_before), tuple_len, + hot_updates, RelationGetRelationName(relation)); + } + else + { + exit_reason = HEAP_PRUNE_EXIT_OTHER; + elog(DEBUG3, "heap_page_prune_opt: context=%s, no pruning done, freespace_before=%zu, " + "tuple_len=%zu, relation=%s", + context_names[context], freespace_before, tuple_len, + RelationGetRelationName(relation)); + } + + } } /* And release buffer lock */ @@ -306,6 +397,52 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len) * free space should be reused by UPDATEs to *this* page. */ } + + else if (enable_heap_prune_tracking) + { + /* Called but no pruning needed */ + exit_reason = HEAP_PRUNE_EXIT_PAGE_NOT_PRUNABLE; + elog(DEBUG3, "heap_page_prune_opt: context=%s, no pruning needed, freespace=%zu, " + "tuple_len=%zu, relation=%s", + context_names[context], freespace_before, tuple_len, + RelationGetRelationName(relation)); + } + + +exit_with_reason: + if (enable_heap_prune_tracking) + { + /* Record timing */ + INSTR_TIME_SET_CURRENT(end_time); + INSTR_TIME_SUBTRACT(end_time, start_time); + prune_stats_by_context[context].time_spent_us += INSTR_TIME_GET_MICROSEC(end_time); + + /* Record exit reason */ + switch (exit_reason) + { + case HEAP_PRUNE_EXIT_SUCCESS: + prune_stats_by_context[context].exit_success++; + break; + case HEAP_PRUNE_EXIT_RECOVERY_IN_PROGRESS: + prune_stats_by_context[context].exit_recover_in_progress++; + break; + case HEAP_PRUNE_EXIT_INVALID_XACT_XID: + prune_stats_by_context[context].exit_invalid_xact_xid++; + break; + case HEAP_PRUNE_EXIT_NO_REMOVABLE_XIDS: + prune_stats_by_context[context].exit_no_removable_xids++; + break; + case HEAP_PRUNE_EXIT_LOCK_FAILED: + prune_stats_by_context[context].exit_lock_failed++; + break; + case HEAP_PRUNE_EXIT_PAGE_NOT_PRUNABLE: + prune_stats_by_context[context].exit_page_not_prunable++; + break; + default: + prune_stats_by_context[context].exit_other++; + break; + } + } } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index c756c2bebaaa0..cc89546583096 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -31,6 +31,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/timestamp.h" +#include "access/heapam.h" #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) @@ -2164,6 +2165,58 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS) PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * pg_stat_get_heap_prune_stats - return heap pruning statistics + */ +Datum +pg_stat_get_heap_prune_stats(PG_FUNCTION_ARGS) +{ + TupleDesc tupdesc; + Datum values[12]; + bool nulls[12]; + HeapTuple tuple; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + static const char *context_names[] = { + "UPDATE_FULL_PAGE", + "INSERT_SPACE_CHECK", + "SCAN_OPPORTUNISTIC", + "SCAN_END", + "MULTI_INSERT", + "PREPARE_PAGESCAN", + }; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC); + + for (int i = 0; i < 5; i++) + { + memset(nulls, 0, sizeof(nulls)); + + + values[0] = CStringGetTextDatum(context_names[i]); + values[1] = Int64GetDatum(prune_stats_by_context[i].calls_total); + values[2] = Int64GetDatum(prune_stats_by_context[i].pages_pruned); + values[3] = Int64GetDatum(prune_stats_by_context[i].tuples_pruned); + values[4] = Int64GetDatum(prune_stats_by_context[i].space_freed); + values[5] = Int64GetDatum(prune_stats_by_context[i].time_spent_us); + values[6] = Int64GetDatum(prune_stats_by_context[i].exit_success); + values[7] = Int64GetDatum(prune_stats_by_context[i].exit_invalid_xact_xid); + values[8] = Int64GetDatum(prune_stats_by_context[i].exit_no_removable_xids); + values[9] = Int64GetDatum(prune_stats_by_context[i].exit_page_not_prunable); + values[10] = Int64GetDatum(prune_stats_by_context[i].exit_lock_failed); + values[11] = Int64GetDatum(prune_stats_by_context[i].exit_other); + + tuple = heap_form_tuple(tupdesc, values, nulls); + tuplestore_puttuple(rsinfo->setResult, tuple); + } + + return (Datum) 0; +} + /* * Get the subscription statistics for the given subscription. If the * subscription statistics is not available, return all-zeros stats. diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 6bc6be13d2ad2..e00b8d2899508 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -3475,4 +3475,10 @@ assign_hook => 'assign_io_method', }, +{ name => 'enable_heap_prune_tracking', type => 'bool', context => 'PGC_USERSET', group => 'DEVELOPER_OPTIONS', + short_desc => 'Enables tracking of heap pruning statistics.', + variable => 'enable_heap_prune_tracking', + boot_val => 'true', +}, + ] diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 9be965fa654d0..25993d2234503 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -32,6 +32,47 @@ #include "utils/relcache.h" #include "utils/snapshot.h" +/* Heap pruning statistics */ +typedef enum HeapPruneContext +{ + PRUNE_CONTEXT_UPDATE_FULL_PAGE, /* Update on full page (new) */ + PRUNE_CONTEXT_INSERT_SPACE_CHECK, /* Insert with space check (new) */ + PRUNE_CONTEXT_SCAN_OPPORTUNISTIC, /* Scan-time opportunistic (new) */ + PRUNE_CONTEXT_SCAN_END, /* End of scan cleanup (new) */ + PRUNE_CONTEXT_MULTI_INSERT, /* Multi-insert operation (new) */ + PRUNE_CONTEXT_PREPARE_PAGESCAN /* Prepare scan (pre-existing) */ +} HeapPruneContext; + +typedef enum HeapPruneExitReason +{ + HEAP_PRUNE_EXIT_SUCCESS = 0, + HEAP_PRUNE_EXIT_RECOVERY_IN_PROGRESS, + HEAP_PRUNE_EXIT_INVALID_XACT_XID, + HEAP_PRUNE_EXIT_NO_REMOVABLE_XIDS, + HEAP_PRUNE_EXIT_LOCK_FAILED, + HEAP_PRUNE_EXIT_PAGE_NOT_PRUNABLE, + HEAP_PRUNE_EXIT_OTHER +} HeapPruneExitReason; + +typedef struct HeapPruneStats +{ + uint64 calls_total; /* Total calls to heap_page_prune_opt */ + uint64 pages_pruned; /* Pages that had items pruned */ + uint64 tuples_pruned; /* Total tuples pruned */ + uint64 space_freed; /* Total space freed (bytes) */ + uint64 time_spent_us; /* Total time spent in microseconds */ + + /* Exit reason counters */ + int64 exit_success; + int64 exit_recover_in_progress; + int64 exit_invalid_xact_xid; + int64 exit_no_removable_xids; + int64 exit_lock_failed; + int64 exit_page_not_prunable; + int64 exit_other; +} HeapPruneStats; +extern HeapPruneStats prune_stats_by_context[6]; + /* "options" flag bits for heap_insert */ #define HEAP_INSERT_SKIP_FSM TABLE_INSERT_SKIP_FSM @@ -372,7 +413,8 @@ extern TransactionId heap_index_delete_tuples(Relation rel, /* in heap/pruneheap.c */ struct GlobalVisState; -extern void heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len); +extern void heap_page_prune_opt(Relation relation, Buffer buffer, Size tuple_len, + HeapPruneContext context); extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 77eb41eb6dc99..ced445414491b 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -32,7 +32,7 @@ /* GUCs */ extern PGDLLIMPORT char *default_table_access_method; extern PGDLLIMPORT bool synchronize_seqscans; - +extern PGDLLIMPORT bool enable_heap_prune_tracking; struct BulkInsertStateData; struct IndexInfo; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 01eba3b5a1909..f19b9abf362a2 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12588,4 +12588,9 @@ proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', prosrc => 'pg_get_aios' }, +{ oid => '9999', descr => 'statistics for heap pruning operations', + proname => 'pg_stat_get_heap_prune_stats', prorows => '5', proisstrict => 'f', + proretset => 't', provolatile => 's', proparallel => 'r', + prorettype => 'record', proargtypes => '', + prosrc => 'pg_stat_get_heap_prune_stats' }, ] diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index ce6285a2c0376..03bddf2d23606 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -136,4 +136,6 @@ extern int32 type_maximum_size(Oid type_oid, int32 typemod); /* quote.c */ extern char *quote_literal_cstr(const char *rawstr); +extern Datum pg_stat_get_heap_prune_stats(PG_FUNCTION_ARGS); + #endif /* BUILTINS_H */ diff --git a/test.log b/test.log new file mode 100644 index 0000000000000..388ac179b92b2 --- /dev/null +++ b/test.log @@ -0,0 +1,870 @@ +/******** QUERY *********/ +SET enable_heap_prune_tracking = on; +/************************/ + +SET +/******** QUERY *********/ +SET log_min_messages = debug2; +/************************/ + +SET +/******** QUERY *********/ +ALTER SYSTEM SET autovacuum = off; +/************************/ + +ALTER SYSTEM +/******** QUERY *********/ +SELECT pg_reload_conf(); +/************************/ + + pg_reload_conf +---------------- + t +(1 row) + +/******** QUERY *********/ +CREATE EXTENSION IF NOT EXISTS pageinspect; +/************************/ + +CREATE EXTENSION +/******** QUERY *********/ +DROP FUNCTION IF EXISTS capture_prune_stats(); +/************************/ + +DROP FUNCTION +/******** QUERY *********/ +DROP FUNCTION IF EXISTS analyze_exit_reasons(); +/************************/ + +DROP FUNCTION +/******** QUERY *********/ +CREATE OR REPLACE FUNCTION capture_prune_stats() +RETURNS TABLE( + context text, + calls_total bigint, + pages_pruned bigint, + tuples_pruned bigint, + space_freed bigint, + time_spent_us bigint, + exit_success bigint, + exit_invalid_xact_xid bigint, + exit_no_removable_xids bigint, + exit_page_not_prunable bigint, + exit_lock_failed bigint, + exit_other bigint, + prune_success_rate_pct numeric, + avg_time_per_call_us numeric +) AS $$ +BEGIN + RETURN QUERY + SELECT + s.context, + s.calls_total, + s.pages_pruned, + s.tuples_pruned, + s.space_freed, + s.time_spent_us, + s.exit_success, + s.exit_invalid_xact_xid, + s.exit_no_removable_xids, + s.exit_page_not_prunable, + s.exit_lock_failed, + s.exit_other, + CASE WHEN s.calls_total > 0 + THEN round(100.0 * s.pages_pruned / s.calls_total, 2) + ELSE 0 + END as prune_success_rate_pct, + CASE WHEN s.calls_total > 0 + THEN round(s.time_spent_us::numeric / s.calls_total, 3) + ELSE 0 + END as avg_time_per_call_us + FROM pg_stat_get_heap_prune_stats() AS s( + context text, + calls_total bigint, + pages_pruned bigint, + tuples_pruned bigint, + space_freed bigint, + time_spent_us bigint, + exit_success bigint, + exit_invalid_xact_xid bigint, + exit_no_removable_xids bigint, + exit_page_not_prunable bigint, + exit_lock_failed bigint, + exit_other bigint + ) + ORDER BY s.calls_total DESC; +END; +$$ LANGUAGE plpgsql; +/************************/ + +CREATE FUNCTION +/******** QUERY *********/ +CREATE OR REPLACE FUNCTION analyze_exit_reasons() +RETURNS TABLE( + context text, + calls_total bigint, + success_rate_pct numeric, + main_failure_reason text, + failure_count bigint, + failure_pct numeric +) AS $$ +BEGIN + RETURN QUERY + WITH exit_analysis AS ( + SELECT + s.context as ctx, + s.calls_total as total_calls, + s.exit_success, + s.exit_invalid_xact_xid, + s.exit_no_removable_xids, + s.exit_page_not_prunable, + s.exit_lock_failed, + s.exit_other, + CASE WHEN s.calls_total > 0 + THEN round(100.0 * s.exit_success / s.calls_total, 2) + ELSE 0 + END as success_rate + FROM capture_prune_stats() s + WHERE s.calls_total > 0 + ), + failure_reasons AS ( + SELECT + ea.ctx, + ea.total_calls, + ea.success_rate, + CASE + WHEN ea.exit_invalid_xact_xid >= GREATEST(ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) + THEN 'INVALID_XACT_XID' + WHEN ea.exit_no_removable_xids >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) + THEN 'NO_REMOVABLE_XIDS' + WHEN ea.exit_page_not_prunable >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_lock_failed, ea.exit_other) + THEN 'PAGE_NOT_PRUNABLE' + WHEN ea.exit_lock_failed >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_other) + THEN 'LOCK_FAILED' + ELSE 'OTHER' + END as main_reason, + GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) as max_failure_count + FROM exit_analysis ea + ) + SELECT + fr.ctx, + fr.total_calls, + fr.success_rate, + fr.main_reason, + fr.max_failure_count, + CASE WHEN fr.total_calls > 0 + THEN round(100.0 * fr.max_failure_count / fr.total_calls, 2) + ELSE 0 + END as failure_percentage + FROM failure_reasons fr + ORDER BY fr.total_calls DESC; +END; +$$ LANGUAGE plpgsql; +/************************/ + +CREATE FUNCTION +/******** QUERY *********/ +SELECT pg_stat_reset(); +/************************/ + + pg_stat_reset +--------------- + +(1 row) + +/******** QUERY *********/ +CREATE TABLE ultimate_prune_test ( + id serial PRIMARY KEY, + data text, + status varchar(20) DEFAULT 'active', + counter bigint DEFAULT 0, + -- Make tuples absolutely massive - ~6KB each + mega_padding1 text DEFAULT repeat('AAAAAAAA', 750), -- 6KB + mega_padding2 text DEFAULT repeat('BBBBBBBB', 750), -- 6KB + mega_padding3 text DEFAULT repeat('CCCCCCCC', 750) -- 6KB +); +/************************/ + +CREATE TABLE +/******** QUERY *********/ +ALTER TABLE ultimate_prune_test SET (autovacuum_enabled = false); +/************************/ + +ALTER TABLE +/******** QUERY *********/ +CREATE INDEX idx_ultimate_status ON ultimate_prune_test(status); +/************************/ + +CREATE INDEX +/******** QUERY *********/ +INSERT INTO ultimate_prune_test (data, mega_padding1, mega_padding2, mega_padding3) +SELECT + 'initial_' || i, + repeat('INIT1_' || i, 750), + repeat('INIT2_' || i, 750), + repeat('INIT3_' || i, 750) +FROM generate_series(1, 20) i; +/************************/ + +INSERT 0 20 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +SELECT 'BASELINE' as phase, * FROM capture_prune_stats(); +/************************/ + + phase | context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +----------+--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + BASELINE | SCAN_OPPORTUNISTIC | 506 | 4 | 79 | 15472 | 3906 | 4 | 364 | 53 | 85 | 0 | 0 | 0.79 | 7.719 + BASELINE | UPDATE_FULL_PAGE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | INSERT_SPACE_CHECK | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | SCAN_END | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 + BASELINE | MULTI_INSERT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(5 rows) + +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'dead_v1_' || id, + counter = counter + 1000, + mega_padding1 = repeat('DEAD1_V1_' || id, 750), + mega_padding2 = repeat('DEAD2_V1_' || id, 750), + mega_padding3 = repeat('DEAD3_V1_' || id, 750) +WHERE id BETWEEN 1 AND 10; +/************************/ + +UPDATE 10 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 525 | 4 | 79 | 15472 | 3943 | 4 | 376 | 53 | 86 | 6 | 0 | 0.76 | 7.510 + UPDATE_FULL_PAGE | 10 | 0 | 0 | 0 | 15 | 0 | 0 | 0 | 0 | 10 | 0 | 0.00 | 1.500 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 531 | 0.75 | INVALID_XACT_XID | 380 | 71.56 + UPDATE_FULL_PAGE | 10 | 0.00 | LOCK_FAILED | 10 | 100.00 +(2 rows) + +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'dead_v2_' || id, + counter = counter + 2000, + mega_padding1 = repeat('DEAD1_V2_' || id, 750), + mega_padding2 = repeat('DEAD2_V2_' || id, 750), + mega_padding3 = repeat('DEAD3_V2_' || id, 750) +WHERE id BETWEEN 11 AND 20; +/************************/ + +UPDATE 10 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 535 | 5 | 89 | 18456 | 4075 | 5 | 380 | 56 | 88 | 6 | 0 | 0.93 | 7.617 + UPDATE_FULL_PAGE | 20 | 0 | 0 | 0 | 29 | 0 | 1 | 9 | 0 | 10 | 0 | 0.00 | 1.450 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 535 | 0.93 | INVALID_XACT_XID | 380 | 71.03 + UPDATE_FULL_PAGE | 20 | 0.00 | LOCK_FAILED | 10 | 50.00 +(2 rows) + +/******** QUERY *********/ +VACUUM (ANALYZE, VERBOSE) ultimate_prune_test; +/************************/ + +VACUUM +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 636 | 5 | 89 | 18456 | 4147 | 5 | 440 | 56 | 129 | 6 | 0 | 0.79 | 6.520 + UPDATE_FULL_PAGE | 20 | 0 | 0 | 0 | 29 | 0 | 1 | 9 | 0 | 10 | 0 | 0.00 | 1.450 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 636 | 0.79 | INVALID_XACT_XID | 440 | 69.18 + UPDATE_FULL_PAGE | 20 | 0.00 | LOCK_FAILED | 10 | 50.00 +(2 rows) + +/******** QUERY *********/ +CREATE TABLE insert_mega_test ( + id serial, + huge_data text DEFAULT repeat('XXXXXXXX', 1500) -- 12KB per tuple +); +/************************/ + +CREATE TABLE +/******** QUERY *********/ +ALTER TABLE insert_mega_test SET (autovacuum_enabled = false); +/************************/ + +ALTER TABLE +/******** QUERY *********/ +INSERT INTO insert_mega_test (huge_data) +VALUES (repeat('HUGE_INITIAL', 1500)); +/************************/ + +INSERT 0 1 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE insert_mega_test SET huge_data = repeat('DEAD_HUGE', 1500); +/************************/ + +UPDATE 1 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +VACUUM (ANALYZE, VERBOSE) insert_mega_test; +/************************/ + +VACUUM +/******** QUERY *********/ +INSERT INTO insert_mega_test (huge_data) +VALUES (repeat('TRIGGER_INSERT_PRUNING', 1500)); +/************************/ + +INSERT 0 1 +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 734 | 8 | 144 | 22176 | 4217 | 8 | 450 | 72 | 198 | 6 | 0 | 1.09 | 5.745 + UPDATE_FULL_PAGE | 20 | 0 | 0 | 0 | 29 | 0 | 1 | 9 | 0 | 10 | 0 | 0.00 | 1.450 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 734 | 1.09 | INVALID_XACT_XID | 450 | 61.31 + UPDATE_FULL_PAGE | 20 | 0.00 | LOCK_FAILED | 10 | 50.00 +(2 rows) + +/******** QUERY *********/ +DROP TABLE insert_mega_test; +/************************/ + +DROP TABLE +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'more_dead_' || id, + counter = counter + 10000, + mega_padding1 = repeat('MOREDEAD1_' || id, 800), + mega_padding2 = repeat('MOREDEAD2_' || id, 800), + mega_padding3 = repeat('MOREDEAD3_' || id, 800) +WHERE id BETWEEN 1 AND 10; +/************************/ + +UPDATE 10 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +VACUUM (ANALYZE, VERBOSE) ultimate_prune_test; +/************************/ + +VACUUM +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'GIGANTIC_UPDATE_' || repeat('X', 500) || '_' || id, + counter = counter + 100000, + mega_padding1 = repeat('GIGANTIC1_' || id, 1000), + mega_padding2 = repeat('GIGANTIC2_' || id, 1000), + mega_padding3 = repeat('GIGANTIC3_' || id, 1000) +WHERE id BETWEEN 1 AND 5; +/************************/ + +UPDATE 5 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 824 | 9 | 162 | 24640 | 4251 | 9 | 461 | 77 | 271 | 6 | 0 | 1.09 | 5.159 + UPDATE_FULL_PAGE | 28 | 0 | 0 | 0 | 43 | 0 | 3 | 15 | 0 | 10 | 0 | 0.00 | 1.536 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 824 | 1.09 | INVALID_XACT_XID | 461 | 55.95 + UPDATE_FULL_PAGE | 28 | 0.00 | NO_REMOVABLE_XIDS | 15 | 53.57 +(2 rows) + +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'scan_dead_' || id, + counter = counter + 1000000, + mega_padding1 = repeat('SCANDEAD1_' || id, 900), + mega_padding2 = repeat('SCANDEAD2_' || id, 900), + mega_padding3 = repeat('SCANDEAD3_' || id, 900) +WHERE id BETWEEN 11 AND 20; +/************************/ + +UPDATE 10 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +VACUUM (ANALYZE, VERBOSE) ultimate_prune_test; +/************************/ + +VACUUM +/******** QUERY *********/ +SELECT count(*) FROM ultimate_prune_test WHERE data LIKE 'scan_dead_%'; +/************************/ + + count +------- + 10 +(1 row) + +/******** QUERY *********/ +SELECT count(*) FROM ultimate_prune_test WHERE counter > 500000; +/************************/ + + count +------- + 10 +(1 row) + +/******** QUERY *********/ +SELECT avg(length(mega_padding1)) FROM ultimate_prune_test; +/************************/ + + avg +------------------------ + 10390.0000000000000000 +(1 row) + +/******** QUERY *********/ +SET enable_indexscan = off; +/************************/ + +SET +/******** QUERY *********/ +SET enable_bitmapscan = off; +/************************/ + +SET +/******** QUERY *********/ +SELECT count(*) FROM ultimate_prune_test WHERE status = 'active'; +/************************/ + + count +------- + 20 +(1 row) + +/******** QUERY *********/ +SELECT count(*) FROM ultimate_prune_test WHERE length(data) > 50; +/************************/ + + count +------- + 5 +(1 row) + +/******** QUERY *********/ +RESET enable_indexscan; +/************************/ + +RESET +/******** QUERY *********/ +RESET enable_bitmapscan; +/************************/ + +RESET +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 962 | 10 | 194 | 30552 | 4432 | 10 | 567 | 81 | 298 | 6 | 0 | 1.04 | 4.607 + UPDATE_FULL_PAGE | 39 | 0 | 0 | 0 | 71 | 0 | 5 | 23 | 0 | 11 | 0 | 0.00 | 1.821 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 962 | 1.04 | INVALID_XACT_XID | 567 | 58.94 + UPDATE_FULL_PAGE | 39 | 0.00 | NO_REMOVABLE_XIDS | 23 | 58.97 +(2 rows) + +/******** QUERY *********/ +INSERT INTO ultimate_prune_test (data, mega_padding1, mega_padding2, mega_padding3) +SELECT + 'bulk_mega_' || i, + repeat('BULKMEGA1_' || i, 900), + repeat('BULKMEGA2_' || i, 900), + repeat('BULKMEGA3_' || i, 900) +FROM generate_series(21, 40) i; +/************************/ + +INSERT 0 20 +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 962 | 10 | 194 | 30552 | 4432 | 10 | 567 | 81 | 298 | 6 | 0 | 1.04 | 4.607 + UPDATE_FULL_PAGE | 39 | 0 | 0 | 0 | 71 | 0 | 5 | 23 | 0 | 11 | 0 | 0.00 | 1.821 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 962 | 1.04 | INVALID_XACT_XID | 567 | 58.94 + UPDATE_FULL_PAGE | 39 | 0.00 | NO_REMOVABLE_XIDS | 23 | 58.97 +(2 rows) + +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'stress1_' || id, + counter = counter + 10000000, + mega_padding1 = repeat('STRESS1A_' || id, 1200), + mega_padding2 = repeat('STRESS1B_' || id, 1200), + mega_padding3 = repeat('STRESS1C_' || id, 1200) +WHERE id BETWEEN 1 AND 20; +/************************/ + +UPDATE 20 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE ultimate_prune_test SET + data = 'stress2_' || id, + counter = counter + 20000000, + mega_padding1 = repeat('STRESS2A_' || id, 1200), + mega_padding2 = repeat('STRESS2B_' || id, 1200), + mega_padding3 = repeat('STRESS2C_' || id, 1200) +WHERE id BETWEEN 21 AND 40; +/************************/ + +UPDATE 20 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +INSERT INTO ultimate_prune_test (data, mega_padding1, mega_padding2, mega_padding3) +SELECT + 'stress_insert_' || i, + repeat('STRESSINS1_' || i, 1200), + repeat('STRESSINS2_' || i, 1200), + repeat('STRESSINS3_' || i, 1200) +FROM generate_series(41, 60) i; +/************************/ + +INSERT 0 20 +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 999 | 11 | 195 | 31056 | 4490 | 11 | 569 | 113 | 299 | 6 | 1 | 1.10 | 4.494 + UPDATE_FULL_PAGE | 79 | 0 | 0 | 0 | 121 | 0 | 11 | 57 | 0 | 11 | 0 | 0.00 | 1.532 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 999 | 1.10 | INVALID_XACT_XID | 569 | 56.96 + UPDATE_FULL_PAGE | 79 | 0.00 | NO_REMOVABLE_XIDS | 57 | 72.15 +(2 rows) + +/******** QUERY *********/ +CREATE TABLE force_pruning_test ( + id serial, + data text DEFAULT repeat('Z', 8000) -- 8KB per tuple, 1 per page +); +/************************/ + +CREATE TABLE +/******** QUERY *********/ +ALTER TABLE force_pruning_test SET (autovacuum_enabled = false); +/************************/ + +ALTER TABLE +/******** QUERY *********/ +INSERT INTO force_pruning_test (data) VALUES (repeat('FIRST', 2000)); +/************************/ + +INSERT 0 1 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +BEGIN; +/************************/ + +BEGIN +/******** QUERY *********/ +UPDATE force_pruning_test SET data = repeat('DEAD_FIRST', 2000) WHERE id = 1; +/************************/ + +UPDATE 1 +/******** QUERY *********/ +COMMIT; +/************************/ + +COMMIT +/******** QUERY *********/ +VACUUM (ANALYZE, VERBOSE) force_pruning_test; +/************************/ + +VACUUM +/******** QUERY *********/ +INSERT INTO force_pruning_test (data) VALUES (repeat('MUST_TRIGGER_PRUNING', 2000)); +/************************/ + +INSERT 0 1 +/******** QUERY *********/ +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | time_spent_us | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct | avg_time_per_call_us +--------------------+-------------+--------------+---------------+-------------+---------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------+---------------------- + SCAN_OPPORTUNISTIC | 1102 | 11 | 195 | 31056 | 4500 | 11 | 574 | 113 | 397 | 6 | 1 | 1.00 | 4.083 + UPDATE_FULL_PAGE | 79 | 0 | 0 | 0 | 121 | 0 | 11 | 57 | 0 | 11 | 0 | 0.00 | 1.532 +(2 rows) + +/******** QUERY *********/ +SELECT * FROM analyze_exit_reasons(); +/************************/ + + context | calls_total | success_rate_pct | main_failure_reason | failure_count | failure_pct +--------------------+-------------+------------------+---------------------+---------------+------------- + SCAN_OPPORTUNISTIC | 1102 | 1.00 | INVALID_XACT_XID | 574 | 52.09 + UPDATE_FULL_PAGE | 79 | 0.00 | NO_REMOVABLE_XIDS | 57 | 72.15 +(2 rows) + +/******** QUERY *********/ +DROP TABLE force_pruning_test; +/************************/ + +DROP TABLE +/******** QUERY *********/ +SELECT + context, + calls_total, + pages_pruned, + tuples_pruned, + space_freed, + exit_success, + exit_invalid_xact_xid, + exit_no_removable_xids, + exit_page_not_prunable, + exit_lock_failed, + exit_other, + prune_success_rate_pct +FROM capture_prune_stats() +WHERE calls_total > 0 +ORDER BY calls_total DESC; +/************************/ + + context | calls_total | pages_pruned | tuples_pruned | space_freed | exit_success | exit_invalid_xact_xid | exit_no_removable_xids | exit_page_not_prunable | exit_lock_failed | exit_other | prune_success_rate_pct +--------------------+-------------+--------------+---------------+-------------+--------------+-----------------------+------------------------+------------------------+------------------+------------+------------------------ + SCAN_OPPORTUNISTIC | 1149 | 11 | 195 | 31056 | 11 | 574 | 113 | 444 | 6 | 1 | 0.96 + UPDATE_FULL_PAGE | 79 | 0 | 0 | 0 | 0 | 11 | 57 | 0 | 11 | 0 | 0.00 +(2 rows) + +/******** QUERY *********/ +SELECT + 'EXIT_REASON_SUMMARY' as report_type, + context, + main_failure_reason, + failure_count, + failure_pct, + success_rate_pct +FROM analyze_exit_reasons() +ORDER BY calls_total DESC; +/************************/ + + report_type | context | main_failure_reason | failure_count | failure_pct | success_rate_pct +---------------------+--------------------+---------------------+---------------+-------------+------------------ + EXIT_REASON_SUMMARY | SCAN_OPPORTUNISTIC | INVALID_XACT_XID | 574 | 49.96 | 0.96 + EXIT_REASON_SUMMARY | UPDATE_FULL_PAGE | NO_REMOVABLE_XIDS | 57 | 72.15 | 0.00 +(2 rows) + +/******** QUERY *********/ +WITH recommendations AS ( + SELECT + context, + main_failure_reason, + CASE main_failure_reason + WHEN 'INVALID_XACT_XID' THEN 'Check prune_xid setting and page header' + WHEN 'NO_REMOVABLE_XIDS' THEN 'Need more transaction churn or longer waits for visibility' + WHEN 'PAGE_NOT_PRUNABLE' THEN 'Check PageHasPrunable() logic and page conditions' + WHEN 'LOCK_FAILED' THEN 'Increase lock acquisition attempts or use different strategy' + ELSE 'Investigate other failure causes' + END as recommendation + FROM analyze_exit_reasons() + WHERE calls_total > 0 +) +SELECT + 'RECOMMENDATIONS' as report_type, + context, + main_failure_reason, + recommendation +FROM recommendations; +/************************/ + + report_type | context | main_failure_reason | recommendation +-----------------+--------------------+---------------------+------------------------------------------------------------ + RECOMMENDATIONS | SCAN_OPPORTUNISTIC | INVALID_XACT_XID | Check prune_xid setting and page header + RECOMMENDATIONS | UPDATE_FULL_PAGE | NO_REMOVABLE_XIDS | Need more transaction churn or longer waits for visibility +(2 rows) + +/******** QUERY *********/ +SELECT + schemaname, relname, n_tup_ins, n_tup_upd, n_tup_hot_upd, n_dead_tup, + CASE WHEN n_tup_upd > 0 THEN round(100.0 * n_tup_hot_upd / n_tup_upd, 2) ELSE 0 END as hot_update_pct, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||relname)) as table_size, + pg_size_pretty(pg_relation_size(schemaname||'.'||relname)) as heap_size +FROM pg_stat_user_tables +WHERE relname = 'ultimate_prune_test'; +/************************/ + + schemaname | relname | n_tup_ins | n_tup_upd | n_tup_hot_upd | n_dead_tup | hot_update_pct | table_size | heap_size +------------+---------------------+-----------+-----------+---------------+------------+----------------+------------+----------- + public | ultimate_prune_test | 0 | 0 | 0 | 0 | 0 | 136 kB | 64 kB +(1 row) + +/******** QUERY *********/ +ALTER SYSTEM SET autovacuum = on; +/************************/ + +ALTER SYSTEM +/******** QUERY *********/ +SELECT pg_reload_conf(); +/************************/ + + pg_reload_conf +---------------- + t +(1 row) + +/******** QUERY *********/ +DROP TABLE ultimate_prune_test; +/************************/ + +DROP TABLE diff --git a/test.sql b/test.sql new file mode 100644 index 0000000000000..437dda22be31a --- /dev/null +++ b/test.sql @@ -0,0 +1,455 @@ +-- Enable tracking and detailed logging +SET enable_heap_prune_tracking = on; +SET log_min_messages = debug2; + +-- Disable autovacuum globally +ALTER SYSTEM SET autovacuum = off; +SELECT pg_reload_conf(); + +CREATE EXTENSION IF NOT EXISTS pageinspect; + +-- Drop existing functions +DROP FUNCTION IF EXISTS capture_prune_stats(); +DROP FUNCTION IF EXISTS analyze_exit_reasons(); + +-- Updated function to capture statistics with exit reasons +CREATE OR REPLACE FUNCTION capture_prune_stats() +RETURNS TABLE( + context text, + calls_total bigint, + pages_pruned bigint, + tuples_pruned bigint, + space_freed bigint, + time_spent_us bigint, + exit_success bigint, + exit_invalid_xact_xid bigint, + exit_no_removable_xids bigint, + exit_page_not_prunable bigint, + exit_lock_failed bigint, + exit_other bigint, + prune_success_rate_pct numeric, + avg_time_per_call_us numeric +) AS $$ +BEGIN + RETURN QUERY + SELECT + s.context, + s.calls_total, + s.pages_pruned, + s.tuples_pruned, + s.space_freed, + s.time_spent_us, + s.exit_success, + s.exit_invalid_xact_xid, + s.exit_no_removable_xids, + s.exit_page_not_prunable, + s.exit_lock_failed, + s.exit_other, + CASE WHEN s.calls_total > 0 + THEN round(100.0 * s.pages_pruned / s.calls_total, 2) + ELSE 0 + END as prune_success_rate_pct, + CASE WHEN s.calls_total > 0 + THEN round(s.time_spent_us::numeric / s.calls_total, 3) + ELSE 0 + END as avg_time_per_call_us + FROM pg_stat_get_heap_prune_stats() AS s( + context text, + calls_total bigint, + pages_pruned bigint, + tuples_pruned bigint, + space_freed bigint, + time_spent_us bigint, + exit_success bigint, + exit_invalid_xact_xid bigint, + exit_no_removable_xids bigint, + exit_page_not_prunable bigint, + exit_lock_failed bigint, + exit_other bigint + ) + ORDER BY s.calls_total DESC; +END; +$$ LANGUAGE plpgsql; + +-- Function to analyze exit reasons +CREATE OR REPLACE FUNCTION analyze_exit_reasons() +RETURNS TABLE( + context text, + calls_total bigint, + success_rate_pct numeric, + main_failure_reason text, + failure_count bigint, + failure_pct numeric +) AS $$ +BEGIN + RETURN QUERY + WITH exit_analysis AS ( + SELECT + s.context as ctx, + s.calls_total as total_calls, + s.exit_success, + s.exit_invalid_xact_xid, + s.exit_no_removable_xids, + s.exit_page_not_prunable, + s.exit_lock_failed, + s.exit_other, + CASE WHEN s.calls_total > 0 + THEN round(100.0 * s.exit_success / s.calls_total, 2) + ELSE 0 + END as success_rate + FROM capture_prune_stats() s + WHERE s.calls_total > 0 + ), + failure_reasons AS ( + SELECT + ea.ctx, + ea.total_calls, + ea.success_rate, + CASE + WHEN ea.exit_invalid_xact_xid >= GREATEST(ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) + THEN 'INVALID_XACT_XID' + WHEN ea.exit_no_removable_xids >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) + THEN 'NO_REMOVABLE_XIDS' + WHEN ea.exit_page_not_prunable >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_lock_failed, ea.exit_other) + THEN 'PAGE_NOT_PRUNABLE' + WHEN ea.exit_lock_failed >= GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_other) + THEN 'LOCK_FAILED' + ELSE 'OTHER' + END as main_reason, + GREATEST(ea.exit_invalid_xact_xid, ea.exit_no_removable_xids, ea.exit_page_not_prunable, ea.exit_lock_failed, ea.exit_other) as max_failure_count + FROM exit_analysis ea + ) + SELECT + fr.ctx, + fr.total_calls, + fr.success_rate, + fr.main_reason, + fr.max_failure_count, + CASE WHEN fr.total_calls > 0 + THEN round(100.0 * fr.max_failure_count / fr.total_calls, 2) + ELSE 0 + END as failure_percentage + FROM failure_reasons fr + ORDER BY fr.total_calls DESC; +END; +$$ LANGUAGE plpgsql; + +SELECT pg_stat_reset(); + +\echo '=== PHASE 1: Setup with Massive Tuples ===' + +CREATE TABLE ultimate_prune_test ( + id serial PRIMARY KEY, + data text, + status varchar(20) DEFAULT 'active', + counter bigint DEFAULT 0, + -- Make tuples absolutely massive - ~6KB each + mega_padding1 text DEFAULT repeat('AAAAAAAA', 750), -- 6KB + mega_padding2 text DEFAULT repeat('BBBBBBBB', 750), -- 6KB + mega_padding3 text DEFAULT repeat('CCCCCCCC', 750) -- 6KB +); + +ALTER TABLE ultimate_prune_test SET (autovacuum_enabled = false); +CREATE INDEX idx_ultimate_status ON ultimate_prune_test(status); + +-- Insert fewer rows but much larger +INSERT INTO ultimate_prune_test (data, mega_padding1, mega_padding2, mega_padding3) +SELECT + 'initial_' || i, + repeat('INIT1_' || i, 750), + repeat('INIT2_' || i, 750), + repeat('INIT3_' || i, 750) +FROM generate_series(1, 20) i; -- Only 20 rows but each is ~18KB + +COMMIT; + +SELECT 'BASELINE' as phase, * FROM capture_prune_stats(); + +\echo '=== PHASE 2: Create Dead Tuples in Separate Transactions ===' + +-- Transaction 1: Create dead tuples +BEGIN; +UPDATE ultimate_prune_test SET + data = 'dead_v1_' || id, + counter = counter + 1000, + mega_padding1 = repeat('DEAD1_V1_' || id, 750), + mega_padding2 = repeat('DEAD2_V1_' || id, 750), + mega_padding3 = repeat('DEAD3_V1_' || id, 750) +WHERE id BETWEEN 1 AND 10; +COMMIT; + +\echo '--- After First Dead Tuple Creation ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +-- Transaction 2: More dead tuples +BEGIN; +UPDATE ultimate_prune_test SET + data = 'dead_v2_' || id, + counter = counter + 2000, + mega_padding1 = repeat('DEAD1_V2_' || id, 750), + mega_padding2 = repeat('DEAD2_V2_' || id, 750), + mega_padding3 = repeat('DEAD3_V2_' || id, 750) +WHERE id BETWEEN 11 AND 20; +COMMIT; + +\echo '--- After Second Dead Tuple Creation ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +-- Force visibility with VACUUM +VACUUM (ANALYZE, VERBOSE) ultimate_prune_test; + +\echo '--- After VACUUM ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +\echo '=== PHASE 3: Extreme INSERT Test ===' + +CREATE TABLE insert_mega_test ( + id serial, + huge_data text DEFAULT repeat('XXXXXXXX', 1500) -- 12KB per tuple +); + +ALTER TABLE insert_mega_test SET (autovacuum_enabled = false); + +-- Insert only 1 tuple to nearly fill a page +INSERT INTO insert_mega_test (huge_data) +VALUES (repeat('HUGE_INITIAL', 1500)); + +COMMIT; + +-- Create dead tuple +BEGIN; +UPDATE insert_mega_test SET huge_data = repeat('DEAD_HUGE', 1500); +COMMIT; + +-- Force visibility +VACUUM (ANALYZE, VERBOSE) insert_mega_test; + +-- Now insert - should trigger INSERT_SPACE_CHECK +INSERT INTO insert_mega_test (huge_data) +VALUES (repeat('TRIGGER_INSERT_PRUNING', 1500)); + +\echo '--- After INSERT Test ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +DROP TABLE insert_mega_test; + +\echo '=== PHASE 4: UPDATE Test with Extreme Sizes ===' + +-- Create more dead tuples +BEGIN; +UPDATE ultimate_prune_test SET + data = 'more_dead_' || id, + counter = counter + 10000, + mega_padding1 = repeat('MOREDEAD1_' || id, 800), + mega_padding2 = repeat('MOREDEAD2_' || id, 800), + mega_padding3 = repeat('MOREDEAD3_' || id, 800) +WHERE id BETWEEN 1 AND 10; +COMMIT; + +-- Force visibility +VACUUM (ANALYZE, VERBOSE) ultimate_prune_test; + +-- Large updates that should trigger UPDATE_FULL_PAGE +BEGIN; +UPDATE ultimate_prune_test SET + data = 'GIGANTIC_UPDATE_' || repeat('X', 500) || '_' || id, + counter = counter + 100000, + mega_padding1 = repeat('GIGANTIC1_' || id, 1000), + mega_padding2 = repeat('GIGANTIC2_' || id, 1000), + mega_padding3 = repeat('GIGANTIC3_' || id, 1000) +WHERE id BETWEEN 1 AND 5; +COMMIT; + +\echo '--- After UPDATE Test ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +\echo '=== PHASE 5: Scan Pressure Test ===' + +-- Create more dead tuples for scanning +BEGIN; +UPDATE ultimate_prune_test SET + data = 'scan_dead_' || id, + counter = counter + 1000000, + mega_padding1 = repeat('SCANDEAD1_' || id, 900), + mega_padding2 = repeat('SCANDEAD2_' || id, 900), + mega_padding3 = repeat('SCANDEAD3_' || id, 900) +WHERE id BETWEEN 11 AND 20; +COMMIT; + +-- Force visibility +VACUUM (ANALYZE, VERBOSE) ultimate_prune_test; + +-- Intensive scanning to trigger SCAN_OPPORTUNISTIC +SELECT count(*) FROM ultimate_prune_test WHERE data LIKE 'scan_dead_%'; +SELECT count(*) FROM ultimate_prune_test WHERE counter > 500000; +SELECT avg(length(mega_padding1)) FROM ultimate_prune_test; + +-- Force sequential scans +SET enable_indexscan = off; +SET enable_bitmapscan = off; +SELECT count(*) FROM ultimate_prune_test WHERE status = 'active'; +SELECT count(*) FROM ultimate_prune_test WHERE length(data) > 50; +RESET enable_indexscan; +RESET enable_bitmapscan; + +\echo '--- After SCAN Test ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +\echo '=== PHASE 6: Multi-Insert Test ===' + +-- Bulk insert with massive tuples +INSERT INTO ultimate_prune_test (data, mega_padding1, mega_padding2, mega_padding3) +SELECT + 'bulk_mega_' || i, + repeat('BULKMEGA1_' || i, 900), + repeat('BULKMEGA2_' || i, 900), + repeat('BULKMEGA3_' || i, 900) +FROM generate_series(21, 40) i; + +\echo '--- After MULTI_INSERT Test ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +\echo '=== PHASE 7: Ultimate Stress Test ===' + +-- Create extreme conditions without DO blocks +-- Round 1: Massive updates +BEGIN; +UPDATE ultimate_prune_test SET + data = 'stress1_' || id, + counter = counter + 10000000, + mega_padding1 = repeat('STRESS1A_' || id, 1200), + mega_padding2 = repeat('STRESS1B_' || id, 1200), + mega_padding3 = repeat('STRESS1C_' || id, 1200) +WHERE id BETWEEN 1 AND 20; +COMMIT; + +-- Round 2: More massive updates +BEGIN; +UPDATE ultimate_prune_test SET + data = 'stress2_' || id, + counter = counter + 20000000, + mega_padding1 = repeat('STRESS2A_' || id, 1200), + mega_padding2 = repeat('STRESS2B_' || id, 1200), + mega_padding3 = repeat('STRESS2C_' || id, 1200) +WHERE id BETWEEN 21 AND 40; +COMMIT; + +-- Round 3: Inserts +INSERT INTO ultimate_prune_test (data, mega_padding1, mega_padding2, mega_padding3) +SELECT + 'stress_insert_' || i, + repeat('STRESSINS1_' || i, 1200), + repeat('STRESSINS2_' || i, 1200), + repeat('STRESSINS3_' || i, 1200) +FROM generate_series(41, 60) i; + +\echo '--- After STRESS Test ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +\echo '=== PHASE 8: Force Extreme Conditions ===' + +-- Create table guaranteed to need pruning +CREATE TABLE force_pruning_test ( + id serial, + data text DEFAULT repeat('Z', 8000) -- 8KB per tuple, 1 per page +); + +ALTER TABLE force_pruning_test SET (autovacuum_enabled = false); + +-- Insert exactly 1 tuple (should fill most of a page) +INSERT INTO force_pruning_test (data) VALUES (repeat('FIRST', 2000)); +COMMIT; + +-- Make it dead +BEGIN; +UPDATE force_pruning_test SET data = repeat('DEAD_FIRST', 2000) WHERE id = 1; +COMMIT; + +-- Force visibility +VACUUM (ANALYZE, VERBOSE) force_pruning_test; + +-- This insert MUST trigger pruning +INSERT INTO force_pruning_test (data) VALUES (repeat('MUST_TRIGGER_PRUNING', 2000)); + +\echo '--- After FORCE PRUNING Test ---' +SELECT * FROM capture_prune_stats() WHERE calls_total > 0; +SELECT * FROM analyze_exit_reasons(); + +DROP TABLE force_pruning_test; + +\echo '=== PHASE 9: Final Comprehensive Analysis ===' + +-- Final statistics with detailed exit reason analysis +SELECT + context, + calls_total, + pages_pruned, + tuples_pruned, + space_freed, + exit_success, + exit_invalid_xact_xid, + exit_no_removable_xids, + exit_page_not_prunable, + exit_lock_failed, + exit_other, + prune_success_rate_pct +FROM capture_prune_stats() +WHERE calls_total > 0 +ORDER BY calls_total DESC; + +-- Exit reason summary +SELECT + 'EXIT_REASON_SUMMARY' as report_type, + context, + main_failure_reason, + failure_count, + failure_pct, + success_rate_pct +FROM analyze_exit_reasons() +ORDER BY calls_total DESC; + +-- Recommendations based on failure patterns +WITH recommendations AS ( + SELECT + context, + main_failure_reason, + CASE main_failure_reason + WHEN 'INVALID_XACT_XID' THEN 'Check prune_xid setting and page header' + WHEN 'NO_REMOVABLE_XIDS' THEN 'Need more transaction churn or longer waits for visibility' + WHEN 'PAGE_NOT_PRUNABLE' THEN 'Check PageHasPrunable() logic and page conditions' + WHEN 'LOCK_FAILED' THEN 'Increase lock acquisition attempts or use different strategy' + ELSE 'Investigate other failure causes' + END as recommendation + FROM analyze_exit_reasons() + WHERE calls_total > 0 +) +SELECT + 'RECOMMENDATIONS' as report_type, + context, + main_failure_reason, + recommendation +FROM recommendations; + +-- Table statistics +SELECT + schemaname, relname, n_tup_ins, n_tup_upd, n_tup_hot_upd, n_dead_tup, + CASE WHEN n_tup_upd > 0 THEN round(100.0 * n_tup_hot_upd / n_tup_upd, 2) ELSE 0 END as hot_update_pct, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||relname)) as table_size, + pg_size_pretty(pg_relation_size(schemaname||'.'||relname)) as heap_size +FROM pg_stat_user_tables +WHERE relname = 'ultimate_prune_test'; + +\echo '=== Test Complete ===' + +ALTER SYSTEM SET autovacuum = on; +SELECT pg_reload_conf(); + +DROP TABLE ultimate_prune_test;