From a48896d6ff6d21f4bf836a0a17ef0b0a908ea0b2 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Wed, 2 Jul 2025 07:17:55 -0400 Subject: [PATCH 1/5] dev setup v16 --- .clang-format | 71 ++ .clangd | 89 ++ .envrc | 9 + .gdbinit | 15 + .gitignore | 8 + .idea/.gitignore | 8 + .idea/editor.xml | 580 +++++++++++++ .idea/inspectionProfiles/Project_Default.xml | 7 + .idea/misc.xml | 18 + .idea/prettier.xml | 6 + .idea/vcs.xml | 6 + .vscode/launch.json | 22 + .vscode/settings.json | 5 + flake.lock | 78 ++ flake.nix | 45 + glibc-no-fortify-warning.patch | 24 + pg-aliases.sh | 304 +++++++ shell.nix | 820 +++++++++++++++++++ src/test/regress/pg_regress.c | 2 +- src/tools/pgindent/pgindent | 2 +- 20 files changed, 2117 insertions(+), 2 deletions(-) create mode 100644 .clang-format create mode 100644 .clangd create mode 100644 .envrc create mode 100644 .gdbinit create mode 100644 .idea/.gitignore create mode 100644 .idea/editor.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/prettier.xml create mode 100644 .idea/vcs.xml create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 glibc-no-fortify-warning.patch create mode 100644 pg-aliases.sh create mode 100644 shell.nix diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000000..2f786ac8eef05 --- /dev/null +++ b/.clang-format @@ -0,0 +1,71 @@ +# the official .clang-format style for https://github.com/taocpp +# +# clang-format-4.0 -i -style=file $(find -name '[^.]*.[hc]pp') + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -3 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: true + AfterControlStatement: false + AfterEnum : true + AfterFunction : true + AfterNamespace : true + AfterStruct : true + AfterUnion : true + BeforeCatch : true + BeforeElse : true + IndentBraces : false +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakStringLiterals: false +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 0 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 3 +ContinuationIndentWidth: 3 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: true +IndentWidth: 3 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +PointerAlignment: Left +ReflowComments: false +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: Never +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: true +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: true +SpacesInParentheses: true +SpacesInSquareBrackets: true +TabWidth: 8 +UseTab: Never diff --git a/.clangd b/.clangd new file mode 100644 index 0000000000000..500c5d0d258d6 --- /dev/null +++ b/.clangd @@ -0,0 +1,89 @@ +Diagnostics: + MissingIncludes: None +InlayHints: + Enabled: true + ParameterNames: true + DeducedTypes: true +CompileFlags: + CompilationDatabase: build/ # Search build/ directory for compile_commands.json + Remove: [ -Werror ] + Add: + - -DDEBUG + - -DLOCAL + - -DPGDLLIMPORT= + - -DPIC + - -O2 + - -Wall + - -Wcast-function-type + - -Wconversion + - -Wdeclaration-after-statement + - -Wendif-labels + - -Werror=vla + - -Wextra + - -Wfloat-equal + - -Wformat-security + - -Wimplicit-fallthrough=3 + - -Wmissing-format-attribute + - -Wmissing-prototypes + - -Wno-format-truncation + - -Wno-sign-conversion + - -Wno-stringop-truncation + - -Wno-unused-const-variable + - -Wpointer-arith + - -Wshadow + - -Wshadow=compatible-local + - -fPIC + - -fexcess-precision=standard + - -fno-strict-aliasing + - -fvisibility=hidden + - -fwrapv + - -g + - -std=c11 + - -I. + - -I../../../../src/include +# gcc -E -v -xc++ /dev/null +# - -I/nix/store/l2sgvfcyqc1bgnzpz86qw5pjq99j8vlw-libtool-2.5.4/include +# - -I/nix/store/n087ac9g368fbl6h57a2mdd741lshzrc-file-5.46-dev/include +# - -I/nix/store/p7z72c2s722pbw31jmm3y0nwypksb5fj-gnumake-4.4.1/include +# - -I/nix/store/wzwlizg15dwh6x0h3ckjmibdblfkfdzf-flex-2.6.4/include +# - -I/nix/store/8nh579b2yl3sz2yfwyjc9ksb0jb7kwf5-libxslt-1.1.43-dev/include +# - -I/nix/store/cisb0723v3pgp74f2lj07z5d6w3j77sl-libxml2-2.13.8-dev/include +# - -I/nix/store/245c5yscaxyxi49fz9ys1i1apy5s2igz-valgrind-3.24.0-dev/include +# - -I/nix/store/nmxr110602fvajr9ax8d65ac1g40vx1a-curl-8.13.0-dev/include +# - -I/nix/store/slqvy0fgnwmvaq3bxmrvqclph8x909i2-brotli-1.1.0-dev/include +# - -I/nix/store/lchvccw6zl1z1wmhqayixcjcqyhqvyj7-krb5-1.21.3-dev/include +# - -I/nix/store/hybw3vnacqmm68fskbcchrbmj0h4ffv2-nghttp2-1.65.0-dev/include +# - -I/nix/store/2m0s7qxq2kgclyh6cfbflpxm65aga2h4-libidn2-2.3.8-dev/include +# - -I/nix/store/kcgqglb4iax0zh5jlrxmjdik93wlgsrq-openssl-3.4.1-dev/include +# - -I/nix/store/8mlcjg5js2r0zrpdjlfaxax6hyvppgz5-libpsl-0.21.5-dev/include +# - -I/nix/store/1nygjgimkj4wnmydzd6brsw6m0rd7gmx-libssh2-1.11.1-dev/include +# - -I/nix/store/cbdvjyn19y77m8l06n089x30v7irqz3j-zlib-1.3.1-dev/include +# - -I/nix/store/x10zhllc0rhk1s1mhjvsrzvbg55802gj-zstd-1.5.7-dev/include +# - -I/nix/store/8w718rm43x7z73xhw9d6vh8s4snrq67h-python3-3.12.10/include +# - -I/nix/store/1lrgn56jw2yww4bxj0frpgvahqh9i7gl-perf-linux-6.12.35/include +# - -I/nix/store/j87n5xqfj6c03633g7l95lfjq5ynml13-gdb-16.2/include +# - -I/nix/store/ih8dkkw9r7zx5fxg3arh53qc9zs422d1-llvm-21.1.0-dev/include +# - -I/nix/store/rz4bmcm8dwsy7ylx6rhffkwkqn6n8srn-ncurses-6.5-dev/include +# - -I/nix/store/29mcvdnd9s6sp46cjmqm0pfg4xs56rik-zlib-1.3.1-dev/include +# - -I/nix/store/42288hw25sc2gchgc5jp4wfgwisa0nxm-lldb-21.1.0-dev/include +# - -I/nix/store/wpfdp7vzd7h7ahnmp4rvxfcklg4viknl-tcl-8.6.15/include +# - -I/nix/store/4sq2x2770k0xrjshdi6piqrazqjfi5s4-readline-8.2p13-dev/include +# - -I/nix/store/myw381bc9yqd709hpray9lp7l98qmlm1-ncurses-6.5-dev/include +# - -I/nix/store/dvhx24q4icrig4q1v1lp7kzi3izd5jmb-icu4c-76.1-dev/include +# - -I/nix/store/7ld4hdn561a4vkk5hrkdhq8r6rxw8shl-lz4-1.10.0-dev/include +# - -I/nix/store/fnzbi6b8q79faggzj53paqi7igr091w0-util-linux-minimal-2.41-dev/include +# - -I/nix/store/vrdwlbzr74ibnzcli2yl1nxg9jqmr237-linux-pam-1.6.1/include +# - -I/nix/store/qizipyz9y17nr4w4gmxvwd3x4k0bp2rh-libxcrypt-4.4.38/include +# - -I/nix/store/7z8illxfqr4mvwh4l3inik6vdh12jx09-numactl-2.0.18-dev/include +# - -I/nix/store/f6lmz5inbk7qjc79099q4jvgzih7zbhy-openldap-2.6.9-dev/include +# - -I/nix/store/28vmjd90wzd6gij5a1nfj4nqaw191cfg-liburing-2.9-dev/include +# - -I/nix/store/75cyhmjxzx8z7v2z8vrmrydwraf00wyi-libselinux-3.8.1-dev/include +# - -I/nix/store/r25srliigrrv5q3n7y8ms6z10spvjcd9-glibc-2.40-66-dev/include +# - -I/nix/store/ldp1izmflvc74bd4n2svhrd5xrz61wyi-lld-21.1.0-dev/include +# - -I/nix/store/wd5cm50kmlw8n9mq6l1mkvpp8g443a1g-compiler-rt-libc-21.1.0-dev/include +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include/c++/14.2.1.20250322/ +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include/c++/14.2.1.20250322//x86_64-unknown-linux-gnu +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include/c++/14.2.1.20250322//backward +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/lib/gcc/x86_64-unknown-linux-gnu/14.2.1/include +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/include +# - -I/nix/store/9ds850ifd4jwcccpp3v14818kk74ldf2-gcc-14.2.1.20250322/lib/gcc/x86_64-unknown-linux-gnu/14.2.1/include-fixed diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000000..c2f6763607e61 --- /dev/null +++ b/.envrc @@ -0,0 +1,9 @@ +watch_file flake.nix +use flake + +#export MESON_EXTRA_SETUP="-Db_coverage=true" +#export GENINFO_OPTIONS="--ignore-errors inconsistent,gcov" +#export LCOV_OPTIONS="--ignore-errors inconsistent,gcov" + +export CFLAGS="-Wall -Wextra -Wconversion -Wdouble-promotion -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion -fsanitize-trap --werror" +# -fsanitize=undefined,address,undefined,thread diff --git a/.gdbinit b/.gdbinit new file mode 100644 index 0000000000000..f2593d3326984 --- /dev/null +++ b/.gdbinit @@ -0,0 +1,15 @@ +set tui tab-width 4 +set tui mouse-events off + + +b tts_heap_check_idx_attrs +b ExecCheckTupleForChanges +b ExecOpenIndicies +b ExecInsertIndexTuples +b heap_update + +#b fork_process +#b ParallelWorkerMain +#set follow-fork-mode child +#b initdb.c:3105 + diff --git a/.gitignore b/.gitignore index 4e911395fe3ba..8e429d66ca41f 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,11 @@ lib*.pc /Release/ /tmp_install/ /portlock/ + +build/ +install/ +test-db/ +.direnv/ +.cache/ +.history + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000000000..13566b81b018a --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/editor.xml b/.idea/editor.xml new file mode 100644 index 0000000000000..1f0ef49b4faf4 --- /dev/null +++ b/.idea/editor.xml @@ -0,0 +1,580 @@ + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000000000..9c69411050eac --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000000000..53624c9e1f9ab --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,18 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/prettier.xml b/.idea/prettier.xml new file mode 100644 index 0000000000000..b0c1c68fbbad6 --- /dev/null +++ b/.idea/prettier.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000000000..35eb1ddfbbc02 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000000000..f5d97424c5047 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "(gdb) Attach Postgres", + "type": "cppdbg", + "request": "attach", + "program": "${workspaceRoot}/install/bin/postgres", + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000..cc8a64fa9fa85 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "syscache.h": "c" + } +} \ No newline at end of file diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000000000..a609589066525 --- /dev/null +++ b/flake.lock @@ -0,0 +1,78 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1751211869, + "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-unstable": { + "locked": { + "lastModified": 1757651841, + "narHash": "sha256-Lh9QoMzTjY/O4LqNwcm6s/WSYStDmCH6f3V/izwlkHc=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "ad4e6dd68c30bc8bd1860a27bc6f0c485bd7f3b6", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "nixpkgs-unstable": "nixpkgs-unstable" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000000000..709d13737ee5a --- /dev/null +++ b/flake.nix @@ -0,0 +1,45 @@ +{ + description = "PostgreSQL development environment"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + nixpkgs-unstable.url = "github:nixos/nixpkgs/nixpkgs-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + nixpkgs-unstable, + flake-utils, + }: + flake-utils.lib.eachDefaultSystem ( + system: let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + }; + pkgs-unstable = import nixpkgs-unstable { + inherit system; + config.allowUnfree = true; + }; + + shellConfig = import ./shell.nix {inherit pkgs pkgs-unstable system;}; + in { + formatter = pkgs.alejandra; + devShells = { + default = shellConfig.devShell; + gcc = shellConfig.devShell; + clang = shellConfig.clangDevShell; + gcc-musl = shellConfig.muslDevShell; + clang-musl = shellConfig.clangMuslDevShell; + }; + + packages = { + inherit (shellConfig) gdbConfig flameGraphScript pgbenchScript; + }; + + environment.localBinInPath = true; + } + ); +} diff --git a/glibc-no-fortify-warning.patch b/glibc-no-fortify-warning.patch new file mode 100644 index 0000000000000..681e678e67ee3 --- /dev/null +++ b/glibc-no-fortify-warning.patch @@ -0,0 +1,24 @@ +From 130c231020f97e5eb878cc9fdb2bd9b186a5aa04 Mon Sep 17 00:00:00 2001 +From: Greg Burd +Date: Fri, 24 Oct 2025 11:58:24 -0400 +Subject: [PATCH] no warnings with -O0 and fortify source please + +--- + include/features.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/features.h b/include/features.h +index 673c4036..a02c8a3f 100644 +--- a/include/features.h ++++ b/include/features.h +@@ -432,7 +432,6 @@ + + #if defined _FORTIFY_SOURCE && _FORTIFY_SOURCE > 0 + # if !defined __OPTIMIZE__ || __OPTIMIZE__ <= 0 +-# warning _FORTIFY_SOURCE requires compiling with optimization (-O) + # elif !__GNUC_PREREQ (4, 1) + # warning _FORTIFY_SOURCE requires GCC 4.1 or later + # elif _FORTIFY_SOURCE > 2 && (__glibc_clang_prereq (9, 0) \ +-- +2.50.1 + diff --git a/pg-aliases.sh b/pg-aliases.sh new file mode 100644 index 0000000000000..d0b5ed16eb0e0 --- /dev/null +++ b/pg-aliases.sh @@ -0,0 +1,304 @@ +# PostgreSQL Development Aliases + +# Build system management +pg_clean_for_compiler() { + local current_compiler="$(basename $CC)" + local build_dir="$PG_BUILD_DIR" + + if [ -f "$build_dir/compile_commands.json" ]; then + local last_compiler=$(grep -o '/[^/]*/bin/[gc]cc\|/[^/]*/bin/clang' "$build_dir/compile_commands.json" | head -1 | xargs basename 2>/dev/null || echo "unknown") + + if [ "$last_compiler" != "$current_compiler" ] && [ "$last_compiler" != "unknown" ]; then + echo "Detected compiler change from $last_compiler to $current_compiler" + echo "Cleaning build directory..." + rm -rf "$build_dir" + mkdir -p "$build_dir" + fi + fi + + mkdir -p "$build_dir" + echo "$current_compiler" >"$build_dir/.compiler_used" +} + +# Core PostgreSQL commands +alias pg-setup=' + if [ -z "$PERL_CORE_DIR" ]; then + echo "Error: Could not find perl CORE directory" >&2 + return 1 + fi + + pg_clean_for_compiler + + echo "=== PostgreSQL Build Configuration ===" + echo "Compiler: $CC" + echo "LLVM: $(llvm-config --version 2>/dev/null || echo 'disabled')" + echo "Source: $PG_SOURCE_DIR" + echo "Build: $PG_BUILD_DIR" + echo "Install: $PG_INSTALL_DIR" + echo "======================================" + # --fatal-meson-warnings + + env CFLAGS="-I$PERL_CORE_DIR $CFLAGS" \ + LDFLAGS="-L$PERL_CORE_DIR -lperl $LDFLAGS" \ + meson setup $MESON_EXTRA_SETUP \ + --reconfigure \ + -Db_coverage=false \ + -Db_lundef=false \ + -Dcassert=true \ + -Ddebug=true \ + -Ddocs_html_style=website \ + -Ddocs_pdf=enabled \ + -Dicu=enabled \ + -Dinjection_points=true \ + -Dldap=enabled \ + -Dlibcurl=enabled \ + -Dlibxml=enabled \ + -Dlibxslt=enabled \ + -Dllvm=auto \ + -Dlz4=enabled \ + -Dnls=enabled \ + -Doptimization=0 \ + -Dplperl=enabled \ + -Dplpython=enabled \ + -Dpltcl=enabled \ + -Dreadline=enabled \ + -Dssl=openssl \ + -Dtap_tests=enabled \ + -Duuid=e2fs \ + -Dzstd=enabled \ + --prefix="$PG_INSTALL_DIR" \ + "$PG_BUILD_DIR" \ + "$PG_SOURCE_DIR"' + +alias pg-compdb='compdb -p build/ list > compile_commands.json' +alias pg-build='meson compile -C "$PG_BUILD_DIR"' +alias pg-install='meson install -C "$PG_BUILD_DIR"' +alias pg-test='meson test -q --print-errorlogs -C "$PG_BUILD_DIR"' + +# Clean commands +alias pg-clean='ninja -C "$PG_BUILD_DIR" clean' +alias pg-full-clean='rm -rf "$PG_BUILD_DIR" "$PG_INSTALL_DIR" && echo "Build and install directories cleaned"' + +# Database management +alias pg-init='rm -rf "$PG_DATA_DIR" && "$PG_INSTALL_DIR/bin/initdb" --debug --no-clean "$PG_DATA_DIR"' +alias pg-start='"$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR" -k "$PG_DATA_DIR"' +alias pg-stop='pkill -f "postgres.*-D.*$PG_DATA_DIR" || true' +alias pg-restart='pg-stop && sleep 2 && pg-start' +alias pg-status='pgrep -f "postgres.*-D.*$PG_DATA_DIR" && echo "PostgreSQL is running" || echo "PostgreSQL is not running"' + +# Client connections +alias pg-psql='"$PG_INSTALL_DIR/bin/psql" -h "$PG_DATA_DIR" postgres' +alias pg-createdb='"$PG_INSTALL_DIR/bin/createdb" -h "$PG_DATA_DIR"' +alias pg-dropdb='"$PG_INSTALL_DIR/bin/dropdb" -h "$PG_DATA_DIR"' + +# Debugging +alias pg-debug-gdb='gdb -x "$GDBINIT" "$PG_INSTALL_DIR/bin/postgres"' +alias pg-debug-lldb='lldb "$PG_INSTALL_DIR/bin/postgres"' +alias pg-debug=' + if command -v gdb >/dev/null 2>&1; then + pg-debug-gdb + elif command -v lldb >/dev/null 2>&1; then + pg-debug-lldb + else + echo "No debugger available (gdb or lldb required)" + fi' + +# Attach to running process +alias pg-attach-gdb=' + PG_PID=$(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1) + if [ -n "$PG_PID" ]; then + echo "Attaching GDB to PostgreSQL process $PG_PID" + gdb -x "$GDBINIT" -p "$PG_PID" + else + echo "No PostgreSQL process found" + fi' + +alias pg-attach-lldb=' + PG_PID=$(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1) + if [ -n "$PG_PID" ]; then + echo "Attaching LLDB to PostgreSQL process $PG_PID" + lldb -p "$PG_PID" + else + echo "No PostgreSQL process found" + fi' + +alias pg-attach=' + if command -v gdb >/dev/null 2>&1; then + pg-attach-gdb + elif command -v lldb >/dev/null 2>&1; then + pg-attach-lldb + else + echo "No debugger available (gdb or lldb required)" + fi' + +# Performance profiling and analysis +alias pg-valgrind='valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all "$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR"' +alias pg-strace='strace -f -o /tmp/postgres.strace "$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR"' + +# Flame graph generation +alias pg-flame='pg-flame-generate' +alias pg-flame-30='pg-flame-generate 30' +alias pg-flame-60='pg-flame-generate 60' +alias pg-flame-120='pg-flame-generate 120' + +# Custom flame graph with specific duration and output +pg-flame-custom() { + local duration=${1:-30} + local output_dir=${2:-$PG_FLAME_DIR} + echo "Generating flame graph for ${duration}s, output to: $output_dir" + pg-flame-generate "$duration" "$output_dir" +} + +# Benchmarking with pgbench +alias pg-bench='pg-bench-run' +alias pg-bench-quick='pg-bench-run 5 1 100 1 30 select-only' +alias pg-bench-standard='pg-bench-run 10 2 1000 10 60 tpcb-like' +alias pg-bench-heavy='pg-bench-run 50 4 5000 100 300 tpcb-like' +alias pg-bench-readonly='pg-bench-run 20 4 2000 50 120 select-only' + +# Custom benchmark function +pg-bench-custom() { + local clients=${1:-10} + local threads=${2:-2} + local transactions=${3:-1000} + local scale=${4:-10} + local duration=${5:-60} + local test_type=${6:-tpcb-like} + + echo "Running custom benchmark:" + echo " Clients: $clients, Threads: $threads" + echo " Transactions: $transactions, Scale: $scale" + echo " Duration: ${duration}s, Type: $test_type" + + pg-bench-run "$clients" "$threads" "$transactions" "$scale" "$duration" "$test_type" +} + +# Benchmark with flame graph +pg-bench-flame() { + local duration=${1:-60} + local clients=${2:-10} + local scale=${3:-10} + + echo "Running benchmark with flame graph generation" + echo "Duration: ${duration}s, Clients: $clients, Scale: $scale" + + # Start benchmark in background + pg-bench-run "$clients" 2 1000 "$scale" "$duration" tpcb-like & + local bench_pid=$! + + # Wait a bit for benchmark to start + sleep 5 + + # Generate flame graph for most of the benchmark duration + local flame_duration=$((duration - 10)) + if [ $flame_duration -gt 10 ]; then + pg-flame-generate "$flame_duration" & + local flame_pid=$! + fi + + # Wait for benchmark to complete + wait $bench_pid + + # Wait for flame graph if it was started + if [ -n "${flame_pid:-}" ]; then + wait $flame_pid + fi + + echo "Benchmark and flame graph generation completed" +} + +# Performance monitoring +alias pg-perf='perf top -p $(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1)' +alias pg-htop='htop -p $(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | tr "\n" "," | sed "s/,$//")' + +# System performance stats during PostgreSQL operation +pg-stats() { + local duration=${1:-30} + echo "Collecting system stats for ${duration}s..." + + iostat -x 1 "$duration" >"$PG_BENCH_DIR/iostat_$(date +%Y%m%d_%H%M%S).log" & + vmstat 1 "$duration" >"$PG_BENCH_DIR/vmstat_$(date +%Y%m%d_%H%M%S).log" & + + wait + echo "System stats saved to $PG_BENCH_DIR" +} + +# Development helpers +pg-format() { + local since=${1:-HEAD} + + if [ ! -f "$PG_SOURCE_DIR/src/tools/pgindent/pgindent" ]; then + echo "Error: pgindent not found at $PG_SOURCE_DIR/src/tools/pgindent/pgindent" + else + + modified_files=$(git diff --name-only "${since}" | grep -E "\.c$|\.h$") + + if [ -z "$modified_files" ]; then + echo "No modified .c or .h files found" + else + + echo "Formatting modified files with pgindent:" + for file in $modified_files; do + if [ -f "$file" ]; then + echo " Formatting: $file" + "$PG_SOURCE_DIR/src/tools/pgindent/pgindent" "$file" + else + echo " Warning: File not found: $file" + fi + done + + echo "Checking files for whitespace:" + git diff --check "${since}" + fi + fi +} + +alias pg-tidy='find "$PG_SOURCE_DIR" -name "*.c" | head -10 | xargs clang-tidy' + +# Log management +alias pg-log='tail -f "$PG_DATA_DIR/log/postgresql-$(date +%Y-%m-%d).log" 2>/dev/null || echo "No log file found"' +alias pg-log-errors='grep -i error "$PG_DATA_DIR/log/"*.log 2>/dev/null || echo "No error logs found"' + +# Build logs +alias pg-build-log='cat "$PG_BUILD_DIR/meson-logs/meson-log.txt"' +alias pg-build-errors='grep -i error "$PG_BUILD_DIR/meson-logs/meson-log.txt" 2>/dev/null || echo "No build errors found"' + +# Results viewing +alias pg-bench-results='ls -la "$PG_BENCH_DIR" && echo "Latest results:" && tail -20 "$PG_BENCH_DIR"/results_*.txt 2>/dev/null | tail -20' +alias pg-flame-results='ls -la "$PG_FLAME_DIR" && echo "Open flame graphs with: firefox $PG_FLAME_DIR/*.svg"' + +# Clean up old results +pg-clean-results() { + local days=${1:-7} + echo "Cleaning benchmark and flame graph results older than $days days..." + find "$PG_BENCH_DIR" -type f -mtime +$days -delete 2>/dev/null || true + find "$PG_FLAME_DIR" -type f -mtime +$days -delete 2>/dev/null || true + echo "Cleanup completed" +} + +# Information +alias pg-info=' + echo "=== PostgreSQL Development Environment ===" + echo "Source: $PG_SOURCE_DIR" + echo "Build: $PG_BUILD_DIR" + echo "Install: $PG_INSTALL_DIR" + echo "Data: $PG_DATA_DIR" + echo "Benchmarks: $PG_BENCH_DIR" + echo "Flame graphs: $PG_FLAME_DIR" + echo "Compiler: $CC" + echo "" + echo "Available commands:" + echo " Setup: pg-setup, pg-build, pg-install" + echo " Database: pg-init, pg-start, pg-stop, pg-psql" + echo " Debug: pg-debug, pg-attach, pg-valgrind" + echo " Performance: pg-flame, pg-bench, pg-perf" + echo " Benchmarks: pg-bench-quick, pg-bench-standard, pg-bench-heavy" + echo " Flame graphs: pg-flame-30, pg-flame-60, pg-flame-custom" + echo " Combined: pg-bench-flame" + echo " Results: pg-bench-results, pg-flame-results" + echo " Logs: pg-log, pg-build-log" + echo " Clean: pg-clean, pg-full-clean, pg-clean-results" + echo " Code quality: pg-format, pg-tidy" + echo "=========================================="' + +echo "PostgreSQL aliases loaded. Run 'pg-info' for available commands." diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000000000..130d5b21986b2 --- /dev/null +++ b/shell.nix @@ -0,0 +1,820 @@ +{ + pkgs, + pkgs-unstable, + system, +}: let + # Create a patched glibc only for the dev shell + patchedGlibc = pkgs.glibc.overrideAttrs (oldAttrs: { + patches = (oldAttrs.patches or []) ++ [ + ./glibc-no-fortify-warning.patch + ]; + }); + + llvmPkgs = pkgs-unstable.llvmPackages_21; + + # Configuration constants + config = { + pgSourceDir = "$PWD"; + pgBuildDir = "$PWD/build"; + pgInstallDir = "$PWD/install"; + pgDataDir = "/tmp/test-db-$(basename $PWD)"; + pgBenchDir = "/tmp/pgbench-results-$(basename $PWD)"; + pgFlameDir = "/tmp/flame-graphs-$(basename $PWD)"; + }; + + # Single dependency function that can be used for all environments + getPostgreSQLDeps = muslLibs: + with pkgs; + [ + # Build system (always use host tools) + pkgs-unstable.meson + pkgs-unstable.ninja + pkg-config + autoconf + libtool + git + which + binutils + gnumake + + # Parser/lexer tools + bison + flex + + # Documentation + docbook_xml_dtd_45 + docbook-xsl-nons + fop + gettext + libxslt + libxml2 + + # Development tools (always use host tools) + coreutils + shellcheck + ripgrep + valgrind + curl + uv + pylint + black + lcov + strace + ltrace + perf-tools + linuxPackages.perf + flamegraph + htop + iotop + sysstat + ccache + cppcheck + compdb + + # GCC/GDB +# pkgs-unstable.gcc15 + gcc + gdb + + # LLVM toolchain + llvmPkgs.llvm + llvmPkgs.llvm.dev + llvmPkgs.clang-tools + llvmPkgs.lldb + + # Language support + (perl.withPackages (ps: with ps; [IPCRun])) + (python3.withPackages (ps: with ps; [requests browser-cookie3])) + tcl + ] + ++ ( + if muslLibs + then [ + # Musl target libraries for cross-compilation + pkgs.pkgsMusl.readline + pkgs.pkgsMusl.zlib + pkgs.pkgsMusl.openssl + pkgs.pkgsMusl.icu + pkgs.pkgsMusl.lz4 + pkgs.pkgsMusl.zstd + pkgs.pkgsMusl.libuuid + pkgs.pkgsMusl.libkrb5 + pkgs.pkgsMusl.linux-pam + pkgs.pkgsMusl.libxcrypt + ] + else [ + # Glibc target libraries + readline + zlib + openssl + icu + lz4 + zstd + libuuid + libkrb5 + linux-pam + libxcrypt + numactl + openldap + liburing + libselinux + patchedGlibc + glibcInfo + glibc.dev + ] + ); + + # GDB configuration for PostgreSQL debugging + gdbConfig = pkgs.writeText "gdbinit-postgres" '' + # PostgreSQL-specific GDB configuration + + # Pretty-print PostgreSQL data structures + define print_node + if $arg0 + printf "Node type: %s\n", nodeTagNames[$arg0->type] + print *$arg0 + else + printf "NULL node\n" + end + end + document print_node + Print a PostgreSQL Node with type information + Usage: print_node + end + + define print_list + set $list = (List*)$arg0 + if $list + printf "List length: %d\n", $list->length + set $cell = $list->head + set $i = 0 + while $cell && $i < $list->length + printf " [%d]: ", $i + print_node $cell->data.ptr_value + set $cell = $cell->next + set $i = $i + 1 + end + else + printf "NULL list\n" + end + end + document print_list + Print a PostgreSQL List structure + Usage: print_list + end + + define print_query + set $query = (Query*)$arg0 + if $query + printf "Query type: %d, command type: %d\n", $query->querySource, $query->commandType + print *$query + else + printf "NULL query\n" + end + end + document print_query + Print a PostgreSQL Query structure + Usage: print_query + end + + define print_relcache + set $rel = (Relation)$arg0 + if $rel + printf "Relation: %s.%s (OID: %u)\n", $rel->rd_rel->relnamespace, $rel->rd_rel->relname.data, $rel->rd_id + printf " natts: %d, relkind: %c\n", $rel->rd_rel->relnatts, $rel->rd_rel->relkind + else + printf "NULL relation\n" + end + end + document print_relcache + Print relation cache entry information + Usage: print_relcache + end + + define print_tupdesc + set $desc = (TupleDesc)$arg0 + if $desc + printf "TupleDesc: %d attributes\n", $desc->natts + set $i = 0 + while $i < $desc->natts + set $attr = $desc->attrs[$i] + printf " [%d]: %s (type: %u, len: %d)\n", $i, $attr->attname.data, $attr->atttypid, $attr->attlen + set $i = $i + 1 + end + else + printf "NULL tuple descriptor\n" + end + end + document print_tupdesc + Print tuple descriptor information + Usage: print_tupdesc + end + + define print_slot + set $slot = (TupleTableSlot*)$arg0 + if $slot + printf "TupleTableSlot: %s\n", $slot->tts_ops->name + printf " empty: %d, shouldFree: %d\n", $slot->tts_empty, $slot->tts_shouldFree + if $slot->tts_tupleDescriptor + print_tupdesc $slot->tts_tupleDescriptor + end + else + printf "NULL slot\n" + end + end + document print_slot + Print tuple table slot information + Usage: print_slot + end + + # Memory context debugging + define print_mcxt + set $context = (MemoryContext)$arg0 + if $context + printf "MemoryContext: %s\n", $context->name + printf " type: %s, parent: %p\n", $context->methods->name, $context->parent + printf " total: %zu, free: %zu\n", $context->mem_allocated, $context->freep - $context->freeptr + else + printf "NULL memory context\n" + end + end + document print_mcxt + Print memory context information + Usage: print_mcxt + end + + # Process debugging + define print_proc + set $proc = (PGPROC*)$arg0 + if $proc + printf "PGPROC: pid=%d, database=%u\n", $proc->pid, $proc->databaseId + printf " waiting: %d, waitStatus: %d\n", $proc->waiting, $proc->waitStatus + else + printf "NULL process\n" + end + end + document print_proc + Print process information + Usage: print_proc + end + + # Set useful defaults + set print pretty on + set print object on + set print static-members off + set print vtbl on + set print demangle on + set demangle-style gnu-v3 + set print sevenbit-strings off + set history save on + set history size 1000 + set history filename ~/.gdb_history_postgres + + # Common breakpoints for PostgreSQL debugging + define pg_break_common + break elog + break errfinish + break ExceptionalCondition + break ProcessInterrupts + end + document pg_break_common + Set common PostgreSQL debugging breakpoints + end + + printf "PostgreSQL GDB configuration loaded.\n" + printf "Available commands: print_node, print_list, print_query, print_relcache,\n" + printf " print_tupdesc, print_slot, print_mcxt, print_proc, pg_break_common\n" + ''; + + # Flame graph generation script + flameGraphScript = pkgs.writeScriptBin "pg-flame-generate" '' + #!${pkgs.bash}/bin/bash + set -euo pipefail + + DURATION=''${1:-30} + OUTPUT_DIR=''${2:-${config.pgFlameDir}} + TIMESTAMP=$(date +%Y%m%d_%H%M%S) + + mkdir -p "$OUTPUT_DIR" + + echo "Generating flame graph for PostgreSQL (duration: ''${DURATION}s)" + + # Find PostgreSQL processes + PG_PIDS=$(pgrep -f "postgres.*-D.*${config.pgDataDir}" || true) + + if [ -z "$PG_PIDS" ]; then + echo "Error: No PostgreSQL processes found" + exit 1 + fi + + echo "Found PostgreSQL processes: $PG_PIDS" + + # Record perf data + PERF_DATA="$OUTPUT_DIR/perf_$TIMESTAMP.data" + echo "Recording perf data to $PERF_DATA" + + ${pkgs.linuxPackages.perf}/bin/perf record \ + -F 997 \ + -g \ + --call-graph dwarf \ + -p "$(echo $PG_PIDS | tr ' ' ',')" \ + -o "$PERF_DATA" \ + sleep "$DURATION" + + # Generate flame graph + FLAME_SVG="$OUTPUT_DIR/postgres_flame_$TIMESTAMP.svg" + echo "Generating flame graph: $FLAME_SVG" + + ${pkgs.linuxPackages.perf}/bin/perf script -i "$PERF_DATA" | \ + ${pkgs.flamegraph}/bin/stackcollapse-perf.pl | \ + ${pkgs.flamegraph}/bin/flamegraph.pl \ + --title "PostgreSQL Flame Graph ($TIMESTAMP)" \ + --width 1200 \ + --height 800 \ + > "$FLAME_SVG" + + echo "Flame graph generated: $FLAME_SVG" + echo "Perf data saved: $PERF_DATA" + + # Generate summary report + REPORT="$OUTPUT_DIR/report_$TIMESTAMP.txt" + echo "Generating performance report: $REPORT" + + { + echo "PostgreSQL Performance Analysis Report" + echo "Generated: $(date)" + echo "Duration: ''${DURATION}s" + echo "Processes: $PG_PIDS" + echo "" + echo "=== Top Functions ===" + ${pkgs.linuxPackages.perf}/bin/perf report -i "$PERF_DATA" --stdio --sort comm,dso,symbol | head -50 + echo "" + echo "=== Call Graph ===" + ${pkgs.linuxPackages.perf}/bin/perf report -i "$PERF_DATA" --stdio -g --sort comm,dso,symbol | head -100 + } > "$REPORT" + + echo "Report generated: $REPORT" + echo "" + echo "Files created:" + echo " Flame graph: $FLAME_SVG" + echo " Perf data: $PERF_DATA" + echo " Report: $REPORT" + ''; + + # pgbench wrapper script + pgbenchScript = pkgs.writeScriptBin "pg-bench-run" '' + #!${pkgs.bash}/bin/bash + set -euo pipefail + + # Default parameters + CLIENTS=''${1:-10} + THREADS=''${2:-2} + TRANSACTIONS=''${3:-1000} + SCALE=''${4:-10} + DURATION=''${5:-60} + TEST_TYPE=''${6:-tpcb-like} + + OUTPUT_DIR="${config.pgBenchDir}" + TIMESTAMP=$(date +%Y%m%d_%H%M%S) + + mkdir -p "$OUTPUT_DIR" + + echo "=== PostgreSQL Benchmark Configuration ===" + echo "Clients: $CLIENTS" + echo "Threads: $THREADS" + echo "Transactions: $TRANSACTIONS" + echo "Scale factor: $SCALE" + echo "Duration: ''${DURATION}s" + echo "Test type: $TEST_TYPE" + echo "Output directory: $OUTPUT_DIR" + echo "============================================" + + # Check if PostgreSQL is running + if ! pgrep -f "postgres.*-D.*${config.pgDataDir}" >/dev/null; then + echo "Error: PostgreSQL is not running. Start it with 'pg-start'" + exit 1 + fi + + PGBENCH="${config.pgInstallDir}/bin/pgbench" + PSQL="${config.pgInstallDir}/bin/psql" + CREATEDB="${config.pgInstallDir}/bin/createdb" + DROPDB="${config.pgInstallDir}/bin/dropdb" + + DB_NAME="pgbench_test_$TIMESTAMP" + RESULTS_FILE="$OUTPUT_DIR/results_$TIMESTAMP.txt" + LOG_FILE="$OUTPUT_DIR/pgbench_$TIMESTAMP.log" + + echo "Creating test database: $DB_NAME" + "$CREATEDB" -h "${config.pgDataDir}" "$DB_NAME" || { + echo "Failed to create database" + exit 1 + } + + # Initialize pgbench tables + echo "Initializing pgbench tables (scale factor: $SCALE)" + "$PGBENCH" -h "${config.pgDataDir}" -i -s "$SCALE" "$DB_NAME" || { + echo "Failed to initialize pgbench tables" + "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true + exit 1 + } + + # Run benchmark based on test type + echo "Running benchmark..." + + case "$TEST_TYPE" in + "tpcb-like"|"default") + BENCH_ARGS="" + ;; + "select-only") + BENCH_ARGS="-S" + ;; + "simple-update") + BENCH_ARGS="-N" + ;; + "read-write") + BENCH_ARGS="-b select-only@70 -b tpcb-like@30" + ;; + *) + echo "Unknown test type: $TEST_TYPE" + echo "Available types: tpcb-like, select-only, simple-update, read-write" + "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true + exit 1 + ;; + esac + + { + echo "PostgreSQL Benchmark Results" + echo "Generated: $(date)" + echo "Test type: $TEST_TYPE" + echo "Clients: $CLIENTS, Threads: $THREADS" + echo "Transactions: $TRANSACTIONS, Duration: ''${DURATION}s" + echo "Scale factor: $SCALE" + echo "Database: $DB_NAME" + echo "" + echo "=== System Information ===" + echo "CPU: $(nproc) cores" + echo "Memory: $(free -h | grep '^Mem:' | awk '{print $2}')" + echo "Compiler: $CC" + echo "PostgreSQL version: $("$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -t -c "SELECT version();" | head -1)" + echo "" + echo "=== Benchmark Results ===" + } > "$RESULTS_FILE" + + # Run the actual benchmark + "$PGBENCH" \ + -h "${config.pgDataDir}" \ + -c "$CLIENTS" \ + -j "$THREADS" \ + -T "$DURATION" \ + -P 5 \ + --log \ + --log-prefix="$OUTPUT_DIR/pgbench_$TIMESTAMP" \ + $BENCH_ARGS \ + "$DB_NAME" 2>&1 | tee -a "$RESULTS_FILE" + + # Collect additional statistics + { + echo "" + echo "=== Database Statistics ===" + "$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -c " + SELECT + schemaname, + relname, + n_tup_ins as inserts, + n_tup_upd as updates, + n_tup_del as deletes, + n_live_tup as live_tuples, + n_dead_tup as dead_tuples + FROM pg_stat_user_tables; + " + + echo "" + echo "=== Index Statistics ===" + "$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -c " + SELECT + schemaname, + relname, + indexrelname, + idx_scan, + idx_tup_read, + idx_tup_fetch + FROM pg_stat_user_indexes; + " + } >> "$RESULTS_FILE" + + # Clean up + echo "Cleaning up test database: $DB_NAME" + "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true + + echo "" + echo "Benchmark completed!" + echo "Results saved to: $RESULTS_FILE" + echo "Transaction logs: $OUTPUT_DIR/pgbench_$TIMESTAMP*" + + # Show summary + echo "" + echo "=== Quick Summary ===" + grep -E "(tps|latency)" "$RESULTS_FILE" | tail -5 + ''; + + # Development shell (GCC + glibc) + devShell = pkgs.mkShell { + name = "postgresql-dev"; + buildInputs = + (getPostgreSQLDeps false) + ++ [ + flameGraphScript + pgbenchScript + ]; + + shellHook = let + icon = "f121"; + in '' + # History configuration + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + # Clean environment + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + # Essential tools in PATH + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]" + + # Ccache configuration + export PATH=${pkgs.ccache}/bin:$PATH + export CCACHE_COMPILERCHECK=content + export CCACHE_DIR=$HOME/.ccache/pg/$(basename $PWD) + mkdir -p "$CCACHE_DIR" + + # LLVM configuration + export LLVM_CONFIG="${llvmPkgs.llvm}/bin/llvm-config" + export PATH="${llvmPkgs.llvm}/bin:$PATH" + export PKG_CONFIG_PATH="${llvmPkgs.llvm.dev}/lib/pkgconfig:$PKG_CONFIG_PATH" + export LLVM_DIR="${llvmPkgs.llvm.dev}/lib/cmake/llvm" + export LLVM_ROOT="${llvmPkgs.llvm}" + + # Development tools in PATH + export PATH=${pkgs.clang-tools}/bin:$PATH + export PATH=${pkgs.cppcheck}/bin:$PATH + + # PosgreSQL Development CFLAGS + # -DRELCACHE_FORCE_RELEASE -DCATCACHE_FORCE_RELEASE -fno-omit-frame-pointer -fno-stack-protector -DUSE_VALGRIND + export CFLAGS="" + export CXXFLAGS="" + + # Python UV + UV_PYTHON_DOWNLOADS=never + + # GCC configuration (default compiler) + export CC="${pkgs.gcc}/bin/gcc" + export CXX="${pkgs.gcc}/bin/g++" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + # GDB configuration + export GDBINIT="${gdbConfig}" + + # Performance tools in PATH + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + # Create output directories + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + # Compiler verification + echo "Environment configured:" + echo " Compiler: $CC" + echo " libc: glibc" + echo " LLVM: $(llvm-config --version 2>/dev/null || echo 'not available')" + + # Load PostgreSQL development aliases + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + else + echo "Warning: pg-aliases.sh not found in current directory" + fi + + echo "" + echo "PostgreSQL Development Environment Ready (GCC + glibc)" + echo "Run 'pg-info' for available commands" + ''; + }; + + # Clang + glibc variant + clangDevShell = pkgs.mkShell { + name = "postgresql-clang-glibc"; + buildInputs = + (getPostgreSQLDeps false) + ++ [ + llvmPkgs.clang + llvmPkgs.lld + llvmPkgs.compiler-rt + flameGraphScript + pgbenchScript + ]; + + shellHook = let + icon = "f121"; + in '' + # History configuration + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + # Clean environment + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + # Essential tools in PATH + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]" + + # Ccache configuration + export PATH=${pkgs.ccache}/bin:$PATH + export CCACHE_COMPILERCHECK=content + export CCACHE_DIR=$HOME/.ccache_pg_dev_clang + mkdir -p "$CCACHE_DIR" + + # LLVM configuration + export LLVM_CONFIG="${llvmPkgs.llvm}/bin/llvm-config" + export PATH="${llvmPkgs.llvm}/bin:$PATH" + export PKG_CONFIG_PATH="${llvmPkgs.llvm.dev}/lib/pkgconfig:$PKG_CONFIG_PATH" + export LLVM_DIR="${llvmPkgs.llvm.dev}/lib/cmake/llvm" + export LLVM_ROOT="${llvmPkgs.llvm}" + + # Development tools in PATH + export PATH=${pkgs.clang-tools}/bin:$PATH + export PATH=${pkgs.cppcheck}/bin:$PATH + + # Clang + glibc configuration - use system linker instead of LLD for compatibility + export CC="${llvmPkgs.clang}/bin/clang" + export CXX="${llvmPkgs.clang}/bin/clang++" + + # Use system linker and standard runtime + #export CFLAGS="" + #export CXXFLAGS="" + #export LDFLAGS="" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + # GDB configuration + export GDBINIT="${gdbConfig}" + + # Performance tools in PATH + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + # Create output directories + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + # Compiler verification + echo "Environment configured:" + echo " Compiler: $CC" + echo " libc: glibc" + echo " LLVM: $(llvm-config --version 2>/dev/null || echo 'not available')" + + # Load PostgreSQL development aliases + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + else + echo "Warning: pg-aliases.sh not found in current directory" + fi + + echo "" + echo "PostgreSQL Development Environment Ready (Clang + glibc)" + echo "Run 'pg-info' for available commands" + ''; + }; + + # GCC + musl variant (cross-compilation) + muslDevShell = pkgs.mkShell { + name = "postgresql-gcc-musl"; + buildInputs = + (getPostgreSQLDeps true) + ++ [ + pkgs.gcc + flameGraphScript + pgbenchScript + ]; + + shellHook = '' + # Same base configuration as main shell + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + + # Cross-compilation to musl + export CC="${pkgs.gcc}/bin/gcc" + export CXX="${pkgs.gcc}/bin/g++" + + # Point to musl libraries for linking + export PKG_CONFIG_PATH="${pkgs.pkgsMusl.openssl.dev}/lib/pkgconfig:${pkgs.pkgsMusl.zlib.dev}/lib/pkgconfig:${pkgs.pkgsMusl.icu.dev}/lib/pkgconfig" + export CFLAGS="-ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export CXXFLAGS="-ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export LDFLAGS="-L${pkgs.pkgsMusl.stdenv.cc.libc}/lib -static-libgcc" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + export GDBINIT="${gdbConfig}" + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + echo "GCC + musl environment configured" + echo " Compiler: $CC" + echo " LibC: musl (cross-compilation)" + + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + fi + + echo "PostgreSQL Development Environment Ready (GCC + musl)" + ''; + }; + + # Clang + musl variant (cross-compilation) + clangMuslDevShell = pkgs.mkShell { + name = "postgresql-clang-musl"; + buildInputs = + (getPostgreSQLDeps true) + ++ [ + llvmPkgs.clang + llvmPkgs.lld + flameGraphScript + pgbenchScript + ]; + + shellHook = let + icon = "f121"; + in '' + export HISTFILE=.history + export HISTSIZE=1000000 + export HISTFILESIZE=1000000 + + unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH + + export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH" + export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]" + + # Cross-compilation to musl with clang + export CC="${llvmPkgs.clang}/bin/clang" + export CXX="${llvmPkgs.clang}/bin/clang++" + + # Point to musl libraries for linking + export PKG_CONFIG_PATH="${pkgs.pkgsMusl.openssl.dev}/lib/pkgconfig:${pkgs.pkgsMusl.zlib.dev}/lib/pkgconfig:${pkgs.pkgsMusl.icu.dev}/lib/pkgconfig" + export CFLAGS="--target=x86_64-linux-musl -ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export CXXFLAGS="--target=x86_64-linux-musl -ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include" + export LDFLAGS="--target=x86_64-linux-musl -L${pkgs.pkgsMusl.stdenv.cc.libc}/lib -fuse-ld=lld" + + # PostgreSQL environment + export PG_SOURCE_DIR="${config.pgSourceDir}" + export PG_BUILD_DIR="${config.pgBuildDir}" + export PG_INSTALL_DIR="${config.pgInstallDir}" + export PG_DATA_DIR="${config.pgDataDir}" + export PG_BENCH_DIR="${config.pgBenchDir}" + export PG_FLAME_DIR="${config.pgFlameDir}" + export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d) + + export GDBINIT="${gdbConfig}" + export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH" + + mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR" + + echo "Clang + musl environment configured" + echo " Compiler: $CC" + echo " LibC: musl (cross-compilation)" + + if [ -f ./pg-aliases.sh ]; then + source ./pg-aliases.sh + fi + + echo "PostgreSQL Development Environment Ready (Clang + musl)" + ''; + }; +in { + inherit devShell clangDevShell muslDevShell clangMuslDevShell gdbConfig flameGraphScript pgbenchScript; +} diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index efc41fca2ba79..0a8fc7f181c25 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -1232,7 +1232,7 @@ spawn_process(const char *cmdline) char *cmdline2; cmdline2 = psprintf("exec %s", cmdline); - execl(shellprog, shellprog, "-c", cmdline2, (char *) NULL); + execlp(shellprog, shellprog, "-c", cmdline2, (char *) NULL); /* Not using the normal bail() here as we want _exit */ bail_noatexit("could not exec \"%s\": %m", shellprog); } diff --git a/src/tools/pgindent/pgindent b/src/tools/pgindent/pgindent index d14da3f01a995..18ef572a8be6c 100755 --- a/src/tools/pgindent/pgindent +++ b/src/tools/pgindent/pgindent @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # Copyright (c) 2021-2025, PostgreSQL Global Development Group From ce82432bf329bc95c25dc608ac9072854b2225d7 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 2 Nov 2025 11:36:20 -0500 Subject: [PATCH 2/5] Reorganize heap update logic This commit refactors the interaction between heap_tuple_update(), heap_update(), and simple_heap_update() to improve code organization and flexibility. The changes are functionally equivalent to the previous implementation and have no performance impact. The primary motivation is to prepare for upcoming modifications to how and where modified attributes are identified during the update path, particularly for catalog updates. As part of this reorganization, the handling of replica identity key attributes has been adjusted. Instead of fetching a second copy of the bitmap during an update operation, the caller is now required to provide it. This change applies to both heap_update() and heap_delete(). No user-visible changes. --- src/backend/access/heap/heapam.c | 568 +++++++++++------------ src/backend/access/heap/heapam_handler.c | 117 ++++- src/include/access/heapam.h | 24 +- 3 files changed, 410 insertions(+), 299 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4d382a04338e6..30847db1fe332 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -39,18 +39,24 @@ #include "access/syncscan.h" #include "access/valid.h" #include "access/visibilitymap.h" +#include "access/xact.h" #include "access/xloginsert.h" +#include "catalog/catalog.h" #include "catalog/pg_database.h" #include "catalog/pg_database_d.h" #include "commands/vacuum.h" +#include "nodes/bitmapset.h" #include "pgstat.h" #include "port/pg_bitutils.h" +#include "storage/bufmgr.h" +#include "storage/itemptr.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/procarray.h" #include "utils/datum.h" #include "utils/injection_point.h" #include "utils/inval.h" +#include "utils/relcache.h" #include "utils/spccache.h" #include "utils/syscache.h" @@ -62,16 +68,8 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared); #ifdef USE_ASSERT_CHECKING -static void check_lock_if_inplace_updateable_rel(Relation relation, - const ItemPointerData *otid, - HeapTuple newtup); static void check_inplace_rel_lock(HeapTuple oldtup); #endif -static Bitmapset *HeapDetermineColumnsInfo(Relation relation, - Bitmapset *interesting_cols, - Bitmapset *external_cols, - HeapTuple oldtup, HeapTuple newtup, - bool *has_external); static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock); @@ -103,10 +101,10 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status static void index_delete_sort(TM_IndexDeleteOp *delstate); static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate); static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); -static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, +static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, + Bitmapset *rid_attrs, bool key_required, bool *copy); - /* * Each tuple lock mode has a corresponding heavyweight lock, and one or two * corresponding MultiXactStatuses (one to merely lock tuples, another one to @@ -2814,6 +2812,7 @@ heap_delete(Relation relation, const ItemPointerData *tid, Buffer buffer; Buffer vmbuffer = InvalidBuffer; TransactionId new_xmax; + Bitmapset *rid_attrs; uint16 new_infomask, new_infomask2; bool have_tuple_lock = false; @@ -2826,6 +2825,8 @@ heap_delete(Relation relation, const ItemPointerData *tid, AssertHasSnapshotForToast(relation); + rid_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY); + /* * Forbid this during a parallel operation, lest it allocate a combo CID. * Other workers might need that combo CID for visibility checks, and we @@ -3029,6 +3030,7 @@ heap_delete(Relation relation, const ItemPointerData *tid, UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive); if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); + bms_free(rid_attrs); return result; } @@ -3050,7 +3052,10 @@ heap_delete(Relation relation, const ItemPointerData *tid, * Compute replica identity tuple before entering the critical section so * we don't PANIC upon a memory allocation failure. */ - old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied); + old_key_tuple = ExtractReplicaIdentity(relation, &tp, rid_attrs, + true, &old_key_copied); + bms_free(rid_attrs); + rid_attrs = NULL; /* * If this is the first possibly-multixact-able operation in the current @@ -3262,7 +3267,10 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid) * heap_update - replace a tuple * * See table_tuple_update() for an explanation of the parameters, except that - * this routine directly takes a tuple rather than a slot. + * this routine directly takes a heap tuple rather than a slot. + * + * It's required that the caller has acquired the pin and lock on the buffer. + * That lock and pin will be managed here, not in the caller. * * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last @@ -3270,30 +3278,21 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid) * generated by another transaction). */ TM_Result -heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, - CommandId cid, Snapshot crosscheck, bool wait, - TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes) +heap_update(Relation relation, HeapTupleData *oldtup, + HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, + Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, + Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs, + Bitmapset *mix_attrs, Buffer *vmbuffer, + bool rep_id_key_required, TU_UpdateIndexes *update_indexes) { TM_Result result; TransactionId xid = GetCurrentTransactionId(); - Bitmapset *hot_attrs; - Bitmapset *sum_attrs; - Bitmapset *key_attrs; - Bitmapset *id_attrs; - Bitmapset *interesting_attrs; - Bitmapset *modified_attrs; - ItemId lp; - HeapTupleData oldtup; HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; - Page page; - BlockNumber block; MultiXactStatus mxact_status; - Buffer buffer, - newbuf, - vmbuffer = InvalidBuffer, + Buffer newbuf, vmbuffer_new = InvalidBuffer; bool need_toast; Size newtupsize, @@ -3307,7 +3306,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, bool all_visible_cleared_new = false; bool checked_lockers; bool locker_remains; - bool id_has_external = false; TransactionId xmax_new_tuple, xmax_old_tuple; uint16 infomask_old_tuple, @@ -3315,144 +3313,13 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, infomask_new_tuple, infomask2_new_tuple; - Assert(ItemPointerIsValid(otid)); - - /* Cheap, simplistic check that the tuple matches the rel's rowtype. */ - Assert(HeapTupleHeaderGetNatts(newtup->t_data) <= - RelationGetNumberOfAttributes(relation)); - + Assert(BufferIsLockedByMe(buffer)); + Assert(ItemIdIsNormal(lp)); AssertHasSnapshotForToast(relation); - /* - * Forbid this during a parallel operation, lest it allocate a combo CID. - * Other workers might need that combo CID for visibility checks, and we - * have no provision for broadcasting it to them. - */ - if (IsInParallelMode()) - ereport(ERROR, - (errcode(ERRCODE_INVALID_TRANSACTION_STATE), - errmsg("cannot update tuples during a parallel operation"))); - -#ifdef USE_ASSERT_CHECKING - check_lock_if_inplace_updateable_rel(relation, otid, newtup); -#endif - - /* - * Fetch the list of attributes to be checked for various operations. - * - * For HOT considerations, this is wasted effort if we fail to update or - * have to put the new tuple on a different page. But we must compute the - * list before obtaining buffer lock --- in the worst case, if we are - * doing an update on one of the relevant system catalogs, we could - * deadlock if we try to fetch the list later. In any case, the relcache - * caches the data so this is usually pretty cheap. - * - * We also need columns used by the replica identity and columns that are - * considered the "key" of rows in the table. - * - * Note that we get copies of each bitmap, so we need not worry about - * relcache flush happening midway through. - */ - hot_attrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_HOT_BLOCKING); - sum_attrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_SUMMARIZED); - key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY); - id_attrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_IDENTITY_KEY); - interesting_attrs = NULL; - interesting_attrs = bms_add_members(interesting_attrs, hot_attrs); - interesting_attrs = bms_add_members(interesting_attrs, sum_attrs); - interesting_attrs = bms_add_members(interesting_attrs, key_attrs); - interesting_attrs = bms_add_members(interesting_attrs, id_attrs); - - block = ItemPointerGetBlockNumber(otid); - INJECTION_POINT("heap_update-before-pin", NULL); - buffer = ReadBuffer(relation, block); - page = BufferGetPage(buffer); - - /* - * Before locking the buffer, pin the visibility map page if it appears to - * be necessary. Since we haven't got the lock yet, someone else might be - * in the middle of changing this, so we'll need to recheck after we have - * the lock. - */ - if (PageIsAllVisible(page)) - visibilitymap_pin(relation, block, &vmbuffer); - - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - - lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid)); - - /* - * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring - * we see LP_NORMAL here. When the otid origin is a syscache, we may have - * neither a pin nor a snapshot. Hence, we may see other LP_ states, each - * of which indicates concurrent pruning. - * - * Failing with TM_Updated would be most accurate. However, unlike other - * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and - * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted - * does matter to SQL statements UPDATE and MERGE, those SQL statements - * hold a snapshot that ensures LP_NORMAL. Hence, the choice between - * TM_Updated and TM_Deleted affects only the wording of error messages. - * Settle on TM_Deleted, for two reasons. First, it avoids complicating - * the specification of when tmfd->ctid is valid. Second, it creates - * error log evidence that we took this branch. - * - * Since it's possible to see LP_UNUSED at otid, it's also possible to see - * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an - * unrelated row, we'll fail with "duplicate key value violates unique". - * XXX if otid is the live, newer version of the newtup row, we'll discard - * changes originating in versions of this catalog row after the version - * the caller got from syscache. See syscache-update-pruned.spec. - */ - if (!ItemIdIsNormal(lp)) - { - Assert(RelationSupportsSysCache(RelationGetRelid(relation))); - - UnlockReleaseBuffer(buffer); - Assert(!have_tuple_lock); - if (vmbuffer != InvalidBuffer) - ReleaseBuffer(vmbuffer); - tmfd->ctid = *otid; - tmfd->xmax = InvalidTransactionId; - tmfd->cmax = InvalidCommandId; - *update_indexes = TU_None; - - bms_free(hot_attrs); - bms_free(sum_attrs); - bms_free(key_attrs); - bms_free(id_attrs); - /* modified_attrs not yet initialized */ - bms_free(interesting_attrs); - return TM_Deleted; - } - - /* - * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work - * properly. - */ - oldtup.t_tableOid = RelationGetRelid(relation); - oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); - oldtup.t_len = ItemIdGetLength(lp); - oldtup.t_self = *otid; - - /* the new tuple is ready, except for this: */ + /* The new tuple is ready, except for this */ newtup->t_tableOid = RelationGetRelid(relation); - /* - * Determine columns modified by the update. Additionally, identify - * whether any of the unmodified replica identity key attributes in the - * old tuple is externally stored or not. This is required because for - * such attributes the flattened value won't be WAL logged as part of the - * new tuple so we must include it as part of the old_key_tuple. See - * ExtractReplicaIdentity. - */ - modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs, - id_attrs, &oldtup, - newtup, &id_has_external); - /* * If we're not updating any "key" column, we can grab a weaker lock type. * This allows for more concurrency when we are running simultaneously @@ -3464,7 +3331,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * is updates that don't manipulate key columns, not those that * serendipitously arrive at the same key values. */ - if (!bms_overlap(modified_attrs, key_attrs)) + if (!bms_overlap(mix_attrs, pk_attrs)) { *lockmode = LockTupleNoKeyExclusive; mxact_status = MultiXactStatusNoKeyUpdate; @@ -3488,17 +3355,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, key_intact = false; } - /* - * Note: beyond this point, use oldtup not otid to refer to old tuple. - * otid may very well point at newtup->t_self, which we will overwrite - * with the new tuple's location, so there's great risk of confusion if we - * use otid anymore. - */ - l2: checked_lockers = false; locker_remains = false; - result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer); + result = HeapTupleSatisfiesUpdate(oldtup, cid, buffer); /* see below about the "no wait" case */ Assert(result != TM_BeingModified || wait); @@ -3530,8 +3390,8 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, */ /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); - infomask = oldtup.t_data->t_infomask; + xwait = HeapTupleHeaderGetRawXmax(oldtup->t_data); + infomask = oldtup->t_data->t_infomask; /* * Now we have to do something about the existing locker. If it's a @@ -3571,13 +3431,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * requesting a lock and already have one; avoids deadlock). */ if (!current_is_member) - heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode, + heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode, LockWaitBlock, &have_tuple_lock); /* wait for multixact */ MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask, - relation, &oldtup.t_self, XLTW_Update, - &remain); + relation, &oldtup->t_self, XLTW_Update, &remain); checked_lockers = true; locker_remains = remain != 0; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -3587,9 +3446,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * could update this tuple before we get to this point. Check * for xmax change, and start over if so. */ - if (xmax_infomask_changed(oldtup.t_data->t_infomask, + if (xmax_infomask_changed(oldtup->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data), + !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup->t_data), xwait)) goto l2; } @@ -3614,8 +3473,8 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * before this one, which are important to keep in case this * subxact aborts. */ - if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) - update_xact = HeapTupleGetUpdateXid(oldtup.t_data); + if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup->t_data->t_infomask)) + update_xact = HeapTupleGetUpdateXid(oldtup->t_data); else update_xact = InvalidTransactionId; @@ -3656,9 +3515,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * lock. */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode, + heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode, LockWaitBlock, &have_tuple_lock); - XactLockTableWait(xwait, relation, &oldtup.t_self, + XactLockTableWait(xwait, relation, &oldtup->t_self, XLTW_Update); checked_lockers = true; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -3668,20 +3527,20 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * other xact could update this tuple before we get to this point. * Check for xmax change, and start over if so. */ - if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || + if (xmax_infomask_changed(oldtup->t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, - HeapTupleHeaderGetRawXmax(oldtup.t_data))) + HeapTupleHeaderGetRawXmax(oldtup->t_data))) goto l2; /* Otherwise check if it committed or aborted */ - UpdateXmaxHintBits(oldtup.t_data, buffer, xwait); - if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) + UpdateXmaxHintBits(oldtup->t_data, buffer, xwait); + if (oldtup->t_data->t_infomask & HEAP_XMAX_INVALID) can_continue = true; } if (can_continue) result = TM_Ok; - else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid)) + else if (!ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid)) result = TM_Updated; else result = TM_Deleted; @@ -3694,39 +3553,33 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, result == TM_Updated || result == TM_Deleted || result == TM_BeingModified); - Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); + Assert(!(oldtup->t_data->t_infomask & HEAP_XMAX_INVALID)); Assert(result != TM_Updated || - !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid)); + !ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid)); } if (crosscheck != InvalidSnapshot && result == TM_Ok) { /* Perform additional check for transaction-snapshot mode RI updates */ - if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer)) + if (!HeapTupleSatisfiesVisibility(oldtup, crosscheck, buffer)) result = TM_Updated; } if (result != TM_Ok) { - tmfd->ctid = oldtup.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); + tmfd->ctid = oldtup->t_data->t_ctid; + tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup->t_data); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); + tmfd->cmax = HeapTupleHeaderGetCmax(oldtup->t_data); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); if (have_tuple_lock) - UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode); - if (vmbuffer != InvalidBuffer) - ReleaseBuffer(vmbuffer); + UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode); + if (*vmbuffer != InvalidBuffer) + ReleaseBuffer(*vmbuffer); *update_indexes = TU_None; - bms_free(hot_attrs); - bms_free(sum_attrs); - bms_free(key_attrs); - bms_free(id_attrs); - bms_free(modified_attrs); - bms_free(interesting_attrs); return result; } @@ -3739,10 +3592,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * tuple has been locked or updated under us, but hopefully it won't * happen very often. */ - if (vmbuffer == InvalidBuffer && PageIsAllVisible(page)) + if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page)) { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - visibilitymap_pin(relation, block, &vmbuffer); + visibilitymap_pin(relation, block, vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto l2; } @@ -3753,9 +3606,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), - oldtup.t_data->t_infomask, - oldtup.t_data->t_infomask2, + compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data), + oldtup->t_data->t_infomask, + oldtup->t_data->t_infomask2, xid, *lockmode, true, &xmax_old_tuple, &infomask_old_tuple, &infomask2_old_tuple); @@ -3767,12 +3620,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * tuple. (In rare cases that might also be InvalidTransactionId and yet * not have the HEAP_XMAX_INVALID bit set; that's fine.) */ - if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) || - HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) || + if ((oldtup->t_data->t_infomask & HEAP_XMAX_INVALID) || + HEAP_LOCKED_UPGRADED(oldtup->t_data->t_infomask) || (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else - xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup->t_data); if (!TransactionIdIsValid(xmax_new_tuple)) { @@ -3787,7 +3640,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * Note that since we're doing an update, the only possibility is that * the lockers had FOR KEY SHARE lock. */ - if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) + if (oldtup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) { GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple, &infomask2_new_tuple); @@ -3815,7 +3668,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * Replace cid with a combo CID if necessary. Note that we already put * the plain cid into the new tuple. */ - HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo); + HeapTupleHeaderAdjustCmax(oldtup->t_data, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit @@ -3832,12 +3685,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, relation->rd_rel->relkind != RELKIND_MATVIEW) { /* toast table entries should never be recursively toasted */ - Assert(!HeapTupleHasExternal(&oldtup)); + Assert(!HeapTupleHasExternal(oldtup)); Assert(!HeapTupleHasExternal(newtup)); need_toast = false; } else - need_toast = (HeapTupleHasExternal(&oldtup) || + need_toast = (HeapTupleHasExternal(oldtup) || HeapTupleHasExternal(newtup) || newtup->t_len > TOAST_TUPLE_THRESHOLD); @@ -3870,9 +3723,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * updating, because the potentially created multixact would otherwise * be wrong. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), - oldtup.t_data->t_infomask, - oldtup.t_data->t_infomask2, + compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data), + oldtup->t_data->t_infomask, + oldtup->t_data->t_infomask2, xid, *lockmode, false, &xmax_lock_old_tuple, &infomask_lock_old_tuple, &infomask2_lock_old_tuple); @@ -3882,18 +3735,18 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, START_CRIT_SECTION(); /* Clear obsolete visibility flags ... */ - oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); - oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; - HeapTupleClearHotUpdated(&oldtup); + oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); + oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; + HeapTupleClearHotUpdated(oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple); - oldtup.t_data->t_infomask |= infomask_lock_old_tuple; - oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; - HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); + HeapTupleHeaderSetXmax(oldtup->t_data, xmax_lock_old_tuple); + oldtup->t_data->t_infomask |= infomask_lock_old_tuple; + oldtup->t_data->t_infomask2 |= infomask2_lock_old_tuple; + HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ - oldtup.t_data->t_ctid = oldtup.t_self; + oldtup->t_data->t_ctid = oldtup->t_self; /* * Clear all-frozen bit on visibility map if needed. We could @@ -3902,7 +3755,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * worthwhile. */ if (PageIsAllVisible(page) && - visibilitymap_clear(relation, block, vmbuffer, + visibilitymap_clear(relation, block, *vmbuffer, VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; @@ -3916,10 +3769,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self); + xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); xlrec.xmax = xmax_lock_old_tuple; - xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask, - oldtup.t_data->t_infomask2); + xlrec.infobits_set = compute_infobits(oldtup->t_data->t_infomask, + oldtup->t_data->t_infomask2); xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; XLogRegisterData(&xlrec, SizeOfHeapLock); @@ -3941,7 +3794,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, if (need_toast) { /* Note we always use WAL and FSM during updates */ - heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0); + heaptup = heap_toast_insert_or_update(relation, newtup, oldtup, 0); newtupsize = MAXALIGN(heaptup->t_len); } else @@ -3977,20 +3830,20 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, /* It doesn't fit, must use RelationGetBufferForTuple. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, buffer, 0, NULL, - &vmbuffer_new, &vmbuffer, + &vmbuffer_new, vmbuffer, 0); /* We're all done. */ break; } /* Acquire VM page pin if needed and we don't have it. */ - if (vmbuffer == InvalidBuffer && PageIsAllVisible(page)) - visibilitymap_pin(relation, block, &vmbuffer); + if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page)) + visibilitymap_pin(relation, block, vmbuffer); /* Re-acquire the lock on the old tuple's page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* Re-check using the up-to-date free space */ pagefree = PageGetHeapFreeSpace(page); if (newtupsize > pagefree || - (vmbuffer == InvalidBuffer && PageIsAllVisible(page))) + (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))) { /* * Rats, it doesn't fit anymore, or somebody just now set the @@ -4028,7 +3881,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * will include checking the relation level, there is no benefit to a * separate check for the new tuple. */ - CheckForSerializableConflictIn(relation, &oldtup.t_self, + CheckForSerializableConflictIn(relation, &oldtup->t_self, BufferGetBlockNumber(buffer)); /* @@ -4036,7 +3889,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * has enough space for the new tuple. If they are the same buffer, only * one pin is held. */ - if (newbuf == buffer) { /* @@ -4044,7 +3896,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * to do a HOT update. Check if any of the index columns have been * changed. */ - if (!bms_overlap(modified_attrs, hot_attrs)) + if (!bms_overlap(mix_attrs, hot_attrs)) { use_hot_update = true; @@ -4055,7 +3907,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * indexes if the columns were updated, or we may fail to detect * e.g. value bound changes in BRIN minmax indexes. */ - if (bms_overlap(modified_attrs, sum_attrs)) + if (bms_overlap(mix_attrs, sum_attrs)) summarized_update = true; } } @@ -4072,10 +3924,8 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * logged. Pass old key required as true only if the replica identity key * columns are modified or it has external data. */ - old_key_tuple = ExtractReplicaIdentity(relation, &oldtup, - bms_overlap(modified_attrs, id_attrs) || - id_has_external, - &old_key_copied); + old_key_tuple = ExtractReplicaIdentity(relation, oldtup, rid_attrs, + rep_id_key_required, &old_key_copied); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -4097,7 +3947,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, if (use_hot_update) { /* Mark the old tuple as HOT-updated */ - HeapTupleSetHotUpdated(&oldtup); + HeapTupleSetHotUpdated(oldtup); /* And mark the new tuple as heap-only */ HeapTupleSetHeapOnly(heaptup); /* Mark the caller's copy too, in case different from heaptup */ @@ -4106,7 +3956,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, else { /* Make sure tuples are correctly marked as not-HOT */ - HeapTupleClearHotUpdated(&oldtup); + HeapTupleClearHotUpdated(oldtup); HeapTupleClearHeapOnly(heaptup); HeapTupleClearHeapOnly(newtup); } @@ -4115,17 +3965,17 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, /* Clear obsolete visibility flags, possibly set by ourselves above... */ - oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); - oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; + oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); + oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple); - oldtup.t_data->t_infomask |= infomask_old_tuple; - oldtup.t_data->t_infomask2 |= infomask2_old_tuple; - HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); + HeapTupleHeaderSetXmax(oldtup->t_data, xmax_old_tuple); + oldtup->t_data->t_infomask |= infomask_old_tuple; + oldtup->t_data->t_infomask2 |= infomask2_old_tuple; + HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo); /* record address of new tuple in t_ctid of old one */ - oldtup.t_data->t_ctid = heaptup->t_self; + oldtup->t_data->t_ctid = heaptup->t_self; /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */ if (PageIsAllVisible(BufferGetPage(buffer))) @@ -4133,7 +3983,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer, VISIBILITYMAP_VALID_BITS); + *vmbuffer, VISIBILITYMAP_VALID_BITS); } if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf))) { @@ -4158,12 +4008,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, */ if (RelationIsAccessibleInLogicalDecoding(relation)) { - log_heap_new_cid(relation, &oldtup); + log_heap_new_cid(relation, oldtup); log_heap_new_cid(relation, heaptup); } recptr = log_heap_update(relation, buffer, - newbuf, &oldtup, heaptup, + newbuf, oldtup, heaptup, old_key_tuple, all_visible_cleared, all_visible_cleared_new); @@ -4188,7 +4038,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * both tuple versions in one call to inval.c so we can avoid redundant * sinval messages.) */ - CacheInvalidateHeapTuple(relation, &oldtup, heaptup); + CacheInvalidateHeapTuple(relation, oldtup, heaptup); /* Now we can release the buffer(s) */ if (newbuf != buffer) @@ -4196,14 +4046,14 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, ReleaseBuffer(buffer); if (BufferIsValid(vmbuffer_new)) ReleaseBuffer(vmbuffer_new); - if (BufferIsValid(vmbuffer)) - ReleaseBuffer(vmbuffer); + if (BufferIsValid(*vmbuffer)) + ReleaseBuffer(*vmbuffer); /* * Release the lmgr tuple lock, if we had it. */ if (have_tuple_lock) - UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode); + UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode); pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer); @@ -4236,13 +4086,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, if (old_key_tuple != NULL && old_key_copied) heap_freetuple(old_key_tuple); - bms_free(hot_attrs); - bms_free(sum_attrs); - bms_free(key_attrs); - bms_free(id_attrs); - bms_free(modified_attrs); - bms_free(interesting_attrs); - return TM_Ok; } @@ -4251,7 +4094,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, * Confirm adequate lock held during heap_update(), per rules from * README.tuplock section "Locking to write inplace-updated tables". */ -static void +void check_lock_if_inplace_updateable_rel(Relation relation, const ItemPointerData *otid, HeapTuple newtup) @@ -4423,7 +4266,7 @@ heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2, * listed as interesting) of the old tuple is a member of external_cols and is * stored externally. */ -static Bitmapset * +Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, @@ -4506,25 +4349,175 @@ HeapDetermineColumnsInfo(Relation relation, } /* - * simple_heap_update - replace a tuple - * - * This routine may be used to update a tuple when concurrent updates of - * the target tuple are not expected (for example, because we have a lock - * on the relation associated with the tuple). Any failure is reported - * via ereport(). + * This routine may be used to update a tuple when concurrent updates of the + * target tuple are not expected (for example, because we have a lock on the + * relation associated with the tuple). Any failure is reported via ereport(). */ void -simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup, +simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple, TU_UpdateIndexes *update_indexes) { TM_Result result; TM_FailureData tmfd; LockTupleMode lockmode; + Buffer buffer; + Buffer vmbuffer = InvalidBuffer; + Page page; + BlockNumber block; + Bitmapset *hot_attrs, + *sum_attrs, + *pk_attrs, + *rid_attrs, + *mix_attrs, + *idx_attrs; + ItemId lp; + HeapTupleData oldtup; + bool rep_id_key_required = false; + + Assert(ItemPointerIsValid(otid)); + + /* Cheap, simplistic check that the tuple matches the rel's rowtype. */ + Assert(HeapTupleHeaderGetNatts(tuple->t_data) <= + RelationGetNumberOfAttributes(relation)); + + /* + * Forbid this during a parallel operation, lest it allocate a combo CID. + * Other workers might need that combo CID for visibility checks, and we + * have no provision for broadcasting it to them. + */ + if (IsInParallelMode()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), + errmsg("cannot update tuples during a parallel operation"))); + +#ifdef USE_ASSERT_CHECKING + check_lock_if_inplace_updateable_rel(relation, otid, tuple); +#endif + + /* + * Fetch the list of attributes to be checked for various operations. + * + * For HOT considerations, this is wasted effort if we fail to update or + * have to put the new tuple on a different page. But we must compute the + * list before obtaining buffer lock --- in the worst case, if we are + * doing an update on one of the relevant system catalogs, we could + * deadlock if we try to fetch the list later. In any case, the relcache + * caches the data so this is usually pretty cheap. + * + * We also need columns used by the replica identity and columns that are + * considered the "key" of rows in the table. + * + * Note that we get copies of each bitmap, so we need not worry about + * relcache flush happening midway through. + */ + hot_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_HOT_BLOCKING); + sum_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_SUMMARIZED); + pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY); + rid_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + + idx_attrs = bms_copy(hot_attrs); + idx_attrs = bms_add_members(idx_attrs, sum_attrs); + idx_attrs = bms_add_members(idx_attrs, pk_attrs); + idx_attrs = bms_add_members(idx_attrs, rid_attrs); + + block = ItemPointerGetBlockNumber(otid); + INJECTION_POINT("heap_update-before-pin", NULL); + buffer = ReadBuffer(relation, block); + page = BufferGetPage(buffer); + + /* + * Before locking the buffer, pin the visibility map page if it appears to + * be necessary. Since we haven't got the lock yet, someone else might be + * in the middle of changing this, so we'll need to recheck after we have + * the lock. + */ + if (PageIsAllVisible(page)) + visibilitymap_pin(relation, block, &vmbuffer); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid)); + + /* + * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring + * we see LP_NORMAL here. When the otid origin is a syscache, we may have + * neither a pin nor a snapshot. Hence, we may see other LP_ states, each + * of which indicates concurrent pruning. + * + * Failing with TM_Updated would be most accurate. However, unlike other + * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and + * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted + * does matter to SQL statements UPDATE and MERGE, those SQL statements + * hold a snapshot that ensures LP_NORMAL. Hence, the choice between + * TM_Updated and TM_Deleted affects only the wording of error messages. + * Settle on TM_Deleted, for two reasons. First, it avoids complicating + * the specification of when tmfd->ctid is valid. Second, it creates + * error log evidence that we took this branch. + * + * Since it's possible to see LP_UNUSED at otid, it's also possible to see + * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an + * unrelated row, we'll fail with "duplicate key value violates unique". + * XXX if otid is the live, newer version of the newtup row, we'll discard + * changes originating in versions of this catalog row after the version + * the caller got from syscache. See syscache-update-pruned.spec. + */ + if (!ItemIdIsNormal(lp)) + { + Assert(RelationSupportsSysCache(RelationGetRelid(relation))); + + UnlockReleaseBuffer(buffer); + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + *update_indexes = TU_None; + + bms_free(hot_attrs); + bms_free(sum_attrs); + bms_free(pk_attrs); + bms_free(rid_attrs); + bms_free(idx_attrs); + /* mix_attrs not yet initialized */ + + elog(ERROR, "tuple concurrently deleted"); + + return; + } + + /* + * Partially construct the oldtup for HeapDetermineColumnsInfo to work and + * then pass that on to heap_update. + */ + oldtup.t_tableOid = RelationGetRelid(relation); + oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); + oldtup.t_len = ItemIdGetLength(lp); + oldtup.t_self = *otid; + + mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs, + &oldtup, tuple, &rep_id_key_required); + + /* + * We'll need to WAL log the replica identity attributes if either they + * overlap with the modified indexed attributes or, as we've checked for + * just now in HeapDetermineColumnsInfo, they were unmodified external + * indexed attributes. + */ + rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs); + + result = heap_update(relation, &oldtup, tuple, GetCurrentCommandId(true), + InvalidSnapshot, true /* wait for commit */ , &tmfd, &lockmode, + buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs, + rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required, + update_indexes); + + bms_free(hot_attrs); + bms_free(sum_attrs); + bms_free(pk_attrs); + bms_free(rid_attrs); + bms_free(mix_attrs); + bms_free(idx_attrs); - result = heap_update(relation, otid, tup, - GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &tmfd, &lockmode, update_indexes); switch (result) { case TM_SelfModified: @@ -9164,12 +9157,11 @@ log_heap_new_cid(Relation relation, HeapTuple tup) * the same tuple that was passed in. */ static HeapTuple -ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, - bool *copy) +ExtractReplicaIdentity(Relation relation, HeapTuple tp, Bitmapset *rid_attrs, + bool key_required, bool *copy) { TupleDesc desc = RelationGetDescr(relation); char replident = relation->rd_rel->relreplident; - Bitmapset *idattrs; HeapTuple key_tuple; bool nulls[MaxHeapAttributeNumber]; Datum values[MaxHeapAttributeNumber]; @@ -9200,17 +9192,13 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, if (!key_required) return NULL; - /* find out the replica identity columns */ - idattrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_IDENTITY_KEY); - /* * If there's no defined replica identity columns, treat as !key_required. * (This case should not be reachable from heap_update, since that should * calculate key_required accurately. But heap_delete just passes * constant true for key_required, so we can hit this case in deletes.) */ - if (bms_is_empty(idattrs)) + if (bms_is_empty(rid_attrs)) return NULL; /* @@ -9223,7 +9211,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, for (int i = 0; i < desc->natts; i++) { if (bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber, - idattrs)) + rid_attrs)) Assert(!nulls[i]); else nulls[i] = true; @@ -9232,8 +9220,6 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, key_tuple = heap_form_tuple(desc, values, nulls); *copy = true; - bms_free(idattrs); - /* * If the tuple, which by here only contains indexed columns, still has * toasted columns, force them to be inlined. This is somewhat unlikely diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index bcbac844bb669..1cf9a18775d6d 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -44,6 +44,7 @@ #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" +#include "utils/injection_point.h" #include "utils/rel.h" static void reform_and_rewrite_tuple(HeapTuple tuple, @@ -312,23 +313,133 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart); } - static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes) { + bool rep_id_key_required = false; bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + HeapTupleData oldtup; + Buffer buffer; + Buffer vmbuffer = InvalidBuffer; + Page page; + BlockNumber block; + ItemId lp; + Bitmapset *hot_attrs, + *sum_attrs, + *pk_attrs, + *rid_attrs, + *mix_attrs, + *idx_attrs; TM_Result result; + Assert(ItemPointerIsValid(otid)); + + /* Cheap, simplistic check that the tuple matches the rel's rowtype. */ + Assert(HeapTupleHeaderGetNatts(tuple->t_data) <= + RelationGetNumberOfAttributes(relation)); + + /* + * Forbid this during a parallel operation, lest it allocate a combo CID. + * Other workers might need that combo CID for visibility checks, and we + * have no provision for broadcasting it to them. + */ + if (IsInParallelMode()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), + errmsg("cannot update tuples during a parallel operation"))); + +#ifdef USE_ASSERT_CHECKING + check_lock_if_inplace_updateable_rel(relation, otid, tuple); +#endif + + /* + * Fetch the list of attributes to be checked for various operations. + * + * For HOT considerations, this is wasted effort if we fail to update or + * have to put the new tuple on a different page. But we must compute the + * list before obtaining buffer lock --- in the worst case, if we are + * doing an update on one of the relevant system catalogs, we could + * deadlock if we try to fetch the list later. In any case, the relcache + * caches the data so this is usually pretty cheap. + * + * We also need columns used by the replica identity and columns that are + * considered the "key" of rows in the table. + * + * Note that we get copies of each bitmap, so we need not worry about + * relcache flush happening midway through. + */ + hot_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_HOT_BLOCKING); + sum_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_SUMMARIZED); + pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY); + rid_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + + idx_attrs = bms_copy(hot_attrs); + idx_attrs = bms_add_members(idx_attrs, sum_attrs); + idx_attrs = bms_add_members(idx_attrs, pk_attrs); + idx_attrs = bms_add_members(idx_attrs, rid_attrs); + + block = ItemPointerGetBlockNumber(otid); + INJECTION_POINT("heap_update-before-pin", NULL); + buffer = ReadBuffer(relation, block); + page = BufferGetPage(buffer); + + /* + * Before locking the buffer, pin the visibility map page if it appears to + * be necessary. Since we haven't got the lock yet, someone else might be + * in the middle of changing this, so we'll need to recheck after we have + * the lock. + */ + if (PageIsAllVisible(page)) + visibilitymap_pin(relation, block, &vmbuffer); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid)); + + Assert(ItemIdIsNormal(lp)); + + /* + * Partially construct the oldtup for HeapDetermineColumnsInfo to work and + * then pass that on to heap_update. + */ + oldtup.t_tableOid = RelationGetRelid(relation); + oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); + oldtup.t_len = ItemIdGetLength(lp); + oldtup.t_self = *otid; + + mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs, + &oldtup, tuple, &rep_id_key_required); + + /* + * We'll need to WAL log the replica identity attributes if either they + * overlap with the modified indexed attributes or, as we've checked for + * just now in HeapDetermineColumnsInfo, they were unmodified external + * indexed attributes. + */ + rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs); + /* Update the tuple with table oid */ slot->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slot->tts_tableOid; - result = heap_update(relation, otid, tuple, cid, crosscheck, wait, - tmfd, lockmode, update_indexes); + result = heap_update(relation, &oldtup, tuple, cid, crosscheck, wait, tmfd, lockmode, + buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs, + rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required, update_indexes); + + bms_free(hot_attrs); + bms_free(sum_attrs); + bms_free(pk_attrs); + bms_free(rid_attrs); + bms_free(mix_attrs); + bms_free(idx_attrs); + ItemPointerCopy(&tuple->t_self, &slot->tts_tid); /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 632c4332a8c34..2f9a2b069cd00 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -364,11 +364,13 @@ extern TM_Result heap_delete(Relation relation, const ItemPointerData *tid, TM_FailureData *tmfd, bool changingPart); extern void heap_finish_speculative(Relation relation, const ItemPointerData *tid); extern void heap_abort_speculative(Relation relation, const ItemPointerData *tid); -extern TM_Result heap_update(Relation relation, const ItemPointerData *otid, - HeapTuple newtup, - CommandId cid, Snapshot crosscheck, bool wait, - TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes); +extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup, + HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, + Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, + Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs, + Bitmapset *mix_attrs, Buffer *vmbuffer, + bool rep_id_key_required, TU_UpdateIndexes *update_indexes); extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, @@ -430,6 +432,18 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused); +/* in heap/heapam.c */ +extern Bitmapset *HeapDetermineColumnsInfo(Relation relation, + Bitmapset *interesting_cols, + Bitmapset *external_cols, + HeapTuple oldtup, HeapTuple newtup, + bool *has_external); +#ifdef USE_ASSERT_CHECKING +extern void check_lock_if_inplace_updateable_rel(Relation relation, + const ItemPointerData *otid, + HeapTuple newtup); +#endif + /* in heap/vacuumlazy.c */ extern void heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy); From f7f174b96a9d0c87d017e6caa33fc8e19e103d21 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 26 Oct 2025 10:49:25 -0400 Subject: [PATCH 3/5] Track changed indexed columns in the executor during UPDATEs Refactor executor update logic to determine which indexed columns have actually changed during an UPDATE operation rather than leaving this up to HeapDetermineColumnsInfo in heap_update. This enables the comparison to happen without taking a lock on the page and opens the door to reuse in other code paths. Because heap_update now requires the caller to provide the modified indexed columns simple_heap_update has become a tad more complex. It is frequently called from CatalogTupleUpdate which either updates heap tuples via their form or using heap_modify_tuple. In both cases the caller does know the modified set of attributes, but sadly those attributes are lost before being provided to simple_heap_update. Due to that the "simple" path has to retain the HeapDetermineColumnsInfo logic of old (for now). In order for that to work it was necessary to split the (overly large) heap_update call itself up. This moves up into simple_heap_update and heap_tuple_update a bit of what existed in heap_update itself. Ideally this will be cleaned up once CatalogTupleUpdate paths are all recording modified attributes correctly, when that happens the "simple" path can be simplified again. ExecCheckIndexedAttrsForChanges replaces HeapDeterminesColumnsInfo and tts_attr_equal replaces heap_attr_equal changing the test for equality when calling into heap_tuple_update (but not simple_heap_update). In the past we used datumIsEqual(), essentially a binary comparison using memcmp(), now the comparison code in tts_attr_equal uses type-specific equality function when available and falls back to datumIsEqual() when not. This change in equality testing has some intended implications and opens the door for more HOT updates (foreshadowing). For instance, indexes with collation information allowing more HOT updates when the index is specified to be case insensitive. This change forced some logic changes in execReplication on the update paths is now it is required to have knowledge of the set of attributes that are both changed and referenced by indexes. Luckilly, the this is available within calls to slot_modify_data() where LogicalRepTupleData is processed and has a set of updated attributes. In this case rather than using ExecCheckIndexedAttrsForChanges we can preseve what slot_modify_data() identifies as the modified set and then intersect that with the set of indexes on the relation and get the correct set of modified indexed attributes required on heap_update(). --- src/backend/access/heap/heapam.c | 12 +- src/backend/access/heap/heapam_handler.c | 72 +++++-- src/backend/access/table/tableam.c | 5 +- src/backend/executor/execMain.c | 1 + src/backend/executor/execReplication.c | 7 + src/backend/executor/nodeModifyTable.c | 247 ++++++++++++++++++++++- src/backend/nodes/bitmapset.c | 4 + src/backend/replication/logical/worker.c | 72 ++++++- src/backend/utils/cache/relcache.c | 15 ++ src/include/access/tableam.h | 8 +- src/include/executor/executor.h | 5 + src/include/nodes/execnodes.h | 1 + src/include/utils/rel.h | 1 + src/include/utils/relcache.h | 1 + 14 files changed, 415 insertions(+), 36 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 30847db1fe332..d9fb390efceb1 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3278,12 +3278,12 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid) * generated by another transaction). */ TM_Result -heap_update(Relation relation, HeapTupleData *oldtup, - HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, - TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, - Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, - Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs, - Bitmapset *mix_attrs, Buffer *vmbuffer, +heap_update(Relation relation, HeapTupleData *oldtup, HeapTuple newtup, + CommandId cid, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, LockTupleMode *lockmode, + Buffer buffer, Page page, BlockNumber block, ItemId lp, + Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs, + Bitmapset *rid_attrs, Bitmapset *mix_attrs, Buffer *vmbuffer, bool rep_id_key_required, TU_UpdateIndexes *update_indexes) { TM_Result result; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 1cf9a18775d6d..ef08e1d3e1024 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -315,9 +315,12 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, - CommandId cid, Snapshot snapshot, Snapshot crosscheck, - bool wait, TM_FailureData *tmfd, - LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes) + CommandId cid, Snapshot snapshot, + Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, + LockTupleMode *lockmode, + Bitmapset *mix_attrs, + TU_UpdateIndexes *update_indexes) { bool rep_id_key_required = false; bool shouldFree = true; @@ -332,7 +335,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, *sum_attrs, *pk_attrs, *rid_attrs, - *mix_attrs, *idx_attrs; TM_Result result; @@ -414,16 +416,61 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, oldtup.t_len = ItemIdGetLength(lp); oldtup.t_self = *otid; - mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs, - &oldtup, tuple, &rep_id_key_required); - /* - * We'll need to WAL log the replica identity attributes if either they - * overlap with the modified indexed attributes or, as we've checked for - * just now in HeapDetermineColumnsInfo, they were unmodified external - * indexed attributes. + * We'll need to include the replica identity key when either the identity + * key attributes overlap with the modified index attributes or when the + * replica identity attributes are stored externally. This is required + * because for such attributes the flattened value won't be WAL logged as + * part of the new tuple so we must determine if we need to extract and + * include them as part of the old_key_tuple (see ExtractReplicaIdentity). */ - rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs); + rep_id_key_required = bms_overlap(mix_attrs, rid_attrs); + if (!rep_id_key_required) + { + Bitmapset *attrs; + TupleDesc tupdesc = RelationGetDescr(relation); + int attidx = -1; + + /* + * We don't own idx_attrs so we'll copy it and remove the modified set + * to reduce the attributes we need to test in the while loop and + * avoid a two branches in the loop. + */ + attrs = bms_difference(idx_attrs, mix_attrs); + attrs = bms_int_members(attrs, rid_attrs); + + while ((attidx = bms_next_member(attrs, attidx)) >= 0) + { + /* + * attidx is zero-based, attrnum is the normal attribute number + */ + AttrNumber attrnum = attidx + FirstLowInvalidHeapAttributeNumber; + Datum value; + bool isnull; + + /* + * System attributes are not added into interesting_attrs in + * relcache + */ + Assert(attrnum > 0); + + value = heap_getattr(&oldtup, attrnum, tupdesc, &isnull); + + /* No need to check attributes that can't be stored externally */ + if (isnull || + TupleDescCompactAttr(tupdesc, attrnum - 1)->attlen != -1) + continue; + + /* Check if the old tuple's attribute is stored externally */ + if (VARATT_IS_EXTERNAL((struct varlena *) DatumGetPointer(value))) + { + rep_id_key_required = true; + break; + } + } + + bms_free(attrs); + } /* Update the tuple with table oid */ slot->tts_tableOid = RelationGetRelid(relation); @@ -437,7 +484,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, bms_free(sum_attrs); bms_free(pk_attrs); bms_free(rid_attrs); - bms_free(mix_attrs); bms_free(idx_attrs); ItemPointerCopy(&tuple->t_self, &slot->tts_tid); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 1e099febdc8ca..a3c1b40653eaf 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -367,6 +367,7 @@ void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, + Bitmapset *modified_indexed_cols, TU_UpdateIndexes *update_indexes) { TM_Result result; @@ -377,7 +378,9 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, GetCurrentCommandId(true), snapshot, InvalidSnapshot, true /* wait for commit */ , - &tmfd, &lockmode, update_indexes); + &tmfd, &lockmode, + modified_indexed_cols, + update_indexes); switch (result) { diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 27c9eec697b18..6b7b6bc801952 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1282,6 +1282,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, /* The following fields are set later if needed */ resultRelInfo->ri_RowIdAttNo = 0; resultRelInfo->ri_extraUpdatedCols = NULL; + resultRelInfo->ri_ChangedIndexedCols = NULL; resultRelInfo->ri_projectNew = NULL; resultRelInfo->ri_newTupleSlot = NULL; resultRelInfo->ri_oldTupleSlot = NULL; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index def32774c90db..2709e2db0f2b4 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -32,6 +32,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/relcache.h" #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -936,7 +937,13 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, if (rel->rd_rel->relispartition) ExecPartitionCheck(resultRelInfo, slot, estate, true); + /* + * We're not going to call ExecCheckIndexedAttrsForChanges here + * because we've already identified the changes earlier on thanks to + * slot_modify_data. + */ simple_table_tuple_update(rel, tid, slot, estate->es_snapshot, + resultRelInfo->ri_ChangedIndexedCols, &update_indexes); conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index e44f12238864f..0a40e87e3275a 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -17,6 +17,7 @@ * ExecModifyTable - retrieve the next tuple from the node * ExecEndModifyTable - shut down the ModifyTable node * ExecReScanModifyTable - rescan the ModifyTable node + * ExecCheckIndexedAttrsForChanges - find set of updated indexed columns * * NOTES * The ModifyTable node receives input from its outerPlan, which is @@ -54,11 +55,14 @@ #include "access/htup_details.h" #include "access/tableam.h" +#include "access/tupconvert.h" +#include "access/tupdesc.h" #include "access/xact.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" #include "executor/nodeModifyTable.h" +#include "executor/tuptable.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" @@ -68,7 +72,9 @@ #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/datum.h" +#include "utils/float.h" #include "utils/injection_point.h" +#include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -177,6 +183,219 @@ static TupleTableSlot *ExecMergeNotMatched(ModifyTableContext *context, bool canSetTag); +/* + * Compare two datums using the type's default equality operator. + * + * Returns true if the values are equal according to the type's equality + * operator, false otherwise. Falls back to binary comparison if no + * type-specific operator is available. + * + * This function uses the TypeCache infrastructure which caches operator + * lookups for efficiency. + */ +bool +tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen, + Datum value1, Datum value2) +{ + TypeCacheEntry *typentry; + + LOCAL_FCINFO(fcinfo, 2); + Datum result; + + /* + * Fast path for common types to avoid even the type cache lookup. These + * types have simple equality semantics. + */ + switch (typid) + { + case INT2OID: + return DatumGetInt16(value1) == DatumGetInt16(value2); + case INT4OID: + return DatumGetInt32(value1) == DatumGetInt32(value2); + case INT8OID: + return DatumGetInt64(value1) == DatumGetInt64(value2); + case FLOAT4OID: + return !float4_cmp_internal(DatumGetFloat4(value1), DatumGetFloat4(value2)); + case FLOAT8OID: + return !float8_cmp_internal(DatumGetFloat8(value1), DatumGetFloat8(value2)); + case BOOLOID: + return DatumGetBool(value1) == DatumGetBool(value2); + case OIDOID: + case REGPROCOID: + case REGPROCEDUREOID: + case REGOPEROID: + case REGOPERATOROID: + case REGCLASSOID: + case REGTYPEOID: + case REGROLEOID: + case REGNAMESPACEOID: + case REGCONFIGOID: + case REGDICTIONARYOID: + return DatumGetObjectId(value1) == DatumGetObjectId(value2); + case CHAROID: + return DatumGetChar(value1) == DatumGetChar(value2); + default: + /* Continue to type cache lookup */ + break; + } + + /* + * Look up the type's equality operator using the type cache. Request both + * the operator OID and the function info for efficiency. + */ + typentry = lookup_type_cache(typid, + TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO); + + /* + * If no equality operator is available, fall back to binary comparison. + * This handles types that don't have proper equality operators defined. + */ + if (!OidIsValid(typentry->eq_opr)) + return datumIsEqual(value1, value2, typbyval, typlen); + + /* + * Use the cached function info if available, otherwise look it up. The + * type cache keeps this around so subsequent calls are fast. + */ + if (typentry->eq_opr_finfo.fn_addr == NULL) + { + Oid eq_proc = get_opcode(typentry->eq_opr); + + if (!OidIsValid(eq_proc)) + /* Shouldn't happen, but fall back to binary comparison */ + return datumIsEqual(value1, value2, typbyval, typlen); + + fmgr_info_cxt(eq_proc, &typentry->eq_opr_finfo, + CacheMemoryContext); + } + + /* Set up function call */ + InitFunctionCallInfoData(*fcinfo, &typentry->eq_opr_finfo, 2, + collation, NULL, NULL); + + fcinfo->args[0].value = value1; + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = value2; + fcinfo->args[1].isnull = false; + + /* Invoke the equality operator */ + result = FunctionCallInvoke(fcinfo); + + /* + * If the function returned NULL (shouldn't happen for equality ops), + * treat as not equal for safety. + */ + if (fcinfo->isnull) + return false; + + return DatumGetBool(result); +} + +/* + * Determine which updated attributes actually changed values between old and + * new tuples and are referenced by indexes on the relation. + * + * Returns a Bitmapset of attribute offsets (0-based, adjusted by + * FirstLowInvalidHeapAttributeNumber) or NULL if no attributes changed. + */ +Bitmapset * +ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo, + TupleTableSlot *tts_old, + TupleTableSlot *tts_new) +{ + Relation relation = relinfo->ri_RelationDesc; + TupleDesc tupdesc = RelationGetDescr(relation); + Bitmapset *indexed_attrs; + Bitmapset *modified = NULL; + int attidx; + + /* If no indexes, we're done */ + if (relinfo->ri_NumIndices == 0) + return NULL; + + /* + * Get the set of index key attributes. This includes summarizing, + * expression indexes and attributes mentioned in the predicate of a + * partition but not those in INCLUDING. + */ + indexed_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_INDEXED); + Assert(!bms_is_empty(indexed_attrs)); + + /* + * NOTE: It is important to scan all indexed attributes in the tuples + * because ExecGetAllUpdatedCols won't include columns that may have been + * modified via heap_modify_tuple_by_col which is the case in + * tsvector_update_trigger. + */ + attidx = -1; + while ((attidx = bms_next_member(indexed_attrs, attidx)) >= 0) + { + /* attidx is zero-based, attrnum is the normal attribute number */ + AttrNumber attrnum = attidx + FirstLowInvalidHeapAttributeNumber; + Form_pg_attribute attr; + bool oldnull, + newnull; + Datum oldval, + newval; + + /* + * If it's a whole-tuple reference, record as modified. It's not + * really worth supporting this case, since it could only succeed + * after a no-op update, which is hardly a case worth optimizing for. + */ + if (attrnum == 0) + { + modified = bms_add_member(modified, attidx); + continue; + } + + /* + * Likewise, include in the modified set any system attribute other + * than tableOID; we cannot expect these to be consistent in a HOT + * chain, or even to be set correctly yet in the new tuple. + */ + if (attrnum < 0) + { + if (attrnum != TableOidAttributeNumber) + modified = bms_add_member(modified, attidx); + continue; + } + + /* Extract values from both slots */ + oldval = slot_getattr(tts_old, attrnum, &oldnull); + newval = slot_getattr(tts_new, attrnum, &newnull); + + /* If one value is NULL and the other is not, they are not equal */ + if (oldnull != newnull) + { + modified = bms_add_member(modified, attidx); + continue; + } + + /* If both are NULL, consider them equal */ + if (oldnull) + continue; + + /* Get attribute metadata */ + Assert(attrnum > 0 && attrnum <= tupdesc->natts); + attr = TupleDescAttr(tupdesc, attrnum - 1); + + /* Compare using type-specific equality operator */ + if (!tts_attr_equal(attr->atttypid, + attr->attcollation, + attr->attbyval, + attr->attlen, + oldval, + newval)) + modified = bms_add_member(modified, attidx); + } + + bms_free(indexed_attrs); + + return modified; +} + /* * Verify that the tuples to be produced by INSERT match the * target relation's rowtype @@ -2170,8 +2389,8 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo, */ static TM_Result ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, - bool canSetTag, UpdateContext *updateCxt) + ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *oldSlot, + TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt) { EState *estate = context->estate; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -2293,6 +2512,16 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); + /* + * Identify which, if any, indexed attributes were modified here so that + * we might reuse it in a few places. + */ + bms_free(resultRelInfo->ri_ChangedIndexedCols); + resultRelInfo->ri_ChangedIndexedCols = NULL; + + resultRelInfo->ri_ChangedIndexedCols = + ExecCheckIndexedAttrsForChanges(resultRelInfo, oldSlot, slot); + /* * replace the heap tuple * @@ -2308,6 +2537,7 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, estate->es_crosscheck_snapshot, true /* wait for commit */ , &context->tmfd, &updateCxt->lockmode, + resultRelInfo->ri_ChangedIndexedCols, &updateCxt->updateIndexes); return result; @@ -2526,8 +2756,9 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ redo_act: lockedtid = *tupleid; - result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot, - canSetTag, &updateCxt); + + result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, oldSlot, + slot, canSetTag, &updateCxt); /* * If ExecUpdateAct reports that a cross-partition update was done, @@ -3224,8 +3455,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, Assert(oldtuple == NULL); result = ExecUpdateAct(context, resultRelInfo, tupleid, - NULL, newslot, canSetTag, - &updateCxt); + NULL, resultRelInfo->ri_oldTupleSlot, + newslot, canSetTag, &updateCxt); /* * As in ExecUpdate(), if ExecUpdateAct() reports that a @@ -3250,6 +3481,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, tupleid, NULL, newslot); mtstate->mt_merge_updated += 1; } + break; case CMD_DELETE: @@ -4356,7 +4588,7 @@ ExecModifyTable(PlanState *pstate) * For UPDATE/DELETE/MERGE, fetch the row identity info for the tuple * to be updated/deleted/merged. For a heap relation, that's a TID; * otherwise we may have a wholerow junk attr that carries the old - * tuple in toto. Keep this in step with the part of + * tuple in total. Keep this in step with the part of * ExecInitModifyTable that sets up ri_RowIdAttNo. */ if (operation == CMD_UPDATE || operation == CMD_DELETE || @@ -4532,6 +4764,7 @@ ExecModifyTable(PlanState *pstate) /* Now apply the update. */ slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple, oldSlot, slot, node->canSetTag); + if (tuplock) UnlockTuple(resultRelInfo->ri_RelationDesc, tupleid, InplaceUpdateTupleLock); diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 7b1e9d94103f8..c522971a37c22 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -238,6 +238,10 @@ bms_make_singleton(int x) void bms_free(Bitmapset *a) { +#if USE_ASSERT_CHECKING + Assert(bms_is_valid_set(a)); +#endif + if (a) pfree(a); } diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 93970c6af2948..b363eaa49ccec 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -243,6 +243,8 @@ */ #include "postgres.h" +#include "access/sysattr.h" +#include "nodes/bitmapset.h" #include #include @@ -275,7 +277,6 @@ #include "replication/logicalrelation.h" #include "replication/logicalworker.h" #include "replication/origin.h" -#include "replication/slot.h" #include "replication/walreceiver.h" #include "replication/worker_internal.h" #include "rewrite/rewriteHandler.h" @@ -291,6 +292,7 @@ #include "utils/memutils.h" #include "utils/pg_lsn.h" #include "utils/rel.h" +#include "utils/relcache.h" #include "utils/rls.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -1110,15 +1112,18 @@ slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, * "slot" is filled with a copy of the tuple in "srcslot", replacing * columns provided in "tupleData" and leaving others as-is. * + * Returns a bitmap of the modified columns. + * * Caution: unreplaced pass-by-ref columns in "slot" will point into the * storage for "srcslot". This is OK for current usage, but someday we may * need to materialize "slot" at the end to make it independent of "srcslot". */ -static void +static Bitmapset * slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot, LogicalRepRelMapEntry *rel, LogicalRepTupleData *tupleData) { + Bitmapset *modified = NULL; int natts = slot->tts_tupleDescriptor->natts; int i; @@ -1195,6 +1200,28 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot, slot->tts_isnull[i] = true; } + /* + * Determine if the replicated value changed the local value by + * comparing slots. This is a subset of + * ExecCheckIndexedAttrsForChanges. + */ + if (srcslot->tts_isnull[i] != slot->tts_isnull[i]) + { + /* One is NULL, the other is not so the value changed */ + modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber); + } + else if (!srcslot->tts_isnull[i]) + { + /* Both are not NULL, compare their values */ + if (!tts_attr_equal(att->atttypid, + att->attcollation, + att->attbyval, + att->attlen, + srcslot->tts_values[i], + slot->tts_values[i])) + modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber); + } + /* Reset attnum for error callback */ apply_error_callback_arg.remote_attnum = -1; } @@ -1202,6 +1229,8 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot, /* And finally, declare that "slot" contains a valid virtual tuple */ ExecStoreVirtualTuple(slot); + + return modified; } /* @@ -2918,6 +2947,7 @@ apply_handle_update_internal(ApplyExecutionData *edata, ConflictTupleInfo conflicttuple = {0}; bool found; MemoryContext oldctx; + Bitmapset *indexed = NULL; EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL); ExecOpenIndices(relinfo, false); @@ -2934,6 +2964,8 @@ apply_handle_update_internal(ApplyExecutionData *edata, */ if (found) { + Bitmapset *modified = NULL; + /* * Report the conflict if the tuple was modified by a different * origin. @@ -2957,15 +2989,29 @@ apply_handle_update_internal(ApplyExecutionData *edata, /* Process and store remote tuple in the slot */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - slot_modify_data(remoteslot, localslot, relmapentry, newtup); + modified = slot_modify_data(remoteslot, localslot, relmapentry, newtup); MemoryContextSwitchTo(oldctx); + /* + * Normally we'd call ExecCheckIndexedAttrForChanges but here we have + * the record of changed columns in the replication state, so let's + * use that instead. + */ + indexed = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, + INDEX_ATTR_BITMAP_INDEXED); + + bms_free(relinfo->ri_ChangedIndexedCols); + relinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed); + bms_free(indexed); + EvalPlanQualSetSlot(&epqstate, remoteslot); InitConflictIndexes(relinfo); - /* Do the actual update. */ + /* First check privileges */ TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_UPDATE); + + /* Then do the actual update. */ ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot, remoteslot); } @@ -3455,6 +3501,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, bool found; EPQState epqstate; ConflictTupleInfo conflicttuple = {0}; + Bitmapset *modified = NULL; + Bitmapset *indexed; /* Get the matching local tuple from the partition. */ found = FindReplTupleInLocalRel(edata, partrel, @@ -3523,8 +3571,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, * remoteslot_part. */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - slot_modify_data(remoteslot_part, localslot, part_entry, - newtup); + modified = slot_modify_data(remoteslot_part, localslot, part_entry, + newtup); MemoryContextSwitchTo(oldctx); EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL); @@ -3549,6 +3597,18 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, EvalPlanQualSetSlot(&epqstate, remoteslot_part); TargetPrivilegesCheck(partrelinfo->ri_RelationDesc, ACL_UPDATE); + + /* + * Normally we'd call ExecCheckIndexedAttrForChanges but + * here we have the record of changed columns in the + * replication state, so let's use that instead. + */ + indexed = RelationGetIndexAttrBitmap(partrelinfo->ri_RelationDesc, + INDEX_ATTR_BITMAP_INDEXED); + bms_free(partrelinfo->ri_ChangedIndexedCols); + partrelinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed); + bms_free(indexed); + ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate, localslot, remoteslot_part); } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 915d0bc908423..32825596be1d8 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2482,6 +2482,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) bms_free(relation->rd_idattr); bms_free(relation->rd_hotblockingattr); bms_free(relation->rd_summarizedattr); + bms_free(relation->rd_indexedattr); if (relation->rd_pubdesc) pfree(relation->rd_pubdesc); if (relation->rd_options) @@ -5283,6 +5284,7 @@ RelationGetIndexPredicate(Relation relation) * index (empty if FULL) * INDEX_ATTR_BITMAP_HOT_BLOCKING Columns that block updates from being HOT * INDEX_ATTR_BITMAP_SUMMARIZED Columns included in summarizing indexes + * INDEX_ATTR_BITMAP_INDEXED Columns referenced by indexes * * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that * we can include system attributes (e.g., OID) in the bitmap representation. @@ -5307,6 +5309,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) Bitmapset *idindexattrs; /* columns in the replica identity */ Bitmapset *hotblockingattrs; /* columns with HOT blocking indexes */ Bitmapset *summarizedattrs; /* columns with summarizing indexes */ + Bitmapset *indexedattrs; /* columns referenced by indexes */ List *indexoidlist; List *newindexoidlist; Oid relpkindex; @@ -5329,6 +5332,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return bms_copy(relation->rd_hotblockingattr); case INDEX_ATTR_BITMAP_SUMMARIZED: return bms_copy(relation->rd_summarizedattr); + case INDEX_ATTR_BITMAP_INDEXED: + return bms_copy(relation->rd_indexedattr); default: elog(ERROR, "unknown attrKind %u", attrKind); } @@ -5373,6 +5378,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) idindexattrs = NULL; hotblockingattrs = NULL; summarizedattrs = NULL; + indexedattrs = NULL; foreach(l, indexoidlist) { Oid indexOid = lfirst_oid(l); @@ -5505,10 +5511,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) bms_free(idindexattrs); bms_free(hotblockingattrs); bms_free(summarizedattrs); + bms_free(indexedattrs); goto restart; } + /* Combine all index attributes */ + indexedattrs = bms_union(hotblockingattrs, summarizedattrs); + /* Don't leak the old values of these bitmaps, if any */ relation->rd_attrsvalid = false; bms_free(relation->rd_keyattr); @@ -5521,6 +5531,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relation->rd_hotblockingattr = NULL; bms_free(relation->rd_summarizedattr); relation->rd_summarizedattr = NULL; + bms_free(relation->rd_indexedattr); + relation->rd_indexedattr = NULL; /* * Now save copies of the bitmaps in the relcache entry. We intentionally @@ -5535,6 +5547,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relation->rd_idattr = bms_copy(idindexattrs); relation->rd_hotblockingattr = bms_copy(hotblockingattrs); relation->rd_summarizedattr = bms_copy(summarizedattrs); + relation->rd_indexedattr = bms_copy(indexedattrs); relation->rd_attrsvalid = true; MemoryContextSwitchTo(oldcxt); @@ -5551,6 +5564,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return hotblockingattrs; case INDEX_ATTR_BITMAP_SUMMARIZED: return summarizedattrs; + case INDEX_ATTR_BITMAP_INDEXED: + return indexedattrs; default: elog(ERROR, "unknown attrKind %u", attrKind); return NULL; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 2fa790b6bf549..f6237949bd26c 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -549,6 +549,7 @@ typedef struct TableAmRoutine bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, + Bitmapset *updated_cols, TU_UpdateIndexes *update_indexes); /* see table_tuple_lock() for reference about parameters */ @@ -1512,12 +1513,12 @@ static inline TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes) + Bitmapset *updated_cols, TU_UpdateIndexes *update_indexes) { return rel->rd_tableam->tuple_update(rel, otid, slot, cid, snapshot, crosscheck, - wait, tmfd, - lockmode, update_indexes); + wait, tmfd, lockmode, + updated_cols, update_indexes); } /* @@ -2020,6 +2021,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot); extern void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, + Bitmapset *modified_indexe_attrs, TU_UpdateIndexes *update_indexes); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index fa2b657fb2ffb..993dc0e6cedd2 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -800,5 +800,10 @@ extern ResultRelInfo *ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache); +extern Bitmapset *ExecCheckIndexedAttrsForChanges(ResultRelInfo *resultRelInfo, + TupleTableSlot *tts_old, + TupleTableSlot *tts_new); +extern bool tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen, + Datum value1, Datum value2); #endif /* EXECUTOR_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 64ff6996431eb..ae858cc90ed8b 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -498,6 +498,7 @@ typedef struct ResultRelInfo Bitmapset *ri_extraUpdatedCols; /* true if the above has been computed */ bool ri_extraUpdatedCols_valid; + Bitmapset *ri_ChangedIndexedCols; /* Projection to generate new tuple in an INSERT/UPDATE */ ProjectionInfo *ri_projectNew; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 80286076a111a..b23a7306e69c2 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -164,6 +164,7 @@ typedef struct RelationData Bitmapset *rd_idattr; /* included in replica identity index */ Bitmapset *rd_hotblockingattr; /* cols blocking HOT update */ Bitmapset *rd_summarizedattr; /* cols indexed by summarizing indexes */ + Bitmapset *rd_indexedattr; /* all cols referenced by indexes */ PublicationDesc *rd_pubdesc; /* publication descriptor, or NULL */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 3561c6bef0bfc..d3fbb8b093a50 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -71,6 +71,7 @@ typedef enum IndexAttrBitmapKind INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_SUMMARIZED, + INDEX_ATTR_BITMAP_INDEXED, } IndexAttrBitmapKind; extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation, From 62e45d279d1fd3fbe93eac4f7f04b0833e6f9fe9 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 31 Oct 2025 14:55:25 -0400 Subject: [PATCH 4/5] Replace index_unchanged_by_update with ri_ChangedIndexedCols In execIndexing on updates we'd like to pass a hint to the indexing code when the indexed attributes are unchanged. This commit replaces the now redundant code in index_unchanged_by_update with the same information found earlier in the update path. --- src/backend/catalog/toasting.c | 2 - src/backend/executor/execIndexing.c | 156 +--------------------------- src/backend/nodes/makefuncs.c | 2 - src/include/nodes/execnodes.h | 4 - 4 files changed, 1 insertion(+), 163 deletions(-) diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 874a8fc89adb3..5d819bda54a2a 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -300,8 +300,6 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, indexInfo->ii_Unique = true; indexInfo->ii_NullsNotDistinct = false; indexInfo->ii_ReadyForInserts = true; - indexInfo->ii_CheckedUnchanged = false; - indexInfo->ii_IndexUnchanged = false; indexInfo->ii_Concurrent = false; indexInfo->ii_BrokenHotChain = false; indexInfo->ii_ParallelWorkers = 0; diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index dd323c9b9fd42..09306515b56f0 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -139,11 +139,6 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, static bool index_recheck_constraint(Relation index, const Oid *constr_procs, const Datum *existing_values, const bool *existing_isnull, const Datum *new_values); -static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo, - EState *estate, IndexInfo *indexInfo, - Relation indexRelation); -static bool index_expression_changed_walker(Node *node, - Bitmapset *allUpdatedCols); static void ExecWithoutOverlapsNotEmpty(Relation rel, NameData attname, Datum attval, char typtype, Oid atttypid); @@ -441,10 +436,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, * index. If we're being called as part of an UPDATE statement, * consider if the 'indexUnchanged' = true hint should be passed. */ - indexUnchanged = update && index_unchanged_by_update(resultRelInfo, - estate, - indexInfo, - indexRelation); + indexUnchanged = update && bms_is_empty(resultRelInfo->ri_ChangedIndexedCols); satisfiesConstraint = index_insert(indexRelation, /* index relation */ @@ -999,152 +991,6 @@ index_recheck_constraint(Relation index, const Oid *constr_procs, return true; } -/* - * Check if ExecInsertIndexTuples() should pass indexUnchanged hint. - * - * When the executor performs an UPDATE that requires a new round of index - * tuples, determine if we should pass 'indexUnchanged' = true hint for one - * single index. - */ -static bool -index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate, - IndexInfo *indexInfo, Relation indexRelation) -{ - Bitmapset *updatedCols; - Bitmapset *extraUpdatedCols; - Bitmapset *allUpdatedCols; - bool hasexpression = false; - List *idxExprs; - - /* - * Check cache first - */ - if (indexInfo->ii_CheckedUnchanged) - return indexInfo->ii_IndexUnchanged; - indexInfo->ii_CheckedUnchanged = true; - - /* - * Check for indexed attribute overlap with updated columns. - * - * Only do this for key columns. A change to a non-key column within an - * INCLUDE index should not be counted here. Non-key column values are - * opaque payload state to the index AM, a little like an extra table TID. - * - * Note that row-level BEFORE triggers won't affect our behavior, since - * they don't affect the updatedCols bitmaps generally. It doesn't seem - * worth the trouble of checking which attributes were changed directly. - */ - updatedCols = ExecGetUpdatedCols(resultRelInfo, estate); - extraUpdatedCols = ExecGetExtraUpdatedCols(resultRelInfo, estate); - for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++) - { - int keycol = indexInfo->ii_IndexAttrNumbers[attr]; - - if (keycol <= 0) - { - /* - * Skip expressions for now, but remember to deal with them later - * on - */ - hasexpression = true; - continue; - } - - if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber, - updatedCols) || - bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber, - extraUpdatedCols)) - { - /* Changed key column -- don't hint for this index */ - indexInfo->ii_IndexUnchanged = false; - return false; - } - } - - /* - * When we get this far and index has no expressions, return true so that - * index_insert() call will go on to pass 'indexUnchanged' = true hint. - * - * The _absence_ of an indexed key attribute that overlaps with updated - * attributes (in addition to the total absence of indexed expressions) - * shows that the index as a whole is logically unchanged by UPDATE. - */ - if (!hasexpression) - { - indexInfo->ii_IndexUnchanged = true; - return true; - } - - /* - * Need to pass only one bms to expression_tree_walker helper function. - * Avoid allocating memory in common case where there are no extra cols. - */ - if (!extraUpdatedCols) - allUpdatedCols = updatedCols; - else - allUpdatedCols = bms_union(updatedCols, extraUpdatedCols); - - /* - * We have to work slightly harder in the event of indexed expressions, - * but the principle is the same as before: try to find columns (Vars, - * actually) that overlap with known-updated columns. - * - * If we find any matching Vars, don't pass hint for index. Otherwise - * pass hint. - */ - idxExprs = RelationGetIndexExpressions(indexRelation); - hasexpression = index_expression_changed_walker((Node *) idxExprs, - allUpdatedCols); - list_free(idxExprs); - if (extraUpdatedCols) - bms_free(allUpdatedCols); - - if (hasexpression) - { - indexInfo->ii_IndexUnchanged = false; - return false; - } - - /* - * Deliberately don't consider index predicates. We should even give the - * hint when result rel's "updated tuple" has no corresponding index - * tuple, which is possible with a partial index (provided the usual - * conditions are met). - */ - indexInfo->ii_IndexUnchanged = true; - return true; -} - -/* - * Indexed expression helper for index_unchanged_by_update(). - * - * Returns true when Var that appears within allUpdatedCols located. - */ -static bool -index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols) -{ - if (node == NULL) - return false; - - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, - allUpdatedCols)) - { - /* Var was updated -- indicates that we should not hint */ - return true; - } - - /* Still haven't found a reason to not pass the hint */ - return false; - } - - return expression_tree_walker(node, index_expression_changed_walker, - allUpdatedCols); -} - /* * ExecWithoutOverlapsNotEmpty - raise an error if the tuple has an empty * range or multirange in the given attribute. diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index e2d9e9be41a65..d69dc090aa417 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -845,8 +845,6 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions, n->ii_Unique = unique; n->ii_NullsNotDistinct = nulls_not_distinct; n->ii_ReadyForInserts = isready; - n->ii_CheckedUnchanged = false; - n->ii_IndexUnchanged = false; n->ii_Concurrent = concurrent; n->ii_Summarizing = summarizing; n->ii_WithoutOverlaps = withoutoverlaps; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ae858cc90ed8b..4cedbd8acf69e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -202,10 +202,6 @@ typedef struct IndexInfo bool ii_NullsNotDistinct; /* is it valid for inserts? */ bool ii_ReadyForInserts; - /* IndexUnchanged status determined yet? */ - bool ii_CheckedUnchanged; - /* aminsert hint, cached for retail inserts */ - bool ii_IndexUnchanged; /* are we doing a concurrent index build? */ bool ii_Concurrent; /* did we detect any broken HOT chains? */ From 94e88c71880ecb2f62706183765ecefaf6e10b5c Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 26 Oct 2025 10:49:25 -0400 Subject: [PATCH 5/5] Enable HOT updates for expression and partial indexes Currently, PostgreSQL conservatively prevents HOT (Heap-Only Tuple) updates whenever any indexed column changes, even if the indexed portion of that column remains identical. This is overly restrictive for expression indexes (where f(column) might not change even when column changes) and partial indexes (where both old and new tuples might fall outside the predicate). Finally, index AMs play no role in deciding when they need a new index entry on update, the rules regarding that are based on binary equality and the HEAP's model for MVCC and related HOT optimization. Here we open that door a bit so as to enable more nuanced control over the process. This enables index AMs that require binary equality (as is the case for nbtree) to do that without disallowing type-specific equality checking for other indexes. This patch introduces several improvements to enable HOT updates in these cases: Add amcomparedatums() callback to IndexAmRoutine. This allows index access methods like GIN to provide custom logic for comparing datums by extracting and comparing index keys rather than comparing the raw datums. GIN indexes now implement gincomparedatums() which extracts keys from both datums and compares the resulting key sets. Also, as mentioned earlier nbtree implements this API and uses datumIsEqual() for equality so that the manner in which it deduplicates TIDs on page split doesn't have to change. This is not a required API, when not implemented the executor will compare TupleTableSlot datum for equality using type-specific operators and take into account collation so that an update from "Apple" to "APPLE" on a case insensitive index can now be HOT. ExecWhichIndexesRequireUpdates() is re-written to find the set of modified indexed attributes that trigger new index tuples on updated. For partial indexes, this checks whether both old and new tuples satisfy or fail the predicate. For expression indexes, this uses type-specific equality operators to compare computed values. For extraction-based indexes (GIN/RUM) that implement amcomparedatums() it uses that. Importantly, table access methods can still signal using TU_Update if all, none, or only summarizing indexes should be updated. While the executor layer now owns determining what has changed due to an update and is interested in only updating the minimum number of indexes possible, the table AM can override that while performing table_tuple_update(), which is what heap does. While this signal is very specific to how the heap implements MVCC and its HOT optimization, we'll leave replacing that for another day. This optimization trades off some new overhead for the potential for more updates to use the HOT optimized path and avoid index and heap bloat. This should significantly improve update performance for tables with expression indexes, partial indexes, and GIN/GiST indexes on complex data types like JSONB and tsvector, while maintaining correct index semantics. Minimal additional overhead due to type-specific equality checking should be washed out by the benefits of updating indexes fewer times. One notable trade-off is that there are more calls to FormIndexDatum() as a result. Caching these might reduce some of that overhead, but not all. This lead to the change in the frequency for expressions in the spec update test to output notice messages, but does not impact correctness. --- src/backend/access/brin/brin.c | 1 + src/backend/access/gin/ginutil.c | 92 +- src/backend/access/hash/hash.c | 44 + src/backend/access/heap/heapam.c | 10 +- src/backend/access/heap/heapam_handler.c | 6 +- src/backend/access/nbtree/nbtree.c | 1 + src/backend/access/table/tableam.c | 4 +- src/backend/bootstrap/bootstrap.c | 8 + src/backend/catalog/index.c | 54 + src/backend/catalog/indexing.c | 16 +- src/backend/catalog/toasting.c | 4 + src/backend/executor/execIndexing.c | 45 +- src/backend/executor/nodeModifyTable.c | 496 ++++- src/backend/nodes/makefuncs.c | 4 + src/include/access/amapi.h | 28 + src/include/access/gin.h | 3 + src/include/access/heapam.h | 6 +- src/include/access/nbtree.h | 4 + src/include/access/tableam.h | 8 +- src/include/catalog/index.h | 1 + src/include/executor/executor.h | 12 +- src/include/nodes/execnodes.h | 19 + .../expected/insert-conflict-specconflict.out | 20 + .../regress/expected/heap_hot_updates.out | 1922 +++++++++++++++++ src/test/regress/parallel_schedule | 6 + src/test/regress/sql/heap_hot_updates.sql | 1325 ++++++++++++ src/tools/pgindent/typedefs.list | 1 + 27 files changed, 4015 insertions(+), 125 deletions(-) create mode 100644 src/test/regress/expected/heap_hot_updates.out create mode 100644 src/test/regress/sql/heap_hot_updates.sql diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index cb3331921cbfd..36e639552e62b 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -290,6 +290,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->amproperty = NULL; amroutine->ambuildphasename = NULL; amroutine->amvalidate = brinvalidate; + amroutine->amcomparedatums = NULL; amroutine->amadjustmembers = NULL; amroutine->ambeginscan = brinbeginscan; amroutine->amrescan = brinrescan; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 78f7b7a2495cf..8e31ec21c1c94 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -26,6 +26,7 @@ #include "storage/indexfsm.h" #include "utils/builtins.h" #include "utils/index_selfuncs.h" +#include "utils/memutils.h" #include "utils/rel.h" #include "utils/typcache.h" @@ -78,6 +79,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amproperty = NULL; amroutine->ambuildphasename = ginbuildphasename; amroutine->amvalidate = ginvalidate; + amroutine->amcomparedatums = gincomparedatums; amroutine->amadjustmembers = ginadjustmembers; amroutine->ambeginscan = ginbeginscan; amroutine->amrescan = ginrescan; @@ -477,13 +479,6 @@ cmpEntries(const void *a, const void *b, void *arg) return res; } - -/* - * Extract the index key values from an indexable item - * - * The resulting key values are sorted, and any duplicates are removed. - * This avoids generating redundant index entries. - */ Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, @@ -729,3 +724,86 @@ ginbuildphasename(int64 phasenum) return NULL; } } + +/* + * gincomparedatums - Compare datums to determine if they produce identical keys + * + * This function extracts keys from both old_datum and new_datum using the + * opclass's extractValue function, then compares the extracted key arrays. + * Returns true if the key sets are identical (same keys, same counts). + * + * This enables HOT updates for GIN indexes when the indexed portions of a + * value haven't changed, even if the value itself has changed. + * + * Example: JSONB column with GIN index. If an update changes a non-indexed + * key in the JSONB document, the extracted keys are identical and we can + * do a HOT update. + */ +bool +gincomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull) +{ + GinState ginstate; + Datum *old_keys; + Datum *new_keys; + GinNullCategory *old_categories; + GinNullCategory *new_categories; + int32 old_nkeys; + int32 new_nkeys; + MemoryContext tmpcontext; + MemoryContext oldcontext; + bool result = true; + + /* Handle NULL cases */ + if (old_isnull != new_isnull) + return false; + if (old_isnull) + return true; + + /* Create temporary context for extraction work */ + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "GIN datum comparison", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + initGinState(&ginstate, index); + + /* Extract keys from both datums using existing GIN infrastructure */ + old_keys = ginExtractEntries(&ginstate, attnum, old_datum, old_isnull, + &old_nkeys, &old_categories); + new_keys = ginExtractEntries(&ginstate, attnum, new_datum, new_isnull, + &new_nkeys, &new_categories); + + /* Different number of keys, definitely different */ + if (old_nkeys != new_nkeys) + { + result = false; + goto cleanup; + } + + /* + * Compare the sorted key arrays element-by-element. Since both arrays are + * already sorted by ginExtractEntries, we can do a simple O(n) + * comparison. + */ + for (int i = 0; i < old_nkeys; i++) + { + int cmp = ginCompareEntries(&ginstate, attnum, + old_keys[i], old_categories[i], + new_keys[i], new_categories[i]); + + if (cmp != 0) + { + result = false; + break; + } + } + +cleanup: + /* Clean up */ + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(tmpcontext); + + return result; +} diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 53061c819fbf0..91371dfdacdb0 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -50,6 +50,10 @@ static void hashbuildCallback(Relation index, void *state); +static bool hashcomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); + /* * Hash handler function: return IndexAmRoutine with access method parameters * and callbacks. @@ -98,6 +102,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->amproperty = NULL; amroutine->ambuildphasename = NULL; amroutine->amvalidate = hashvalidate; + amroutine->amcomparedatums = hashcomparedatums; amroutine->amadjustmembers = hashadjustmembers; amroutine->ambeginscan = hashbeginscan; amroutine->amrescan = hashrescan; @@ -944,3 +949,42 @@ hashtranslatecmptype(CompareType cmptype, Oid opfamily) return HTEqualStrategyNumber; return InvalidStrategy; } + +/* + * hashcomparedatums - Compare datums to determine if they produce identical keys + * + * Returns true if the hash values are identical (index doesn't need update). + */ +bool +hashcomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull) +{ + uint32 old_hashkey; + uint32 new_hashkey; + + /* If both are NULL, they're equal */ + if (old_isnull && new_isnull) + return true; + + /* If NULL status differs, they're not equal */ + if (old_isnull != new_isnull) + return false; + + /* + * _hash_datum2hashkey() is used because we know this can't be a cross + * type comparison. + */ + old_hashkey = _hash_datum2hashkey(index, old_datum); + new_hashkey = _hash_datum2hashkey(index, new_datum); + + /* + * If hash keys are identical, the index entry would be the same. Return + * true to indicate no index update needed. + * + * Note: Hash collisions are rare but possible. If hash(x) == hash(y) but + * x != y, the hash index still treats them identically, so we correctly + * return true. + */ + return (old_hashkey == new_hashkey); +} diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d9fb390efceb1..3e88bdbbda86a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3283,7 +3283,7 @@ heap_update(Relation relation, HeapTupleData *oldtup, HeapTuple newtup, TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs, - Bitmapset *rid_attrs, Bitmapset *mix_attrs, Buffer *vmbuffer, + Bitmapset *rid_attrs, const Bitmapset *mix_attrs, Buffer *vmbuffer, bool rep_id_key_required, TU_UpdateIndexes *update_indexes) { TM_Result result; @@ -4352,8 +4352,9 @@ HeapDetermineColumnsInfo(Relation relation, * This routine may be used to update a tuple when concurrent updates of the * target tuple are not expected (for example, because we have a lock on the * relation associated with the tuple). Any failure is reported via ereport(). + * Returns the set of modified indexed attributes. */ -void +Bitmapset * simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple, TU_UpdateIndexes *update_indexes) { @@ -4482,7 +4483,7 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup elog(ERROR, "tuple concurrently deleted"); - return; + return NULL; } /* @@ -4515,7 +4516,6 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup bms_free(sum_attrs); bms_free(pk_attrs); bms_free(rid_attrs); - bms_free(mix_attrs); bms_free(idx_attrs); switch (result) @@ -4541,6 +4541,8 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup elog(ERROR, "unrecognized heap_update status: %u", result); break; } + + return mix_attrs; } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ef08e1d3e1024..7527809ec0802 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -319,7 +319,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - Bitmapset *mix_attrs, + const Bitmapset *mix_attrs, TU_UpdateIndexes *update_indexes) { bool rep_id_key_required = false; @@ -407,10 +407,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, Assert(ItemIdIsNormal(lp)); - /* - * Partially construct the oldtup for HeapDetermineColumnsInfo to work and - * then pass that on to heap_update. - */ oldtup.t_tableOid = RelationGetRelid(relation); oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index fdff960c13022..e435f0d5db49e 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -155,6 +155,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amproperty = btproperty; amroutine->ambuildphasename = btbuildphasename; amroutine->amvalidate = btvalidate; + amroutine->amcomparedatums = NULL; amroutine->amadjustmembers = btadjustmembers; amroutine->ambeginscan = btbeginscan; amroutine->amrescan = btrescan; diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index a3c1b40653eaf..15f0dd7aa2857 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -367,7 +367,7 @@ void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, - Bitmapset *modified_indexed_cols, + const Bitmapset *mix_attrs, TU_UpdateIndexes *update_indexes) { TM_Result result; @@ -379,7 +379,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, snapshot, InvalidSnapshot, true /* wait for commit */ , &tmfd, &lockmode, - modified_indexed_cols, + mix_attrs, update_indexes); switch (result) diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index fc8638c1b61b6..329c110d0bfd3 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -961,10 +961,18 @@ index_register(Oid heap, newind->il_info->ii_Expressions = copyObject(indexInfo->ii_Expressions); newind->il_info->ii_ExpressionsState = NIL; + /* expression attrs will likely be null, but may as well copy it */ + newind->il_info->ii_ExpressionsAttrs = + copyObject(indexInfo->ii_ExpressionsAttrs); /* predicate will likely be null, but may as well copy it */ newind->il_info->ii_Predicate = copyObject(indexInfo->ii_Predicate); newind->il_info->ii_PredicateState = NULL; + /* predicate attrs will likely be null, but may as well copy it */ + newind->il_info->ii_PredicateAttrs = + copyObject(indexInfo->ii_PredicateAttrs); + newind->il_info->ii_CheckedPredicate = false; + newind->il_info->ii_PredicateSatisfied = false; /* no exclusion constraints at bootstrap time, so no need to copy */ Assert(indexInfo->ii_ExclusionOps == NULL); Assert(indexInfo->ii_ExclusionProcs == NULL); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 5d9db167e5950..e88db7e919b8b 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -27,6 +27,7 @@ #include "access/heapam.h" #include "access/multixact.h" #include "access/relscan.h" +#include "access/sysattr.h" #include "access/tableam.h" #include "access/toast_compression.h" #include "access/transam.h" @@ -58,6 +59,7 @@ #include "commands/trigger.h" #include "executor/executor.h" #include "miscadmin.h" +#include "nodes/execnodes.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -2414,6 +2416,58 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) * ---------------------------------------------------------------- */ +/* ---------------- + * BuildUpdateIndexInfo + * + * For expression indexes updates may not change the indexed value allowing + * for a HOT update. Add information to the IndexInfo to allow for checking + * if the indexed value has changed. + * + * Do this processing here rather than in BuildIndexInfo() to not incur the + * overhead in the common non-expression cases. + * ---------------- + */ +void +BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo) +{ + for (int j = 0; j < resultRelInfo->ri_NumIndices; j++) + { + int i; + int indnatts; + Bitmapset *attrs = NULL; + IndexInfo *ii = resultRelInfo->ri_IndexRelationInfo[j]; + + indnatts = ii->ii_NumIndexAttrs; + + /* Collect key attributes used by the index, key and including */ + for (i = 0; i < indnatts; i++) + { + AttrNumber attnum = ii->ii_IndexAttrNumbers[i]; + + if (attnum != 0) + attrs = bms_add_member(attrs, attnum - FirstLowInvalidHeapAttributeNumber); + } + + /* Collect attributes used in the expression */ + if (ii->ii_Expressions) + pull_varattnos((Node *) ii->ii_Expressions, + resultRelInfo->ri_RangeTableIndex, + &ii->ii_ExpressionsAttrs); + + /* Collect attributes used in the predicate */ + if (ii->ii_Predicate) + pull_varattnos((Node *) ii->ii_Predicate, + resultRelInfo->ri_RangeTableIndex, + &ii->ii_PredicateAttrs); + + /* Combine key, including, and expression attributes, but not predicate */ + ii->ii_IndexedAttrs = bms_union(attrs, ii->ii_ExpressionsAttrs); + + /* All indexes should index *something*! */ + Assert(!bms_is_empty(ii->ii_IndexedAttrs)); + } +} + /* ---------------- * BuildIndexInfo * Construct an IndexInfo record for an open index diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 004c5121000fe..a361c21549012 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -102,7 +102,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple, * Get information from the state structure. Fall out if nothing to do. */ numIndexes = indstate->ri_NumIndices; - if (numIndexes == 0) + if (numIndexes == 0 || updateIndexes == TU_None) return; relationDescs = indstate->ri_IndexRelationDescs; indexInfoArray = indstate->ri_IndexRelationInfo; @@ -314,15 +314,18 @@ CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup) { CatalogIndexState indstate; TU_UpdateIndexes updateIndexes = TU_All; + Bitmapset *updatedAttrs; CatalogTupleCheckConstraints(heapRel, tup); indstate = CatalogOpenIndexes(heapRel); - simple_heap_update(heapRel, otid, tup, &updateIndexes); - + updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes); + ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs; CatalogIndexInsert(indstate, tup, updateIndexes); + CatalogCloseIndexes(indstate); + bms_free(updatedAttrs); } /* @@ -338,12 +341,15 @@ CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTu CatalogIndexState indstate) { TU_UpdateIndexes updateIndexes = TU_All; + Bitmapset *updatedAttrs; CatalogTupleCheckConstraints(heapRel, tup); - simple_heap_update(heapRel, otid, tup, &updateIndexes); - + updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes); + ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs; CatalogIndexInsert(indstate, tup, updateIndexes); + ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = NULL; + bms_free(updatedAttrs); } /* diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 5d819bda54a2a..c665aa744b3bf 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -292,8 +292,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, indexInfo->ii_IndexAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_ExpressionsAttrs = NULL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NULL; + indexInfo->ii_PredicateAttrs = NULL; + indexInfo->ii_CheckedPredicate = false; + indexInfo->ii_PredicateSatisfied = false; indexInfo->ii_ExclusionOps = NULL; indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 09306515b56f0..c051babf91d15 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -109,11 +109,15 @@ #include "access/genam.h" #include "access/relscan.h" #include "access/tableam.h" +#include "access/sysattr.h" #include "access/xact.h" #include "catalog/index.h" #include "executor/executor.h" +#include "nodes/bitmapset.h" +#include "nodes/execnodes.h" #include "nodes/nodeFuncs.h" #include "storage/lmgr.h" +#include "utils/datum.h" #include "utils/injection_point.h" #include "utils/multirangetypes.h" #include "utils/rangetypes.h" @@ -319,8 +323,8 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, Relation heapRelation; IndexInfo **indexInfoArray; ExprContext *econtext; - Datum values[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; + Datum loc_values[INDEX_MAX_KEYS]; + bool loc_isnull[INDEX_MAX_KEYS]; Assert(ItemPointerIsValid(tupleid)); @@ -344,13 +348,13 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, /* Arrange for econtext's scan tuple to be the tuple under test */ econtext->ecxt_scantuple = slot; - /* - * for each index, form and insert the index tuple - */ + /* Insert into each index that needs updating */ for (i = 0; i < numIndices; i++) { Relation indexRelation = relationDescs[i]; IndexInfo *indexInfo; + Datum *values; + bool *isnull; bool applyNoDupErr; IndexUniqueCheck checkUnique; bool indexUnchanged; @@ -367,7 +371,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, /* * Skip processing of non-summarizing indexes if we only update - * summarizing indexes + * summarizing indexes or if this index is unchanged. */ if (onlySummarizing && !indexInfo->ii_Summarizing) continue; @@ -388,8 +392,15 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, indexInfo->ii_PredicateState = predicate; } + /* Check the index predicate if we haven't done so earlier on */ + if (!indexInfo->ii_CheckedPredicate) + { + indexInfo->ii_PredicateSatisfied = ExecQual(predicate, econtext); + indexInfo->ii_CheckedPredicate = true; + } + /* Skip this index-update if the predicate isn't satisfied */ - if (!ExecQual(predicate, econtext)) + if (!indexInfo->ii_PredicateSatisfied) continue; } @@ -397,11 +408,10 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, * FormIndexDatum fills in its values and isnull parameters with the * appropriate values for the column(s) of the index. */ - FormIndexDatum(indexInfo, - slot, - estate, - values, - isnull); + FormIndexDatum(indexInfo, slot, estate, loc_values, loc_isnull); + + values = loc_values; + isnull = loc_isnull; /* Check whether to apply noDupErr to this index */ applyNoDupErr = noDupErr && @@ -436,7 +446,9 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, * index. If we're being called as part of an UPDATE statement, * consider if the 'indexUnchanged' = true hint should be passed. */ - indexUnchanged = update && bms_is_empty(resultRelInfo->ri_ChangedIndexedCols); + indexUnchanged = update && + !bms_overlap(indexInfo->ii_IndexedAttrs, + resultRelInfo->ri_ChangedIndexedCols); satisfiesConstraint = index_insert(indexRelation, /* index relation */ @@ -605,7 +617,12 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, checkedIndex = true; /* Check for partial index */ - if (indexInfo->ii_Predicate != NIL) + if (indexInfo->ii_CheckedPredicate && !indexInfo->ii_PredicateSatisfied) + { + /* We've already checked and the predicate wasn't satisfied. */ + continue; + } + else if (indexInfo->ii_Predicate != NIL) { ExprState *predicate; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 0a40e87e3275a..fb1ef526a6c9f 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -54,10 +54,13 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/attnum.h" +#include "access/sysattr.h" #include "access/tableam.h" #include "access/tupconvert.h" #include "access/tupdesc.h" #include "access/xact.h" +#include "catalog/index.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" @@ -76,6 +79,7 @@ #include "utils/injection_point.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/relcache.h" #include "utils/snapmgr.h" @@ -246,6 +250,10 @@ tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen, typentry = lookup_type_cache(typid, TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO); + /* Use the type's collation if none provided */ + if (collation == -1) + collation = typentry->typcollation; + /* * If no equality operator is available, fall back to binary comparison. * This handles types that don't have proper equality operators defined. @@ -292,108 +300,415 @@ tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen, } /* - * Determine which updated attributes actually changed values between old and - * new tuples and are referenced by indexes on the relation. + * ExecCheckIndexedAttrsForChanges + * + * Determine which indexes need updating by finding the set of modified + * indexed attributes. + * + * For expression indexes and indexes which implement the amcomparedatums() + * index AM API we'll need to form index datum and compare each attribute to + * see if any actually changed. + * + * For expression indexes the result of the expression might not change at all, + * this is common with JSONB columns, which require expression indexes. It is + * is commonplace to index one or more fields within a document and perform + * updates to the document while leaving the indexed fields unchanged. These + * updates don't necessitate index updates. + * + * Partial indexes won't trigger index updates when the old/new tuples are both + * outside of the predicate range. A transition into or out of the predicate + * does require an index update. + * + * Indexes that support index-only scans (IOS) should return the value that + * is the binary equavalent of what is in the table. For that reason we must + * use datumIsEqual() when deciding if an index update is required or not. + * + * All other indexes require testing old/new datum for equality, we now test + * with a type-specific equality operator and fall back to datumIsEqual() + * when that isn't possible. + * + * For a BTREE index (nbtree) their is an additional reason to use binary + * comparison for equality. TID deduplication on page split in nbtree uses + * binary comparison. + * + * The goal is for the executor to know, ahead of calling into the table AM to + * process the update and before calling into the index AM for inserting new + * index tuples, which attributes in the new TupleTableSlot, if any, truely + * necessitate a new index tuple. * - * Returns a Bitmapset of attribute offsets (0-based, adjusted by - * FirstLowInvalidHeapAttributeNumber) or NULL if no attributes changed. + * Returns a Bitmapset of attributes that intersects with indexes which require + * a new index tuple. */ Bitmapset * ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo, - TupleTableSlot *tts_old, - TupleTableSlot *tts_new) + EState *estate, + TupleTableSlot *old_tts, + TupleTableSlot *new_tts) { Relation relation = relinfo->ri_RelationDesc; TupleDesc tupdesc = RelationGetDescr(relation); - Bitmapset *indexed_attrs; - Bitmapset *modified = NULL; - int attidx; + Bitmapset *mix_attrs = NULL; /* If no indexes, we're done */ if (relinfo->ri_NumIndices == 0) return NULL; /* - * Get the set of index key attributes. This includes summarizing, - * expression indexes and attributes mentioned in the predicate of a - * partition but not those in INCLUDING. + * NOTE: Expression and predicates that are observed to change will have + * all their attributes added into the m_attrs set knowing that some of + * those might not have changed. Take for instance an index on (a + b) + * followed by an index on (b) with an update that changes only the value + * of 'a'. We'll add both 'a' and 'b' to the m_attrs set then later when + * reviewing the second index add 'b' to the u_attrs (unchanged) set. In + * the end, we'll remove all the unchanged from the m_attrs and get our + * desired result. */ - indexed_attrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_INDEXED); - Assert(!bms_is_empty(indexed_attrs)); - /* - * NOTE: It is important to scan all indexed attributes in the tuples - * because ExecGetAllUpdatedCols won't include columns that may have been - * modified via heap_modify_tuple_by_col which is the case in - * tsvector_update_trigger. - */ - attidx = -1; - while ((attidx = bms_next_member(indexed_attrs, attidx)) >= 0) + /* Find the indexes that reference this attribute */ + for (int i = 0; i < relinfo->ri_NumIndices; i++) { - /* attidx is zero-based, attrnum is the normal attribute number */ - AttrNumber attrnum = attidx + FirstLowInvalidHeapAttributeNumber; - Form_pg_attribute attr; - bool oldnull, - newnull; - Datum oldval, - newval; + Relation index = relinfo->ri_IndexRelationDescs[i]; + IndexAmRoutine *amroutine = index->rd_indam; + IndexInfo *indexInfo = relinfo->ri_IndexRelationInfo[i]; + Bitmapset *m_attrs = NULL; /* (possibly) modified key attributes */ + Bitmapset *p_attrs = NULL; /* (possibly) modified predicate + * attributes */ + Bitmapset *u_attrs = NULL; /* unmodified attributes */ + Bitmapset *pre_attrs = indexInfo->ii_PredicateAttrs; + bool has_expressions = (indexInfo->ii_Expressions != NIL); + bool has_am_compare = (amroutine->amcomparedatums != NULL); + bool supports_ios = (amroutine->amcanreturn != NULL); + bool is_partial = (indexInfo->ii_Predicate != NIL); + TupleTableSlot *save_scantuple; + ExprContext *econtext = GetPerTupleExprContext(estate); + Datum old_values[INDEX_MAX_KEYS]; + bool old_isnull[INDEX_MAX_KEYS]; + Datum new_values[INDEX_MAX_KEYS]; + bool new_isnull[INDEX_MAX_KEYS]; + + /* If we've reviewed all the attributes on this index, move on */ + if (bms_is_subset(indexInfo->ii_IndexedAttrs, mix_attrs)) + continue; - /* - * If it's a whole-tuple reference, record as modified. It's not - * really worth supporting this case, since it could only succeed - * after a no-op update, which is hardly a case worth optimizing for. - */ - if (attrnum == 0) + /* Checking partial at this point isn't viable when we're serializable */ + if (is_partial && IsolationIsSerializable()) { - modified = bms_add_member(modified, attidx); - continue; + p_attrs = bms_add_members(p_attrs, pre_attrs); + } + /* Check partial index predicate */ + else if (is_partial) + { + ExprState *pstate; + bool old_qualifies, + new_qualifies; + + if (!indexInfo->ii_CheckedPredicate) + pstate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + else + pstate = indexInfo->ii_PredicateState; + + save_scantuple = econtext->ecxt_scantuple; + + econtext->ecxt_scantuple = old_tts; + old_qualifies = ExecQual(pstate, econtext); + + econtext->ecxt_scantuple = new_tts; + new_qualifies = ExecQual(pstate, econtext); + + econtext->ecxt_scantuple = save_scantuple; + + indexInfo->ii_CheckedPredicate = true; + indexInfo->ii_PredicateState = pstate; + indexInfo->ii_PredicateSatisfied = new_qualifies; + + /* Both outside predicate, index doesn't need update */ + if (!old_qualifies && !new_qualifies) + continue; + + /* A transition means we need to update the index */ + if (old_qualifies != new_qualifies) + p_attrs = bms_copy(pre_attrs); + + /* + * When both are within the predicate we must update this index, + * but only if one of the index key attributes changed. + */ } /* - * Likewise, include in the modified set any system attribute other - * than tableOID; we cannot expect these to be consistent in a HOT - * chain, or even to be set correctly yet in the new tuple. + * Expression indexes, or an index that has a comparison function, + * requires us to form index datums and compare. We've done all we + * can to avoid this overhead, now it's time to bite the bullet and + * get it done. + * + * XXX: Caching the values/isnull might be a win and avoid one of the + * added calls to FormIndexDatum(). */ - if (attrnum < 0) + if (has_expressions || has_am_compare) { - if (attrnum != TableOidAttributeNumber) - modified = bms_add_member(modified, attidx); - continue; - } + save_scantuple = econtext->ecxt_scantuple; - /* Extract values from both slots */ - oldval = slot_getattr(tts_old, attrnum, &oldnull); - newval = slot_getattr(tts_new, attrnum, &newnull); + /* Evaluate expressions (if any) to get base datums */ + econtext->ecxt_scantuple = old_tts; + FormIndexDatum(indexInfo, old_tts, estate, old_values, old_isnull); - /* If one value is NULL and the other is not, they are not equal */ - if (oldnull != newnull) - { - modified = bms_add_member(modified, attidx); - continue; + econtext->ecxt_scantuple = new_tts; + FormIndexDatum(indexInfo, new_tts, estate, new_values, new_isnull); + + econtext->ecxt_scantuple = save_scantuple; + + /* Compare the index key datums for equality */ + for (int j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++) + { + AttrNumber rel_attrnum = indexInfo->ii_IndexAttrNumbers[j]; + int rel_attridx = rel_attrnum - FirstLowInvalidHeapAttributeNumber; + int nth_expr = 0; + bool values_equal = false; + + /* + * We can't skip attributes that we've already identified as + * triggering an index update because we may have added an + * attribute from an expression index that didn't change but + * the expression did and that unchanged attribute is + * referenced in a subsequent index where we will discover + * that fact. + */ + + /* A change to/from NULL, record this attribute */ + if (old_isnull[j] != new_isnull[j]) + { + /* Expressions will have rel_attrnum == 0 */ + if (rel_attrnum == 0) + m_attrs = bms_add_members(m_attrs, indexInfo->ii_ExpressionsAttrs); + else + m_attrs = bms_add_member(m_attrs, rel_attridx); + continue; + } + + /* Both NULL, no change */ + if (old_isnull[j]) + { + if (rel_attrnum != 0) + u_attrs = bms_add_member(u_attrs, rel_attridx); + + continue; + } + + /* + * Use index AM's comparison function if present when + * comparing the index datum formed when creating an index + * key. + */ + if (has_am_compare) + { + /* + * NOTE: For AM comparison, pass the 1-based index + * attribute number. The AM's compare function expects the + * same numbering as used internally by the AM. + */ + values_equal = amroutine->amcomparedatums(index, j + 1, + old_values[j], old_isnull[j], + new_values[j], new_isnull[j]); + } + else + { + /* Non-zero attribute means not an expression */ + if (rel_attrnum != 0) + { + if (supports_ios) + { + CompactAttribute *attr = TupleDescCompactAttr(tupdesc, rel_attrnum - 1); + + values_equal = datumIsEqual(old_values[j], + new_values[j], + attr->attbyval, + attr->attlen); + } + else + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, rel_attrnum - 1); + + /* + * Compare using type-specific equality which at + * this point is the relation's type because + * FormIndexDatum() will populate the values/nulls + * but won't transform them into the final values + * destined for the index tuple, that's left to + * index_form_tuple() which we don't call (on + * purpose). + */ + values_equal = tts_attr_equal(attr->atttypid, + attr->attcollation, + attr->attbyval, + attr->attlen, + old_values[j], + new_values[j]); + } + } + else + { + /* + * An expression on an indexed attribute without + * custom AM comparison function. In this case, becase + * indexes will store the result of the expression's + * evaluation, we can test for equality using the + * expression's result type. This allows for JSONB + * and custom type equality tests, which may not be + * the same as binary equality, to be in effect. The + * result stored in the index and used in index-only + * scans will be valid as it is the expressions + * result, which shouldn't change given the same + * input. + * + * At this point the expression's type is what is + * required when testing for equality, not the index's + * type, because the value created by FormIndexDatum() + * is the expression's result. Later on in + * index_form_tuple() an index may transform the value + * when forming it's key (as is the case with HASH), + * but at this point the Datum is the expression's + * result type. + */ + Oid expr_type_oid; + int16 typlen; + bool typbyval; + Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr); + + Assert(expr != NULL); + + /* Get type OID from the expression */ + expr_type_oid = exprType((Node *) expr); + + /* Get type information from the OID */ + get_typlenbyval(expr_type_oid, &typlen, &typbyval); + + values_equal = tts_attr_equal(expr_type_oid, + -1, /* use TBD expr type */ + typbyval, + typlen, + old_values[j], + new_values[j]); + } + } + + if (!values_equal) + { + /* Expressions will have rel_attrnum == 0 */ + if (rel_attrnum == 0) + m_attrs = bms_add_members(m_attrs, indexInfo->ii_ExpressionsAttrs); + else + m_attrs = bms_add_member(m_attrs, rel_attridx); + } + else + { + if (rel_attrnum != 0) + u_attrs = bms_add_member(u_attrs, rel_attridx); + } + + if (rel_attrnum == 0) + nth_expr++; + } } + else + { + /* + * Here we know that we're reviewing an index that doesn't have a + * partial predicate, doesn't use expressions, and doesn't have a + * amcomparedatums() implementation. If this index supports IOS + * we need to use binary comparison, if not then type-specific + * will provide a more accurate result. + */ - /* If both are NULL, consider them equal */ - if (oldnull) - continue; + /* Compare the index key datums for equality */ + for (int j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++) + { + AttrNumber rel_attrnum; + int rel_attridx; + bool values_equal = false; + bool old_null, + new_null; + Datum old_val, + new_val; - /* Get attribute metadata */ - Assert(attrnum > 0 && attrnum <= tupdesc->natts); - attr = TupleDescAttr(tupdesc, attrnum - 1); - - /* Compare using type-specific equality operator */ - if (!tts_attr_equal(attr->atttypid, - attr->attcollation, - attr->attbyval, - attr->attlen, - oldval, - newval)) - modified = bms_add_member(modified, attidx); - } + rel_attrnum = indexInfo->ii_IndexAttrNumbers[j]; + rel_attridx = rel_attrnum - FirstLowInvalidHeapAttributeNumber; + + /* Zero would mean expression, something we don't expect here */ + Assert(rel_attrnum > 0 && rel_attrnum <= tupdesc->natts); + + /* Extract values from both slots for this attribute */ + old_val = slot_getattr(old_tts, rel_attrnum, &old_null); + new_val = slot_getattr(new_tts, rel_attrnum, &new_null); - bms_free(indexed_attrs); + /* + * If one value is NULL and the other is not, they are not + * equal + */ + if (old_null != new_null) + { + m_attrs = bms_add_member(m_attrs, rel_attridx); + continue; + } + + /* If both are NULL, consider them equal */ + if (old_null) + { + u_attrs = bms_add_member(u_attrs, rel_attridx); + continue; + } + + if (supports_ios) + { + CompactAttribute *attr = TupleDescCompactAttr(tupdesc, rel_attrnum - 1); + + values_equal = datumIsEqual(old_val, + new_val, + attr->attbyval, + attr->attlen); + } + else + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, rel_attrnum - 1); + + /* + * Compare using type-specific equality which at this + * point is the relation's type because FormIndexDatum() + * will populate the values/nulls but won't transform them + * into the final values destined for the index tuple, + * that's left to index_form_tuple() which we don't call + * (on purpose). + */ + values_equal = tts_attr_equal(attr->atttypid, + attr->attcollation, + attr->attbyval, + attr->attlen, + old_val, + new_val); + } + + if (!values_equal) + m_attrs = bms_add_member(m_attrs, rel_attridx); + else + u_attrs = bms_add_member(u_attrs, rel_attridx); + } + } + + /* + * Here we know all the attributes we thought might be modified and + * all those we know haven't been. Take the difference and add it to + * the modified indexed attributes set. + */ + m_attrs = bms_del_members(m_attrs, u_attrs); + p_attrs = bms_del_members(p_attrs, u_attrs); + mix_attrs = bms_add_members(mix_attrs, m_attrs); + mix_attrs = bms_add_members(mix_attrs, p_attrs); + + bms_free(m_attrs); + bms_free(u_attrs); + bms_free(p_attrs); + } - return modified; + return mix_attrs; } /* @@ -2397,6 +2712,9 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, bool partition_constraint_failed; TM_Result result; + /* The set of modified indexed attributes that trigger new index entries */ + Bitmapset *mix_attrs = NULL; + updateCxt->crossPartUpdate = false; /* @@ -2519,13 +2837,32 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, bms_free(resultRelInfo->ri_ChangedIndexedCols); resultRelInfo->ri_ChangedIndexedCols = NULL; - resultRelInfo->ri_ChangedIndexedCols = - ExecCheckIndexedAttrsForChanges(resultRelInfo, oldSlot, slot); + /* + * During updates we'll need a bit more information in IndexInfo but we've + * delayed adding it until here. We check to ensure that there are + * indexes, that something has changed that is indexed, and that the first + * index doesn't yet have ii_IndexedAttrs set as a way to ensure we only + * build this when needed and only once. We don't build this in + * ExecOpenIndicies() as it is unnecessary overhead when not performing an + * update. + */ + if (resultRelInfo->ri_NumIndices > 0 && + bms_is_empty(resultRelInfo->ri_IndexRelationInfo[0]->ii_IndexedAttrs)) + BuildUpdateIndexInfo(resultRelInfo); + + /* + * Next up we need to find out the set of indexed attributes that have + * changed in value and should trigger a new index tuple. We could start + * with the set of updated columns via ExecGetUpdatedCols(), but if we do + * we will overlook attributes directly modified by heap_modify_tuple() + * which are not known to ExecGetUpdatedCols(). + */ + mix_attrs = ExecCheckIndexedAttrsForChanges(resultRelInfo, estate, oldSlot, slot); /* - * replace the heap tuple + * Call into the table AM to update the heap tuple. * - * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that + * NOTE: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that * the row to be updated is visible to that snapshot, and throw a * can't-serialize error if not. This is a special-case behavior needed * for referential integrity updates in transaction-snapshot mode @@ -2537,9 +2874,12 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, estate->es_crosscheck_snapshot, true /* wait for commit */ , &context->tmfd, &updateCxt->lockmode, - resultRelInfo->ri_ChangedIndexedCols, + mix_attrs, &updateCxt->updateIndexes); + Assert(bms_is_empty(resultRelInfo->ri_ChangedIndexedCols)); + resultRelInfo->ri_ChangedIndexedCols = mix_attrs; + return result; } @@ -2557,7 +2897,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, ModifyTableState *mtstate = context->mtstate; List *recheckIndexes = NIL; - /* insert index entries for tuple if necessary */ + /* Insert index entries for tuple if necessary */ if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None)) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, slot, context->estate, diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index d69dc090aa417..e9a53b95caf1e 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -855,10 +855,14 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions, /* expressions */ n->ii_Expressions = expressions; n->ii_ExpressionsState = NIL; + n->ii_ExpressionsAttrs = NULL; /* predicates */ n->ii_Predicate = predicates; n->ii_PredicateState = NULL; + n->ii_PredicateAttrs = NULL; + n->ii_CheckedPredicate = false; + n->ii_PredicateSatisfied = false; /* exclusion constraints */ n->ii_ExclusionOps = NULL; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 63dd41c1f21bf..9bdf73eda59f4 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -211,6 +211,33 @@ typedef void (*ammarkpos_function) (IndexScanDesc scan); /* restore marked scan position */ typedef void (*amrestrpos_function) (IndexScanDesc scan); +/* + * amcomparedatums - Compare datums to determine if index update is needed + * + * This function compares old_datum and new_datum to determine if they would + * produce different index entries. For extraction-based indexes (GIN, RUM), + * this should: + * 1. Extract keys from old_datum using the opclass's extractValue function + * 2. Extract keys from new_datum using the opclass's extractValue function + * 3. Compare the two sets of keys using appropriate equality operators + * 4. Return true if the sets are equal (no index update needed) + * + * The comparison should account for: + * - Different numbers of extracted keys + * - NULL values + * - Type-specific equality (not just binary equality) + * - Opclass parameters (e.g., path in bson_rum_single_path_ops) + * + * For the DocumentDB example with path='a', this would extract values at + * path 'a' from both old and new BSON documents and compare them using + * BSON's equality operator. + */ +/* identify if updated datums would produce one or more index entries */ +typedef bool (*amcomparedatums_function) (Relation indexRelation, + int attno, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); + /* * Callback function signatures - for parallel index scans. */ @@ -313,6 +340,7 @@ typedef struct IndexAmRoutine amendscan_function amendscan; ammarkpos_function ammarkpos; /* can be NULL */ amrestrpos_function amrestrpos; /* can be NULL */ + amcomparedatums_function amcomparedatums; /* can be NULL */ /* interface functions to support parallel index scans */ amestimateparallelscan_function amestimateparallelscan; /* can be NULL */ diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 13ea91922efc5..2f265f4816c32 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -100,6 +100,9 @@ extern PGDLLIMPORT int gin_pending_list_limit; extern void ginGetStats(Relation index, GinStatsData *stats); extern void ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build); +extern bool gincomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 2f9a2b069cd00..5783dbebff04c 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -369,7 +369,7 @@ extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup, TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs, - Bitmapset *mix_attrs, Buffer *vmbuffer, + const Bitmapset *mix_attrs, Buffer *vmbuffer, bool rep_id_key_required, TU_UpdateIndexes *update_indexes); extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, @@ -404,8 +404,8 @@ extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); extern void simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, const ItemPointerData *tid); -extern void simple_heap_update(Relation relation, const ItemPointerData *otid, - HeapTuple tup, TU_UpdateIndexes *update_indexes); +extern Bitmapset *simple_heap_update(Relation relation, const ItemPointerData *otid, + HeapTuple tup, TU_UpdateIndexes *update_indexes); extern TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 16be5c7a9c158..42bd329eaad32 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -1210,6 +1210,10 @@ extern int btgettreeheight(Relation rel); extern CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily); extern StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily); +extern bool btcomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); + /* * prototypes for internal functions in nbtree.c diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index f6237949bd26c..d94dfc9b41d23 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -549,7 +549,7 @@ typedef struct TableAmRoutine bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - Bitmapset *updated_cols, + const Bitmapset *updated_cols, TU_UpdateIndexes *update_indexes); /* see table_tuple_lock() for reference about parameters */ @@ -1513,12 +1513,12 @@ static inline TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - Bitmapset *updated_cols, TU_UpdateIndexes *update_indexes) + const Bitmapset *mix_cols, TU_UpdateIndexes *update_indexes) { return rel->rd_tableam->tuple_update(rel, otid, slot, cid, snapshot, crosscheck, wait, tmfd, lockmode, - updated_cols, update_indexes); + mix_cols, update_indexes); } /* @@ -2021,7 +2021,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot); extern void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, - Bitmapset *modified_indexe_attrs, + const Bitmapset *mix_attrs, TU_UpdateIndexes *update_indexes); diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index dda95e54903eb..8d364f8b30f4e 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -132,6 +132,7 @@ extern bool CompareIndexInfo(const IndexInfo *info1, const IndexInfo *info2, const AttrMap *attmap); extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii); +extern void BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo); extern void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 993dc0e6cedd2..a19585ba06561 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -739,6 +739,11 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate); */ extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); +extern Bitmapset *ExecWhichIndexesRequireUpdates(ResultRelInfo *relinfo, + Bitmapset *mix_attrs, + EState *estate, + TupleTableSlot *old_tts, + TupleTableSlot *new_tts); extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool update, @@ -800,9 +805,10 @@ extern ResultRelInfo *ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache); -extern Bitmapset *ExecCheckIndexedAttrsForChanges(ResultRelInfo *resultRelInfo, - TupleTableSlot *tts_old, - TupleTableSlot *tts_new); +extern Bitmapset *ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo, + EState *estate, + TupleTableSlot *old_tts, + TupleTableSlot *new_tts); extern bool tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen, Datum value1, Datum value2); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 4cedbd8acf69e..1259897282ede 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -174,15 +174,29 @@ typedef struct IndexInfo */ AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS]; + /* + * All key, expression, sumarizing, and partition attributes referenced by + * this index + */ + Bitmapset *ii_IndexedAttrs; + /* expr trees for expression entries, or NIL if none */ List *ii_Expressions; /* list of Expr */ /* exec state for expressions, or NIL if none */ List *ii_ExpressionsState; /* list of ExprState */ + /* attributes exclusively referenced by expression indexes */ + Bitmapset *ii_ExpressionsAttrs; /* partial-index predicate, or NIL if none */ List *ii_Predicate; /* list of Expr */ /* exec state for expressions, or NIL if none */ ExprState *ii_PredicateState; + /* attributes referenced by the predicate */ + Bitmapset *ii_PredicateAttrs; + /* partial index predicate determined yet? */ + bool ii_CheckedPredicate; + /* amupdate hint used to avoid rechecking predicate */ + bool ii_PredicateSatisfied; /* Per-column exclusion operators, or NULL if none */ Oid *ii_ExclusionOps; /* array with one entry per column */ @@ -494,6 +508,11 @@ typedef struct ResultRelInfo Bitmapset *ri_extraUpdatedCols; /* true if the above has been computed */ bool ri_extraUpdatedCols_valid; + + /* + * For UPDATE a Bitmapset of the attributes that are both indexed and have + * changed in value. + */ Bitmapset *ri_ChangedIndexedCols; /* Projection to generate new tuple in an INSERT/UPDATE */ diff --git a/src/test/isolation/expected/insert-conflict-specconflict.out b/src/test/isolation/expected/insert-conflict-specconflict.out index e34a821c403c7..54b3981918c7d 100644 --- a/src/test/isolation/expected/insert-conflict-specconflict.out +++ b/src/test/isolation/expected/insert-conflict-specconflict.out @@ -80,6 +80,10 @@ pg_advisory_unlock t (1 row) +s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 +s1: NOTICE: acquiring advisory lock on 2 +s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 +s1: NOTICE: acquiring advisory lock on 2 s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 s1: NOTICE: acquiring advisory lock on 2 s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 @@ -172,6 +176,10 @@ pg_advisory_unlock t (1 row) +s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 +s2: NOTICE: acquiring advisory lock on 2 +s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 +s2: NOTICE: acquiring advisory lock on 2 s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 s2: NOTICE: acquiring advisory lock on 2 s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 @@ -369,6 +377,10 @@ key|data step s1_commit: COMMIT; s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 s2: NOTICE: acquiring advisory lock on 2 +s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 +s2: NOTICE: acquiring advisory lock on 2 +s2: NOTICE: blurt_and_lock_123() called for k1 in session 2 +s2: NOTICE: acquiring advisory lock on 2 step s2_upsert: <... completed> step controller_show: SELECT * FROM upserttest; key|data @@ -530,6 +542,14 @@ isolation/insert-conflict-specconflict/s2|transactionid|ExclusiveLock|t step s2_commit: COMMIT; s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 s1: NOTICE: acquiring advisory lock on 2 +s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 +s1: NOTICE: acquiring advisory lock on 2 +s1: NOTICE: blurt_and_lock_123() called for k1 in session 1 +s1: NOTICE: acquiring advisory lock on 2 +s1: NOTICE: blurt_and_lock_4() called for k1 in session 1 +s1: NOTICE: acquiring advisory lock on 4 +s1: NOTICE: blurt_and_lock_4() called for k1 in session 1 +s1: NOTICE: acquiring advisory lock on 4 step s1_upsert: <... completed> step s1_noop: step controller_show: SELECT * FROM upserttest; diff --git a/src/test/regress/expected/heap_hot_updates.out b/src/test/regress/expected/heap_hot_updates.out new file mode 100644 index 0000000000000..f6bd8b18af8ce --- /dev/null +++ b/src/test/regress/expected/heap_hot_updates.out @@ -0,0 +1,1922 @@ +-- ================================================================ +-- Test Suite for Heap-only (HOT) Updates +-- ================================================================ +-- Setup: Create function to measure HOT updates +CREATE OR REPLACE FUNCTION check_hot_updates( + expected INT, + p_table_name TEXT DEFAULT 't', + p_schema_name TEXT DEFAULT current_schema() +) +RETURNS TABLE ( + table_name TEXT, + total_updates BIGINT, + hot_updates BIGINT, + hot_update_percentage NUMERIC, + matches_expected BOOLEAN +) +LANGUAGE plpgsql +AS $$ +DECLARE + v_relid oid; + v_qualified_name TEXT; + v_hot_updates BIGINT; + v_updates BIGINT; + v_xact_hot_updates BIGINT; + v_xact_updates BIGINT; +BEGIN + -- Force statistics update + PERFORM pg_stat_force_next_flush(); + + -- Get table OID + v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name); + v_relid := v_qualified_name::regclass; + + IF v_relid IS NULL THEN + RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name; + END IF; + + -- Get cumulative + transaction stats + v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0); + v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0); + v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0); + v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0); + + v_hot_updates := v_hot_updates + v_xact_hot_updates; + v_updates := v_updates + v_xact_updates; + + RETURN QUERY + SELECT + p_table_name::TEXT, + v_updates::BIGINT, + v_hot_updates::BIGINT, + CASE WHEN v_updates > 0 + THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2) + ELSE 0 + END, + (v_hot_updates = expected)::BOOLEAN; +END; +$$; +CREATE COLLATION case_insensitive ( + provider = libc, + locale = 'C' +); +-- ================================================================ +-- Basic JSONB Expression Index +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_name_idx ON t((docs->'name')); +INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}'); +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update indexed JSONB field - should NOT be HOT +UPDATE t SET docs = '{"name": "bob", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update non-indexed field again - should be HOT +UPDATE t SET docs = '{"name": "bob", "age": 32}' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 2 | 66.67 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- JSONB Expression Index an some including columns +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB, status TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_name_idx ON t((docs->'name')); +INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}', 'ok'); +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET status = 'not ok' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Partial Index with Predicate Transitions +-- ================================================================ +CREATE TABLE t(id INT, value INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_value_idx ON t(value) WHERE value > 10; +INSERT INTO t VALUES (1, 5); +-- Both outside predicate - should be HOT +UPDATE t SET value = 8 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Transition into predicate - should NOT be HOT +UPDATE t SET value = 15 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Both inside predicate, value changes - should NOT be HOT +UPDATE t SET value = 20 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Transition out of predicate - should NOT be HOT +UPDATE t SET value = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 1 | 25.00 | t +(1 row) + +-- Both outside predicate again - should be HOT +UPDATE t SET value = 3 WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 5 | 2 | 40.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Expression Index with Partial Predicate +-- ================================================================ +CREATE TABLE t(docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((docs->'status')) + WHERE (docs->'priority')::int > 5; +INSERT INTO t VALUES ('{"status": "pending", "priority": 3}'); +-- Both outside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 4}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Transition into predicate - should NOT be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 10}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Inside predicate, status changes - should NOT be HOT +UPDATE t SET docs = '{"status": "active", "priority": 10}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Inside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "active", "priority": 8}'; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 2 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN Index on JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}'); +-- Change tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Change tags again - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 0 | 0.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN Index with Unchanged Keys +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- Create GIN index on specific path +CREATE INDEX t_gin_idx ON t USING gin((data->'tags')); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}'); +-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change indexed tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN with jsonb_path_ops +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops); +INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}'); +-- Change value at different path - keys changed, NOT HOT +UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Multi-Column Expression Index +-- ================================================================ +CREATE TABLE t(id INT, a INT, b INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t(id, abs(a), abs(b)); +INSERT INTO t VALUES (1, -5, -10); +-- Change sign but not abs value - should be HOT +UPDATE t SET a = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change abs value - should NOT be HOT +UPDATE t SET b = -15 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Change id - should NOT be HOT +UPDATE t SET id = 2 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Mixed Index Types (BRIN + Expression) +-- ================================================================ +CREATE TABLE t(id INT, value INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_idx ON t USING brin(value); +CREATE INDEX t_expr_idx ON t((data->'status')); +INSERT INTO t VALUES (1, 100, '{"status": "active"}'); +-- Update only BRIN column - should be HOT +UPDATE t SET value = 200 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update only expression column - should NOT be HOT +UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update both - should NOT be HOT +UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Expression with COLLATION and BTREE (nbtree) index +-- ================================================================ +CREATE TABLE t( + id INT PRIMARY KEY, + name TEXT COLLATE case_insensitive +) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_lower_idx ON t USING BTREE (name COLLATE case_insensitive); +INSERT INTO t VALUES (1, 'ALICE'); +-- Change case but not value - should NOT be HOT in BTREE +UPDATE t SET name = 'Alice' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Change to new value - should NOT be HOT +UPDATE t SET name = 'BOB' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Array Expression Index +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_array_len_idx ON t(array_length(tags, 1)); +INSERT INTO t VALUES (1, ARRAY['a', 'b', 'c']); +-- Same length, different elements - should be HOT +UPDATE t SET tags = ARRAY['d', 'e', 'f'] WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Different length - should NOT be HOT +UPDATE t SET tags = ARRAY['d', 'e'] WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Nested JSONB Expression and JSONB equality '->' (not '->>') +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_nested_idx ON t((data->'user'->'name')); +INSERT INTO t VALUES ('{"user": {"name": "alice", "age": 30}}'); +-- Change nested non-indexed field - should be HOT +UPDATE t SET data = '{"user": {"name": "alice", "age": 31}}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change nested indexed field - should NOT be HOT +UPDATE t SET data = '{"user": {"name": "bob", "age": 31}}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Complex Predicate on Multiple JSONB Fields +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((data->'status')) + WHERE (data->'priority')::int > 5 + AND (data->'active')::boolean = true; +INSERT INTO t VALUES ('{"status": "pending", "priority": 3, "active": true}'); +-- Outside predicate (priority too low) - should be HOT +UPDATE t SET data = '{"status": "done", "priority": 3, "active": true}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Transition into predicate - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": true}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Inside predicate, change to outside (active = false) - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": false}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN Array Index - Order Insensitive Extraction +-- ================================================================ +CREATE TABLE t( + id INT PRIMARY KEY, + data JSONB +) WITH (autovacuum_enabled = off, fillfactor = 70); +-- GIN index on JSONB array (extracts all elements) +CREATE INDEX t_items_gin ON t USING GIN ((data->'items')); +INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}'); +-- Update: Reorder array elements +-- JSONB equality: NOT equal (different arrays) +-- GIN extraction: Same elements extracted (might allow HOT if not careful) +UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update: Add/remove element +UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- TOASTed Values in Expression Index +-- ================================================================ +CREATE TABLE t(id INT, large_text TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_substr_idx ON t(substr(large_text, 1, 10)); +INSERT INTO t VALUES (1, repeat('x', 5000) || 'identifier'); +-- Change end of string, prefix unchanged - should be HOT +UPDATE t SET large_text = repeat('x', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change prefix - should NOT be HOT +UPDATE t SET large_text = repeat('y', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- TEST: GIN with TOASTed TEXT (tsvector) +-- ================================================================ +CREATE TABLE t(id INT, content TEXT, search_vec tsvector) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- Create trigger to maintain tsvector +CREATE TRIGGER tsvectorupdate_toast + BEFORE INSERT OR UPDATE ON t + FOR EACH ROW EXECUTE FUNCTION + tsvector_update_trigger(search_vec, 'pg_catalog.english', content); +CREATE INDEX t_gin ON t USING gin(search_vec); +-- Insert with large content (will be TOASTed) +INSERT INTO t (id, content) VALUES + (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000)); +-- Verify initial state +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important'); + count +------- + 1 +(1 row) + +-- Expected: 1 row +-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time +-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed. +-- This means the comparison sees old tsvector vs. trigger-modified tsvector, +-- not the natural progression. HOT won't happen because the trigger changed +-- the indexed column. +-- Update: Even though content keywords unchanged, trigger still fires +UPDATE t +SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT) +-- This is actually correct behavior - the trigger updated an indexed column +-- Update: Change indexed keywords +UPDATE t +SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (index keys changed) +-- Verify query correctness +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical'); + count +------- + 1 +(1 row) + +-- Expected: 1 row +DROP TABLE t CASCADE; +-- ================================================================ +-- TEST: GIN with TOASTed JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin((data->'tags')); +-- Insert with TOASTed JSONB +INSERT INTO t (id, data) VALUES + (1, jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('x', 10000) + )); +-- Update: Change large_field, tags unchanged - should be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Expected: 1 HOT update +-- Update: Change tags - should NOT be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "sql"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Expected: Still 1 HOT +-- Verify correctness +SELECT count(*) FROM t WHERE data->'tags' @> '["database"]'::jsonb; + count +------- + 0 +(1 row) + +-- Expected: 0 rows +SELECT count(*) FROM t WHERE data->'tags' @> '["sql"]'::jsonb; + count +------- + 1 +(1 row) + +-- Expected: 1 row +DROP TABLE t CASCADE; +-- ================================================================ +-- TEST: GIN with Array of Large Strings +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin(tags); +-- Insert with large array elements (might be TOASTed) +INSERT INTO t (id, tags) VALUES + (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]); +-- Update: Change to different large values - NOT HOT +UPDATE t +SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (keys actually changed) +-- Update: Keep same tag values, just reorder - SHOULD BE HOT +-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3] +-- extract to the same sorted key set ['tag3','tag4']) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Expected: 1 HOT (GIN keys semantically identical) +-- Update: Remove an element - NOT HOT (keys changed) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Expected: Still 1 HOT (not this one) +DROP TABLE t CASCADE; +-- ================================================================ +-- BRIN Index with Partial Predicate +-- ================================================================ +CREATE TABLE t( + id INT PRIMARY KEY, + value INT, + description TEXT +) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100; +INSERT INTO t VALUES (1, 50, 'below range'); +-- Test 1: Outside predicate +UPDATE t SET description = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Validate: Predicate query returns 0 rows +SELECT COUNT(*) as cnt FROM t WHERE value > 100; + cnt +----- + 0 +(1 row) + +-- Test 2: Transition into predicate +UPDATE t SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +-- Validate: Predicate query returns 1 row with correct value +SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100; + cnt | max_val +-----+--------- + 1 | 150 +(1 row) + +-- Test 3: Inside predicate, value changes +UPDATE t SET value = 160, description = 'updated again' WHERE id = 1; +SELECT * FROM check_hot_updates(3); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 3 | 100.00 | t +(1 row) + +-- Validate: Updated value (160) is returned +SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100; + cnt | max_val +-----+--------- + 1 | 160 +(1 row) + +-- Test 4: Transition out of predicate +UPDATE t SET value = 50 WHERE id = 1; +SELECT * FROM check_hot_updates(4); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 4 | 100.00 | t +(1 row) + +SELECT COUNT(*) as cnt FROM t WHERE value > 100; + cnt +----- + 0 +(1 row) + +SELECT id, value, description FROM t; + id | value | description +----+-------+--------------- + 1 | 50 | updated again +(1 row) + +DROP TABLE t CASCADE; +-- ================================================================ +-- HASH Index (Simple Column) +-- ================================================================ +CREATE TABLE t(id INT, code VARCHAR(20), description TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_idx ON t USING hash(code); +INSERT INTO t VALUES (1, 'CODE001', 'initial'); +-- Update non-indexed column - should be HOT +UPDATE t SET description = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update indexed column - HASH index requires update, NOT HOT +UPDATE t SET code = 'CODE002' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update both - NOT HOT +UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Back to original code - NOT HOT (different hash bucket location) +UPDATE t SET code = 'CODE001' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 1 | 25.00 | t +(1 row) + +DROP TABLE t CASCADE; +-- ================================================================ +-- HASH Index on Expression +-- ================================================================ +CREATE TABLE t(id INT, email TEXT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email)); +INSERT INTO t VALUES (1, 'Alice@Example.com', '{"status": "new"}'); +-- Update non-indexed field - should be HOT +UPDATE t SET data = '{"status": "active"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update email with case change only (same lowercase) - should be HOT +UPDATE t SET email = 'alice@example.com' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +-- Update email to different lowercase - NOT HOT +UPDATE t SET email = 'bob@example.com' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 2 | 66.67 | t +(1 row) + +DROP TABLE t CASCADE; +-- ================================================================ +-- HASH Index on JSONB Field +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_category_idx ON t USING hash((data->'category')); +INSERT INTO t VALUES (1, '{"category": "books", "title": "PostgreSQL Guide"}'); +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET data = '{"category": "books", "title": "PostgreSQL Handbook"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update indexed JSONB field - NOT HOT +UPDATE t SET data = '{"category": "videos", "title": "PostgreSQL Handbook"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update both - NOT HOT +UPDATE t SET data = '{"category": "courses", "title": "PostgreSQL Basics"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t CASCADE; +-- ================================================================ +-- Multiple HASH Indexes +-- ================================================================ +CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_category_idx ON t USING hash(category); +CREATE INDEX t_hash_status_idx ON t USING hash(status); +INSERT INTO t VALUES (1, 'electronics', 'active', 100); +-- Update non-indexed column - should be HOT +UPDATE t SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update one indexed column - NOT HOT +UPDATE t SET category = 'books' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update other indexed column - NOT HOT +UPDATE t SET status = 'inactive' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Update both indexed columns - NOT HOT +UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 1 | 25.00 | t +(1 row) + +DROP TABLE t CASCADE; +-- ================================================================ +-- BRIN vs HASH Comparison +-- ================================================================ +CREATE TABLE t_brin(id INT, value INT, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE TABLE t_hash(id INT, value INT, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_value_idx ON t_brin USING brin(value); +CREATE INDEX t_hash_value_idx ON t_hash USING hash(value); +INSERT INTO t_brin VALUES (1, 100, 'initial'); +INSERT INTO t_hash VALUES (1, 100, 'initial'); +-- Same update on both - different HOT behavior expected +-- BRIN: might allow HOT (range summary unchanged) +-- HASH: blocks HOT (hash bucket changed) +UPDATE t_brin SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't_brin'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t_brin | 1 | 1 | 100.00 | t +(1 row) + +-- Expected: 1 HOT (BRIN allows it for single row) +UPDATE t_hash SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't_hash'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t_hash | 1 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (HASH blocks it) +DROP TABLE t_brin CASCADE; +DROP TABLE t_hash CASCADE; +-- ================================================================ +-- HASH Index with NULL Values +-- ================================================================ +CREATE TABLE t(id INT, category VARCHAR, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_category_idx ON t USING hash(category); +INSERT INTO t VALUES (1, 'electronics', 'initial'); +-- Update indexed column to NULL - NOT HOT (hash value changed) +UPDATE t SET category = NULL WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT +-- Update indexed column from NULL to value - NOT HOT +UPDATE t SET category = 'books' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT +-- Update non-indexed column - should be HOT +UPDATE t SET data = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Expected: 1 HOT +DROP TABLE t CASCADE; +-- ================================================================ +-- BRIN on JSONB Field +-- ================================================================ +CREATE TABLE t(id INT, metrics JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- BRIN doesn't directly support JSONB, but we can test on expression +CREATE INDEX t_brin_count_idx ON t USING brin( + CAST(metrics->>'count' AS INTEGER) +); +INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}'); +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Expected: 1 HOT +-- Update indexed field - BRIN allows HOT for single row +UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +-- Expected: 2 HOT (BRIN permits single-row updates) +DROP TABLE t CASCADE; +-- ================================================================ +-- Mixed BRIN + HASH on Same Table +-- ================================================================ +CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp); +CREATE INDEX t_hash_category_idx ON t USING hash(category); +INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial'); +-- Update non-indexed column - should be HOT +UPDATE t SET data = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Expected: 1 HOT +-- Update BRIN indexed column - allows HOT +UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +-- Expected: 2 HOT +-- Update HASH indexed column - blocks HOT +UPDATE t SET category = 'videos' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 2 | 66.67 | t +(1 row) + +-- Expected: 2 HOT (HASH blocks it) +-- Update price (non-indexed) - should be HOT +UPDATE t SET price = 39.99 WHERE id = 1; +SELECT * FROM check_hot_updates(3); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 3 | 75.00 | t +(1 row) + +-- Expected: 3 HOT +DROP TABLE t CASCADE; +-- ================================================================ +-- Index both on a field in a JSONB document, and the document +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_idx ON t((docs->'name')); +CREATE INDEX t_docs_col_idx ON t(docs); +INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}'); +-- Update impacts index on whole docment attribute, can't go HOT +UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +DROP TABLE t CASCADE; +-- ================================================================ +-- Two indexes on a JSONB document, one partial +-- ================================================================ +CREATE TABLE t (docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +INSERT INTO t (docs) VALUES ('{"a": 0, "b": 0}'); +INSERT INTO t (docs) SELECT jsonb_build_object('b', n) FROM generate_series(100, 10000) as n; +CREATE INDEX t_idx_a ON t ((docs->'a')); +CREATE INDEX t_idx_b ON t ((docs->'b')) WHERE (docs->'b')::numeric > 9; +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; +-- Leave 'a' unchanged but modify 'b' to a value outside of the index predicate. +-- This should be a HOT update because neither index is changed. +UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 1) WHERE (docs->'a')::numeric = 0; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Check to make sure that the index does not contain a value for 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + QUERY PLAN +------------------------------------------------------------- + Index Scan using t_idx_b on t + Filter: (((docs -> 'b'::text))::numeric < '100'::numeric) +(2 rows) + +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + docs +------ +(0 rows) + +-- Leave 'a' unchanged but modify 'b' to a value within the index predicate. +-- This represents a change for field 'b' from unindexed to indexed and so +-- this should not take the HOT path. +UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 10) WHERE (docs->'a')::numeric = 0; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Check to make sure that the index contains the new value of 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + QUERY PLAN +------------------------------------------------------------- + Index Scan using t_idx_b on t + Filter: (((docs -> 'b'::text))::numeric < '100'::numeric) +(2 rows) + +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + docs +------------------- + {"a": 0, "b": 10} +(1 row) + +-- This update modifies the value of 'a', an indexed field, so it also cannot +-- be a HOT update. +UPDATE t SET docs = jsonb_build_object('a', 1, 'b', 10) WHERE (docs->'b')::numeric = 10; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- This update changes both 'a' and 'b' to new values this cannot use the HOT path. +UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 12) WHERE (docs->'b')::numeric = 10; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 1 | 25.00 | t +(1 row) + +-- Check to make sure that the index contains the new value of 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + QUERY PLAN +------------------------------------------------------------- + Index Scan using t_idx_b on t + Filter: (((docs -> 'b'::text))::numeric < '100'::numeric) +(2 rows) + +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + docs +------------------- + {"a": 2, "b": 12} +(1 row) + +-- This update changes 'b' to a value outside its predicate requiring that +-- we remove it from the index. That's a transition that can't be done +-- during a HOT update. +UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 1) WHERE (docs->'b')::numeric = 12; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 5 | 1 | 20.00 | t +(1 row) + +-- Check to make sure that the index no longer contains the value of 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + QUERY PLAN +------------------------------------------------------------- + Index Scan using t_idx_b on t + Filter: (((docs -> 'b'::text))::numeric < '100'::numeric) +(2 rows) + +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + docs +------ +(0 rows) + +DROP TABLE t CASCADE; +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; +-- ================================================================ +-- Tests to check expression indexes +-- ================================================================ +CREATE TABLE t(a INT, b INT) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx_a ON t(abs(a)) WHERE abs(a) > 10; +CREATE INDEX t_idx_b ON t(abs(b)); +INSERT INTO t VALUES (-1, -1), (-2, -2), (-3, -3), (-4, -4), (-5, -5); +INSERT INTO t SELECT m, n FROM generate_series(-10000, -10) AS m, abs(m) AS n; +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; +-- The indexed value of b hasn't changed, this should be a HOT update. +-- (-5, -5) -> (-5, 1) +UPDATE t SET b = 5 WHERE a = -5; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; + QUERY PLAN +------------------------------------------------ + Index Scan using t_idx_b on t + Index Cond: ((abs(b) < 10) AND (abs(b) > 0)) +(2 rows) + +SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; + b +---- + -1 + -2 + -3 + -4 + 5 +(5 rows) + +-- Now that we're not checking the predicate of the partial index, this +-- update of a from -5 to 5 should be HOT because we should ignore the +-- predicate and check the expression and find it unchanged. +-- (-5, 1) -> (5, 1) +UPDATE t SET a = 5 WHERE a = -5; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +-- This update moves a into the partial index and should not +-- be HOT. Let's make sure of that and check the index as well. +-- (-4, -4) -> (-11, -4) +UPDATE t SET a = -11 WHERE a = -4; +SELECT * FROM check_hot_updates(2); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 2 | 66.67 | t +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + QUERY PLAN +------------------------------- + Index Scan using t_idx_a on t + Index Cond: (abs(a) < 15) +(2 rows) + +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + a | b +-----+---- + -10 | 10 + -11 | -4 + -11 | 11 + -12 | 12 + -13 | 13 + -14 | 14 +(6 rows) + +-- (-11, -4) -> (11, -4) +UPDATE t SET a = 11 WHERE b = -4; +SELECT * FROM check_hot_updates(3); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 3 | 75.00 | t +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + QUERY PLAN +------------------------------- + Index Scan using t_idx_a on t + Index Cond: (abs(a) < 15) +(2 rows) + +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + a | b +-----+---- + -10 | 10 + 11 | -4 + -11 | 11 + -12 | 12 + -13 | 13 + -14 | 14 +(6 rows) + +-- (11, -4) -> (-4, -4) +UPDATE t SET a = -4 WHERE b = -4; +SELECT * FROM check_hot_updates(3); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 5 | 3 | 60.00 | t +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + QUERY PLAN +------------------------------- + Index Scan using t_idx_a on t + Index Cond: (abs(a) < 15) +(2 rows) + +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + a | b +-----+---- + -10 | 10 + -11 | 11 + -12 | 12 + -13 | 13 + -14 | 14 +(5 rows) + +-- This update of a from 5 to -1 is HOT despite that attribute +-- being indexed because the before and after values for the +-- partial index predicate are outside the index definition. +-- (5, 1) -> (-1, 1) +UPDATE t SET a = -1 WHERE a = 5; +SELECT * FROM check_hot_updates(4); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 6 | 4 | 66.67 | t +(1 row) + +-- This update of a from -2 to -1 will be HOT because the before/after values +-- of a are both outside the predicate of the partial index. +-- (-1, 1) -> (-2, 1) +UPDATE t SET a = -2 WHERE b = -2; +SELECT * FROM check_hot_updates(5); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 7 | 5 | 71.43 | t +(1 row) + +-- The indexed value for b isn't changing, this should be HOT. +-- (-2, -2) -> (-2, 2) +UPDATE t SET b = 2 WHERE b = -2; +SELECT * FROM check_hot_updates(6); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 8 | 6 | 75.00 | t +(1 row) + +EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; + QUERY PLAN +------------------------------------------------ + Index Scan using t_idx_b on t + Index Cond: ((abs(b) < 10) AND (abs(b) > 0)) +(2 rows) + +SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; + b +---- + -1 + 2 + -3 + -4 + 5 +(5 rows) + +SELECT * FROM t where a > -10 AND a < 10; + a | b +----+---- + -1 | -1 + -3 | -3 + -1 | 5 + -4 | -4 + -2 | 2 +(5 rows) + +-- Before and after values for a are outside the predicate of the index, +-- and because we're checking this should be HOT. +-- (-2, 1) -> (5, 1) +-- (-2, -2) -> (5, -2) +UPDATE t SET a = 5 WHERE a = -1; +SELECT * FROM check_hot_updates(8); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 10 | 8 | 80.00 | t +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + QUERY PLAN +------------------------------- + Index Scan using t_idx_a on t + Index Cond: (abs(a) < 15) +(2 rows) + +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + a | b +-----+---- + -10 | 10 + -11 | 11 + -12 | 12 + -13 | 13 + -14 | 14 +(5 rows) + +DROP TABLE t CASCADE; +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; +-- ================================================================ +-- JSONB with two indexes each on separate fields, one partial +-- ================================================================ +CREATE TABLE t(docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_idx ON t((docs->'a')) WHERE (docs->'b')::integer = 1; +INSERT INTO t VALUES ('{"a": 1, "b": 1}'); +EXPLAIN (COSTS OFF) SELECT * FROM t; + QUERY PLAN +--------------- + Seq Scan on t +(1 row) + +SELECT * FROM t; + docs +------------------ + {"a": 1, "b": 1} +(1 row) + +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::integer = 1; + QUERY PLAN +---------------------------------- + Index Scan using t_docs_idx on t +(1 row) + +SELECT * FROM t WHERE (docs->'b')::integer = 1; + docs +------------------ + {"a": 1, "b": 1} +(1 row) + +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 0 | 0 | 0 | t +(1 row) + +UPDATE t SET docs='{"a": 1, "b": 0}'; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +SELECT * FROM t WHERE (docs->'b')::integer = 1; + docs +------ +(0 rows) + +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; +DROP TABLE t CASCADE; +-- ================================================================ +-- Tests for multi-column indexes +-- ================================================================ +CREATE TABLE t(id INT, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_idx ON t(id, (docs->'a')); +INSERT INTO t VALUES (1, '{"a": 1, "b": 1}'); +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + QUERY PLAN +------------------------------------------------ + Index Scan using t_docs_idx on t + Index Cond: (id > 0) + Filter: (((docs -> 'a'::text))::integer > 0) +(3 rows) + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + id | docs +----+------------------ + 1 | {"a": 1, "b": 1} +(1 row) + +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 0 | 0 | 0 | t +(1 row) + +-- Changing the id attribute which is an indexed attribute should +-- prevent HOT updates. +UPDATE t SET id = 2; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + id | docs +----+------------------ + 2 | {"a": 1, "b": 1} +(1 row) + +-- Changing the docs->'a' field in the indexed attribute 'docs' +-- should prevent HOT updates. +UPDATE t SET docs='{"a": -2, "b": 1}'; +SELECT * FROM check_hot_updates(0); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0; + id | docs +----+------------------- + 2 | {"a": -2, "b": 1} +(1 row) + +-- Leaving the docs->'a' attribute unchanged means that the expression +-- is unchanged and because the 'id' attribute isn't in the modified +-- set the indexed tuple is unchanged, this can go HOT. +UPDATE t SET docs='{"a": -2, "b": 2}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0; + id | docs +----+------------------- + 2 | {"a": -2, "b": 2} +(1 row) + +-- Here we change the 'id' attribute and the 'docs' attribute setting +-- the expression docs->'a' to a new value, this cannot be a HOT update. +UPDATE t SET id = 3, docs='{"a": 3, "b": 3}'; +SELECT * FROM check_hot_updates(1); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 1 | 25.00 | t +(1 row) + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + id | docs +----+------------------ + 3 | {"a": 3, "b": 3} +(1 row) + +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; +DROP TABLE t CASCADE; +-- ================================================================ +-- Relation with unique constraint, partial index +-- ================================================================ +CREATE TABLE users ( + user_id serial primary key, + name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL, + EXCLUDE USING btree (lower(email) WITH =) +); +-- Add some data to the table and then update it in ways that should and should +-- not be HOT updates. +INSERT INTO users (name, email) VALUES +('user1', 'user1@example.com'), +('user2', 'user2@example.com'), +('taken', 'taken@EXAMPLE.com'), +('you', 'you@domain.com'), +('taken', 'taken@domain.com'); +-- Should fail because of the unique constraint on the email column. +UPDATE users SET email = 'user1@example.com' WHERE email = 'user2@example.com'; +ERROR: conflicting key value violates exclusion constraint "users_lower_excl" +DETAIL: Key (lower(email::text))=(user1@example.com) conflicts with existing key (lower(email::text))=(user1@example.com). +SELECT * FROM check_hot_updates(0, 'users'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + users | 1 | 0 | 0.00 | t +(1 row) + +-- Should succeed because the email column is not being updated and should go HOT. +UPDATE users SET name = 'foo' WHERE email = 'user1@example.com'; +SELECT * FROM check_hot_updates(1, 'users'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + users | 2 | 1 | 50.00 | t +(1 row) + +-- Create a partial index on the email column, updates +CREATE INDEX idx_users_email_no_example ON users (lower(email)) WHERE lower(email) LIKE '%@example.com%'; +-- An update that changes the email column but not the indexed portion of it and falls outside the constraint. +-- Shouldn't be a HOT update because of the exclusion constraint. +UPDATE users SET email = 'you+2@domain.com' WHERE name = 'you'; +SELECT * FROM check_hot_updates(1, 'users'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + users | 3 | 1 | 33.33 | t +(1 row) + +-- An update that changes the email column but not the indexed portion of it and falls within the constraint. +-- Again, should fail constraint and fail to be a HOT update. +UPDATE users SET email = 'taken@domain.com' WHERE name = 'you'; +ERROR: conflicting key value violates exclusion constraint "users_lower_excl" +DETAIL: Key (lower(email::text))=(taken@domain.com) conflicts with existing key (lower(email::text))=(taken@domain.com). +SELECT * FROM check_hot_updates(1, 'users'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + users | 4 | 1 | 25.00 | t +(1 row) + +DROP TABLE users CASCADE; +-- ================================================================ +-- Constraints spoiling HOT updates, this time with a range. +-- ================================================================ +CREATE TABLE events ( + id serial primary key, + name VARCHAR(255) NOT NULL, + event_time tstzrange, + constraint no_screening_time_overlap exclude using gist ( + event_time WITH && + ) +); +-- Add two non-overlapping events. +INSERT INTO events (id, event_time, name) +VALUES + (1, '["2023-01-01 19:00:00", "2023-01-01 20:45:00"]', 'event1'), + (2, '["2023-01-01 21:00:00", "2023-01-01 21:45:00"]', 'event2'); +-- Update the first event to overlap with the second, should fail the constraint and not be HOT. +UPDATE events SET event_time = '["2023-01-01 20:00:00", "2023-01-01 21:45:00"]' WHERE id = 1; +ERROR: conflicting key value violates exclusion constraint "no_screening_time_overlap" +DETAIL: Key (event_time)=(["Sun Jan 01 20:00:00 2023 PST","Sun Jan 01 21:45:00 2023 PST"]) conflicts with existing key (event_time)=(["Sun Jan 01 21:00:00 2023 PST","Sun Jan 01 21:45:00 2023 PST"]). +SELECT * FROM check_hot_updates(0, 'events'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + events | 1 | 0 | 0.00 | t +(1 row) + +-- Update the first event to not overlap with the second, again not HOT due to the constraint. +UPDATE events SET event_time = '["2023-01-01 22:00:00", "2023-01-01 22:45:00"]' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 'events'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + events | 2 | 0 | 0.00 | t +(1 row) + +-- Update the first event to not overlap with the second, this time we're HOT because we don't overlap with the constraint. +UPDATE events SET name = 'new name here' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 'events'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + events | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE events CASCADE; +-- ================================================================ +-- Ensure that only the modified summarizing indexes are updated. +-- ================================================================ +CREATE TABLE ex (id SERIAL primary key, att1 JSONB, att2 text, att3 text, att4 text) WITH (fillfactor = 60); +CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data')); +CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2); +CREATE INDEX ex_expr2_idx ON ex USING btree((att1->'a')); +CREATE INDEX ex_expr3_idx ON ex USING btree((att1->'b')); +CREATE INDEX ex_expr4_idx ON ex USING btree((att1->'c')); +CREATE INDEX ex_sumr2_idx ON ex USING BRIN(att3); +CREATE INDEX ex_sumr3_idx ON ex USING BRIN(att4); +CREATE INDEX ex_expr5_idx ON ex USING btree((att1->'d')); +INSERT INTO ex (att1, att2) VALUES ('{"data": []}'::json, 'nothing special'); +SELECT * FROM ex; + id | att1 | att2 | att3 | att4 +----+--------------+-----------------+------+------ + 1 | {"data": []} | nothing special | | +(1 row) + +-- Update att2 and att4 both are BRIN/summarizing indexes, this should be a HOT update and +-- only update two of the three summarizing indexes. +UPDATE ex SET att2 = 'special indeed', att4 = 'whatever'; +SELECT * FROM check_hot_updates(1, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 1 | 1 | 100.00 | t +(1 row) + +SELECT * FROM ex; + id | att1 | att2 | att3 | att4 +----+--------------+----------------+------+---------- + 1 | {"data": []} | special indeed | | whatever +(1 row) + +-- Update att1 and att2, only one is BRIN/summarizing, this should NOT be a HOT update. +UPDATE ex SET att1 = att1 || '{"data": "howdy"}', att2 = 'special, so special'; +SELECT * FROM check_hot_updates(1, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 2 | 1 | 50.00 | t +(1 row) + +SELECT * FROM ex; + id | att1 | att2 | att3 | att4 +----+-------------------+---------------------+------+---------- + 1 | {"data": "howdy"} | special, so special | | whatever +(1 row) + +-- Update att2, att3, and att4 all are BRIN/summarizing indexes, this should be a HOT update +-- and yet still update all three summarizing indexes. +UPDATE ex SET att2 = 'a', att3 = 'b', att4 = 'c'; +SELECT * FROM check_hot_updates(2, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 3 | 2 | 66.67 | t +(1 row) + +SELECT * FROM ex; + id | att1 | att2 | att3 | att4 +----+-------------------+------+------+------ + 1 | {"data": "howdy"} | a | b | c +(1 row) + +-- Update att1, att2, and att3 all modified values are BRIN/summarizing indexes, this should be a HOT update +-- and yet still update all three summarizing indexes. +UPDATE ex SET att1 = '{"data": "howdy"}', att2 = 'd', att3 = 'e'; +SELECT * FROM check_hot_updates(3, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 4 | 3 | 75.00 | t +(1 row) + +SELECT * FROM ex; + id | att1 | att2 | att3 | att4 +----+-------------------+------+------+------ + 1 | {"data": "howdy"} | d | e | c +(1 row) + +DROP TABLE ex CASCADE; +-- ================================================================ +-- Don't update unmodified summarizing indexes but do allow HOT +-- ================================================================ +CREATE TABLE ex (att1 JSONB, att2 text) WITH (fillfactor = 60); +CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data')); +CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2); +INSERT INTO ex VALUES ('{"data": []}', 'nothing special'); +-- Update the unindexed value of att1, this should be a HOT update and and should +-- update the summarizing index. +UPDATE ex SET att1 = att1 || '{"status": "stalemate"}'; +SELECT * FROM check_hot_updates(1, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 1 | 1 | 100.00 | t +(1 row) + +-- Update the indexed value of att2, a summarized value, this is a summarized +-- only update and should use the HOT path while still triggering an update to +-- the summarizing BRIN index. +UPDATE ex SET att2 = 'special indeed'; +SELECT * FROM check_hot_updates(2, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 2 | 2 | 100.00 | t +(1 row) + +-- Update to att1 doesn't change the indexed value while the update to att2 does, +-- this again is a summarized only update and should use the HOT path as well as +-- trigger an update to the BRIN index. +UPDATE ex SET att1 = att1 || '{"status": "checkmate"}', att2 = 'special, so special'; +SELECT * FROM check_hot_updates(3, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 3 | 3 | 100.00 | t +(1 row) + +-- This updates both indexes, the expression index on att1 and the summarizing +-- index on att2. This should not be a HOT update because there are modified +-- indexes and only some are summarized, not all. This should force all +-- indexes to be updated. +UPDATE ex SET att1 = att1 || '{"data": [1,2,3]}', att2 = 'do you want to play a game?'; +SELECT * FROM check_hot_updates(3, 'ex'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + ex | 4 | 3 | 75.00 | t +(1 row) + +DROP TABLE ex CASCADE; +-- ================================================================ +-- Ensure custom type equality operators are used +-- ================================================================ +CREATE TYPE my_custom_type AS (val int); +-- Comparison functions (returns boolean) +CREATE FUNCTION my_custom_lt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val < b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +CREATE FUNCTION my_custom_le(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val <= b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +CREATE FUNCTION my_custom_eq(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val = b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +CREATE FUNCTION my_custom_ge(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val >= b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +CREATE FUNCTION my_custom_gt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val > b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +CREATE FUNCTION my_custom_ne(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val != b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +-- Comparison function (returns -1, 0, 1) +CREATE FUNCTION my_custom_cmp(a my_custom_type, b my_custom_type) RETURNS int AS $$ +BEGIN + IF a.val < b.val THEN + RETURN -1; + ELSIF a.val > b.val THEN + RETURN 1; + ELSE + RETURN 0; + END IF; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +-- Create the operators +CREATE OPERATOR < ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_lt, + COMMUTATOR = >, + NEGATOR = >= +); +CREATE OPERATOR <= ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_le, + COMMUTATOR = >=, + NEGATOR = > +); +CREATE OPERATOR = ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_eq, + COMMUTATOR = =, + NEGATOR = <> +); +CREATE OPERATOR >= ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_ge, + COMMUTATOR = <=, + NEGATOR = < +); +CREATE OPERATOR > ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_gt, + COMMUTATOR = <, + NEGATOR = <= +); +CREATE OPERATOR <> ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_ne, + COMMUTATOR = <>, + NEGATOR = = +); +-- Create the operator class (including the support function) +CREATE OPERATOR CLASS my_custom_ops + DEFAULT FOR TYPE my_custom_type USING btree AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + FUNCTION 1 my_custom_cmp(my_custom_type, my_custom_type); +-- Create the table +CREATE TABLE my_table ( + id int, + custom_val my_custom_type +); +-- Insert some data +INSERT INTO my_table (id, custom_val) VALUES +(1, ROW(3)::my_custom_type), +(2, ROW(1)::my_custom_type), +(3, ROW(4)::my_custom_type), +(4, ROW(2)::my_custom_type); +-- Create a function to use when indexing +CREATE OR REPLACE FUNCTION abs_val(val my_custom_type) RETURNS int AS $$ +BEGIN + RETURN abs(val.val); +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; +-- Create the index +CREATE INDEX idx_custom_val_abs ON my_table (abs_val(custom_val)); +-- Update 1 +UPDATE my_table SET custom_val = ROW(5)::my_custom_type WHERE id = 1; +SELECT * FROM check_hot_updates(0, 'my_table'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + my_table | 1 | 0 | 0.00 | t +(1 row) + +-- Update 2 +UPDATE my_table SET custom_val = ROW(0)::my_custom_type WHERE custom_val < ROW(3)::my_custom_type; +SELECT * FROM check_hot_updates(0, 'my_table'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + my_table | 3 | 0 | 0.00 | t +(1 row) + +-- Update 3 +UPDATE my_table SET custom_val = ROW(6)::my_custom_type WHERE id = 3; +SELECT * FROM check_hot_updates(0, 'my_table'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + my_table | 4 | 0 | 0.00 | t +(1 row) + +-- Update 4 +UPDATE my_table SET id = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 'my_table'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + my_table | 5 | 1 | 20.00 | t +(1 row) + +-- Query using the index +SELECT * FROM my_table WHERE abs_val(custom_val) = 6; + id | custom_val +----+------------ + 3 | (6) +(1 row) + +-- Clean up test case +DROP TABLE my_table CASCADE; +DROP OPERATOR CLASS my_custom_ops USING btree CASCADE; +DROP OPERATOR < (my_custom_type, my_custom_type); +DROP OPERATOR <= (my_custom_type, my_custom_type); +DROP OPERATOR = (my_custom_type, my_custom_type); +DROP OPERATOR >= (my_custom_type, my_custom_type); +DROP OPERATOR > (my_custom_type, my_custom_type); +DROP OPERATOR <> (my_custom_type, my_custom_type); +DROP FUNCTION my_custom_lt(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_le(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_eq(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_ge(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_gt(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_ne(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_cmp(my_custom_type, my_custom_type); +DROP FUNCTION abs_val(my_custom_type); +DROP TYPE my_custom_type CASCADE; +-- Cleanup +DROP FUNCTION check_hot_updates(int, text, text); +DROP COLLATION case_insensitive; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index cc6d799bceaf0..f3db9270fe6b8 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -125,6 +125,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # ---------- test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate + +# ---------- +# Another group of parallel tests, these focused on heap HOT updates +# ---------- +test: heap_hot_updates + # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL # oidjoins is read-only, though, and should run late for best coverage diff --git a/src/test/regress/sql/heap_hot_updates.sql b/src/test/regress/sql/heap_hot_updates.sql new file mode 100644 index 0000000000000..8d5510989df0c --- /dev/null +++ b/src/test/regress/sql/heap_hot_updates.sql @@ -0,0 +1,1325 @@ +-- ================================================================ +-- Test Suite for Heap-only (HOT) Updates +-- ================================================================ + +-- Setup: Create function to measure HOT updates +CREATE OR REPLACE FUNCTION check_hot_updates( + expected INT, + p_table_name TEXT DEFAULT 't', + p_schema_name TEXT DEFAULT current_schema() +) +RETURNS TABLE ( + table_name TEXT, + total_updates BIGINT, + hot_updates BIGINT, + hot_update_percentage NUMERIC, + matches_expected BOOLEAN +) +LANGUAGE plpgsql +AS $$ +DECLARE + v_relid oid; + v_qualified_name TEXT; + v_hot_updates BIGINT; + v_updates BIGINT; + v_xact_hot_updates BIGINT; + v_xact_updates BIGINT; +BEGIN + -- Force statistics update + PERFORM pg_stat_force_next_flush(); + + -- Get table OID + v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name); + v_relid := v_qualified_name::regclass; + + IF v_relid IS NULL THEN + RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name; + END IF; + + -- Get cumulative + transaction stats + v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0); + v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0); + v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0); + v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0); + + v_hot_updates := v_hot_updates + v_xact_hot_updates; + v_updates := v_updates + v_xact_updates; + + RETURN QUERY + SELECT + p_table_name::TEXT, + v_updates::BIGINT, + v_hot_updates::BIGINT, + CASE WHEN v_updates > 0 + THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2) + ELSE 0 + END, + (v_hot_updates = expected)::BOOLEAN; +END; +$$; + +CREATE COLLATION case_insensitive ( + provider = libc, + locale = 'C' +); + +-- ================================================================ +-- Basic JSONB Expression Index +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_name_idx ON t((docs->'name')); +INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}'); + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update indexed JSONB field - should NOT be HOT +UPDATE t SET docs = '{"name": "bob", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update non-indexed field again - should be HOT +UPDATE t SET docs = '{"name": "bob", "age": 32}' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + +DROP TABLE t; + +-- ================================================================ +-- JSONB Expression Index an some including columns +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB, status TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_name_idx ON t((docs->'name')); +INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}', 'ok'); + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET status = 'not ok' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + +DROP TABLE t; + +-- ================================================================ +-- Partial Index with Predicate Transitions +-- ================================================================ +CREATE TABLE t(id INT, value INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_value_idx ON t(value) WHERE value > 10; +INSERT INTO t VALUES (1, 5); + +-- Both outside predicate - should be HOT +UPDATE t SET value = 8 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Transition into predicate - should NOT be HOT +UPDATE t SET value = 15 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Both inside predicate, value changes - should NOT be HOT +UPDATE t SET value = 20 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Transition out of predicate - should NOT be HOT +UPDATE t SET value = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Both outside predicate again - should be HOT +UPDATE t SET value = 3 WHERE id = 1; +SELECT * FROM check_hot_updates(2); + +DROP TABLE t; + +-- ================================================================ +-- Expression Index with Partial Predicate +-- ================================================================ +CREATE TABLE t(docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((docs->'status')) + WHERE (docs->'priority')::int > 5; +INSERT INTO t VALUES ('{"status": "pending", "priority": 3}'); + +-- Both outside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 4}'; +SELECT * FROM check_hot_updates(1); + +-- Transition into predicate - should NOT be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 10}'; +SELECT * FROM check_hot_updates(1); + +-- Inside predicate, status changes - should NOT be HOT +UPDATE t SET docs = '{"status": "active", "priority": 10}'; +SELECT * FROM check_hot_updates(1); + +-- Inside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "active", "priority": 8}'; +SELECT * FROM check_hot_updates(2); + +DROP TABLE t; + +-- ================================================================ +-- GIN Index on JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}'); + +-- Change tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + +-- Change tags again - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + +-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + +DROP TABLE t; + +-- ================================================================ +-- GIN Index with Unchanged Keys +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- Create GIN index on specific path +CREATE INDEX t_gin_idx ON t USING gin((data->'tags')); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}'); + +-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Change indexed tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- GIN with jsonb_path_ops +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops); +INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}'); + +-- Change value at different path - keys changed, NOT HOT +UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + +DROP TABLE t; + +-- ================================================================ +-- Multi-Column Expression Index +-- ================================================================ +CREATE TABLE t(id INT, a INT, b INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t(id, abs(a), abs(b)); +INSERT INTO t VALUES (1, -5, -10); + +-- Change sign but not abs value - should be HOT +UPDATE t SET a = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Change abs value - should NOT be HOT +UPDATE t SET b = -15 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Change id - should NOT be HOT +UPDATE t SET id = 2 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- Mixed Index Types (BRIN + Expression) +-- ================================================================ +CREATE TABLE t(id INT, value INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_idx ON t USING brin(value); +CREATE INDEX t_expr_idx ON t((data->'status')); +INSERT INTO t VALUES (1, 100, '{"status": "active"}'); + +-- Update only BRIN column - should be HOT +UPDATE t SET value = 200 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update only expression column - should NOT be HOT +UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update both - should NOT be HOT +UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- Expression with COLLATION and BTREE (nbtree) index +-- ================================================================ +CREATE TABLE t( + id INT PRIMARY KEY, + name TEXT COLLATE case_insensitive +) WITH (autovacuum_enabled = off, fillfactor = 70); + +CREATE INDEX t_lower_idx ON t USING BTREE (name COLLATE case_insensitive); + +INSERT INTO t VALUES (1, 'ALICE'); + +-- Change case but not value - should NOT be HOT in BTREE +UPDATE t SET name = 'Alice' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + +-- Change to new value - should NOT be HOT +UPDATE t SET name = 'BOB' WHERE id = 1; +SELECT * FROM check_hot_updates(0); + +DROP TABLE t; + +-- ================================================================ +-- Array Expression Index +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_array_len_idx ON t(array_length(tags, 1)); +INSERT INTO t VALUES (1, ARRAY['a', 'b', 'c']); + +-- Same length, different elements - should be HOT +UPDATE t SET tags = ARRAY['d', 'e', 'f'] WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Different length - should NOT be HOT +UPDATE t SET tags = ARRAY['d', 'e'] WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- Nested JSONB Expression and JSONB equality '->' (not '->>') +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_nested_idx ON t((data->'user'->'name')); +INSERT INTO t VALUES ('{"user": {"name": "alice", "age": 30}}'); + +-- Change nested non-indexed field - should be HOT +UPDATE t SET data = '{"user": {"name": "alice", "age": 31}}'; +SELECT * FROM check_hot_updates(1); + +-- Change nested indexed field - should NOT be HOT +UPDATE t SET data = '{"user": {"name": "bob", "age": 31}}'; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- Complex Predicate on Multiple JSONB Fields +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((data->'status')) + WHERE (data->'priority')::int > 5 + AND (data->'active')::boolean = true; + +INSERT INTO t VALUES ('{"status": "pending", "priority": 3, "active": true}'); + +-- Outside predicate (priority too low) - should be HOT +UPDATE t SET data = '{"status": "done", "priority": 3, "active": true}'; +SELECT * FROM check_hot_updates(1); + +-- Transition into predicate - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": true}'; +SELECT * FROM check_hot_updates(1); + +-- Inside predicate, change to outside (active = false) - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": false}'; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- GIN Array Index - Order Insensitive Extraction +-- ================================================================ +CREATE TABLE t( + id INT PRIMARY KEY, + data JSONB +) WITH (autovacuum_enabled = off, fillfactor = 70); + +-- GIN index on JSONB array (extracts all elements) +CREATE INDEX t_items_gin ON t USING GIN ((data->'items')); + +INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}'); + +-- Update: Reorder array elements +-- JSONB equality: NOT equal (different arrays) +-- GIN extraction: Same elements extracted (might allow HOT if not careful) +UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update: Add/remove element +UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- TOASTed Values in Expression Index +-- ================================================================ +CREATE TABLE t(id INT, large_text TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_substr_idx ON t(substr(large_text, 1, 10)); + +INSERT INTO t VALUES (1, repeat('x', 5000) || 'identifier'); + +-- Change end of string, prefix unchanged - should be HOT +UPDATE t SET large_text = repeat('x', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Change prefix - should NOT be HOT +UPDATE t SET large_text = repeat('y', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t; + +-- ================================================================ +-- TEST: GIN with TOASTed TEXT (tsvector) +-- ================================================================ +CREATE TABLE t(id INT, content TEXT, search_vec tsvector) + WITH (autovacuum_enabled = off, fillfactor = 70); + +-- Create trigger to maintain tsvector +CREATE TRIGGER tsvectorupdate_toast + BEFORE INSERT OR UPDATE ON t + FOR EACH ROW EXECUTE FUNCTION + tsvector_update_trigger(search_vec, 'pg_catalog.english', content); + +CREATE INDEX t_gin ON t USING gin(search_vec); + +-- Insert with large content (will be TOASTed) +INSERT INTO t (id, content) VALUES + (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000)); + +-- Verify initial state +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important'); +-- Expected: 1 row + +-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time +-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed. +-- This means the comparison sees old tsvector vs. trigger-modified tsvector, +-- not the natural progression. HOT won't happen because the trigger changed +-- the indexed column. + +-- Update: Even though content keywords unchanged, trigger still fires +UPDATE t +SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0); +-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT) +-- This is actually correct behavior - the trigger updated an indexed column + +-- Update: Change indexed keywords +UPDATE t +SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0); +-- Expected: 0 HOT (index keys changed) + +-- Verify query correctness +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical'); +-- Expected: 1 row + +DROP TABLE t CASCADE; + +-- ================================================================ +-- TEST: GIN with TOASTed JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin((data->'tags')); + +-- Insert with TOASTed JSONB +INSERT INTO t (id, data) VALUES + (1, jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('x', 10000) + )); + +-- Update: Change large_field, tags unchanged - should be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: 1 HOT update + +-- Update: Change tags - should NOT be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "sql"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: Still 1 HOT + +-- Verify correctness +SELECT count(*) FROM t WHERE data->'tags' @> '["database"]'::jsonb; +-- Expected: 0 rows +SELECT count(*) FROM t WHERE data->'tags' @> '["sql"]'::jsonb; +-- Expected: 1 row + +DROP TABLE t CASCADE; + +-- ================================================================ +-- TEST: GIN with Array of Large Strings +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin(tags); + +-- Insert with large array elements (might be TOASTed) +INSERT INTO t (id, tags) VALUES + (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]); + +-- Update: Change to different large values - NOT HOT +UPDATE t +SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(0); +-- Expected: 0 HOT (keys actually changed) + +-- Update: Keep same tag values, just reorder - SHOULD BE HOT +-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3] +-- extract to the same sorted key set ['tag3','tag4']) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: 1 HOT (GIN keys semantically identical) + +-- Update: Remove an element - NOT HOT (keys changed) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: Still 1 HOT (not this one) + +DROP TABLE t CASCADE; + +-- ================================================================ +-- BRIN Index with Partial Predicate +-- ================================================================ +CREATE TABLE t( + id INT PRIMARY KEY, + value INT, + description TEXT +) WITH (autovacuum_enabled = off, fillfactor = 70); + +CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100; + +INSERT INTO t VALUES (1, 50, 'below range'); + +-- Test 1: Outside predicate +UPDATE t SET description = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Validate: Predicate query returns 0 rows +SELECT COUNT(*) as cnt FROM t WHERE value > 100; + +-- Test 2: Transition into predicate +UPDATE t SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(2); + +-- Validate: Predicate query returns 1 row with correct value +SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100; + +-- Test 3: Inside predicate, value changes +UPDATE t SET value = 160, description = 'updated again' WHERE id = 1; +SELECT * FROM check_hot_updates(3); + +-- Validate: Updated value (160) is returned +SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100; + +-- Test 4: Transition out of predicate +UPDATE t SET value = 50 WHERE id = 1; +SELECT * FROM check_hot_updates(4); + +SELECT COUNT(*) as cnt FROM t WHERE value > 100; + +SELECT id, value, description FROM t; + +DROP TABLE t CASCADE; + +-- ================================================================ +-- HASH Index (Simple Column) +-- ================================================================ +CREATE TABLE t(id INT, code VARCHAR(20), description TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_idx ON t USING hash(code); +INSERT INTO t VALUES (1, 'CODE001', 'initial'); + +-- Update non-indexed column - should be HOT +UPDATE t SET description = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update indexed column - HASH index requires update, NOT HOT +UPDATE t SET code = 'CODE002' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update both - NOT HOT +UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Back to original code - NOT HOT (different hash bucket location) +UPDATE t SET code = 'CODE001' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t CASCADE; + +-- ================================================================ +-- HASH Index on Expression +-- ================================================================ +CREATE TABLE t(id INT, email TEXT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email)); +INSERT INTO t VALUES (1, 'Alice@Example.com', '{"status": "new"}'); + +-- Update non-indexed field - should be HOT +UPDATE t SET data = '{"status": "active"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update email with case change only (same lowercase) - should be HOT +UPDATE t SET email = 'alice@example.com' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + +-- Update email to different lowercase - NOT HOT +UPDATE t SET email = 'bob@example.com' WHERE id = 1; +SELECT * FROM check_hot_updates(2); + +DROP TABLE t CASCADE; + +-- ================================================================ +-- HASH Index on JSONB Field +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_category_idx ON t USING hash((data->'category')); +INSERT INTO t VALUES (1, '{"category": "books", "title": "PostgreSQL Guide"}'); + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET data = '{"category": "books", "title": "PostgreSQL Handbook"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update indexed JSONB field - NOT HOT +UPDATE t SET data = '{"category": "videos", "title": "PostgreSQL Handbook"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update both - NOT HOT +UPDATE t SET data = '{"category": "courses", "title": "PostgreSQL Basics"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t CASCADE; + +-- ================================================================ +-- Multiple HASH Indexes +-- ================================================================ +CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_category_idx ON t USING hash(category); +CREATE INDEX t_hash_status_idx ON t USING hash(status); +INSERT INTO t VALUES (1, 'electronics', 'active', 100); + +-- Update non-indexed column - should be HOT +UPDATE t SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update one indexed column - NOT HOT +UPDATE t SET category = 'books' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update other indexed column - NOT HOT +UPDATE t SET status = 'inactive' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +-- Update both indexed columns - NOT HOT +UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1; +SELECT * FROM check_hot_updates(1); + +DROP TABLE t CASCADE; + +-- ================================================================ +-- BRIN vs HASH Comparison +-- ================================================================ +CREATE TABLE t_brin(id INT, value INT, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE TABLE t_hash(id INT, value INT, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); + +CREATE INDEX t_brin_value_idx ON t_brin USING brin(value); +CREATE INDEX t_hash_value_idx ON t_hash USING hash(value); + +INSERT INTO t_brin VALUES (1, 100, 'initial'); +INSERT INTO t_hash VALUES (1, 100, 'initial'); + +-- Same update on both - different HOT behavior expected +-- BRIN: might allow HOT (range summary unchanged) +-- HASH: blocks HOT (hash bucket changed) +UPDATE t_brin SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't_brin'); +-- Expected: 1 HOT (BRIN allows it for single row) + +UPDATE t_hash SET value = 150 WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't_hash'); +-- Expected: 0 HOT (HASH blocks it) + +DROP TABLE t_brin CASCADE; +DROP TABLE t_hash CASCADE; + +-- ================================================================ +-- HASH Index with NULL Values +-- ================================================================ +CREATE TABLE t(id INT, category VARCHAR, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_hash_category_idx ON t USING hash(category); +INSERT INTO t VALUES (1, 'electronics', 'initial'); + +-- Update indexed column to NULL - NOT HOT (hash value changed) +UPDATE t SET category = NULL WHERE id = 1; +SELECT * FROM check_hot_updates(0); +-- Expected: 0 HOT + +-- Update indexed column from NULL to value - NOT HOT +UPDATE t SET category = 'books' WHERE id = 1; +SELECT * FROM check_hot_updates(0); +-- Expected: 0 HOT + +-- Update non-indexed column - should be HOT +UPDATE t SET data = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: 1 HOT + +DROP TABLE t CASCADE; + +-- ================================================================ +-- BRIN on JSONB Field +-- ================================================================ +CREATE TABLE t(id INT, metrics JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- BRIN doesn't directly support JSONB, but we can test on expression +CREATE INDEX t_brin_count_idx ON t USING brin( + CAST(metrics->>'count' AS INTEGER) +); +INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}'); + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: 1 HOT + +-- Update indexed field - BRIN allows HOT for single row +UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1; +SELECT * FROM check_hot_updates(2); +-- Expected: 2 HOT (BRIN permits single-row updates) + +DROP TABLE t CASCADE; + +-- ================================================================ +-- Mixed BRIN + HASH on Same Table +-- ================================================================ +CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp); +CREATE INDEX t_hash_category_idx ON t USING hash(category); +INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial'); + +-- Update non-indexed column - should be HOT +UPDATE t SET data = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1); +-- Expected: 1 HOT + +-- Update BRIN indexed column - allows HOT +UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1; +SELECT * FROM check_hot_updates(2); +-- Expected: 2 HOT + +-- Update HASH indexed column - blocks HOT +UPDATE t SET category = 'videos' WHERE id = 1; +SELECT * FROM check_hot_updates(2); +-- Expected: 2 HOT (HASH blocks it) + +-- Update price (non-indexed) - should be HOT +UPDATE t SET price = 39.99 WHERE id = 1; +SELECT * FROM check_hot_updates(3); +-- Expected: 3 HOT + +DROP TABLE t CASCADE; + +-- ================================================================ +-- Index both on a field in a JSONB document, and the document +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_idx ON t((docs->'name')); +CREATE INDEX t_docs_col_idx ON t(docs); +INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}'); + +-- Update impacts index on whole docment attribute, can't go HOT +UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1; +SELECT * FROM check_hot_updates(0); + +DROP TABLE t CASCADE; + + +-- ================================================================ +-- Two indexes on a JSONB document, one partial +-- ================================================================ +CREATE TABLE t (docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +INSERT INTO t (docs) VALUES ('{"a": 0, "b": 0}'); +INSERT INTO t (docs) SELECT jsonb_build_object('b', n) FROM generate_series(100, 10000) as n; +CREATE INDEX t_idx_a ON t ((docs->'a')); +CREATE INDEX t_idx_b ON t ((docs->'b')) WHERE (docs->'b')::numeric > 9; + +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; + +-- Leave 'a' unchanged but modify 'b' to a value outside of the index predicate. +-- This should be a HOT update because neither index is changed. +UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 1) WHERE (docs->'a')::numeric = 0; +SELECT * FROM check_hot_updates(1); + +-- Check to make sure that the index does not contain a value for 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + +-- Leave 'a' unchanged but modify 'b' to a value within the index predicate. +-- This represents a change for field 'b' from unindexed to indexed and so +-- this should not take the HOT path. +UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 10) WHERE (docs->'a')::numeric = 0; +SELECT * FROM check_hot_updates(1); + +-- Check to make sure that the index contains the new value of 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + +-- This update modifies the value of 'a', an indexed field, so it also cannot +-- be a HOT update. +UPDATE t SET docs = jsonb_build_object('a', 1, 'b', 10) WHERE (docs->'b')::numeric = 10; +SELECT * FROM check_hot_updates(1); + +-- This update changes both 'a' and 'b' to new values this cannot use the HOT path. +UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 12) WHERE (docs->'b')::numeric = 10; +SELECT * FROM check_hot_updates(1); + +-- Check to make sure that the index contains the new value of 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + +-- This update changes 'b' to a value outside its predicate requiring that +-- we remove it from the index. That's a transition that can't be done +-- during a HOT update. +UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 1) WHERE (docs->'b')::numeric = 12; +SELECT * FROM check_hot_updates(1); + +-- Check to make sure that the index no longer contains the value of 'b' +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; +SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100; + +DROP TABLE t CASCADE; +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; + +-- ================================================================ +-- Tests to check expression indexes +-- ================================================================ +CREATE TABLE t(a INT, b INT) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx_a ON t(abs(a)) WHERE abs(a) > 10; +CREATE INDEX t_idx_b ON t(abs(b)); +INSERT INTO t VALUES (-1, -1), (-2, -2), (-3, -3), (-4, -4), (-5, -5); +INSERT INTO t SELECT m, n FROM generate_series(-10000, -10) AS m, abs(m) AS n; +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; + +-- The indexed value of b hasn't changed, this should be a HOT update. +-- (-5, -5) -> (-5, 1) +UPDATE t SET b = 5 WHERE a = -5; +SELECT * FROM check_hot_updates(1); +EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; +SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; + +-- Now that we're not checking the predicate of the partial index, this +-- update of a from -5 to 5 should be HOT because we should ignore the +-- predicate and check the expression and find it unchanged. +-- (-5, 1) -> (5, 1) +UPDATE t SET a = 5 WHERE a = -5; +SELECT * FROM check_hot_updates(2); + +-- This update moves a into the partial index and should not +-- be HOT. Let's make sure of that and check the index as well. +-- (-4, -4) -> (-11, -4) +UPDATE t SET a = -11 WHERE a = -4; +SELECT * FROM check_hot_updates(2); +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + +-- (-11, -4) -> (11, -4) +UPDATE t SET a = 11 WHERE b = -4; +SELECT * FROM check_hot_updates(3); +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + +-- (11, -4) -> (-4, -4) +UPDATE t SET a = -4 WHERE b = -4; +SELECT * FROM check_hot_updates(3); +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + +-- This update of a from 5 to -1 is HOT despite that attribute +-- being indexed because the before and after values for the +-- partial index predicate are outside the index definition. +-- (5, 1) -> (-1, 1) +UPDATE t SET a = -1 WHERE a = 5; +SELECT * FROM check_hot_updates(4); + +-- This update of a from -2 to -1 will be HOT because the before/after values +-- of a are both outside the predicate of the partial index. +-- (-1, 1) -> (-2, 1) +UPDATE t SET a = -2 WHERE b = -2; +SELECT * FROM check_hot_updates(5); + +-- The indexed value for b isn't changing, this should be HOT. +-- (-2, -2) -> (-2, 2) +UPDATE t SET b = 2 WHERE b = -2; +SELECT * FROM check_hot_updates(6); +EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; +SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0; + +SELECT * FROM t where a > -10 AND a < 10; + +-- Before and after values for a are outside the predicate of the index, +-- and because we're checking this should be HOT. +-- (-2, 1) -> (5, 1) +-- (-2, -2) -> (5, -2) +UPDATE t SET a = 5 WHERE a = -1; +SELECT * FROM check_hot_updates(8); + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; +SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15; + +DROP TABLE t CASCADE; +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; + + +-- ================================================================ +-- JSONB with two indexes each on separate fields, one partial +-- ================================================================ +CREATE TABLE t(docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_idx ON t((docs->'a')) WHERE (docs->'b')::integer = 1; +INSERT INTO t VALUES ('{"a": 1, "b": 1}'); + +EXPLAIN (COSTS OFF) SELECT * FROM t; +SELECT * FROM t; + +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::integer = 1; +SELECT * FROM t WHERE (docs->'b')::integer = 1; + +SELECT * FROM check_hot_updates(0); + +UPDATE t SET docs='{"a": 1, "b": 0}'; +SELECT * FROM check_hot_updates(0); + +SELECT * FROM t WHERE (docs->'b')::integer = 1; + +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; + +DROP TABLE t CASCADE; + + +-- ================================================================ +-- Tests for multi-column indexes +-- ================================================================ +CREATE TABLE t(id INT, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_idx ON t(id, (docs->'a')); +INSERT INTO t VALUES (1, '{"a": 1, "b": 1}'); + +SET SESSION enable_seqscan = OFF; +SET SESSION enable_bitmapscan = OFF; + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + +SELECT * FROM check_hot_updates(0); + +-- Changing the id attribute which is an indexed attribute should +-- prevent HOT updates. +UPDATE t SET id = 2; +SELECT * FROM check_hot_updates(0); + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + +-- Changing the docs->'a' field in the indexed attribute 'docs' +-- should prevent HOT updates. +UPDATE t SET docs='{"a": -2, "b": 1}'; +SELECT * FROM check_hot_updates(0); + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0; + +-- Leaving the docs->'a' attribute unchanged means that the expression +-- is unchanged and because the 'id' attribute isn't in the modified +-- set the indexed tuple is unchanged, this can go HOT. +UPDATE t SET docs='{"a": -2, "b": 2}'; +SELECT * FROM check_hot_updates(1); + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0; + +-- Here we change the 'id' attribute and the 'docs' attribute setting +-- the expression docs->'a' to a new value, this cannot be a HOT update. +UPDATE t SET id = 3, docs='{"a": 3, "b": 3}'; +SELECT * FROM check_hot_updates(1); + +SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0; + +SET SESSION enable_seqscan = ON; +SET SESSION enable_bitmapscan = ON; + +DROP TABLE t CASCADE; + +-- ================================================================ +-- Relation with unique constraint, partial index +-- ================================================================ +CREATE TABLE users ( + user_id serial primary key, + name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL, + EXCLUDE USING btree (lower(email) WITH =) +); + +-- Add some data to the table and then update it in ways that should and should +-- not be HOT updates. +INSERT INTO users (name, email) VALUES +('user1', 'user1@example.com'), +('user2', 'user2@example.com'), +('taken', 'taken@EXAMPLE.com'), +('you', 'you@domain.com'), +('taken', 'taken@domain.com'); + +-- Should fail because of the unique constraint on the email column. +UPDATE users SET email = 'user1@example.com' WHERE email = 'user2@example.com'; +SELECT * FROM check_hot_updates(0, 'users'); + +-- Should succeed because the email column is not being updated and should go HOT. +UPDATE users SET name = 'foo' WHERE email = 'user1@example.com'; +SELECT * FROM check_hot_updates(1, 'users'); + +-- Create a partial index on the email column, updates +CREATE INDEX idx_users_email_no_example ON users (lower(email)) WHERE lower(email) LIKE '%@example.com%'; + +-- An update that changes the email column but not the indexed portion of it and falls outside the constraint. +-- Shouldn't be a HOT update because of the exclusion constraint. +UPDATE users SET email = 'you+2@domain.com' WHERE name = 'you'; +SELECT * FROM check_hot_updates(1, 'users'); + +-- An update that changes the email column but not the indexed portion of it and falls within the constraint. +-- Again, should fail constraint and fail to be a HOT update. +UPDATE users SET email = 'taken@domain.com' WHERE name = 'you'; +SELECT * FROM check_hot_updates(1, 'users'); + +DROP TABLE users CASCADE; + +-- ================================================================ +-- Constraints spoiling HOT updates, this time with a range. +-- ================================================================ +CREATE TABLE events ( + id serial primary key, + name VARCHAR(255) NOT NULL, + event_time tstzrange, + constraint no_screening_time_overlap exclude using gist ( + event_time WITH && + ) +); + +-- Add two non-overlapping events. +INSERT INTO events (id, event_time, name) +VALUES + (1, '["2023-01-01 19:00:00", "2023-01-01 20:45:00"]', 'event1'), + (2, '["2023-01-01 21:00:00", "2023-01-01 21:45:00"]', 'event2'); + +-- Update the first event to overlap with the second, should fail the constraint and not be HOT. +UPDATE events SET event_time = '["2023-01-01 20:00:00", "2023-01-01 21:45:00"]' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 'events'); + +-- Update the first event to not overlap with the second, again not HOT due to the constraint. +UPDATE events SET event_time = '["2023-01-01 22:00:00", "2023-01-01 22:45:00"]' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 'events'); + +-- Update the first event to not overlap with the second, this time we're HOT because we don't overlap with the constraint. +UPDATE events SET name = 'new name here' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 'events'); + +DROP TABLE events CASCADE; + +-- ================================================================ +-- Ensure that only the modified summarizing indexes are updated. +-- ================================================================ +CREATE TABLE ex (id SERIAL primary key, att1 JSONB, att2 text, att3 text, att4 text) WITH (fillfactor = 60); +CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data')); +CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2); +CREATE INDEX ex_expr2_idx ON ex USING btree((att1->'a')); +CREATE INDEX ex_expr3_idx ON ex USING btree((att1->'b')); +CREATE INDEX ex_expr4_idx ON ex USING btree((att1->'c')); +CREATE INDEX ex_sumr2_idx ON ex USING BRIN(att3); +CREATE INDEX ex_sumr3_idx ON ex USING BRIN(att4); +CREATE INDEX ex_expr5_idx ON ex USING btree((att1->'d')); +INSERT INTO ex (att1, att2) VALUES ('{"data": []}'::json, 'nothing special'); + +SELECT * FROM ex; + +-- Update att2 and att4 both are BRIN/summarizing indexes, this should be a HOT update and +-- only update two of the three summarizing indexes. +UPDATE ex SET att2 = 'special indeed', att4 = 'whatever'; +SELECT * FROM check_hot_updates(1, 'ex'); +SELECT * FROM ex; + +-- Update att1 and att2, only one is BRIN/summarizing, this should NOT be a HOT update. +UPDATE ex SET att1 = att1 || '{"data": "howdy"}', att2 = 'special, so special'; +SELECT * FROM check_hot_updates(1, 'ex'); +SELECT * FROM ex; + +-- Update att2, att3, and att4 all are BRIN/summarizing indexes, this should be a HOT update +-- and yet still update all three summarizing indexes. +UPDATE ex SET att2 = 'a', att3 = 'b', att4 = 'c'; +SELECT * FROM check_hot_updates(2, 'ex'); +SELECT * FROM ex; + +-- Update att1, att2, and att3 all modified values are BRIN/summarizing indexes, this should be a HOT update +-- and yet still update all three summarizing indexes. +UPDATE ex SET att1 = '{"data": "howdy"}', att2 = 'd', att3 = 'e'; +SELECT * FROM check_hot_updates(3, 'ex'); +SELECT * FROM ex; + +DROP TABLE ex CASCADE; + +-- ================================================================ +-- Don't update unmodified summarizing indexes but do allow HOT +-- ================================================================ +CREATE TABLE ex (att1 JSONB, att2 text) WITH (fillfactor = 60); +CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data')); +CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2); +INSERT INTO ex VALUES ('{"data": []}', 'nothing special'); + +-- Update the unindexed value of att1, this should be a HOT update and and should +-- update the summarizing index. +UPDATE ex SET att1 = att1 || '{"status": "stalemate"}'; +SELECT * FROM check_hot_updates(1, 'ex'); + +-- Update the indexed value of att2, a summarized value, this is a summarized +-- only update and should use the HOT path while still triggering an update to +-- the summarizing BRIN index. +UPDATE ex SET att2 = 'special indeed'; +SELECT * FROM check_hot_updates(2, 'ex'); + +-- Update to att1 doesn't change the indexed value while the update to att2 does, +-- this again is a summarized only update and should use the HOT path as well as +-- trigger an update to the BRIN index. +UPDATE ex SET att1 = att1 || '{"status": "checkmate"}', att2 = 'special, so special'; +SELECT * FROM check_hot_updates(3, 'ex'); + +-- This updates both indexes, the expression index on att1 and the summarizing +-- index on att2. This should not be a HOT update because there are modified +-- indexes and only some are summarized, not all. This should force all +-- indexes to be updated. +UPDATE ex SET att1 = att1 || '{"data": [1,2,3]}', att2 = 'do you want to play a game?'; +SELECT * FROM check_hot_updates(3, 'ex'); + +DROP TABLE ex CASCADE; + +-- ================================================================ +-- Ensure custom type equality operators are used +-- ================================================================ + +CREATE TYPE my_custom_type AS (val int); + +-- Comparison functions (returns boolean) +CREATE FUNCTION my_custom_lt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val < b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +CREATE FUNCTION my_custom_le(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val <= b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +CREATE FUNCTION my_custom_eq(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val = b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +CREATE FUNCTION my_custom_ge(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val >= b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +CREATE FUNCTION my_custom_gt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val > b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +CREATE FUNCTION my_custom_ne(a my_custom_type, b my_custom_type) RETURNS boolean AS $$ +BEGIN + RETURN a.val != b.val; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +-- Comparison function (returns -1, 0, 1) +CREATE FUNCTION my_custom_cmp(a my_custom_type, b my_custom_type) RETURNS int AS $$ +BEGIN + IF a.val < b.val THEN + RETURN -1; + ELSIF a.val > b.val THEN + RETURN 1; + ELSE + RETURN 0; + END IF; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +-- Create the operators +CREATE OPERATOR < ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_lt, + COMMUTATOR = >, + NEGATOR = >= +); + +CREATE OPERATOR <= ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_le, + COMMUTATOR = >=, + NEGATOR = > +); + +CREATE OPERATOR = ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_eq, + COMMUTATOR = =, + NEGATOR = <> +); + +CREATE OPERATOR >= ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_ge, + COMMUTATOR = <=, + NEGATOR = < +); + +CREATE OPERATOR > ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_gt, + COMMUTATOR = <, + NEGATOR = <= +); + +CREATE OPERATOR <> ( + LEFTARG = my_custom_type, + RIGHTARG = my_custom_type, + PROCEDURE = my_custom_ne, + COMMUTATOR = <>, + NEGATOR = = +); + +-- Create the operator class (including the support function) +CREATE OPERATOR CLASS my_custom_ops + DEFAULT FOR TYPE my_custom_type USING btree AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + FUNCTION 1 my_custom_cmp(my_custom_type, my_custom_type); + +-- Create the table +CREATE TABLE my_table ( + id int, + custom_val my_custom_type +); + +-- Insert some data +INSERT INTO my_table (id, custom_val) VALUES +(1, ROW(3)::my_custom_type), +(2, ROW(1)::my_custom_type), +(3, ROW(4)::my_custom_type), +(4, ROW(2)::my_custom_type); + +-- Create a function to use when indexing +CREATE OR REPLACE FUNCTION abs_val(val my_custom_type) RETURNS int AS $$ +BEGIN + RETURN abs(val.val); +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; + +-- Create the index +CREATE INDEX idx_custom_val_abs ON my_table (abs_val(custom_val)); + +-- Update 1 +UPDATE my_table SET custom_val = ROW(5)::my_custom_type WHERE id = 1; +SELECT * FROM check_hot_updates(0, 'my_table'); + +-- Update 2 +UPDATE my_table SET custom_val = ROW(0)::my_custom_type WHERE custom_val < ROW(3)::my_custom_type; +SELECT * FROM check_hot_updates(0, 'my_table'); + +-- Update 3 +UPDATE my_table SET custom_val = ROW(6)::my_custom_type WHERE id = 3; +SELECT * FROM check_hot_updates(0, 'my_table'); + +-- Update 4 +UPDATE my_table SET id = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 'my_table'); + +-- Query using the index +SELECT * FROM my_table WHERE abs_val(custom_val) = 6; + +-- Clean up test case +DROP TABLE my_table CASCADE; +DROP OPERATOR CLASS my_custom_ops USING btree CASCADE; +DROP OPERATOR < (my_custom_type, my_custom_type); +DROP OPERATOR <= (my_custom_type, my_custom_type); +DROP OPERATOR = (my_custom_type, my_custom_type); +DROP OPERATOR >= (my_custom_type, my_custom_type); +DROP OPERATOR > (my_custom_type, my_custom_type); +DROP OPERATOR <> (my_custom_type, my_custom_type); +DROP FUNCTION my_custom_lt(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_le(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_eq(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_ge(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_gt(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_ne(my_custom_type, my_custom_type); +DROP FUNCTION my_custom_cmp(my_custom_type, my_custom_type); +DROP FUNCTION abs_val(my_custom_type); +DROP TYPE my_custom_type CASCADE; + +-- Cleanup +DROP FUNCTION check_hot_updates(int, text, text); +DROP COLLATION case_insensitive; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index cf3f6a7dafd0d..4cc7a9d4c7d2b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -390,6 +390,7 @@ CachedFunctionCompileCallback CachedFunctionDeleteCallback CachedFunctionHashEntry CachedFunctionHashKey +CachedIndexDatum CachedPlan CachedPlanSource CallContext