+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000000000..35eb1ddfbbc02
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000000000..f5d97424c5047
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,22 @@
+{
+ // Use IntelliSense to learn about possible attributes.
+ // Hover to view descriptions of existing attributes.
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "(gdb) Attach Postgres",
+ "type": "cppdbg",
+ "request": "attach",
+ "program": "${workspaceRoot}/install/bin/postgres",
+ "MIMode": "gdb",
+ "setupCommands": [
+ {
+ "description": "Enable pretty-printing for gdb",
+ "text": "-enable-pretty-printing",
+ "ignoreFailures": true
+ }
+ ],
+ }
+ ]
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000000000..cc8a64fa9fa85
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+ "files.associations": {
+ "syscache.h": "c"
+ }
+}
\ No newline at end of file
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000000000..a609589066525
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,78 @@
+{
+ "nodes": {
+ "flake-utils": {
+ "inputs": {
+ "systems": "systems"
+ },
+ "locked": {
+ "lastModified": 1731533236,
+ "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1751211869,
+ "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixos-25.05",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "nixpkgs-unstable": {
+ "locked": {
+ "lastModified": 1757651841,
+ "narHash": "sha256-Lh9QoMzTjY/O4LqNwcm6s/WSYStDmCH6f3V/izwlkHc=",
+ "owner": "nixos",
+ "repo": "nixpkgs",
+ "rev": "ad4e6dd68c30bc8bd1860a27bc6f0c485bd7f3b6",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nixos",
+ "ref": "nixpkgs-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "flake-utils": "flake-utils",
+ "nixpkgs": "nixpkgs",
+ "nixpkgs-unstable": "nixpkgs-unstable"
+ }
+ },
+ "systems": {
+ "locked": {
+ "lastModified": 1681028828,
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+ "owner": "nix-systems",
+ "repo": "default",
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-systems",
+ "repo": "default",
+ "type": "github"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000000000..709d13737ee5a
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,45 @@
+{
+ description = "PostgreSQL development environment";
+
+ inputs = {
+ nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
+ nixpkgs-unstable.url = "github:nixos/nixpkgs/nixpkgs-unstable";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs = {
+ self,
+ nixpkgs,
+ nixpkgs-unstable,
+ flake-utils,
+ }:
+ flake-utils.lib.eachDefaultSystem (
+ system: let
+ pkgs = import nixpkgs {
+ inherit system;
+ config.allowUnfree = true;
+ };
+ pkgs-unstable = import nixpkgs-unstable {
+ inherit system;
+ config.allowUnfree = true;
+ };
+
+ shellConfig = import ./shell.nix {inherit pkgs pkgs-unstable system;};
+ in {
+ formatter = pkgs.alejandra;
+ devShells = {
+ default = shellConfig.devShell;
+ gcc = shellConfig.devShell;
+ clang = shellConfig.clangDevShell;
+ gcc-musl = shellConfig.muslDevShell;
+ clang-musl = shellConfig.clangMuslDevShell;
+ };
+
+ packages = {
+ inherit (shellConfig) gdbConfig flameGraphScript pgbenchScript;
+ };
+
+ environment.localBinInPath = true;
+ }
+ );
+}
diff --git a/glibc-no-fortify-warning.patch b/glibc-no-fortify-warning.patch
new file mode 100644
index 0000000000000..681e678e67ee3
--- /dev/null
+++ b/glibc-no-fortify-warning.patch
@@ -0,0 +1,24 @@
+From 130c231020f97e5eb878cc9fdb2bd9b186a5aa04 Mon Sep 17 00:00:00 2001
+From: Greg Burd
+Date: Fri, 24 Oct 2025 11:58:24 -0400
+Subject: [PATCH] no warnings with -O0 and fortify source please
+
+---
+ include/features.h | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/include/features.h b/include/features.h
+index 673c4036..a02c8a3f 100644
+--- a/include/features.h
++++ b/include/features.h
+@@ -432,7 +432,6 @@
+
+ #if defined _FORTIFY_SOURCE && _FORTIFY_SOURCE > 0
+ # if !defined __OPTIMIZE__ || __OPTIMIZE__ <= 0
+-# warning _FORTIFY_SOURCE requires compiling with optimization (-O)
+ # elif !__GNUC_PREREQ (4, 1)
+ # warning _FORTIFY_SOURCE requires GCC 4.1 or later
+ # elif _FORTIFY_SOURCE > 2 && (__glibc_clang_prereq (9, 0) \
+--
+2.50.1
+
diff --git a/pg-aliases.sh b/pg-aliases.sh
new file mode 100644
index 0000000000000..d0b5ed16eb0e0
--- /dev/null
+++ b/pg-aliases.sh
@@ -0,0 +1,304 @@
+# PostgreSQL Development Aliases
+
+# Build system management
+pg_clean_for_compiler() {
+ local current_compiler="$(basename $CC)"
+ local build_dir="$PG_BUILD_DIR"
+
+ if [ -f "$build_dir/compile_commands.json" ]; then
+ local last_compiler=$(grep -o '/[^/]*/bin/[gc]cc\|/[^/]*/bin/clang' "$build_dir/compile_commands.json" | head -1 | xargs basename 2>/dev/null || echo "unknown")
+
+ if [ "$last_compiler" != "$current_compiler" ] && [ "$last_compiler" != "unknown" ]; then
+ echo "Detected compiler change from $last_compiler to $current_compiler"
+ echo "Cleaning build directory..."
+ rm -rf "$build_dir"
+ mkdir -p "$build_dir"
+ fi
+ fi
+
+ mkdir -p "$build_dir"
+ echo "$current_compiler" >"$build_dir/.compiler_used"
+}
+
+# Core PostgreSQL commands
+alias pg-setup='
+ if [ -z "$PERL_CORE_DIR" ]; then
+ echo "Error: Could not find perl CORE directory" >&2
+ return 1
+ fi
+
+ pg_clean_for_compiler
+
+ echo "=== PostgreSQL Build Configuration ==="
+ echo "Compiler: $CC"
+ echo "LLVM: $(llvm-config --version 2>/dev/null || echo 'disabled')"
+ echo "Source: $PG_SOURCE_DIR"
+ echo "Build: $PG_BUILD_DIR"
+ echo "Install: $PG_INSTALL_DIR"
+ echo "======================================"
+ # --fatal-meson-warnings
+
+ env CFLAGS="-I$PERL_CORE_DIR $CFLAGS" \
+ LDFLAGS="-L$PERL_CORE_DIR -lperl $LDFLAGS" \
+ meson setup $MESON_EXTRA_SETUP \
+ --reconfigure \
+ -Db_coverage=false \
+ -Db_lundef=false \
+ -Dcassert=true \
+ -Ddebug=true \
+ -Ddocs_html_style=website \
+ -Ddocs_pdf=enabled \
+ -Dicu=enabled \
+ -Dinjection_points=true \
+ -Dldap=enabled \
+ -Dlibcurl=enabled \
+ -Dlibxml=enabled \
+ -Dlibxslt=enabled \
+ -Dllvm=auto \
+ -Dlz4=enabled \
+ -Dnls=enabled \
+ -Doptimization=0 \
+ -Dplperl=enabled \
+ -Dplpython=enabled \
+ -Dpltcl=enabled \
+ -Dreadline=enabled \
+ -Dssl=openssl \
+ -Dtap_tests=enabled \
+ -Duuid=e2fs \
+ -Dzstd=enabled \
+ --prefix="$PG_INSTALL_DIR" \
+ "$PG_BUILD_DIR" \
+ "$PG_SOURCE_DIR"'
+
+alias pg-compdb='compdb -p build/ list > compile_commands.json'
+alias pg-build='meson compile -C "$PG_BUILD_DIR"'
+alias pg-install='meson install -C "$PG_BUILD_DIR"'
+alias pg-test='meson test -q --print-errorlogs -C "$PG_BUILD_DIR"'
+
+# Clean commands
+alias pg-clean='ninja -C "$PG_BUILD_DIR" clean'
+alias pg-full-clean='rm -rf "$PG_BUILD_DIR" "$PG_INSTALL_DIR" && echo "Build and install directories cleaned"'
+
+# Database management
+alias pg-init='rm -rf "$PG_DATA_DIR" && "$PG_INSTALL_DIR/bin/initdb" --debug --no-clean "$PG_DATA_DIR"'
+alias pg-start='"$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR" -k "$PG_DATA_DIR"'
+alias pg-stop='pkill -f "postgres.*-D.*$PG_DATA_DIR" || true'
+alias pg-restart='pg-stop && sleep 2 && pg-start'
+alias pg-status='pgrep -f "postgres.*-D.*$PG_DATA_DIR" && echo "PostgreSQL is running" || echo "PostgreSQL is not running"'
+
+# Client connections
+alias pg-psql='"$PG_INSTALL_DIR/bin/psql" -h "$PG_DATA_DIR" postgres'
+alias pg-createdb='"$PG_INSTALL_DIR/bin/createdb" -h "$PG_DATA_DIR"'
+alias pg-dropdb='"$PG_INSTALL_DIR/bin/dropdb" -h "$PG_DATA_DIR"'
+
+# Debugging
+alias pg-debug-gdb='gdb -x "$GDBINIT" "$PG_INSTALL_DIR/bin/postgres"'
+alias pg-debug-lldb='lldb "$PG_INSTALL_DIR/bin/postgres"'
+alias pg-debug='
+ if command -v gdb >/dev/null 2>&1; then
+ pg-debug-gdb
+ elif command -v lldb >/dev/null 2>&1; then
+ pg-debug-lldb
+ else
+ echo "No debugger available (gdb or lldb required)"
+ fi'
+
+# Attach to running process
+alias pg-attach-gdb='
+ PG_PID=$(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1)
+ if [ -n "$PG_PID" ]; then
+ echo "Attaching GDB to PostgreSQL process $PG_PID"
+ gdb -x "$GDBINIT" -p "$PG_PID"
+ else
+ echo "No PostgreSQL process found"
+ fi'
+
+alias pg-attach-lldb='
+ PG_PID=$(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1)
+ if [ -n "$PG_PID" ]; then
+ echo "Attaching LLDB to PostgreSQL process $PG_PID"
+ lldb -p "$PG_PID"
+ else
+ echo "No PostgreSQL process found"
+ fi'
+
+alias pg-attach='
+ if command -v gdb >/dev/null 2>&1; then
+ pg-attach-gdb
+ elif command -v lldb >/dev/null 2>&1; then
+ pg-attach-lldb
+ else
+ echo "No debugger available (gdb or lldb required)"
+ fi'
+
+# Performance profiling and analysis
+alias pg-valgrind='valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all "$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR"'
+alias pg-strace='strace -f -o /tmp/postgres.strace "$PG_INSTALL_DIR/bin/postgres" -D "$PG_DATA_DIR"'
+
+# Flame graph generation
+alias pg-flame='pg-flame-generate'
+alias pg-flame-30='pg-flame-generate 30'
+alias pg-flame-60='pg-flame-generate 60'
+alias pg-flame-120='pg-flame-generate 120'
+
+# Custom flame graph with specific duration and output
+pg-flame-custom() {
+ local duration=${1:-30}
+ local output_dir=${2:-$PG_FLAME_DIR}
+ echo "Generating flame graph for ${duration}s, output to: $output_dir"
+ pg-flame-generate "$duration" "$output_dir"
+}
+
+# Benchmarking with pgbench
+alias pg-bench='pg-bench-run'
+alias pg-bench-quick='pg-bench-run 5 1 100 1 30 select-only'
+alias pg-bench-standard='pg-bench-run 10 2 1000 10 60 tpcb-like'
+alias pg-bench-heavy='pg-bench-run 50 4 5000 100 300 tpcb-like'
+alias pg-bench-readonly='pg-bench-run 20 4 2000 50 120 select-only'
+
+# Custom benchmark function
+pg-bench-custom() {
+ local clients=${1:-10}
+ local threads=${2:-2}
+ local transactions=${3:-1000}
+ local scale=${4:-10}
+ local duration=${5:-60}
+ local test_type=${6:-tpcb-like}
+
+ echo "Running custom benchmark:"
+ echo " Clients: $clients, Threads: $threads"
+ echo " Transactions: $transactions, Scale: $scale"
+ echo " Duration: ${duration}s, Type: $test_type"
+
+ pg-bench-run "$clients" "$threads" "$transactions" "$scale" "$duration" "$test_type"
+}
+
+# Benchmark with flame graph
+pg-bench-flame() {
+ local duration=${1:-60}
+ local clients=${2:-10}
+ local scale=${3:-10}
+
+ echo "Running benchmark with flame graph generation"
+ echo "Duration: ${duration}s, Clients: $clients, Scale: $scale"
+
+ # Start benchmark in background
+ pg-bench-run "$clients" 2 1000 "$scale" "$duration" tpcb-like &
+ local bench_pid=$!
+
+ # Wait a bit for benchmark to start
+ sleep 5
+
+ # Generate flame graph for most of the benchmark duration
+ local flame_duration=$((duration - 10))
+ if [ $flame_duration -gt 10 ]; then
+ pg-flame-generate "$flame_duration" &
+ local flame_pid=$!
+ fi
+
+ # Wait for benchmark to complete
+ wait $bench_pid
+
+ # Wait for flame graph if it was started
+ if [ -n "${flame_pid:-}" ]; then
+ wait $flame_pid
+ fi
+
+ echo "Benchmark and flame graph generation completed"
+}
+
+# Performance monitoring
+alias pg-perf='perf top -p $(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | head -1)'
+alias pg-htop='htop -p $(pgrep -f "postgres.*-D.*$PG_DATA_DIR" | tr "\n" "," | sed "s/,$//")'
+
+# System performance stats during PostgreSQL operation
+pg-stats() {
+ local duration=${1:-30}
+ echo "Collecting system stats for ${duration}s..."
+
+ iostat -x 1 "$duration" >"$PG_BENCH_DIR/iostat_$(date +%Y%m%d_%H%M%S).log" &
+ vmstat 1 "$duration" >"$PG_BENCH_DIR/vmstat_$(date +%Y%m%d_%H%M%S).log" &
+
+ wait
+ echo "System stats saved to $PG_BENCH_DIR"
+}
+
+# Development helpers
+pg-format() {
+ local since=${1:-HEAD}
+
+ if [ ! -f "$PG_SOURCE_DIR/src/tools/pgindent/pgindent" ]; then
+ echo "Error: pgindent not found at $PG_SOURCE_DIR/src/tools/pgindent/pgindent"
+ else
+
+ modified_files=$(git diff --name-only "${since}" | grep -E "\.c$|\.h$")
+
+ if [ -z "$modified_files" ]; then
+ echo "No modified .c or .h files found"
+ else
+
+ echo "Formatting modified files with pgindent:"
+ for file in $modified_files; do
+ if [ -f "$file" ]; then
+ echo " Formatting: $file"
+ "$PG_SOURCE_DIR/src/tools/pgindent/pgindent" "$file"
+ else
+ echo " Warning: File not found: $file"
+ fi
+ done
+
+ echo "Checking files for whitespace:"
+ git diff --check "${since}"
+ fi
+ fi
+}
+
+alias pg-tidy='find "$PG_SOURCE_DIR" -name "*.c" | head -10 | xargs clang-tidy'
+
+# Log management
+alias pg-log='tail -f "$PG_DATA_DIR/log/postgresql-$(date +%Y-%m-%d).log" 2>/dev/null || echo "No log file found"'
+alias pg-log-errors='grep -i error "$PG_DATA_DIR/log/"*.log 2>/dev/null || echo "No error logs found"'
+
+# Build logs
+alias pg-build-log='cat "$PG_BUILD_DIR/meson-logs/meson-log.txt"'
+alias pg-build-errors='grep -i error "$PG_BUILD_DIR/meson-logs/meson-log.txt" 2>/dev/null || echo "No build errors found"'
+
+# Results viewing
+alias pg-bench-results='ls -la "$PG_BENCH_DIR" && echo "Latest results:" && tail -20 "$PG_BENCH_DIR"/results_*.txt 2>/dev/null | tail -20'
+alias pg-flame-results='ls -la "$PG_FLAME_DIR" && echo "Open flame graphs with: firefox $PG_FLAME_DIR/*.svg"'
+
+# Clean up old results
+pg-clean-results() {
+ local days=${1:-7}
+ echo "Cleaning benchmark and flame graph results older than $days days..."
+ find "$PG_BENCH_DIR" -type f -mtime +$days -delete 2>/dev/null || true
+ find "$PG_FLAME_DIR" -type f -mtime +$days -delete 2>/dev/null || true
+ echo "Cleanup completed"
+}
+
+# Information
+alias pg-info='
+ echo "=== PostgreSQL Development Environment ==="
+ echo "Source: $PG_SOURCE_DIR"
+ echo "Build: $PG_BUILD_DIR"
+ echo "Install: $PG_INSTALL_DIR"
+ echo "Data: $PG_DATA_DIR"
+ echo "Benchmarks: $PG_BENCH_DIR"
+ echo "Flame graphs: $PG_FLAME_DIR"
+ echo "Compiler: $CC"
+ echo ""
+ echo "Available commands:"
+ echo " Setup: pg-setup, pg-build, pg-install"
+ echo " Database: pg-init, pg-start, pg-stop, pg-psql"
+ echo " Debug: pg-debug, pg-attach, pg-valgrind"
+ echo " Performance: pg-flame, pg-bench, pg-perf"
+ echo " Benchmarks: pg-bench-quick, pg-bench-standard, pg-bench-heavy"
+ echo " Flame graphs: pg-flame-30, pg-flame-60, pg-flame-custom"
+ echo " Combined: pg-bench-flame"
+ echo " Results: pg-bench-results, pg-flame-results"
+ echo " Logs: pg-log, pg-build-log"
+ echo " Clean: pg-clean, pg-full-clean, pg-clean-results"
+ echo " Code quality: pg-format, pg-tidy"
+ echo "=========================================="'
+
+echo "PostgreSQL aliases loaded. Run 'pg-info' for available commands."
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000000000..130d5b21986b2
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,820 @@
+{
+ pkgs,
+ pkgs-unstable,
+ system,
+}: let
+ # Create a patched glibc only for the dev shell
+ patchedGlibc = pkgs.glibc.overrideAttrs (oldAttrs: {
+ patches = (oldAttrs.patches or []) ++ [
+ ./glibc-no-fortify-warning.patch
+ ];
+ });
+
+ llvmPkgs = pkgs-unstable.llvmPackages_21;
+
+ # Configuration constants
+ config = {
+ pgSourceDir = "$PWD";
+ pgBuildDir = "$PWD/build";
+ pgInstallDir = "$PWD/install";
+ pgDataDir = "/tmp/test-db-$(basename $PWD)";
+ pgBenchDir = "/tmp/pgbench-results-$(basename $PWD)";
+ pgFlameDir = "/tmp/flame-graphs-$(basename $PWD)";
+ };
+
+ # Single dependency function that can be used for all environments
+ getPostgreSQLDeps = muslLibs:
+ with pkgs;
+ [
+ # Build system (always use host tools)
+ pkgs-unstable.meson
+ pkgs-unstable.ninja
+ pkg-config
+ autoconf
+ libtool
+ git
+ which
+ binutils
+ gnumake
+
+ # Parser/lexer tools
+ bison
+ flex
+
+ # Documentation
+ docbook_xml_dtd_45
+ docbook-xsl-nons
+ fop
+ gettext
+ libxslt
+ libxml2
+
+ # Development tools (always use host tools)
+ coreutils
+ shellcheck
+ ripgrep
+ valgrind
+ curl
+ uv
+ pylint
+ black
+ lcov
+ strace
+ ltrace
+ perf-tools
+ linuxPackages.perf
+ flamegraph
+ htop
+ iotop
+ sysstat
+ ccache
+ cppcheck
+ compdb
+
+ # GCC/GDB
+# pkgs-unstable.gcc15
+ gcc
+ gdb
+
+ # LLVM toolchain
+ llvmPkgs.llvm
+ llvmPkgs.llvm.dev
+ llvmPkgs.clang-tools
+ llvmPkgs.lldb
+
+ # Language support
+ (perl.withPackages (ps: with ps; [IPCRun]))
+ (python3.withPackages (ps: with ps; [requests browser-cookie3]))
+ tcl
+ ]
+ ++ (
+ if muslLibs
+ then [
+ # Musl target libraries for cross-compilation
+ pkgs.pkgsMusl.readline
+ pkgs.pkgsMusl.zlib
+ pkgs.pkgsMusl.openssl
+ pkgs.pkgsMusl.icu
+ pkgs.pkgsMusl.lz4
+ pkgs.pkgsMusl.zstd
+ pkgs.pkgsMusl.libuuid
+ pkgs.pkgsMusl.libkrb5
+ pkgs.pkgsMusl.linux-pam
+ pkgs.pkgsMusl.libxcrypt
+ ]
+ else [
+ # Glibc target libraries
+ readline
+ zlib
+ openssl
+ icu
+ lz4
+ zstd
+ libuuid
+ libkrb5
+ linux-pam
+ libxcrypt
+ numactl
+ openldap
+ liburing
+ libselinux
+ patchedGlibc
+ glibcInfo
+ glibc.dev
+ ]
+ );
+
+ # GDB configuration for PostgreSQL debugging
+ gdbConfig = pkgs.writeText "gdbinit-postgres" ''
+ # PostgreSQL-specific GDB configuration
+
+ # Pretty-print PostgreSQL data structures
+ define print_node
+ if $arg0
+ printf "Node type: %s\n", nodeTagNames[$arg0->type]
+ print *$arg0
+ else
+ printf "NULL node\n"
+ end
+ end
+ document print_node
+ Print a PostgreSQL Node with type information
+ Usage: print_node
+ end
+
+ define print_list
+ set $list = (List*)$arg0
+ if $list
+ printf "List length: %d\n", $list->length
+ set $cell = $list->head
+ set $i = 0
+ while $cell && $i < $list->length
+ printf " [%d]: ", $i
+ print_node $cell->data.ptr_value
+ set $cell = $cell->next
+ set $i = $i + 1
+ end
+ else
+ printf "NULL list\n"
+ end
+ end
+ document print_list
+ Print a PostgreSQL List structure
+ Usage: print_list
+ end
+
+ define print_query
+ set $query = (Query*)$arg0
+ if $query
+ printf "Query type: %d, command type: %d\n", $query->querySource, $query->commandType
+ print *$query
+ else
+ printf "NULL query\n"
+ end
+ end
+ document print_query
+ Print a PostgreSQL Query structure
+ Usage: print_query
+ end
+
+ define print_relcache
+ set $rel = (Relation)$arg0
+ if $rel
+ printf "Relation: %s.%s (OID: %u)\n", $rel->rd_rel->relnamespace, $rel->rd_rel->relname.data, $rel->rd_id
+ printf " natts: %d, relkind: %c\n", $rel->rd_rel->relnatts, $rel->rd_rel->relkind
+ else
+ printf "NULL relation\n"
+ end
+ end
+ document print_relcache
+ Print relation cache entry information
+ Usage: print_relcache
+ end
+
+ define print_tupdesc
+ set $desc = (TupleDesc)$arg0
+ if $desc
+ printf "TupleDesc: %d attributes\n", $desc->natts
+ set $i = 0
+ while $i < $desc->natts
+ set $attr = $desc->attrs[$i]
+ printf " [%d]: %s (type: %u, len: %d)\n", $i, $attr->attname.data, $attr->atttypid, $attr->attlen
+ set $i = $i + 1
+ end
+ else
+ printf "NULL tuple descriptor\n"
+ end
+ end
+ document print_tupdesc
+ Print tuple descriptor information
+ Usage: print_tupdesc
+ end
+
+ define print_slot
+ set $slot = (TupleTableSlot*)$arg0
+ if $slot
+ printf "TupleTableSlot: %s\n", $slot->tts_ops->name
+ printf " empty: %d, shouldFree: %d\n", $slot->tts_empty, $slot->tts_shouldFree
+ if $slot->tts_tupleDescriptor
+ print_tupdesc $slot->tts_tupleDescriptor
+ end
+ else
+ printf "NULL slot\n"
+ end
+ end
+ document print_slot
+ Print tuple table slot information
+ Usage: print_slot
+ end
+
+ # Memory context debugging
+ define print_mcxt
+ set $context = (MemoryContext)$arg0
+ if $context
+ printf "MemoryContext: %s\n", $context->name
+ printf " type: %s, parent: %p\n", $context->methods->name, $context->parent
+ printf " total: %zu, free: %zu\n", $context->mem_allocated, $context->freep - $context->freeptr
+ else
+ printf "NULL memory context\n"
+ end
+ end
+ document print_mcxt
+ Print memory context information
+ Usage: print_mcxt
+ end
+
+ # Process debugging
+ define print_proc
+ set $proc = (PGPROC*)$arg0
+ if $proc
+ printf "PGPROC: pid=%d, database=%u\n", $proc->pid, $proc->databaseId
+ printf " waiting: %d, waitStatus: %d\n", $proc->waiting, $proc->waitStatus
+ else
+ printf "NULL process\n"
+ end
+ end
+ document print_proc
+ Print process information
+ Usage: print_proc
+ end
+
+ # Set useful defaults
+ set print pretty on
+ set print object on
+ set print static-members off
+ set print vtbl on
+ set print demangle on
+ set demangle-style gnu-v3
+ set print sevenbit-strings off
+ set history save on
+ set history size 1000
+ set history filename ~/.gdb_history_postgres
+
+ # Common breakpoints for PostgreSQL debugging
+ define pg_break_common
+ break elog
+ break errfinish
+ break ExceptionalCondition
+ break ProcessInterrupts
+ end
+ document pg_break_common
+ Set common PostgreSQL debugging breakpoints
+ end
+
+ printf "PostgreSQL GDB configuration loaded.\n"
+ printf "Available commands: print_node, print_list, print_query, print_relcache,\n"
+ printf " print_tupdesc, print_slot, print_mcxt, print_proc, pg_break_common\n"
+ '';
+
+ # Flame graph generation script
+ flameGraphScript = pkgs.writeScriptBin "pg-flame-generate" ''
+ #!${pkgs.bash}/bin/bash
+ set -euo pipefail
+
+ DURATION=''${1:-30}
+ OUTPUT_DIR=''${2:-${config.pgFlameDir}}
+ TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+ mkdir -p "$OUTPUT_DIR"
+
+ echo "Generating flame graph for PostgreSQL (duration: ''${DURATION}s)"
+
+ # Find PostgreSQL processes
+ PG_PIDS=$(pgrep -f "postgres.*-D.*${config.pgDataDir}" || true)
+
+ if [ -z "$PG_PIDS" ]; then
+ echo "Error: No PostgreSQL processes found"
+ exit 1
+ fi
+
+ echo "Found PostgreSQL processes: $PG_PIDS"
+
+ # Record perf data
+ PERF_DATA="$OUTPUT_DIR/perf_$TIMESTAMP.data"
+ echo "Recording perf data to $PERF_DATA"
+
+ ${pkgs.linuxPackages.perf}/bin/perf record \
+ -F 997 \
+ -g \
+ --call-graph dwarf \
+ -p "$(echo $PG_PIDS | tr ' ' ',')" \
+ -o "$PERF_DATA" \
+ sleep "$DURATION"
+
+ # Generate flame graph
+ FLAME_SVG="$OUTPUT_DIR/postgres_flame_$TIMESTAMP.svg"
+ echo "Generating flame graph: $FLAME_SVG"
+
+ ${pkgs.linuxPackages.perf}/bin/perf script -i "$PERF_DATA" | \
+ ${pkgs.flamegraph}/bin/stackcollapse-perf.pl | \
+ ${pkgs.flamegraph}/bin/flamegraph.pl \
+ --title "PostgreSQL Flame Graph ($TIMESTAMP)" \
+ --width 1200 \
+ --height 800 \
+ > "$FLAME_SVG"
+
+ echo "Flame graph generated: $FLAME_SVG"
+ echo "Perf data saved: $PERF_DATA"
+
+ # Generate summary report
+ REPORT="$OUTPUT_DIR/report_$TIMESTAMP.txt"
+ echo "Generating performance report: $REPORT"
+
+ {
+ echo "PostgreSQL Performance Analysis Report"
+ echo "Generated: $(date)"
+ echo "Duration: ''${DURATION}s"
+ echo "Processes: $PG_PIDS"
+ echo ""
+ echo "=== Top Functions ==="
+ ${pkgs.linuxPackages.perf}/bin/perf report -i "$PERF_DATA" --stdio --sort comm,dso,symbol | head -50
+ echo ""
+ echo "=== Call Graph ==="
+ ${pkgs.linuxPackages.perf}/bin/perf report -i "$PERF_DATA" --stdio -g --sort comm,dso,symbol | head -100
+ } > "$REPORT"
+
+ echo "Report generated: $REPORT"
+ echo ""
+ echo "Files created:"
+ echo " Flame graph: $FLAME_SVG"
+ echo " Perf data: $PERF_DATA"
+ echo " Report: $REPORT"
+ '';
+
+ # pgbench wrapper script
+ pgbenchScript = pkgs.writeScriptBin "pg-bench-run" ''
+ #!${pkgs.bash}/bin/bash
+ set -euo pipefail
+
+ # Default parameters
+ CLIENTS=''${1:-10}
+ THREADS=''${2:-2}
+ TRANSACTIONS=''${3:-1000}
+ SCALE=''${4:-10}
+ DURATION=''${5:-60}
+ TEST_TYPE=''${6:-tpcb-like}
+
+ OUTPUT_DIR="${config.pgBenchDir}"
+ TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+ mkdir -p "$OUTPUT_DIR"
+
+ echo "=== PostgreSQL Benchmark Configuration ==="
+ echo "Clients: $CLIENTS"
+ echo "Threads: $THREADS"
+ echo "Transactions: $TRANSACTIONS"
+ echo "Scale factor: $SCALE"
+ echo "Duration: ''${DURATION}s"
+ echo "Test type: $TEST_TYPE"
+ echo "Output directory: $OUTPUT_DIR"
+ echo "============================================"
+
+ # Check if PostgreSQL is running
+ if ! pgrep -f "postgres.*-D.*${config.pgDataDir}" >/dev/null; then
+ echo "Error: PostgreSQL is not running. Start it with 'pg-start'"
+ exit 1
+ fi
+
+ PGBENCH="${config.pgInstallDir}/bin/pgbench"
+ PSQL="${config.pgInstallDir}/bin/psql"
+ CREATEDB="${config.pgInstallDir}/bin/createdb"
+ DROPDB="${config.pgInstallDir}/bin/dropdb"
+
+ DB_NAME="pgbench_test_$TIMESTAMP"
+ RESULTS_FILE="$OUTPUT_DIR/results_$TIMESTAMP.txt"
+ LOG_FILE="$OUTPUT_DIR/pgbench_$TIMESTAMP.log"
+
+ echo "Creating test database: $DB_NAME"
+ "$CREATEDB" -h "${config.pgDataDir}" "$DB_NAME" || {
+ echo "Failed to create database"
+ exit 1
+ }
+
+ # Initialize pgbench tables
+ echo "Initializing pgbench tables (scale factor: $SCALE)"
+ "$PGBENCH" -h "${config.pgDataDir}" -i -s "$SCALE" "$DB_NAME" || {
+ echo "Failed to initialize pgbench tables"
+ "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true
+ exit 1
+ }
+
+ # Run benchmark based on test type
+ echo "Running benchmark..."
+
+ case "$TEST_TYPE" in
+ "tpcb-like"|"default")
+ BENCH_ARGS=""
+ ;;
+ "select-only")
+ BENCH_ARGS="-S"
+ ;;
+ "simple-update")
+ BENCH_ARGS="-N"
+ ;;
+ "read-write")
+ BENCH_ARGS="-b select-only@70 -b tpcb-like@30"
+ ;;
+ *)
+ echo "Unknown test type: $TEST_TYPE"
+ echo "Available types: tpcb-like, select-only, simple-update, read-write"
+ "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true
+ exit 1
+ ;;
+ esac
+
+ {
+ echo "PostgreSQL Benchmark Results"
+ echo "Generated: $(date)"
+ echo "Test type: $TEST_TYPE"
+ echo "Clients: $CLIENTS, Threads: $THREADS"
+ echo "Transactions: $TRANSACTIONS, Duration: ''${DURATION}s"
+ echo "Scale factor: $SCALE"
+ echo "Database: $DB_NAME"
+ echo ""
+ echo "=== System Information ==="
+ echo "CPU: $(nproc) cores"
+ echo "Memory: $(free -h | grep '^Mem:' | awk '{print $2}')"
+ echo "Compiler: $CC"
+ echo "PostgreSQL version: $("$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -t -c "SELECT version();" | head -1)"
+ echo ""
+ echo "=== Benchmark Results ==="
+ } > "$RESULTS_FILE"
+
+ # Run the actual benchmark
+ "$PGBENCH" \
+ -h "${config.pgDataDir}" \
+ -c "$CLIENTS" \
+ -j "$THREADS" \
+ -T "$DURATION" \
+ -P 5 \
+ --log \
+ --log-prefix="$OUTPUT_DIR/pgbench_$TIMESTAMP" \
+ $BENCH_ARGS \
+ "$DB_NAME" 2>&1 | tee -a "$RESULTS_FILE"
+
+ # Collect additional statistics
+ {
+ echo ""
+ echo "=== Database Statistics ==="
+ "$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -c "
+ SELECT
+ schemaname,
+ relname,
+ n_tup_ins as inserts,
+ n_tup_upd as updates,
+ n_tup_del as deletes,
+ n_live_tup as live_tuples,
+ n_dead_tup as dead_tuples
+ FROM pg_stat_user_tables;
+ "
+
+ echo ""
+ echo "=== Index Statistics ==="
+ "$PSQL" --no-psqlrc -h "${config.pgDataDir}" -d "$DB_NAME" -c "
+ SELECT
+ schemaname,
+ relname,
+ indexrelname,
+ idx_scan,
+ idx_tup_read,
+ idx_tup_fetch
+ FROM pg_stat_user_indexes;
+ "
+ } >> "$RESULTS_FILE"
+
+ # Clean up
+ echo "Cleaning up test database: $DB_NAME"
+ "$DROPDB" -h "${config.pgDataDir}" "$DB_NAME" 2>/dev/null || true
+
+ echo ""
+ echo "Benchmark completed!"
+ echo "Results saved to: $RESULTS_FILE"
+ echo "Transaction logs: $OUTPUT_DIR/pgbench_$TIMESTAMP*"
+
+ # Show summary
+ echo ""
+ echo "=== Quick Summary ==="
+ grep -E "(tps|latency)" "$RESULTS_FILE" | tail -5
+ '';
+
+ # Development shell (GCC + glibc)
+ devShell = pkgs.mkShell {
+ name = "postgresql-dev";
+ buildInputs =
+ (getPostgreSQLDeps false)
+ ++ [
+ flameGraphScript
+ pgbenchScript
+ ];
+
+ shellHook = let
+ icon = "f121";
+ in ''
+ # History configuration
+ export HISTFILE=.history
+ export HISTSIZE=1000000
+ export HISTFILESIZE=1000000
+
+ # Clean environment
+ unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH
+
+ # Essential tools in PATH
+ export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH"
+ export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]"
+
+ # Ccache configuration
+ export PATH=${pkgs.ccache}/bin:$PATH
+ export CCACHE_COMPILERCHECK=content
+ export CCACHE_DIR=$HOME/.ccache/pg/$(basename $PWD)
+ mkdir -p "$CCACHE_DIR"
+
+ # LLVM configuration
+ export LLVM_CONFIG="${llvmPkgs.llvm}/bin/llvm-config"
+ export PATH="${llvmPkgs.llvm}/bin:$PATH"
+ export PKG_CONFIG_PATH="${llvmPkgs.llvm.dev}/lib/pkgconfig:$PKG_CONFIG_PATH"
+ export LLVM_DIR="${llvmPkgs.llvm.dev}/lib/cmake/llvm"
+ export LLVM_ROOT="${llvmPkgs.llvm}"
+
+ # Development tools in PATH
+ export PATH=${pkgs.clang-tools}/bin:$PATH
+ export PATH=${pkgs.cppcheck}/bin:$PATH
+
+ # PosgreSQL Development CFLAGS
+ # -DRELCACHE_FORCE_RELEASE -DCATCACHE_FORCE_RELEASE -fno-omit-frame-pointer -fno-stack-protector -DUSE_VALGRIND
+ export CFLAGS=""
+ export CXXFLAGS=""
+
+ # Python UV
+ UV_PYTHON_DOWNLOADS=never
+
+ # GCC configuration (default compiler)
+ export CC="${pkgs.gcc}/bin/gcc"
+ export CXX="${pkgs.gcc}/bin/g++"
+
+ # PostgreSQL environment
+ export PG_SOURCE_DIR="${config.pgSourceDir}"
+ export PG_BUILD_DIR="${config.pgBuildDir}"
+ export PG_INSTALL_DIR="${config.pgInstallDir}"
+ export PG_DATA_DIR="${config.pgDataDir}"
+ export PG_BENCH_DIR="${config.pgBenchDir}"
+ export PG_FLAME_DIR="${config.pgFlameDir}"
+ export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d)
+
+ # GDB configuration
+ export GDBINIT="${gdbConfig}"
+
+ # Performance tools in PATH
+ export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH"
+
+ # Create output directories
+ mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR"
+
+ # Compiler verification
+ echo "Environment configured:"
+ echo " Compiler: $CC"
+ echo " libc: glibc"
+ echo " LLVM: $(llvm-config --version 2>/dev/null || echo 'not available')"
+
+ # Load PostgreSQL development aliases
+ if [ -f ./pg-aliases.sh ]; then
+ source ./pg-aliases.sh
+ else
+ echo "Warning: pg-aliases.sh not found in current directory"
+ fi
+
+ echo ""
+ echo "PostgreSQL Development Environment Ready (GCC + glibc)"
+ echo "Run 'pg-info' for available commands"
+ '';
+ };
+
+ # Clang + glibc variant
+ clangDevShell = pkgs.mkShell {
+ name = "postgresql-clang-glibc";
+ buildInputs =
+ (getPostgreSQLDeps false)
+ ++ [
+ llvmPkgs.clang
+ llvmPkgs.lld
+ llvmPkgs.compiler-rt
+ flameGraphScript
+ pgbenchScript
+ ];
+
+ shellHook = let
+ icon = "f121";
+ in ''
+ # History configuration
+ export HISTFILE=.history
+ export HISTSIZE=1000000
+ export HISTFILESIZE=1000000
+
+ # Clean environment
+ unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH
+
+ # Essential tools in PATH
+ export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH"
+ export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]"
+
+ # Ccache configuration
+ export PATH=${pkgs.ccache}/bin:$PATH
+ export CCACHE_COMPILERCHECK=content
+ export CCACHE_DIR=$HOME/.ccache_pg_dev_clang
+ mkdir -p "$CCACHE_DIR"
+
+ # LLVM configuration
+ export LLVM_CONFIG="${llvmPkgs.llvm}/bin/llvm-config"
+ export PATH="${llvmPkgs.llvm}/bin:$PATH"
+ export PKG_CONFIG_PATH="${llvmPkgs.llvm.dev}/lib/pkgconfig:$PKG_CONFIG_PATH"
+ export LLVM_DIR="${llvmPkgs.llvm.dev}/lib/cmake/llvm"
+ export LLVM_ROOT="${llvmPkgs.llvm}"
+
+ # Development tools in PATH
+ export PATH=${pkgs.clang-tools}/bin:$PATH
+ export PATH=${pkgs.cppcheck}/bin:$PATH
+
+ # Clang + glibc configuration - use system linker instead of LLD for compatibility
+ export CC="${llvmPkgs.clang}/bin/clang"
+ export CXX="${llvmPkgs.clang}/bin/clang++"
+
+ # Use system linker and standard runtime
+ #export CFLAGS=""
+ #export CXXFLAGS=""
+ #export LDFLAGS=""
+
+ # PostgreSQL environment
+ export PG_SOURCE_DIR="${config.pgSourceDir}"
+ export PG_BUILD_DIR="${config.pgBuildDir}"
+ export PG_INSTALL_DIR="${config.pgInstallDir}"
+ export PG_DATA_DIR="${config.pgDataDir}"
+ export PG_BENCH_DIR="${config.pgBenchDir}"
+ export PG_FLAME_DIR="${config.pgFlameDir}"
+ export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d)
+
+ # GDB configuration
+ export GDBINIT="${gdbConfig}"
+
+ # Performance tools in PATH
+ export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH"
+
+ # Create output directories
+ mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR"
+
+ # Compiler verification
+ echo "Environment configured:"
+ echo " Compiler: $CC"
+ echo " libc: glibc"
+ echo " LLVM: $(llvm-config --version 2>/dev/null || echo 'not available')"
+
+ # Load PostgreSQL development aliases
+ if [ -f ./pg-aliases.sh ]; then
+ source ./pg-aliases.sh
+ else
+ echo "Warning: pg-aliases.sh not found in current directory"
+ fi
+
+ echo ""
+ echo "PostgreSQL Development Environment Ready (Clang + glibc)"
+ echo "Run 'pg-info' for available commands"
+ '';
+ };
+
+ # GCC + musl variant (cross-compilation)
+ muslDevShell = pkgs.mkShell {
+ name = "postgresql-gcc-musl";
+ buildInputs =
+ (getPostgreSQLDeps true)
+ ++ [
+ pkgs.gcc
+ flameGraphScript
+ pgbenchScript
+ ];
+
+ shellHook = ''
+ # Same base configuration as main shell
+ export HISTFILE=.history
+ export HISTSIZE=1000000
+ export HISTFILESIZE=1000000
+
+ unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH
+
+ export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH"
+
+ # Cross-compilation to musl
+ export CC="${pkgs.gcc}/bin/gcc"
+ export CXX="${pkgs.gcc}/bin/g++"
+
+ # Point to musl libraries for linking
+ export PKG_CONFIG_PATH="${pkgs.pkgsMusl.openssl.dev}/lib/pkgconfig:${pkgs.pkgsMusl.zlib.dev}/lib/pkgconfig:${pkgs.pkgsMusl.icu.dev}/lib/pkgconfig"
+ export CFLAGS="-ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include"
+ export CXXFLAGS="-ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include"
+ export LDFLAGS="-L${pkgs.pkgsMusl.stdenv.cc.libc}/lib -static-libgcc"
+
+ # PostgreSQL environment
+ export PG_SOURCE_DIR="${config.pgSourceDir}"
+ export PG_BUILD_DIR="${config.pgBuildDir}"
+ export PG_INSTALL_DIR="${config.pgInstallDir}"
+ export PG_DATA_DIR="${config.pgDataDir}"
+ export PG_BENCH_DIR="${config.pgBenchDir}"
+ export PG_FLAME_DIR="${config.pgFlameDir}"
+ export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d)
+
+ export GDBINIT="${gdbConfig}"
+ export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH"
+
+ mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR"
+
+ echo "GCC + musl environment configured"
+ echo " Compiler: $CC"
+ echo " LibC: musl (cross-compilation)"
+
+ if [ -f ./pg-aliases.sh ]; then
+ source ./pg-aliases.sh
+ fi
+
+ echo "PostgreSQL Development Environment Ready (GCC + musl)"
+ '';
+ };
+
+ # Clang + musl variant (cross-compilation)
+ clangMuslDevShell = pkgs.mkShell {
+ name = "postgresql-clang-musl";
+ buildInputs =
+ (getPostgreSQLDeps true)
+ ++ [
+ llvmPkgs.clang
+ llvmPkgs.lld
+ flameGraphScript
+ pgbenchScript
+ ];
+
+ shellHook = let
+ icon = "f121";
+ in ''
+ export HISTFILE=.history
+ export HISTSIZE=1000000
+ export HISTFILESIZE=1000000
+
+ unset LD_LIBRARY_PATH LD_PRELOAD LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH
+
+ export PATH="${pkgs.which}/bin:${pkgs.coreutils}/bin:$PATH"
+ export PS1="$(echo -e '\u${icon}') {\[$(tput sgr0)\]\[\033[38;5;228m\]\w\[$(tput sgr0)\]\[\033[38;5;15m\]} ($(git rev-parse --abbrev-ref HEAD)) \\$ \[$(tput sgr0)\]"
+
+ # Cross-compilation to musl with clang
+ export CC="${llvmPkgs.clang}/bin/clang"
+ export CXX="${llvmPkgs.clang}/bin/clang++"
+
+ # Point to musl libraries for linking
+ export PKG_CONFIG_PATH="${pkgs.pkgsMusl.openssl.dev}/lib/pkgconfig:${pkgs.pkgsMusl.zlib.dev}/lib/pkgconfig:${pkgs.pkgsMusl.icu.dev}/lib/pkgconfig"
+ export CFLAGS="--target=x86_64-linux-musl -ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include"
+ export CXXFLAGS="--target=x86_64-linux-musl -ggdb -Og -fno-omit-frame-pointer -DUSE_VALGRIND -D_FORTIFY_SOURCE=1 -I${pkgs.pkgsMusl.stdenv.cc.libc}/include"
+ export LDFLAGS="--target=x86_64-linux-musl -L${pkgs.pkgsMusl.stdenv.cc.libc}/lib -fuse-ld=lld"
+
+ # PostgreSQL environment
+ export PG_SOURCE_DIR="${config.pgSourceDir}"
+ export PG_BUILD_DIR="${config.pgBuildDir}"
+ export PG_INSTALL_DIR="${config.pgInstallDir}"
+ export PG_DATA_DIR="${config.pgDataDir}"
+ export PG_BENCH_DIR="${config.pgBenchDir}"
+ export PG_FLAME_DIR="${config.pgFlameDir}"
+ export PERL_CORE_DIR=$(find ${pkgs.perl} -maxdepth 5 -path "*/CORE" -type d)
+
+ export GDBINIT="${gdbConfig}"
+ export PATH="${flameGraphScript}/bin:${pgbenchScript}/bin:$PATH"
+
+ mkdir -p "$PG_BENCH_DIR" "$PG_FLAME_DIR"
+
+ echo "Clang + musl environment configured"
+ echo " Compiler: $CC"
+ echo " LibC: musl (cross-compilation)"
+
+ if [ -f ./pg-aliases.sh ]; then
+ source ./pg-aliases.sh
+ fi
+
+ echo "PostgreSQL Development Environment Ready (Clang + musl)"
+ '';
+ };
+in {
+ inherit devShell clangDevShell muslDevShell clangMuslDevShell gdbConfig flameGraphScript pgbenchScript;
+}
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index cb3331921cbfd..36e639552e62b 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -290,6 +290,7 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->amproperty = NULL;
amroutine->ambuildphasename = NULL;
amroutine->amvalidate = brinvalidate;
+ amroutine->amcomparedatums = NULL;
amroutine->amadjustmembers = NULL;
amroutine->ambeginscan = brinbeginscan;
amroutine->amrescan = brinrescan;
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 78f7b7a2495cf..8e31ec21c1c94 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -26,6 +26,7 @@
#include "storage/indexfsm.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
+#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/typcache.h"
@@ -78,6 +79,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amproperty = NULL;
amroutine->ambuildphasename = ginbuildphasename;
amroutine->amvalidate = ginvalidate;
+ amroutine->amcomparedatums = gincomparedatums;
amroutine->amadjustmembers = ginadjustmembers;
amroutine->ambeginscan = ginbeginscan;
amroutine->amrescan = ginrescan;
@@ -477,13 +479,6 @@ cmpEntries(const void *a, const void *b, void *arg)
return res;
}
-
-/*
- * Extract the index key values from an indexable item
- *
- * The resulting key values are sorted, and any duplicates are removed.
- * This avoids generating redundant index entries.
- */
Datum *
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
Datum value, bool isNull,
@@ -729,3 +724,86 @@ ginbuildphasename(int64 phasenum)
return NULL;
}
}
+
+/*
+ * gincomparedatums - Compare datums to determine if they produce identical keys
+ *
+ * This function extracts keys from both old_datum and new_datum using the
+ * opclass's extractValue function, then compares the extracted key arrays.
+ * Returns true if the key sets are identical (same keys, same counts).
+ *
+ * This enables HOT updates for GIN indexes when the indexed portions of a
+ * value haven't changed, even if the value itself has changed.
+ *
+ * Example: JSONB column with GIN index. If an update changes a non-indexed
+ * key in the JSONB document, the extracted keys are identical and we can
+ * do a HOT update.
+ */
+bool
+gincomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull)
+{
+ GinState ginstate;
+ Datum *old_keys;
+ Datum *new_keys;
+ GinNullCategory *old_categories;
+ GinNullCategory *new_categories;
+ int32 old_nkeys;
+ int32 new_nkeys;
+ MemoryContext tmpcontext;
+ MemoryContext oldcontext;
+ bool result = true;
+
+ /* Handle NULL cases */
+ if (old_isnull != new_isnull)
+ return false;
+ if (old_isnull)
+ return true;
+
+ /* Create temporary context for extraction work */
+ tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "GIN datum comparison",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+ initGinState(&ginstate, index);
+
+ /* Extract keys from both datums using existing GIN infrastructure */
+ old_keys = ginExtractEntries(&ginstate, attnum, old_datum, old_isnull,
+ &old_nkeys, &old_categories);
+ new_keys = ginExtractEntries(&ginstate, attnum, new_datum, new_isnull,
+ &new_nkeys, &new_categories);
+
+ /* Different number of keys, definitely different */
+ if (old_nkeys != new_nkeys)
+ {
+ result = false;
+ goto cleanup;
+ }
+
+ /*
+ * Compare the sorted key arrays element-by-element. Since both arrays are
+ * already sorted by ginExtractEntries, we can do a simple O(n)
+ * comparison.
+ */
+ for (int i = 0; i < old_nkeys; i++)
+ {
+ int cmp = ginCompareEntries(&ginstate, attnum,
+ old_keys[i], old_categories[i],
+ new_keys[i], new_categories[i]);
+
+ if (cmp != 0)
+ {
+ result = false;
+ break;
+ }
+ }
+
+cleanup:
+ /* Clean up */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(tmpcontext);
+
+ return result;
+}
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 53061c819fbf0..91371dfdacdb0 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -50,6 +50,10 @@ static void hashbuildCallback(Relation index,
void *state);
+static bool hashcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* Hash handler function: return IndexAmRoutine with access method parameters
* and callbacks.
@@ -98,6 +102,7 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->amproperty = NULL;
amroutine->ambuildphasename = NULL;
amroutine->amvalidate = hashvalidate;
+ amroutine->amcomparedatums = hashcomparedatums;
amroutine->amadjustmembers = hashadjustmembers;
amroutine->ambeginscan = hashbeginscan;
amroutine->amrescan = hashrescan;
@@ -944,3 +949,42 @@ hashtranslatecmptype(CompareType cmptype, Oid opfamily)
return HTEqualStrategyNumber;
return InvalidStrategy;
}
+
+/*
+ * hashcomparedatums - Compare datums to determine if they produce identical keys
+ *
+ * Returns true if the hash values are identical (index doesn't need update).
+ */
+bool
+hashcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull)
+{
+ uint32 old_hashkey;
+ uint32 new_hashkey;
+
+ /* If both are NULL, they're equal */
+ if (old_isnull && new_isnull)
+ return true;
+
+ /* If NULL status differs, they're not equal */
+ if (old_isnull != new_isnull)
+ return false;
+
+ /*
+ * _hash_datum2hashkey() is used because we know this can't be a cross
+ * type comparison.
+ */
+ old_hashkey = _hash_datum2hashkey(index, old_datum);
+ new_hashkey = _hash_datum2hashkey(index, new_datum);
+
+ /*
+ * If hash keys are identical, the index entry would be the same. Return
+ * true to indicate no index update needed.
+ *
+ * Note: Hash collisions are rare but possible. If hash(x) == hash(y) but
+ * x != y, the hash index still treats them identically, so we correctly
+ * return true.
+ */
+ return (old_hashkey == new_hashkey);
+}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4d382a04338e6..3e88bdbbda86a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -39,18 +39,24 @@
#include "access/syncscan.h"
#include "access/valid.h"
#include "access/visibilitymap.h"
+#include "access/xact.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_database.h"
#include "catalog/pg_database_d.h"
#include "commands/vacuum.h"
+#include "nodes/bitmapset.h"
#include "pgstat.h"
#include "port/pg_bitutils.h"
+#include "storage/bufmgr.h"
+#include "storage/itemptr.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
#include "utils/datum.h"
#include "utils/injection_point.h"
#include "utils/inval.h"
+#include "utils/relcache.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
@@ -62,16 +68,8 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
HeapTuple newtup, HeapTuple old_key_tuple,
bool all_visible_cleared, bool new_all_visible_cleared);
#ifdef USE_ASSERT_CHECKING
-static void check_lock_if_inplace_updateable_rel(Relation relation,
- const ItemPointerData *otid,
- HeapTuple newtup);
static void check_inplace_rel_lock(HeapTuple oldtup);
#endif
-static Bitmapset *HeapDetermineColumnsInfo(Relation relation,
- Bitmapset *interesting_cols,
- Bitmapset *external_cols,
- HeapTuple oldtup, HeapTuple newtup,
- bool *has_external);
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid,
LockTupleMode mode, LockWaitPolicy wait_policy,
bool *have_tuple_lock);
@@ -103,10 +101,10 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
static void index_delete_sort(TM_IndexDeleteOp *delstate);
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
-static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
+static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp,
+ Bitmapset *rid_attrs, bool key_required,
bool *copy);
-
/*
* Each tuple lock mode has a corresponding heavyweight lock, and one or two
* corresponding MultiXactStatuses (one to merely lock tuples, another one to
@@ -2814,6 +2812,7 @@ heap_delete(Relation relation, const ItemPointerData *tid,
Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
TransactionId new_xmax;
+ Bitmapset *rid_attrs;
uint16 new_infomask,
new_infomask2;
bool have_tuple_lock = false;
@@ -2826,6 +2825,8 @@ heap_delete(Relation relation, const ItemPointerData *tid,
AssertHasSnapshotForToast(relation);
+ rid_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -3029,6 +3030,7 @@ heap_delete(Relation relation, const ItemPointerData *tid,
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
+ bms_free(rid_attrs);
return result;
}
@@ -3050,7 +3052,10 @@ heap_delete(Relation relation, const ItemPointerData *tid,
* Compute replica identity tuple before entering the critical section so
* we don't PANIC upon a memory allocation failure.
*/
- old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
+ old_key_tuple = ExtractReplicaIdentity(relation, &tp, rid_attrs,
+ true, &old_key_copied);
+ bms_free(rid_attrs);
+ rid_attrs = NULL;
/*
* If this is the first possibly-multixact-able operation in the current
@@ -3262,7 +3267,10 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* heap_update - replace a tuple
*
* See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a heap tuple rather than a slot.
+ *
+ * It's required that the caller has acquired the pin and lock on the buffer.
+ * That lock and pin will be managed here, not in the caller.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -3270,30 +3278,21 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* generated by another transaction).
*/
TM_Result
-heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
+heap_update(Relation relation, HeapTupleData *oldtup, HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ Buffer buffer, Page page, BlockNumber block, ItemId lp,
+ Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs,
+ Bitmapset *rid_attrs, const Bitmapset *mix_attrs, Buffer *vmbuffer,
+ bool rep_id_key_required, TU_UpdateIndexes *update_indexes)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
- Bitmapset *hot_attrs;
- Bitmapset *sum_attrs;
- Bitmapset *key_attrs;
- Bitmapset *id_attrs;
- Bitmapset *interesting_attrs;
- Bitmapset *modified_attrs;
- ItemId lp;
- HeapTupleData oldtup;
HeapTuple heaptup;
HeapTuple old_key_tuple = NULL;
bool old_key_copied = false;
- Page page;
- BlockNumber block;
MultiXactStatus mxact_status;
- Buffer buffer,
- newbuf,
- vmbuffer = InvalidBuffer,
+ Buffer newbuf,
vmbuffer_new = InvalidBuffer;
bool need_toast;
Size newtupsize,
@@ -3307,7 +3306,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
bool all_visible_cleared_new = false;
bool checked_lockers;
bool locker_remains;
- bool id_has_external = false;
TransactionId xmax_new_tuple,
xmax_old_tuple;
uint16 infomask_old_tuple,
@@ -3315,144 +3313,13 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
infomask_new_tuple,
infomask2_new_tuple;
- Assert(ItemPointerIsValid(otid));
-
- /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
- Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
- RelationGetNumberOfAttributes(relation));
-
+ Assert(BufferIsLockedByMe(buffer));
+ Assert(ItemIdIsNormal(lp));
AssertHasSnapshotForToast(relation);
- /*
- * Forbid this during a parallel operation, lest it allocate a combo CID.
- * Other workers might need that combo CID for visibility checks, and we
- * have no provision for broadcasting it to them.
- */
- if (IsInParallelMode())
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
- errmsg("cannot update tuples during a parallel operation")));
-
-#ifdef USE_ASSERT_CHECKING
- check_lock_if_inplace_updateable_rel(relation, otid, newtup);
-#endif
-
- /*
- * Fetch the list of attributes to be checked for various operations.
- *
- * For HOT considerations, this is wasted effort if we fail to update or
- * have to put the new tuple on a different page. But we must compute the
- * list before obtaining buffer lock --- in the worst case, if we are
- * doing an update on one of the relevant system catalogs, we could
- * deadlock if we try to fetch the list later. In any case, the relcache
- * caches the data so this is usually pretty cheap.
- *
- * We also need columns used by the replica identity and columns that are
- * considered the "key" of rows in the table.
- *
- * Note that we get copies of each bitmap, so we need not worry about
- * relcache flush happening midway through.
- */
- hot_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_HOT_BLOCKING);
- sum_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_SUMMARIZED);
- key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
- id_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_IDENTITY_KEY);
- interesting_attrs = NULL;
- interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
-
- block = ItemPointerGetBlockNumber(otid);
- INJECTION_POINT("heap_update-before-pin", NULL);
- buffer = ReadBuffer(relation, block);
- page = BufferGetPage(buffer);
-
- /*
- * Before locking the buffer, pin the visibility map page if it appears to
- * be necessary. Since we haven't got the lock yet, someone else might be
- * in the middle of changing this, so we'll need to recheck after we have
- * the lock.
- */
- if (PageIsAllVisible(page))
- visibilitymap_pin(relation, block, &vmbuffer);
-
- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-
- lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
-
- /*
- * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
- * we see LP_NORMAL here. When the otid origin is a syscache, we may have
- * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
- * of which indicates concurrent pruning.
- *
- * Failing with TM_Updated would be most accurate. However, unlike other
- * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
- * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
- * does matter to SQL statements UPDATE and MERGE, those SQL statements
- * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
- * TM_Updated and TM_Deleted affects only the wording of error messages.
- * Settle on TM_Deleted, for two reasons. First, it avoids complicating
- * the specification of when tmfd->ctid is valid. Second, it creates
- * error log evidence that we took this branch.
- *
- * Since it's possible to see LP_UNUSED at otid, it's also possible to see
- * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
- * unrelated row, we'll fail with "duplicate key value violates unique".
- * XXX if otid is the live, newer version of the newtup row, we'll discard
- * changes originating in versions of this catalog row after the version
- * the caller got from syscache. See syscache-update-pruned.spec.
- */
- if (!ItemIdIsNormal(lp))
- {
- Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
-
- UnlockReleaseBuffer(buffer);
- Assert(!have_tuple_lock);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
- tmfd->ctid = *otid;
- tmfd->xmax = InvalidTransactionId;
- tmfd->cmax = InvalidCommandId;
- *update_indexes = TU_None;
-
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- /* modified_attrs not yet initialized */
- bms_free(interesting_attrs);
- return TM_Deleted;
- }
-
- /*
- * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
- * properly.
- */
- oldtup.t_tableOid = RelationGetRelid(relation);
- oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
- oldtup.t_len = ItemIdGetLength(lp);
- oldtup.t_self = *otid;
-
- /* the new tuple is ready, except for this: */
+ /* The new tuple is ready, except for this */
newtup->t_tableOid = RelationGetRelid(relation);
- /*
- * Determine columns modified by the update. Additionally, identify
- * whether any of the unmodified replica identity key attributes in the
- * old tuple is externally stored or not. This is required because for
- * such attributes the flattened value won't be WAL logged as part of the
- * new tuple so we must include it as part of the old_key_tuple. See
- * ExtractReplicaIdentity.
- */
- modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
- id_attrs, &oldtup,
- newtup, &id_has_external);
-
/*
* If we're not updating any "key" column, we can grab a weaker lock type.
* This allows for more concurrency when we are running simultaneously
@@ -3464,7 +3331,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* is updates that don't manipulate key columns, not those that
* serendipitously arrive at the same key values.
*/
- if (!bms_overlap(modified_attrs, key_attrs))
+ if (!bms_overlap(mix_attrs, pk_attrs))
{
*lockmode = LockTupleNoKeyExclusive;
mxact_status = MultiXactStatusNoKeyUpdate;
@@ -3488,17 +3355,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
key_intact = false;
}
- /*
- * Note: beyond this point, use oldtup not otid to refer to old tuple.
- * otid may very well point at newtup->t_self, which we will overwrite
- * with the new tuple's location, so there's great risk of confusion if we
- * use otid anymore.
- */
-
l2:
checked_lockers = false;
locker_remains = false;
- result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
+ result = HeapTupleSatisfiesUpdate(oldtup, cid, buffer);
/* see below about the "no wait" case */
Assert(result != TM_BeingModified || wait);
@@ -3530,8 +3390,8 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
*/
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
- infomask = oldtup.t_data->t_infomask;
+ xwait = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ infomask = oldtup->t_data->t_infomask;
/*
* Now we have to do something about the existing locker. If it's a
@@ -3571,13 +3431,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* requesting a lock and already have one; avoids deadlock).
*/
if (!current_is_member)
- heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+ heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode,
LockWaitBlock, &have_tuple_lock);
/* wait for multixact */
MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
- relation, &oldtup.t_self, XLTW_Update,
- &remain);
+ relation, &oldtup->t_self, XLTW_Update, &remain);
checked_lockers = true;
locker_remains = remain != 0;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3587,9 +3446,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* could update this tuple before we get to this point. Check
* for xmax change, and start over if so.
*/
- if (xmax_infomask_changed(oldtup.t_data->t_infomask,
+ if (xmax_infomask_changed(oldtup->t_data->t_infomask,
infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup->t_data),
xwait))
goto l2;
}
@@ -3614,8 +3473,8 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* before this one, which are important to keep in case this
* subxact aborts.
*/
- if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
- update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup->t_data->t_infomask))
+ update_xact = HeapTupleGetUpdateXid(oldtup->t_data);
else
update_xact = InvalidTransactionId;
@@ -3656,9 +3515,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* lock.
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+ heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode,
LockWaitBlock, &have_tuple_lock);
- XactLockTableWait(xwait, relation, &oldtup.t_self,
+ XactLockTableWait(xwait, relation, &oldtup->t_self,
XLTW_Update);
checked_lockers = true;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3668,20 +3527,20 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* other xact could update this tuple before we get to this point.
* Check for xmax change, and start over if so.
*/
- if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
+ if (xmax_infomask_changed(oldtup->t_data->t_infomask, infomask) ||
!TransactionIdEquals(xwait,
- HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+ HeapTupleHeaderGetRawXmax(oldtup->t_data)))
goto l2;
/* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
- if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
+ UpdateXmaxHintBits(oldtup->t_data, buffer, xwait);
+ if (oldtup->t_data->t_infomask & HEAP_XMAX_INVALID)
can_continue = true;
}
if (can_continue)
result = TM_Ok;
- else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
+ else if (!ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid))
result = TM_Updated;
else
result = TM_Deleted;
@@ -3694,39 +3553,33 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
result == TM_Updated ||
result == TM_Deleted ||
result == TM_BeingModified);
- Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
+ Assert(!(oldtup->t_data->t_infomask & HEAP_XMAX_INVALID));
Assert(result != TM_Updated ||
- !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
+ !ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid));
}
if (crosscheck != InvalidSnapshot && result == TM_Ok)
{
/* Perform additional check for transaction-snapshot mode RI updates */
- if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
+ if (!HeapTupleSatisfiesVisibility(oldtup, crosscheck, buffer))
result = TM_Updated;
}
if (result != TM_Ok)
{
- tmfd->ctid = oldtup.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
+ tmfd->ctid = oldtup->t_data->t_ctid;
+ tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup->t_data);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
+ tmfd->cmax = HeapTupleHeaderGetCmax(oldtup->t_data);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
+ UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode);
+ if (*vmbuffer != InvalidBuffer)
+ ReleaseBuffer(*vmbuffer);
*update_indexes = TU_None;
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- bms_free(modified_attrs);
- bms_free(interesting_attrs);
return result;
}
@@ -3739,10 +3592,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* tuple has been locked or updated under us, but hopefully it won't
* happen very often.
*/
- if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
+ if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- visibilitymap_pin(relation, block, &vmbuffer);
+ visibilitymap_pin(relation, block, vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l2;
}
@@ -3753,9 +3606,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* If the tuple we're updating is locked, we need to preserve the locking
* info in the old tuple's Xmax. Prepare a new Xmax value for this.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
- oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2,
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data),
+ oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2,
xid, *lockmode, true,
&xmax_old_tuple, &infomask_old_tuple,
&infomask2_old_tuple);
@@ -3767,12 +3620,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* tuple. (In rare cases that might also be InvalidTransactionId and yet
* not have the HEAP_XMAX_INVALID bit set; that's fine.)
*/
- if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
+ if ((oldtup->t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_LOCKED_UPGRADED(oldtup->t_data->t_infomask) ||
(checked_lockers && !locker_remains))
xmax_new_tuple = InvalidTransactionId;
else
- xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup->t_data);
if (!TransactionIdIsValid(xmax_new_tuple))
{
@@ -3787,7 +3640,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* Note that since we're doing an update, the only possibility is that
* the lockers had FOR KEY SHARE lock.
*/
- if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ if (oldtup->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
{
GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
&infomask2_new_tuple);
@@ -3815,7 +3668,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* Replace cid with a combo CID if necessary. Note that we already put
* the plain cid into the new tuple.
*/
- HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
+ HeapTupleHeaderAdjustCmax(oldtup->t_data, &cid, &iscombo);
/*
* If the toaster needs to be activated, OR if the new tuple will not fit
@@ -3832,12 +3685,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
relation->rd_rel->relkind != RELKIND_MATVIEW)
{
/* toast table entries should never be recursively toasted */
- Assert(!HeapTupleHasExternal(&oldtup));
+ Assert(!HeapTupleHasExternal(oldtup));
Assert(!HeapTupleHasExternal(newtup));
need_toast = false;
}
else
- need_toast = (HeapTupleHasExternal(&oldtup) ||
+ need_toast = (HeapTupleHasExternal(oldtup) ||
HeapTupleHasExternal(newtup) ||
newtup->t_len > TOAST_TUPLE_THRESHOLD);
@@ -3870,9 +3723,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* updating, because the potentially created multixact would otherwise
* be wrong.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
- oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2,
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data),
+ oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2,
xid, *lockmode, false,
&xmax_lock_old_tuple, &infomask_lock_old_tuple,
&infomask2_lock_old_tuple);
@@ -3882,18 +3735,18 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
START_CRIT_SECTION();
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
- oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
- HeapTupleClearHotUpdated(&oldtup);
+ oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ HeapTupleClearHotUpdated(oldtup);
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_lock_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
- oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
- oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
- HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleHeaderSetXmax(oldtup->t_data, xmax_lock_old_tuple);
+ oldtup->t_data->t_infomask |= infomask_lock_old_tuple;
+ oldtup->t_data->t_infomask2 |= infomask2_lock_old_tuple;
+ HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo);
/* temporarily make it look not-updated, but locked */
- oldtup.t_data->t_ctid = oldtup.t_self;
+ oldtup->t_data->t_ctid = oldtup->t_self;
/*
* Clear all-frozen bit on visibility map if needed. We could
@@ -3902,7 +3755,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* worthwhile.
*/
if (PageIsAllVisible(page) &&
- visibilitymap_clear(relation, block, vmbuffer,
+ visibilitymap_clear(relation, block, *vmbuffer,
VISIBILITYMAP_ALL_FROZEN))
cleared_all_frozen = true;
@@ -3916,10 +3769,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
XLogBeginInsert();
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
- xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
+ xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
xlrec.xmax = xmax_lock_old_tuple;
- xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2);
+ xlrec.infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
xlrec.flags =
cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
XLogRegisterData(&xlrec, SizeOfHeapLock);
@@ -3941,7 +3794,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
if (need_toast)
{
/* Note we always use WAL and FSM during updates */
- heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
+ heaptup = heap_toast_insert_or_update(relation, newtup, oldtup, 0);
newtupsize = MAXALIGN(heaptup->t_len);
}
else
@@ -3977,20 +3830,20 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
/* It doesn't fit, must use RelationGetBufferForTuple. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
buffer, 0, NULL,
- &vmbuffer_new, &vmbuffer,
+ &vmbuffer_new, vmbuffer,
0);
/* We're all done. */
break;
}
/* Acquire VM page pin if needed and we don't have it. */
- if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
- visibilitymap_pin(relation, block, &vmbuffer);
+ if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, vmbuffer);
/* Re-acquire the lock on the old tuple's page. */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* Re-check using the up-to-date free space */
pagefree = PageGetHeapFreeSpace(page);
if (newtupsize > pagefree ||
- (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
+ (*vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
{
/*
* Rats, it doesn't fit anymore, or somebody just now set the
@@ -4028,7 +3881,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* will include checking the relation level, there is no benefit to a
* separate check for the new tuple.
*/
- CheckForSerializableConflictIn(relation, &oldtup.t_self,
+ CheckForSerializableConflictIn(relation, &oldtup->t_self,
BufferGetBlockNumber(buffer));
/*
@@ -4036,7 +3889,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* has enough space for the new tuple. If they are the same buffer, only
* one pin is held.
*/
-
if (newbuf == buffer)
{
/*
@@ -4044,7 +3896,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* to do a HOT update. Check if any of the index columns have been
* changed.
*/
- if (!bms_overlap(modified_attrs, hot_attrs))
+ if (!bms_overlap(mix_attrs, hot_attrs))
{
use_hot_update = true;
@@ -4055,7 +3907,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* indexes if the columns were updated, or we may fail to detect
* e.g. value bound changes in BRIN minmax indexes.
*/
- if (bms_overlap(modified_attrs, sum_attrs))
+ if (bms_overlap(mix_attrs, sum_attrs))
summarized_update = true;
}
}
@@ -4072,10 +3924,8 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* logged. Pass old key required as true only if the replica identity key
* columns are modified or it has external data.
*/
- old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
- bms_overlap(modified_attrs, id_attrs) ||
- id_has_external,
- &old_key_copied);
+ old_key_tuple = ExtractReplicaIdentity(relation, oldtup, rid_attrs,
+ rep_id_key_required, &old_key_copied);
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -4097,7 +3947,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
if (use_hot_update)
{
/* Mark the old tuple as HOT-updated */
- HeapTupleSetHotUpdated(&oldtup);
+ HeapTupleSetHotUpdated(oldtup);
/* And mark the new tuple as heap-only */
HeapTupleSetHeapOnly(heaptup);
/* Mark the caller's copy too, in case different from heaptup */
@@ -4106,7 +3956,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
else
{
/* Make sure tuples are correctly marked as not-HOT */
- HeapTupleClearHotUpdated(&oldtup);
+ HeapTupleClearHotUpdated(oldtup);
HeapTupleClearHeapOnly(heaptup);
HeapTupleClearHeapOnly(newtup);
}
@@ -4115,17 +3965,17 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
/* Clear obsolete visibility flags, possibly set by ourselves above... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
- oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
- oldtup.t_data->t_infomask |= infomask_old_tuple;
- oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
- HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleHeaderSetXmax(oldtup->t_data, xmax_old_tuple);
+ oldtup->t_data->t_infomask |= infomask_old_tuple;
+ oldtup->t_data->t_infomask2 |= infomask2_old_tuple;
+ HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo);
/* record address of new tuple in t_ctid of old one */
- oldtup.t_data->t_ctid = heaptup->t_self;
+ oldtup->t_data->t_ctid = heaptup->t_self;
/* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
if (PageIsAllVisible(BufferGetPage(buffer)))
@@ -4133,7 +3983,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
all_visible_cleared = true;
PageClearAllVisible(BufferGetPage(buffer));
visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
- vmbuffer, VISIBILITYMAP_VALID_BITS);
+ *vmbuffer, VISIBILITYMAP_VALID_BITS);
}
if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
{
@@ -4158,12 +4008,12 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
*/
if (RelationIsAccessibleInLogicalDecoding(relation))
{
- log_heap_new_cid(relation, &oldtup);
+ log_heap_new_cid(relation, oldtup);
log_heap_new_cid(relation, heaptup);
}
recptr = log_heap_update(relation, buffer,
- newbuf, &oldtup, heaptup,
+ newbuf, oldtup, heaptup,
old_key_tuple,
all_visible_cleared,
all_visible_cleared_new);
@@ -4188,7 +4038,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* both tuple versions in one call to inval.c so we can avoid redundant
* sinval messages.)
*/
- CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
+ CacheInvalidateHeapTuple(relation, oldtup, heaptup);
/* Now we can release the buffer(s) */
if (newbuf != buffer)
@@ -4196,14 +4046,14 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
ReleaseBuffer(buffer);
if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new);
- if (BufferIsValid(vmbuffer))
- ReleaseBuffer(vmbuffer);
+ if (BufferIsValid(*vmbuffer))
+ ReleaseBuffer(*vmbuffer);
/*
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
+ UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode);
pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
@@ -4236,13 +4086,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
if (old_key_tuple != NULL && old_key_copied)
heap_freetuple(old_key_tuple);
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- bms_free(modified_attrs);
- bms_free(interesting_attrs);
-
return TM_Ok;
}
@@ -4251,7 +4094,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* Confirm adequate lock held during heap_update(), per rules from
* README.tuplock section "Locking to write inplace-updated tables".
*/
-static void
+void
check_lock_if_inplace_updateable_rel(Relation relation,
const ItemPointerData *otid,
HeapTuple newtup)
@@ -4423,7 +4266,7 @@ heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2,
* listed as interesting) of the old tuple is a member of external_cols and is
* stored externally.
*/
-static Bitmapset *
+Bitmapset *
HeapDetermineColumnsInfo(Relation relation,
Bitmapset *interesting_cols,
Bitmapset *external_cols,
@@ -4506,25 +4349,175 @@ HeapDetermineColumnsInfo(Relation relation,
}
/*
- * simple_heap_update - replace a tuple
- *
- * This routine may be used to update a tuple when concurrent updates of
- * the target tuple are not expected (for example, because we have a lock
- * on the relation associated with the tuple). Any failure is reported
- * via ereport().
+ * This routine may be used to update a tuple when concurrent updates of the
+ * target tuple are not expected (for example, because we have a lock on the
+ * relation associated with the tuple). Any failure is reported via ereport().
+ * Returns the set of modified indexed attributes.
*/
-void
-simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup,
+Bitmapset *
+simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple,
TU_UpdateIndexes *update_indexes)
{
TM_Result result;
TM_FailureData tmfd;
LockTupleMode lockmode;
+ Buffer buffer;
+ Buffer vmbuffer = InvalidBuffer;
+ Page page;
+ BlockNumber block;
+ Bitmapset *hot_attrs,
+ *sum_attrs,
+ *pk_attrs,
+ *rid_attrs,
+ *mix_attrs,
+ *idx_attrs;
+ ItemId lp;
+ HeapTupleData oldtup;
+ bool rep_id_key_required = false;
+
+ Assert(ItemPointerIsValid(otid));
+
+ /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
+ Assert(HeapTupleHeaderGetNatts(tuple->t_data) <=
+ RelationGetNumberOfAttributes(relation));
+
+ /*
+ * Forbid this during a parallel operation, lest it allocate a combo CID.
+ * Other workers might need that combo CID for visibility checks, and we
+ * have no provision for broadcasting it to them.
+ */
+ if (IsInParallelMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot update tuples during a parallel operation")));
+
+#ifdef USE_ASSERT_CHECKING
+ check_lock_if_inplace_updateable_rel(relation, otid, tuple);
+#endif
+
+ /*
+ * Fetch the list of attributes to be checked for various operations.
+ *
+ * For HOT considerations, this is wasted effort if we fail to update or
+ * have to put the new tuple on a different page. But we must compute the
+ * list before obtaining buffer lock --- in the worst case, if we are
+ * doing an update on one of the relevant system catalogs, we could
+ * deadlock if we try to fetch the list later. In any case, the relcache
+ * caches the data so this is usually pretty cheap.
+ *
+ * We also need columns used by the replica identity and columns that are
+ * considered the "key" of rows in the table.
+ *
+ * Note that we get copies of each bitmap, so we need not worry about
+ * relcache flush happening midway through.
+ */
+ hot_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_HOT_BLOCKING);
+ sum_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_SUMMARIZED);
+ pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
+ rid_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ idx_attrs = bms_copy(hot_attrs);
+ idx_attrs = bms_add_members(idx_attrs, sum_attrs);
+ idx_attrs = bms_add_members(idx_attrs, pk_attrs);
+ idx_attrs = bms_add_members(idx_attrs, rid_attrs);
+
+ block = ItemPointerGetBlockNumber(otid);
+ INJECTION_POINT("heap_update-before-pin", NULL);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ /*
+ * Before locking the buffer, pin the visibility map page if it appears to
+ * be necessary. Since we haven't got the lock yet, someone else might be
+ * in the middle of changing this, so we'll need to recheck after we have
+ * the lock.
+ */
+ if (PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, &vmbuffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
+
+ /*
+ * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
+ * we see LP_NORMAL here. When the otid origin is a syscache, we may have
+ * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
+ * of which indicates concurrent pruning.
+ *
+ * Failing with TM_Updated would be most accurate. However, unlike other
+ * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
+ * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
+ * does matter to SQL statements UPDATE and MERGE, those SQL statements
+ * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
+ * TM_Updated and TM_Deleted affects only the wording of error messages.
+ * Settle on TM_Deleted, for two reasons. First, it avoids complicating
+ * the specification of when tmfd->ctid is valid. Second, it creates
+ * error log evidence that we took this branch.
+ *
+ * Since it's possible to see LP_UNUSED at otid, it's also possible to see
+ * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
+ * unrelated row, we'll fail with "duplicate key value violates unique".
+ * XXX if otid is the live, newer version of the newtup row, we'll discard
+ * changes originating in versions of this catalog row after the version
+ * the caller got from syscache. See syscache-update-pruned.spec.
+ */
+ if (!ItemIdIsNormal(lp))
+ {
+ Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
+
+ UnlockReleaseBuffer(buffer);
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ *update_indexes = TU_None;
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(idx_attrs);
+ /* mix_attrs not yet initialized */
+
+ elog(ERROR, "tuple concurrently deleted");
+
+ return NULL;
+ }
+
+ /*
+ * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
+ * then pass that on to heap_update.
+ */
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = *otid;
+
+ mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
+ &oldtup, tuple, &rep_id_key_required);
+
+ /*
+ * We'll need to WAL log the replica identity attributes if either they
+ * overlap with the modified indexed attributes or, as we've checked for
+ * just now in HeapDetermineColumnsInfo, they were unmodified external
+ * indexed attributes.
+ */
+ rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+
+ result = heap_update(relation, &oldtup, tuple, GetCurrentCommandId(true),
+ InvalidSnapshot, true /* wait for commit */ , &tmfd, &lockmode,
+ buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs,
+ rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required,
+ update_indexes);
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(idx_attrs);
- result = heap_update(relation, otid, tup,
- GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
switch (result)
{
case TM_SelfModified:
@@ -4548,6 +4541,8 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
elog(ERROR, "unrecognized heap_update status: %u", result);
break;
}
+
+ return mix_attrs;
}
@@ -9164,12 +9159,11 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
* the same tuple that was passed in.
*/
static HeapTuple
-ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
- bool *copy)
+ExtractReplicaIdentity(Relation relation, HeapTuple tp, Bitmapset *rid_attrs,
+ bool key_required, bool *copy)
{
TupleDesc desc = RelationGetDescr(relation);
char replident = relation->rd_rel->relreplident;
- Bitmapset *idattrs;
HeapTuple key_tuple;
bool nulls[MaxHeapAttributeNumber];
Datum values[MaxHeapAttributeNumber];
@@ -9200,17 +9194,13 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
if (!key_required)
return NULL;
- /* find out the replica identity columns */
- idattrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_IDENTITY_KEY);
-
/*
* If there's no defined replica identity columns, treat as !key_required.
* (This case should not be reachable from heap_update, since that should
* calculate key_required accurately. But heap_delete just passes
* constant true for key_required, so we can hit this case in deletes.)
*/
- if (bms_is_empty(idattrs))
+ if (bms_is_empty(rid_attrs))
return NULL;
/*
@@ -9223,7 +9213,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
for (int i = 0; i < desc->natts; i++)
{
if (bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
- idattrs))
+ rid_attrs))
Assert(!nulls[i]);
else
nulls[i] = true;
@@ -9232,8 +9222,6 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
key_tuple = heap_form_tuple(desc, values, nulls);
*copy = true;
- bms_free(idattrs);
-
/*
* If the tuple, which by here only contains indexed columns, still has
* toasted columns, force them to be inlined. This is somewhat unlikely
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index bcbac844bb669..7527809ec0802 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -44,6 +44,7 @@
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/rel.h"
static void reform_and_rewrite_tuple(HeapTuple tuple,
@@ -312,23 +313,175 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
}
-
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
- CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd,
- LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+ CommandId cid, Snapshot snapshot,
+ Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd,
+ LockTupleMode *lockmode,
+ const Bitmapset *mix_attrs,
+ TU_UpdateIndexes *update_indexes)
{
+ bool rep_id_key_required = false;
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+ HeapTupleData oldtup;
+ Buffer buffer;
+ Buffer vmbuffer = InvalidBuffer;
+ Page page;
+ BlockNumber block;
+ ItemId lp;
+ Bitmapset *hot_attrs,
+ *sum_attrs,
+ *pk_attrs,
+ *rid_attrs,
+ *idx_attrs;
TM_Result result;
+ Assert(ItemPointerIsValid(otid));
+
+ /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
+ Assert(HeapTupleHeaderGetNatts(tuple->t_data) <=
+ RelationGetNumberOfAttributes(relation));
+
+ /*
+ * Forbid this during a parallel operation, lest it allocate a combo CID.
+ * Other workers might need that combo CID for visibility checks, and we
+ * have no provision for broadcasting it to them.
+ */
+ if (IsInParallelMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot update tuples during a parallel operation")));
+
+#ifdef USE_ASSERT_CHECKING
+ check_lock_if_inplace_updateable_rel(relation, otid, tuple);
+#endif
+
+ /*
+ * Fetch the list of attributes to be checked for various operations.
+ *
+ * For HOT considerations, this is wasted effort if we fail to update or
+ * have to put the new tuple on a different page. But we must compute the
+ * list before obtaining buffer lock --- in the worst case, if we are
+ * doing an update on one of the relevant system catalogs, we could
+ * deadlock if we try to fetch the list later. In any case, the relcache
+ * caches the data so this is usually pretty cheap.
+ *
+ * We also need columns used by the replica identity and columns that are
+ * considered the "key" of rows in the table.
+ *
+ * Note that we get copies of each bitmap, so we need not worry about
+ * relcache flush happening midway through.
+ */
+ hot_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_HOT_BLOCKING);
+ sum_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_SUMMARIZED);
+ pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
+ rid_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ idx_attrs = bms_copy(hot_attrs);
+ idx_attrs = bms_add_members(idx_attrs, sum_attrs);
+ idx_attrs = bms_add_members(idx_attrs, pk_attrs);
+ idx_attrs = bms_add_members(idx_attrs, rid_attrs);
+
+ block = ItemPointerGetBlockNumber(otid);
+ INJECTION_POINT("heap_update-before-pin", NULL);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ /*
+ * Before locking the buffer, pin the visibility map page if it appears to
+ * be necessary. Since we haven't got the lock yet, someone else might be
+ * in the middle of changing this, so we'll need to recheck after we have
+ * the lock.
+ */
+ if (PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, &vmbuffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
+
+ Assert(ItemIdIsNormal(lp));
+
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = *otid;
+
+ /*
+ * We'll need to include the replica identity key when either the identity
+ * key attributes overlap with the modified index attributes or when the
+ * replica identity attributes are stored externally. This is required
+ * because for such attributes the flattened value won't be WAL logged as
+ * part of the new tuple so we must determine if we need to extract and
+ * include them as part of the old_key_tuple (see ExtractReplicaIdentity).
+ */
+ rep_id_key_required = bms_overlap(mix_attrs, rid_attrs);
+ if (!rep_id_key_required)
+ {
+ Bitmapset *attrs;
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ int attidx = -1;
+
+ /*
+ * We don't own idx_attrs so we'll copy it and remove the modified set
+ * to reduce the attributes we need to test in the while loop and
+ * avoid a two branches in the loop.
+ */
+ attrs = bms_difference(idx_attrs, mix_attrs);
+ attrs = bms_int_members(attrs, rid_attrs);
+
+ while ((attidx = bms_next_member(attrs, attidx)) >= 0)
+ {
+ /*
+ * attidx is zero-based, attrnum is the normal attribute number
+ */
+ AttrNumber attrnum = attidx + FirstLowInvalidHeapAttributeNumber;
+ Datum value;
+ bool isnull;
+
+ /*
+ * System attributes are not added into interesting_attrs in
+ * relcache
+ */
+ Assert(attrnum > 0);
+
+ value = heap_getattr(&oldtup, attrnum, tupdesc, &isnull);
+
+ /* No need to check attributes that can't be stored externally */
+ if (isnull ||
+ TupleDescCompactAttr(tupdesc, attrnum - 1)->attlen != -1)
+ continue;
+
+ /* Check if the old tuple's attribute is stored externally */
+ if (VARATT_IS_EXTERNAL((struct varlena *) DatumGetPointer(value)))
+ {
+ rep_id_key_required = true;
+ break;
+ }
+ }
+
+ bms_free(attrs);
+ }
+
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
- tmfd, lockmode, update_indexes);
+ result = heap_update(relation, &oldtup, tuple, cid, crosscheck, wait, tmfd, lockmode,
+ buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs,
+ rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required, update_indexes);
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(idx_attrs);
+
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index fdff960c13022..e435f0d5db49e 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -155,6 +155,7 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->amproperty = btproperty;
amroutine->ambuildphasename = btbuildphasename;
amroutine->amvalidate = btvalidate;
+ amroutine->amcomparedatums = NULL;
amroutine->amadjustmembers = btadjustmembers;
amroutine->ambeginscan = btbeginscan;
amroutine->amrescan = btrescan;
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 1e099febdc8ca..15f0dd7aa2857 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -367,6 +367,7 @@ void
simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot,
Snapshot snapshot,
+ const Bitmapset *mix_attrs,
TU_UpdateIndexes *update_indexes)
{
TM_Result result;
@@ -377,7 +378,9 @@ simple_table_tuple_update(Relation rel, ItemPointer otid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ &tmfd, &lockmode,
+ mix_attrs,
+ update_indexes);
switch (result)
{
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index fc8638c1b61b6..329c110d0bfd3 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -961,10 +961,18 @@ index_register(Oid heap,
newind->il_info->ii_Expressions =
copyObject(indexInfo->ii_Expressions);
newind->il_info->ii_ExpressionsState = NIL;
+ /* expression attrs will likely be null, but may as well copy it */
+ newind->il_info->ii_ExpressionsAttrs =
+ copyObject(indexInfo->ii_ExpressionsAttrs);
/* predicate will likely be null, but may as well copy it */
newind->il_info->ii_Predicate =
copyObject(indexInfo->ii_Predicate);
newind->il_info->ii_PredicateState = NULL;
+ /* predicate attrs will likely be null, but may as well copy it */
+ newind->il_info->ii_PredicateAttrs =
+ copyObject(indexInfo->ii_PredicateAttrs);
+ newind->il_info->ii_CheckedPredicate = false;
+ newind->il_info->ii_PredicateSatisfied = false;
/* no exclusion constraints at bootstrap time, so no need to copy */
Assert(indexInfo->ii_ExclusionOps == NULL);
Assert(indexInfo->ii_ExclusionProcs == NULL);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 5d9db167e5950..e88db7e919b8b 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -27,6 +27,7 @@
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/relscan.h"
+#include "access/sysattr.h"
#include "access/tableam.h"
#include "access/toast_compression.h"
#include "access/transam.h"
@@ -58,6 +59,7 @@
#include "commands/trigger.h"
#include "executor/executor.h"
#include "miscadmin.h"
+#include "nodes/execnodes.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
@@ -2414,6 +2416,58 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
* ----------------------------------------------------------------
*/
+/* ----------------
+ * BuildUpdateIndexInfo
+ *
+ * For expression indexes updates may not change the indexed value allowing
+ * for a HOT update. Add information to the IndexInfo to allow for checking
+ * if the indexed value has changed.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to not incur the
+ * overhead in the common non-expression cases.
+ * ----------------
+ */
+void
+BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo)
+{
+ for (int j = 0; j < resultRelInfo->ri_NumIndices; j++)
+ {
+ int i;
+ int indnatts;
+ Bitmapset *attrs = NULL;
+ IndexInfo *ii = resultRelInfo->ri_IndexRelationInfo[j];
+
+ indnatts = ii->ii_NumIndexAttrs;
+
+ /* Collect key attributes used by the index, key and including */
+ for (i = 0; i < indnatts; i++)
+ {
+ AttrNumber attnum = ii->ii_IndexAttrNumbers[i];
+
+ if (attnum != 0)
+ attrs = bms_add_member(attrs, attnum - FirstLowInvalidHeapAttributeNumber);
+ }
+
+ /* Collect attributes used in the expression */
+ if (ii->ii_Expressions)
+ pull_varattnos((Node *) ii->ii_Expressions,
+ resultRelInfo->ri_RangeTableIndex,
+ &ii->ii_ExpressionsAttrs);
+
+ /* Collect attributes used in the predicate */
+ if (ii->ii_Predicate)
+ pull_varattnos((Node *) ii->ii_Predicate,
+ resultRelInfo->ri_RangeTableIndex,
+ &ii->ii_PredicateAttrs);
+
+ /* Combine key, including, and expression attributes, but not predicate */
+ ii->ii_IndexedAttrs = bms_union(attrs, ii->ii_ExpressionsAttrs);
+
+ /* All indexes should index *something*! */
+ Assert(!bms_is_empty(ii->ii_IndexedAttrs));
+ }
+}
+
/* ----------------
* BuildIndexInfo
* Construct an IndexInfo record for an open index
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index 004c5121000fe..a361c21549012 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -102,7 +102,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
* Get information from the state structure. Fall out if nothing to do.
*/
numIndexes = indstate->ri_NumIndices;
- if (numIndexes == 0)
+ if (numIndexes == 0 || updateIndexes == TU_None)
return;
relationDescs = indstate->ri_IndexRelationDescs;
indexInfoArray = indstate->ri_IndexRelationInfo;
@@ -314,15 +314,18 @@ CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup)
{
CatalogIndexState indstate;
TU_UpdateIndexes updateIndexes = TU_All;
+ Bitmapset *updatedAttrs;
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
- simple_heap_update(heapRel, otid, tup, &updateIndexes);
-
+ updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs;
CatalogIndexInsert(indstate, tup, updateIndexes);
+
CatalogCloseIndexes(indstate);
+ bms_free(updatedAttrs);
}
/*
@@ -338,12 +341,15 @@ CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTu
CatalogIndexState indstate)
{
TU_UpdateIndexes updateIndexes = TU_All;
+ Bitmapset *updatedAttrs;
CatalogTupleCheckConstraints(heapRel, tup);
- simple_heap_update(heapRel, otid, tup, &updateIndexes);
-
+ updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs;
CatalogIndexInsert(indstate, tup, updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = NULL;
+ bms_free(updatedAttrs);
}
/*
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index 874a8fc89adb3..c665aa744b3bf 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -292,16 +292,18 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
indexInfo->ii_IndexAttrNumbers[1] = 2;
indexInfo->ii_Expressions = NIL;
indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_ExpressionsAttrs = NULL;
indexInfo->ii_Predicate = NIL;
indexInfo->ii_PredicateState = NULL;
+ indexInfo->ii_PredicateAttrs = NULL;
+ indexInfo->ii_CheckedPredicate = false;
+ indexInfo->ii_PredicateSatisfied = false;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
indexInfo->ii_Unique = true;
indexInfo->ii_NullsNotDistinct = false;
indexInfo->ii_ReadyForInserts = true;
- indexInfo->ii_CheckedUnchanged = false;
- indexInfo->ii_IndexUnchanged = false;
indexInfo->ii_Concurrent = false;
indexInfo->ii_BrokenHotChain = false;
indexInfo->ii_ParallelWorkers = 0;
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index dd323c9b9fd42..c051babf91d15 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -109,11 +109,15 @@
#include "access/genam.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "executor/executor.h"
+#include "nodes/bitmapset.h"
+#include "nodes/execnodes.h"
#include "nodes/nodeFuncs.h"
#include "storage/lmgr.h"
+#include "utils/datum.h"
#include "utils/injection_point.h"
#include "utils/multirangetypes.h"
#include "utils/rangetypes.h"
@@ -139,11 +143,6 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
static bool index_recheck_constraint(Relation index, const Oid *constr_procs,
const Datum *existing_values, const bool *existing_isnull,
const Datum *new_values);
-static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo,
- EState *estate, IndexInfo *indexInfo,
- Relation indexRelation);
-static bool index_expression_changed_walker(Node *node,
- Bitmapset *allUpdatedCols);
static void ExecWithoutOverlapsNotEmpty(Relation rel, NameData attname, Datum attval,
char typtype, Oid atttypid);
@@ -324,8 +323,8 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
Relation heapRelation;
IndexInfo **indexInfoArray;
ExprContext *econtext;
- Datum values[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
+ Datum loc_values[INDEX_MAX_KEYS];
+ bool loc_isnull[INDEX_MAX_KEYS];
Assert(ItemPointerIsValid(tupleid));
@@ -349,13 +348,13 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
- /*
- * for each index, form and insert the index tuple
- */
+ /* Insert into each index that needs updating */
for (i = 0; i < numIndices; i++)
{
Relation indexRelation = relationDescs[i];
IndexInfo *indexInfo;
+ Datum *values;
+ bool *isnull;
bool applyNoDupErr;
IndexUniqueCheck checkUnique;
bool indexUnchanged;
@@ -372,7 +371,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
/*
* Skip processing of non-summarizing indexes if we only update
- * summarizing indexes
+ * summarizing indexes or if this index is unchanged.
*/
if (onlySummarizing && !indexInfo->ii_Summarizing)
continue;
@@ -393,8 +392,15 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
indexInfo->ii_PredicateState = predicate;
}
+ /* Check the index predicate if we haven't done so earlier on */
+ if (!indexInfo->ii_CheckedPredicate)
+ {
+ indexInfo->ii_PredicateSatisfied = ExecQual(predicate, econtext);
+ indexInfo->ii_CheckedPredicate = true;
+ }
+
/* Skip this index-update if the predicate isn't satisfied */
- if (!ExecQual(predicate, econtext))
+ if (!indexInfo->ii_PredicateSatisfied)
continue;
}
@@ -402,11 +408,10 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* FormIndexDatum fills in its values and isnull parameters with the
* appropriate values for the column(s) of the index.
*/
- FormIndexDatum(indexInfo,
- slot,
- estate,
- values,
- isnull);
+ FormIndexDatum(indexInfo, slot, estate, loc_values, loc_isnull);
+
+ values = loc_values;
+ isnull = loc_isnull;
/* Check whether to apply noDupErr to this index */
applyNoDupErr = noDupErr &&
@@ -441,10 +446,9 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* index. If we're being called as part of an UPDATE statement,
* consider if the 'indexUnchanged' = true hint should be passed.
*/
- indexUnchanged = update && index_unchanged_by_update(resultRelInfo,
- estate,
- indexInfo,
- indexRelation);
+ indexUnchanged = update &&
+ !bms_overlap(indexInfo->ii_IndexedAttrs,
+ resultRelInfo->ri_ChangedIndexedCols);
satisfiesConstraint =
index_insert(indexRelation, /* index relation */
@@ -613,7 +617,12 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
checkedIndex = true;
/* Check for partial index */
- if (indexInfo->ii_Predicate != NIL)
+ if (indexInfo->ii_CheckedPredicate && !indexInfo->ii_PredicateSatisfied)
+ {
+ /* We've already checked and the predicate wasn't satisfied. */
+ continue;
+ }
+ else if (indexInfo->ii_Predicate != NIL)
{
ExprState *predicate;
@@ -999,152 +1008,6 @@ index_recheck_constraint(Relation index, const Oid *constr_procs,
return true;
}
-/*
- * Check if ExecInsertIndexTuples() should pass indexUnchanged hint.
- *
- * When the executor performs an UPDATE that requires a new round of index
- * tuples, determine if we should pass 'indexUnchanged' = true hint for one
- * single index.
- */
-static bool
-index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate,
- IndexInfo *indexInfo, Relation indexRelation)
-{
- Bitmapset *updatedCols;
- Bitmapset *extraUpdatedCols;
- Bitmapset *allUpdatedCols;
- bool hasexpression = false;
- List *idxExprs;
-
- /*
- * Check cache first
- */
- if (indexInfo->ii_CheckedUnchanged)
- return indexInfo->ii_IndexUnchanged;
- indexInfo->ii_CheckedUnchanged = true;
-
- /*
- * Check for indexed attribute overlap with updated columns.
- *
- * Only do this for key columns. A change to a non-key column within an
- * INCLUDE index should not be counted here. Non-key column values are
- * opaque payload state to the index AM, a little like an extra table TID.
- *
- * Note that row-level BEFORE triggers won't affect our behavior, since
- * they don't affect the updatedCols bitmaps generally. It doesn't seem
- * worth the trouble of checking which attributes were changed directly.
- */
- updatedCols = ExecGetUpdatedCols(resultRelInfo, estate);
- extraUpdatedCols = ExecGetExtraUpdatedCols(resultRelInfo, estate);
- for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++)
- {
- int keycol = indexInfo->ii_IndexAttrNumbers[attr];
-
- if (keycol <= 0)
- {
- /*
- * Skip expressions for now, but remember to deal with them later
- * on
- */
- hasexpression = true;
- continue;
- }
-
- if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
- updatedCols) ||
- bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
- extraUpdatedCols))
- {
- /* Changed key column -- don't hint for this index */
- indexInfo->ii_IndexUnchanged = false;
- return false;
- }
- }
-
- /*
- * When we get this far and index has no expressions, return true so that
- * index_insert() call will go on to pass 'indexUnchanged' = true hint.
- *
- * The _absence_ of an indexed key attribute that overlaps with updated
- * attributes (in addition to the total absence of indexed expressions)
- * shows that the index as a whole is logically unchanged by UPDATE.
- */
- if (!hasexpression)
- {
- indexInfo->ii_IndexUnchanged = true;
- return true;
- }
-
- /*
- * Need to pass only one bms to expression_tree_walker helper function.
- * Avoid allocating memory in common case where there are no extra cols.
- */
- if (!extraUpdatedCols)
- allUpdatedCols = updatedCols;
- else
- allUpdatedCols = bms_union(updatedCols, extraUpdatedCols);
-
- /*
- * We have to work slightly harder in the event of indexed expressions,
- * but the principle is the same as before: try to find columns (Vars,
- * actually) that overlap with known-updated columns.
- *
- * If we find any matching Vars, don't pass hint for index. Otherwise
- * pass hint.
- */
- idxExprs = RelationGetIndexExpressions(indexRelation);
- hasexpression = index_expression_changed_walker((Node *) idxExprs,
- allUpdatedCols);
- list_free(idxExprs);
- if (extraUpdatedCols)
- bms_free(allUpdatedCols);
-
- if (hasexpression)
- {
- indexInfo->ii_IndexUnchanged = false;
- return false;
- }
-
- /*
- * Deliberately don't consider index predicates. We should even give the
- * hint when result rel's "updated tuple" has no corresponding index
- * tuple, which is possible with a partial index (provided the usual
- * conditions are met).
- */
- indexInfo->ii_IndexUnchanged = true;
- return true;
-}
-
-/*
- * Indexed expression helper for index_unchanged_by_update().
- *
- * Returns true when Var that appears within allUpdatedCols located.
- */
-static bool
-index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols)
-{
- if (node == NULL)
- return false;
-
- if (IsA(node, Var))
- {
- Var *var = (Var *) node;
-
- if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
- allUpdatedCols))
- {
- /* Var was updated -- indicates that we should not hint */
- return true;
- }
-
- /* Still haven't found a reason to not pass the hint */
- return false;
- }
-
- return expression_tree_walker(node, index_expression_changed_walker,
- allUpdatedCols);
-}
-
/*
* ExecWithoutOverlapsNotEmpty - raise an error if the tuple has an empty
* range or multirange in the given attribute.
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 27c9eec697b18..6b7b6bc801952 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1282,6 +1282,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
/* The following fields are set later if needed */
resultRelInfo->ri_RowIdAttNo = 0;
resultRelInfo->ri_extraUpdatedCols = NULL;
+ resultRelInfo->ri_ChangedIndexedCols = NULL;
resultRelInfo->ri_projectNew = NULL;
resultRelInfo->ri_newTupleSlot = NULL;
resultRelInfo->ri_oldTupleSlot = NULL;
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index def32774c90db..2709e2db0f2b4 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -32,6 +32,7 @@
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -936,7 +937,13 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true);
+ /*
+ * We're not going to call ExecCheckIndexedAttrsForChanges here
+ * because we've already identified the changes earlier on thanks to
+ * slot_modify_data.
+ */
simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
+ resultRelInfo->ri_ChangedIndexedCols,
&update_indexes);
conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index e44f12238864f..fb1ef526a6c9f 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -17,6 +17,7 @@
* ExecModifyTable - retrieve the next tuple from the node
* ExecEndModifyTable - shut down the ModifyTable node
* ExecReScanModifyTable - rescan the ModifyTable node
+ * ExecCheckIndexedAttrsForChanges - find set of updated indexed columns
*
* NOTES
* The ModifyTable node receives input from its outerPlan, which is
@@ -53,12 +54,18 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/attnum.h"
+#include "access/sysattr.h"
#include "access/tableam.h"
+#include "access/tupconvert.h"
+#include "access/tupdesc.h"
#include "access/xact.h"
+#include "catalog/index.h"
#include "commands/trigger.h"
#include "executor/execPartition.h"
#include "executor/executor.h"
#include "executor/nodeModifyTable.h"
+#include "executor/tuptable.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
@@ -68,8 +75,11 @@
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/datum.h"
+#include "utils/float.h"
#include "utils/injection_point.h"
+#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/snapmgr.h"
@@ -177,6 +187,530 @@ static TupleTableSlot *ExecMergeNotMatched(ModifyTableContext *context,
bool canSetTag);
+/*
+ * Compare two datums using the type's default equality operator.
+ *
+ * Returns true if the values are equal according to the type's equality
+ * operator, false otherwise. Falls back to binary comparison if no
+ * type-specific operator is available.
+ *
+ * This function uses the TypeCache infrastructure which caches operator
+ * lookups for efficiency.
+ */
+bool
+tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen,
+ Datum value1, Datum value2)
+{
+ TypeCacheEntry *typentry;
+
+ LOCAL_FCINFO(fcinfo, 2);
+ Datum result;
+
+ /*
+ * Fast path for common types to avoid even the type cache lookup. These
+ * types have simple equality semantics.
+ */
+ switch (typid)
+ {
+ case INT2OID:
+ return DatumGetInt16(value1) == DatumGetInt16(value2);
+ case INT4OID:
+ return DatumGetInt32(value1) == DatumGetInt32(value2);
+ case INT8OID:
+ return DatumGetInt64(value1) == DatumGetInt64(value2);
+ case FLOAT4OID:
+ return !float4_cmp_internal(DatumGetFloat4(value1), DatumGetFloat4(value2));
+ case FLOAT8OID:
+ return !float8_cmp_internal(DatumGetFloat8(value1), DatumGetFloat8(value2));
+ case BOOLOID:
+ return DatumGetBool(value1) == DatumGetBool(value2);
+ case OIDOID:
+ case REGPROCOID:
+ case REGPROCEDUREOID:
+ case REGOPEROID:
+ case REGOPERATOROID:
+ case REGCLASSOID:
+ case REGTYPEOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
+ case REGCONFIGOID:
+ case REGDICTIONARYOID:
+ return DatumGetObjectId(value1) == DatumGetObjectId(value2);
+ case CHAROID:
+ return DatumGetChar(value1) == DatumGetChar(value2);
+ default:
+ /* Continue to type cache lookup */
+ break;
+ }
+
+ /*
+ * Look up the type's equality operator using the type cache. Request both
+ * the operator OID and the function info for efficiency.
+ */
+ typentry = lookup_type_cache(typid,
+ TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO);
+
+ /* Use the type's collation if none provided */
+ if (collation == -1)
+ collation = typentry->typcollation;
+
+ /*
+ * If no equality operator is available, fall back to binary comparison.
+ * This handles types that don't have proper equality operators defined.
+ */
+ if (!OidIsValid(typentry->eq_opr))
+ return datumIsEqual(value1, value2, typbyval, typlen);
+
+ /*
+ * Use the cached function info if available, otherwise look it up. The
+ * type cache keeps this around so subsequent calls are fast.
+ */
+ if (typentry->eq_opr_finfo.fn_addr == NULL)
+ {
+ Oid eq_proc = get_opcode(typentry->eq_opr);
+
+ if (!OidIsValid(eq_proc))
+ /* Shouldn't happen, but fall back to binary comparison */
+ return datumIsEqual(value1, value2, typbyval, typlen);
+
+ fmgr_info_cxt(eq_proc, &typentry->eq_opr_finfo,
+ CacheMemoryContext);
+ }
+
+ /* Set up function call */
+ InitFunctionCallInfoData(*fcinfo, &typentry->eq_opr_finfo, 2,
+ collation, NULL, NULL);
+
+ fcinfo->args[0].value = value1;
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].value = value2;
+ fcinfo->args[1].isnull = false;
+
+ /* Invoke the equality operator */
+ result = FunctionCallInvoke(fcinfo);
+
+ /*
+ * If the function returned NULL (shouldn't happen for equality ops),
+ * treat as not equal for safety.
+ */
+ if (fcinfo->isnull)
+ return false;
+
+ return DatumGetBool(result);
+}
+
+/*
+ * ExecCheckIndexedAttrsForChanges
+ *
+ * Determine which indexes need updating by finding the set of modified
+ * indexed attributes.
+ *
+ * For expression indexes and indexes which implement the amcomparedatums()
+ * index AM API we'll need to form index datum and compare each attribute to
+ * see if any actually changed.
+ *
+ * For expression indexes the result of the expression might not change at all,
+ * this is common with JSONB columns, which require expression indexes. It is
+ * is commonplace to index one or more fields within a document and perform
+ * updates to the document while leaving the indexed fields unchanged. These
+ * updates don't necessitate index updates.
+ *
+ * Partial indexes won't trigger index updates when the old/new tuples are both
+ * outside of the predicate range. A transition into or out of the predicate
+ * does require an index update.
+ *
+ * Indexes that support index-only scans (IOS) should return the value that
+ * is the binary equavalent of what is in the table. For that reason we must
+ * use datumIsEqual() when deciding if an index update is required or not.
+ *
+ * All other indexes require testing old/new datum for equality, we now test
+ * with a type-specific equality operator and fall back to datumIsEqual()
+ * when that isn't possible.
+ *
+ * For a BTREE index (nbtree) their is an additional reason to use binary
+ * comparison for equality. TID deduplication on page split in nbtree uses
+ * binary comparison.
+ *
+ * The goal is for the executor to know, ahead of calling into the table AM to
+ * process the update and before calling into the index AM for inserting new
+ * index tuples, which attributes in the new TupleTableSlot, if any, truely
+ * necessitate a new index tuple.
+ *
+ * Returns a Bitmapset of attributes that intersects with indexes which require
+ * a new index tuple.
+ */
+Bitmapset *
+ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts)
+{
+ Relation relation = relinfo->ri_RelationDesc;
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ Bitmapset *mix_attrs = NULL;
+
+ /* If no indexes, we're done */
+ if (relinfo->ri_NumIndices == 0)
+ return NULL;
+
+ /*
+ * NOTE: Expression and predicates that are observed to change will have
+ * all their attributes added into the m_attrs set knowing that some of
+ * those might not have changed. Take for instance an index on (a + b)
+ * followed by an index on (b) with an update that changes only the value
+ * of 'a'. We'll add both 'a' and 'b' to the m_attrs set then later when
+ * reviewing the second index add 'b' to the u_attrs (unchanged) set. In
+ * the end, we'll remove all the unchanged from the m_attrs and get our
+ * desired result.
+ */
+
+ /* Find the indexes that reference this attribute */
+ for (int i = 0; i < relinfo->ri_NumIndices; i++)
+ {
+ Relation index = relinfo->ri_IndexRelationDescs[i];
+ IndexAmRoutine *amroutine = index->rd_indam;
+ IndexInfo *indexInfo = relinfo->ri_IndexRelationInfo[i];
+ Bitmapset *m_attrs = NULL; /* (possibly) modified key attributes */
+ Bitmapset *p_attrs = NULL; /* (possibly) modified predicate
+ * attributes */
+ Bitmapset *u_attrs = NULL; /* unmodified attributes */
+ Bitmapset *pre_attrs = indexInfo->ii_PredicateAttrs;
+ bool has_expressions = (indexInfo->ii_Expressions != NIL);
+ bool has_am_compare = (amroutine->amcomparedatums != NULL);
+ bool supports_ios = (amroutine->amcanreturn != NULL);
+ bool is_partial = (indexInfo->ii_Predicate != NIL);
+ TupleTableSlot *save_scantuple;
+ ExprContext *econtext = GetPerTupleExprContext(estate);
+ Datum old_values[INDEX_MAX_KEYS];
+ bool old_isnull[INDEX_MAX_KEYS];
+ Datum new_values[INDEX_MAX_KEYS];
+ bool new_isnull[INDEX_MAX_KEYS];
+
+ /* If we've reviewed all the attributes on this index, move on */
+ if (bms_is_subset(indexInfo->ii_IndexedAttrs, mix_attrs))
+ continue;
+
+ /* Checking partial at this point isn't viable when we're serializable */
+ if (is_partial && IsolationIsSerializable())
+ {
+ p_attrs = bms_add_members(p_attrs, pre_attrs);
+ }
+ /* Check partial index predicate */
+ else if (is_partial)
+ {
+ ExprState *pstate;
+ bool old_qualifies,
+ new_qualifies;
+
+ if (!indexInfo->ii_CheckedPredicate)
+ pstate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ else
+ pstate = indexInfo->ii_PredicateState;
+
+ save_scantuple = econtext->ecxt_scantuple;
+
+ econtext->ecxt_scantuple = old_tts;
+ old_qualifies = ExecQual(pstate, econtext);
+
+ econtext->ecxt_scantuple = new_tts;
+ new_qualifies = ExecQual(pstate, econtext);
+
+ econtext->ecxt_scantuple = save_scantuple;
+
+ indexInfo->ii_CheckedPredicate = true;
+ indexInfo->ii_PredicateState = pstate;
+ indexInfo->ii_PredicateSatisfied = new_qualifies;
+
+ /* Both outside predicate, index doesn't need update */
+ if (!old_qualifies && !new_qualifies)
+ continue;
+
+ /* A transition means we need to update the index */
+ if (old_qualifies != new_qualifies)
+ p_attrs = bms_copy(pre_attrs);
+
+ /*
+ * When both are within the predicate we must update this index,
+ * but only if one of the index key attributes changed.
+ */
+ }
+
+ /*
+ * Expression indexes, or an index that has a comparison function,
+ * requires us to form index datums and compare. We've done all we
+ * can to avoid this overhead, now it's time to bite the bullet and
+ * get it done.
+ *
+ * XXX: Caching the values/isnull might be a win and avoid one of the
+ * added calls to FormIndexDatum().
+ */
+ if (has_expressions || has_am_compare)
+ {
+ save_scantuple = econtext->ecxt_scantuple;
+
+ /* Evaluate expressions (if any) to get base datums */
+ econtext->ecxt_scantuple = old_tts;
+ FormIndexDatum(indexInfo, old_tts, estate, old_values, old_isnull);
+
+ econtext->ecxt_scantuple = new_tts;
+ FormIndexDatum(indexInfo, new_tts, estate, new_values, new_isnull);
+
+ econtext->ecxt_scantuple = save_scantuple;
+
+ /* Compare the index key datums for equality */
+ for (int j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
+ {
+ AttrNumber rel_attrnum = indexInfo->ii_IndexAttrNumbers[j];
+ int rel_attridx = rel_attrnum - FirstLowInvalidHeapAttributeNumber;
+ int nth_expr = 0;
+ bool values_equal = false;
+
+ /*
+ * We can't skip attributes that we've already identified as
+ * triggering an index update because we may have added an
+ * attribute from an expression index that didn't change but
+ * the expression did and that unchanged attribute is
+ * referenced in a subsequent index where we will discover
+ * that fact.
+ */
+
+ /* A change to/from NULL, record this attribute */
+ if (old_isnull[j] != new_isnull[j])
+ {
+ /* Expressions will have rel_attrnum == 0 */
+ if (rel_attrnum == 0)
+ m_attrs = bms_add_members(m_attrs, indexInfo->ii_ExpressionsAttrs);
+ else
+ m_attrs = bms_add_member(m_attrs, rel_attridx);
+ continue;
+ }
+
+ /* Both NULL, no change */
+ if (old_isnull[j])
+ {
+ if (rel_attrnum != 0)
+ u_attrs = bms_add_member(u_attrs, rel_attridx);
+
+ continue;
+ }
+
+ /*
+ * Use index AM's comparison function if present when
+ * comparing the index datum formed when creating an index
+ * key.
+ */
+ if (has_am_compare)
+ {
+ /*
+ * NOTE: For AM comparison, pass the 1-based index
+ * attribute number. The AM's compare function expects the
+ * same numbering as used internally by the AM.
+ */
+ values_equal = amroutine->amcomparedatums(index, j + 1,
+ old_values[j], old_isnull[j],
+ new_values[j], new_isnull[j]);
+ }
+ else
+ {
+ /* Non-zero attribute means not an expression */
+ if (rel_attrnum != 0)
+ {
+ if (supports_ios)
+ {
+ CompactAttribute *attr = TupleDescCompactAttr(tupdesc, rel_attrnum - 1);
+
+ values_equal = datumIsEqual(old_values[j],
+ new_values[j],
+ attr->attbyval,
+ attr->attlen);
+ }
+ else
+ {
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, rel_attrnum - 1);
+
+ /*
+ * Compare using type-specific equality which at
+ * this point is the relation's type because
+ * FormIndexDatum() will populate the values/nulls
+ * but won't transform them into the final values
+ * destined for the index tuple, that's left to
+ * index_form_tuple() which we don't call (on
+ * purpose).
+ */
+ values_equal = tts_attr_equal(attr->atttypid,
+ attr->attcollation,
+ attr->attbyval,
+ attr->attlen,
+ old_values[j],
+ new_values[j]);
+ }
+ }
+ else
+ {
+ /*
+ * An expression on an indexed attribute without
+ * custom AM comparison function. In this case, becase
+ * indexes will store the result of the expression's
+ * evaluation, we can test for equality using the
+ * expression's result type. This allows for JSONB
+ * and custom type equality tests, which may not be
+ * the same as binary equality, to be in effect. The
+ * result stored in the index and used in index-only
+ * scans will be valid as it is the expressions
+ * result, which shouldn't change given the same
+ * input.
+ *
+ * At this point the expression's type is what is
+ * required when testing for equality, not the index's
+ * type, because the value created by FormIndexDatum()
+ * is the expression's result. Later on in
+ * index_form_tuple() an index may transform the value
+ * when forming it's key (as is the case with HASH),
+ * but at this point the Datum is the expression's
+ * result type.
+ */
+ Oid expr_type_oid;
+ int16 typlen;
+ bool typbyval;
+ Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
+
+ Assert(expr != NULL);
+
+ /* Get type OID from the expression */
+ expr_type_oid = exprType((Node *) expr);
+
+ /* Get type information from the OID */
+ get_typlenbyval(expr_type_oid, &typlen, &typbyval);
+
+ values_equal = tts_attr_equal(expr_type_oid,
+ -1, /* use TBD expr type */
+ typbyval,
+ typlen,
+ old_values[j],
+ new_values[j]);
+ }
+ }
+
+ if (!values_equal)
+ {
+ /* Expressions will have rel_attrnum == 0 */
+ if (rel_attrnum == 0)
+ m_attrs = bms_add_members(m_attrs, indexInfo->ii_ExpressionsAttrs);
+ else
+ m_attrs = bms_add_member(m_attrs, rel_attridx);
+ }
+ else
+ {
+ if (rel_attrnum != 0)
+ u_attrs = bms_add_member(u_attrs, rel_attridx);
+ }
+
+ if (rel_attrnum == 0)
+ nth_expr++;
+ }
+ }
+ else
+ {
+ /*
+ * Here we know that we're reviewing an index that doesn't have a
+ * partial predicate, doesn't use expressions, and doesn't have a
+ * amcomparedatums() implementation. If this index supports IOS
+ * we need to use binary comparison, if not then type-specific
+ * will provide a more accurate result.
+ */
+
+ /* Compare the index key datums for equality */
+ for (int j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
+ {
+ AttrNumber rel_attrnum;
+ int rel_attridx;
+ bool values_equal = false;
+ bool old_null,
+ new_null;
+ Datum old_val,
+ new_val;
+
+ rel_attrnum = indexInfo->ii_IndexAttrNumbers[j];
+ rel_attridx = rel_attrnum - FirstLowInvalidHeapAttributeNumber;
+
+ /* Zero would mean expression, something we don't expect here */
+ Assert(rel_attrnum > 0 && rel_attrnum <= tupdesc->natts);
+
+ /* Extract values from both slots for this attribute */
+ old_val = slot_getattr(old_tts, rel_attrnum, &old_null);
+ new_val = slot_getattr(new_tts, rel_attrnum, &new_null);
+
+ /*
+ * If one value is NULL and the other is not, they are not
+ * equal
+ */
+ if (old_null != new_null)
+ {
+ m_attrs = bms_add_member(m_attrs, rel_attridx);
+ continue;
+ }
+
+ /* If both are NULL, consider them equal */
+ if (old_null)
+ {
+ u_attrs = bms_add_member(u_attrs, rel_attridx);
+ continue;
+ }
+
+ if (supports_ios)
+ {
+ CompactAttribute *attr = TupleDescCompactAttr(tupdesc, rel_attrnum - 1);
+
+ values_equal = datumIsEqual(old_val,
+ new_val,
+ attr->attbyval,
+ attr->attlen);
+ }
+ else
+ {
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, rel_attrnum - 1);
+
+ /*
+ * Compare using type-specific equality which at this
+ * point is the relation's type because FormIndexDatum()
+ * will populate the values/nulls but won't transform them
+ * into the final values destined for the index tuple,
+ * that's left to index_form_tuple() which we don't call
+ * (on purpose).
+ */
+ values_equal = tts_attr_equal(attr->atttypid,
+ attr->attcollation,
+ attr->attbyval,
+ attr->attlen,
+ old_val,
+ new_val);
+ }
+
+ if (!values_equal)
+ m_attrs = bms_add_member(m_attrs, rel_attridx);
+ else
+ u_attrs = bms_add_member(u_attrs, rel_attridx);
+ }
+ }
+
+ /*
+ * Here we know all the attributes we thought might be modified and
+ * all those we know haven't been. Take the difference and add it to
+ * the modified indexed attributes set.
+ */
+ m_attrs = bms_del_members(m_attrs, u_attrs);
+ p_attrs = bms_del_members(p_attrs, u_attrs);
+ mix_attrs = bms_add_members(mix_attrs, m_attrs);
+ mix_attrs = bms_add_members(mix_attrs, p_attrs);
+
+ bms_free(m_attrs);
+ bms_free(u_attrs);
+ bms_free(p_attrs);
+ }
+
+ return mix_attrs;
+}
+
/*
* Verify that the tuples to be produced by INSERT match the
* target relation's rowtype
@@ -2170,14 +2704,17 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *oldSlot,
+ TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
bool partition_constraint_failed;
TM_Result result;
+ /* The set of modified indexed attributes that trigger new index entries */
+ Bitmapset *mix_attrs = NULL;
+
updateCxt->crossPartUpdate = false;
/*
@@ -2294,9 +2831,38 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ExecConstraints(resultRelInfo, slot, estate);
/*
- * replace the heap tuple
+ * Identify which, if any, indexed attributes were modified here so that
+ * we might reuse it in a few places.
+ */
+ bms_free(resultRelInfo->ri_ChangedIndexedCols);
+ resultRelInfo->ri_ChangedIndexedCols = NULL;
+
+ /*
+ * During updates we'll need a bit more information in IndexInfo but we've
+ * delayed adding it until here. We check to ensure that there are
+ * indexes, that something has changed that is indexed, and that the first
+ * index doesn't yet have ii_IndexedAttrs set as a way to ensure we only
+ * build this when needed and only once. We don't build this in
+ * ExecOpenIndicies() as it is unnecessary overhead when not performing an
+ * update.
+ */
+ if (resultRelInfo->ri_NumIndices > 0 &&
+ bms_is_empty(resultRelInfo->ri_IndexRelationInfo[0]->ii_IndexedAttrs))
+ BuildUpdateIndexInfo(resultRelInfo);
+
+ /*
+ * Next up we need to find out the set of indexed attributes that have
+ * changed in value and should trigger a new index tuple. We could start
+ * with the set of updated columns via ExecGetUpdatedCols(), but if we do
+ * we will overlook attributes directly modified by heap_modify_tuple()
+ * which are not known to ExecGetUpdatedCols().
+ */
+ mix_attrs = ExecCheckIndexedAttrsForChanges(resultRelInfo, estate, oldSlot, slot);
+
+ /*
+ * Call into the table AM to update the heap tuple.
*
- * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
+ * NOTE: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
* the row to be updated is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior needed
* for referential integrity updates in transaction-snapshot mode
@@ -2308,8 +2874,12 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
+ mix_attrs,
&updateCxt->updateIndexes);
+ Assert(bms_is_empty(resultRelInfo->ri_ChangedIndexedCols));
+ resultRelInfo->ri_ChangedIndexedCols = mix_attrs;
+
return result;
}
@@ -2327,7 +2897,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
- /* insert index entries for tuple if necessary */
+ /* Insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, context->estate,
@@ -2526,8 +3096,9 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
lockedtid = *tupleid;
- result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+
+ result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, oldSlot,
+ slot, canSetTag, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -3224,8 +3795,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
Assert(oldtuple == NULL);
result = ExecUpdateAct(context, resultRelInfo, tupleid,
- NULL, newslot, canSetTag,
- &updateCxt);
+ NULL, resultRelInfo->ri_oldTupleSlot,
+ newslot, canSetTag, &updateCxt);
/*
* As in ExecUpdate(), if ExecUpdateAct() reports that a
@@ -3250,6 +3821,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
tupleid, NULL, newslot);
mtstate->mt_merge_updated += 1;
}
+
break;
case CMD_DELETE:
@@ -4356,7 +4928,7 @@ ExecModifyTable(PlanState *pstate)
* For UPDATE/DELETE/MERGE, fetch the row identity info for the tuple
* to be updated/deleted/merged. For a heap relation, that's a TID;
* otherwise we may have a wholerow junk attr that carries the old
- * tuple in toto. Keep this in step with the part of
+ * tuple in total. Keep this in step with the part of
* ExecInitModifyTable that sets up ri_RowIdAttNo.
*/
if (operation == CMD_UPDATE || operation == CMD_DELETE ||
@@ -4532,6 +5104,7 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
oldSlot, slot, node->canSetTag);
+
if (tuplock)
UnlockTuple(resultRelInfo->ri_RelationDesc, tupleid,
InplaceUpdateTupleLock);
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 7b1e9d94103f8..c522971a37c22 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -238,6 +238,10 @@ bms_make_singleton(int x)
void
bms_free(Bitmapset *a)
{
+#if USE_ASSERT_CHECKING
+ Assert(bms_is_valid_set(a));
+#endif
+
if (a)
pfree(a);
}
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index e2d9e9be41a65..e9a53b95caf1e 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -845,8 +845,6 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
n->ii_Unique = unique;
n->ii_NullsNotDistinct = nulls_not_distinct;
n->ii_ReadyForInserts = isready;
- n->ii_CheckedUnchanged = false;
- n->ii_IndexUnchanged = false;
n->ii_Concurrent = concurrent;
n->ii_Summarizing = summarizing;
n->ii_WithoutOverlaps = withoutoverlaps;
@@ -857,10 +855,14 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
/* expressions */
n->ii_Expressions = expressions;
n->ii_ExpressionsState = NIL;
+ n->ii_ExpressionsAttrs = NULL;
/* predicates */
n->ii_Predicate = predicates;
n->ii_PredicateState = NULL;
+ n->ii_PredicateAttrs = NULL;
+ n->ii_CheckedPredicate = false;
+ n->ii_PredicateSatisfied = false;
/* exclusion constraints */
n->ii_ExclusionOps = NULL;
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 93970c6af2948..b363eaa49ccec 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -243,6 +243,8 @@
*/
#include "postgres.h"
+#include "access/sysattr.h"
+#include "nodes/bitmapset.h"
#include
#include
@@ -275,7 +277,6 @@
#include "replication/logicalrelation.h"
#include "replication/logicalworker.h"
#include "replication/origin.h"
-#include "replication/slot.h"
#include "replication/walreceiver.h"
#include "replication/worker_internal.h"
#include "rewrite/rewriteHandler.h"
@@ -291,6 +292,7 @@
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -1110,15 +1112,18 @@ slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
* "slot" is filled with a copy of the tuple in "srcslot", replacing
* columns provided in "tupleData" and leaving others as-is.
*
+ * Returns a bitmap of the modified columns.
+ *
* Caution: unreplaced pass-by-ref columns in "slot" will point into the
* storage for "srcslot". This is OK for current usage, but someday we may
* need to materialize "slot" at the end to make it independent of "srcslot".
*/
-static void
+static Bitmapset *
slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
LogicalRepRelMapEntry *rel,
LogicalRepTupleData *tupleData)
{
+ Bitmapset *modified = NULL;
int natts = slot->tts_tupleDescriptor->natts;
int i;
@@ -1195,6 +1200,28 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
slot->tts_isnull[i] = true;
}
+ /*
+ * Determine if the replicated value changed the local value by
+ * comparing slots. This is a subset of
+ * ExecCheckIndexedAttrsForChanges.
+ */
+ if (srcslot->tts_isnull[i] != slot->tts_isnull[i])
+ {
+ /* One is NULL, the other is not so the value changed */
+ modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber);
+ }
+ else if (!srcslot->tts_isnull[i])
+ {
+ /* Both are not NULL, compare their values */
+ if (!tts_attr_equal(att->atttypid,
+ att->attcollation,
+ att->attbyval,
+ att->attlen,
+ srcslot->tts_values[i],
+ slot->tts_values[i]))
+ modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber);
+ }
+
/* Reset attnum for error callback */
apply_error_callback_arg.remote_attnum = -1;
}
@@ -1202,6 +1229,8 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
/* And finally, declare that "slot" contains a valid virtual tuple */
ExecStoreVirtualTuple(slot);
+
+ return modified;
}
/*
@@ -2918,6 +2947,7 @@ apply_handle_update_internal(ApplyExecutionData *edata,
ConflictTupleInfo conflicttuple = {0};
bool found;
MemoryContext oldctx;
+ Bitmapset *indexed = NULL;
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
ExecOpenIndices(relinfo, false);
@@ -2934,6 +2964,8 @@ apply_handle_update_internal(ApplyExecutionData *edata,
*/
if (found)
{
+ Bitmapset *modified = NULL;
+
/*
* Report the conflict if the tuple was modified by a different
* origin.
@@ -2957,15 +2989,29 @@ apply_handle_update_internal(ApplyExecutionData *edata,
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- slot_modify_data(remoteslot, localslot, relmapentry, newtup);
+ modified = slot_modify_data(remoteslot, localslot, relmapentry, newtup);
MemoryContextSwitchTo(oldctx);
+ /*
+ * Normally we'd call ExecCheckIndexedAttrForChanges but here we have
+ * the record of changed columns in the replication state, so let's
+ * use that instead.
+ */
+ indexed = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_INDEXED);
+
+ bms_free(relinfo->ri_ChangedIndexedCols);
+ relinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed);
+ bms_free(indexed);
+
EvalPlanQualSetSlot(&epqstate, remoteslot);
InitConflictIndexes(relinfo);
- /* Do the actual update. */
+ /* First check privileges */
TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_UPDATE);
+
+ /* Then do the actual update. */
ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
remoteslot);
}
@@ -3455,6 +3501,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
bool found;
EPQState epqstate;
ConflictTupleInfo conflicttuple = {0};
+ Bitmapset *modified = NULL;
+ Bitmapset *indexed;
/* Get the matching local tuple from the partition. */
found = FindReplTupleInLocalRel(edata, partrel,
@@ -3523,8 +3571,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
* remoteslot_part.
*/
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- slot_modify_data(remoteslot_part, localslot, part_entry,
- newtup);
+ modified = slot_modify_data(remoteslot_part, localslot, part_entry,
+ newtup);
MemoryContextSwitchTo(oldctx);
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
@@ -3549,6 +3597,18 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
EvalPlanQualSetSlot(&epqstate, remoteslot_part);
TargetPrivilegesCheck(partrelinfo->ri_RelationDesc,
ACL_UPDATE);
+
+ /*
+ * Normally we'd call ExecCheckIndexedAttrForChanges but
+ * here we have the record of changed columns in the
+ * replication state, so let's use that instead.
+ */
+ indexed = RelationGetIndexAttrBitmap(partrelinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_INDEXED);
+ bms_free(partrelinfo->ri_ChangedIndexedCols);
+ partrelinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed);
+ bms_free(indexed);
+
ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
localslot, remoteslot_part);
}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 915d0bc908423..32825596be1d8 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2482,6 +2482,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
bms_free(relation->rd_idattr);
bms_free(relation->rd_hotblockingattr);
bms_free(relation->rd_summarizedattr);
+ bms_free(relation->rd_indexedattr);
if (relation->rd_pubdesc)
pfree(relation->rd_pubdesc);
if (relation->rd_options)
@@ -5283,6 +5284,7 @@ RelationGetIndexPredicate(Relation relation)
* index (empty if FULL)
* INDEX_ATTR_BITMAP_HOT_BLOCKING Columns that block updates from being HOT
* INDEX_ATTR_BITMAP_SUMMARIZED Columns included in summarizing indexes
+ * INDEX_ATTR_BITMAP_INDEXED Columns referenced by indexes
*
* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
* we can include system attributes (e.g., OID) in the bitmap representation.
@@ -5307,6 +5309,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Bitmapset *idindexattrs; /* columns in the replica identity */
Bitmapset *hotblockingattrs; /* columns with HOT blocking indexes */
Bitmapset *summarizedattrs; /* columns with summarizing indexes */
+ Bitmapset *indexedattrs; /* columns referenced by indexes */
List *indexoidlist;
List *newindexoidlist;
Oid relpkindex;
@@ -5329,6 +5332,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return bms_copy(relation->rd_hotblockingattr);
case INDEX_ATTR_BITMAP_SUMMARIZED:
return bms_copy(relation->rd_summarizedattr);
+ case INDEX_ATTR_BITMAP_INDEXED:
+ return bms_copy(relation->rd_indexedattr);
default:
elog(ERROR, "unknown attrKind %u", attrKind);
}
@@ -5373,6 +5378,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
idindexattrs = NULL;
hotblockingattrs = NULL;
summarizedattrs = NULL;
+ indexedattrs = NULL;
foreach(l, indexoidlist)
{
Oid indexOid = lfirst_oid(l);
@@ -5505,10 +5511,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
bms_free(idindexattrs);
bms_free(hotblockingattrs);
bms_free(summarizedattrs);
+ bms_free(indexedattrs);
goto restart;
}
+ /* Combine all index attributes */
+ indexedattrs = bms_union(hotblockingattrs, summarizedattrs);
+
/* Don't leak the old values of these bitmaps, if any */
relation->rd_attrsvalid = false;
bms_free(relation->rd_keyattr);
@@ -5521,6 +5531,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
relation->rd_hotblockingattr = NULL;
bms_free(relation->rd_summarizedattr);
relation->rd_summarizedattr = NULL;
+ bms_free(relation->rd_indexedattr);
+ relation->rd_indexedattr = NULL;
/*
* Now save copies of the bitmaps in the relcache entry. We intentionally
@@ -5535,6 +5547,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
relation->rd_idattr = bms_copy(idindexattrs);
relation->rd_hotblockingattr = bms_copy(hotblockingattrs);
relation->rd_summarizedattr = bms_copy(summarizedattrs);
+ relation->rd_indexedattr = bms_copy(indexedattrs);
relation->rd_attrsvalid = true;
MemoryContextSwitchTo(oldcxt);
@@ -5551,6 +5564,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return hotblockingattrs;
case INDEX_ATTR_BITMAP_SUMMARIZED:
return summarizedattrs;
+ case INDEX_ATTR_BITMAP_INDEXED:
+ return indexedattrs;
default:
elog(ERROR, "unknown attrKind %u", attrKind);
return NULL;
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 63dd41c1f21bf..9bdf73eda59f4 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -211,6 +211,33 @@ typedef void (*ammarkpos_function) (IndexScanDesc scan);
/* restore marked scan position */
typedef void (*amrestrpos_function) (IndexScanDesc scan);
+/*
+ * amcomparedatums - Compare datums to determine if index update is needed
+ *
+ * This function compares old_datum and new_datum to determine if they would
+ * produce different index entries. For extraction-based indexes (GIN, RUM),
+ * this should:
+ * 1. Extract keys from old_datum using the opclass's extractValue function
+ * 2. Extract keys from new_datum using the opclass's extractValue function
+ * 3. Compare the two sets of keys using appropriate equality operators
+ * 4. Return true if the sets are equal (no index update needed)
+ *
+ * The comparison should account for:
+ * - Different numbers of extracted keys
+ * - NULL values
+ * - Type-specific equality (not just binary equality)
+ * - Opclass parameters (e.g., path in bson_rum_single_path_ops)
+ *
+ * For the DocumentDB example with path='a', this would extract values at
+ * path 'a' from both old and new BSON documents and compare them using
+ * BSON's equality operator.
+ */
+/* identify if updated datums would produce one or more index entries */
+typedef bool (*amcomparedatums_function) (Relation indexRelation,
+ int attno,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* Callback function signatures - for parallel index scans.
*/
@@ -313,6 +340,7 @@ typedef struct IndexAmRoutine
amendscan_function amendscan;
ammarkpos_function ammarkpos; /* can be NULL */
amrestrpos_function amrestrpos; /* can be NULL */
+ amcomparedatums_function amcomparedatums; /* can be NULL */
/* interface functions to support parallel index scans */
amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 13ea91922efc5..2f265f4816c32 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -100,6 +100,9 @@ extern PGDLLIMPORT int gin_pending_list_limit;
extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats,
bool is_build);
+extern bool gincomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 632c4332a8c34..5783dbebff04c 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -364,11 +364,13 @@ extern TM_Result heap_delete(Relation relation, const ItemPointerData *tid,
TM_FailureData *tmfd, bool changingPart);
extern void heap_finish_speculative(Relation relation, const ItemPointerData *tid);
extern void heap_abort_speculative(Relation relation, const ItemPointerData *tid);
-extern TM_Result heap_update(Relation relation, const ItemPointerData *otid,
- HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
+extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup,
+ HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
+ Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
+ Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
+ const Bitmapset *mix_attrs, Buffer *vmbuffer,
+ bool rep_id_key_required, TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates,
@@ -402,8 +404,8 @@ extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
extern void simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, const ItemPointerData *tid);
-extern void simple_heap_update(Relation relation, const ItemPointerData *otid,
- HeapTuple tup, TU_UpdateIndexes *update_indexes);
+extern Bitmapset *simple_heap_update(Relation relation, const ItemPointerData *otid,
+ HeapTuple tup, TU_UpdateIndexes *update_indexes);
extern TransactionId heap_index_delete_tuples(Relation rel,
TM_IndexDeleteOp *delstate);
@@ -430,6 +432,18 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
OffsetNumber *dead, int ndead,
OffsetNumber *unused, int nunused);
+/* in heap/heapam.c */
+extern Bitmapset *HeapDetermineColumnsInfo(Relation relation,
+ Bitmapset *interesting_cols,
+ Bitmapset *external_cols,
+ HeapTuple oldtup, HeapTuple newtup,
+ bool *has_external);
+#ifdef USE_ASSERT_CHECKING
+extern void check_lock_if_inplace_updateable_rel(Relation relation,
+ const ItemPointerData *otid,
+ HeapTuple newtup);
+#endif
+
/* in heap/vacuumlazy.c */
extern void heap_vacuum_rel(Relation rel,
const VacuumParams params, BufferAccessStrategy bstrategy);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 16be5c7a9c158..42bd329eaad32 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1210,6 +1210,10 @@ extern int btgettreeheight(Relation rel);
extern CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily);
extern StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily);
+extern bool btcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* prototypes for internal functions in nbtree.c
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 2fa790b6bf549..d94dfc9b41d23 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -549,6 +549,7 @@ typedef struct TableAmRoutine
bool wait,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
+ const Bitmapset *updated_cols,
TU_UpdateIndexes *update_indexes);
/* see table_tuple_lock() for reference about parameters */
@@ -1512,12 +1513,12 @@ static inline TM_Result
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ const Bitmapset *mix_cols, TU_UpdateIndexes *update_indexes)
{
return rel->rd_tableam->tuple_update(rel, otid, slot,
cid, snapshot, crosscheck,
- wait, tmfd,
- lockmode, update_indexes);
+ wait, tmfd, lockmode,
+ mix_cols, update_indexes);
}
/*
@@ -2020,6 +2021,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
Snapshot snapshot);
extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, Snapshot snapshot,
+ const Bitmapset *mix_attrs,
TU_UpdateIndexes *update_indexes);
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index dda95e54903eb..8d364f8b30f4e 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -132,6 +132,7 @@ extern bool CompareIndexInfo(const IndexInfo *info1, const IndexInfo *info2,
const AttrMap *attmap);
extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii);
+extern void BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo);
extern void FormIndexDatum(IndexInfo *indexInfo,
TupleTableSlot *slot,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index fa2b657fb2ffb..a19585ba06561 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -739,6 +739,11 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate);
*/
extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
+extern Bitmapset *ExecWhichIndexesRequireUpdates(ResultRelInfo *relinfo,
+ Bitmapset *mix_attrs,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts);
extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate,
bool update,
@@ -800,5 +805,11 @@ extern ResultRelInfo *ExecLookupResultRelByOid(ModifyTableState *node,
Oid resultoid,
bool missing_ok,
bool update_cache);
+extern Bitmapset *ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts);
+extern bool tts_attr_equal(Oid typid, Oid collation, bool typbyval, int16 typlen,
+ Datum value1, Datum value2);
#endif /* EXECUTOR_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 64ff6996431eb..1259897282ede 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -174,15 +174,29 @@ typedef struct IndexInfo
*/
AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS];
+ /*
+ * All key, expression, sumarizing, and partition attributes referenced by
+ * this index
+ */
+ Bitmapset *ii_IndexedAttrs;
+
/* expr trees for expression entries, or NIL if none */
List *ii_Expressions; /* list of Expr */
/* exec state for expressions, or NIL if none */
List *ii_ExpressionsState; /* list of ExprState */
+ /* attributes exclusively referenced by expression indexes */
+ Bitmapset *ii_ExpressionsAttrs;
/* partial-index predicate, or NIL if none */
List *ii_Predicate; /* list of Expr */
/* exec state for expressions, or NIL if none */
ExprState *ii_PredicateState;
+ /* attributes referenced by the predicate */
+ Bitmapset *ii_PredicateAttrs;
+ /* partial index predicate determined yet? */
+ bool ii_CheckedPredicate;
+ /* amupdate hint used to avoid rechecking predicate */
+ bool ii_PredicateSatisfied;
/* Per-column exclusion operators, or NULL if none */
Oid *ii_ExclusionOps; /* array with one entry per column */
@@ -202,10 +216,6 @@ typedef struct IndexInfo
bool ii_NullsNotDistinct;
/* is it valid for inserts? */
bool ii_ReadyForInserts;
- /* IndexUnchanged status determined yet? */
- bool ii_CheckedUnchanged;
- /* aminsert hint, cached for retail inserts */
- bool ii_IndexUnchanged;
/* are we doing a concurrent index build? */
bool ii_Concurrent;
/* did we detect any broken HOT chains? */
@@ -499,6 +509,12 @@ typedef struct ResultRelInfo
/* true if the above has been computed */
bool ri_extraUpdatedCols_valid;
+ /*
+ * For UPDATE a Bitmapset of the attributes that are both indexed and have
+ * changed in value.
+ */
+ Bitmapset *ri_ChangedIndexedCols;
+
/* Projection to generate new tuple in an INSERT/UPDATE */
ProjectionInfo *ri_projectNew;
/* Slot to hold that tuple */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 80286076a111a..b23a7306e69c2 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -164,6 +164,7 @@ typedef struct RelationData
Bitmapset *rd_idattr; /* included in replica identity index */
Bitmapset *rd_hotblockingattr; /* cols blocking HOT update */
Bitmapset *rd_summarizedattr; /* cols indexed by summarizing indexes */
+ Bitmapset *rd_indexedattr; /* all cols referenced by indexes */
PublicationDesc *rd_pubdesc; /* publication descriptor, or NULL */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 3561c6bef0bfc..d3fbb8b093a50 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -71,6 +71,7 @@ typedef enum IndexAttrBitmapKind
INDEX_ATTR_BITMAP_IDENTITY_KEY,
INDEX_ATTR_BITMAP_HOT_BLOCKING,
INDEX_ATTR_BITMAP_SUMMARIZED,
+ INDEX_ATTR_BITMAP_INDEXED,
} IndexAttrBitmapKind;
extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation,
diff --git a/src/test/isolation/expected/insert-conflict-specconflict.out b/src/test/isolation/expected/insert-conflict-specconflict.out
index e34a821c403c7..54b3981918c7d 100644
--- a/src/test/isolation/expected/insert-conflict-specconflict.out
+++ b/src/test/isolation/expected/insert-conflict-specconflict.out
@@ -80,6 +80,10 @@ pg_advisory_unlock
t
(1 row)
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
s1: NOTICE: acquiring advisory lock on 2
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
@@ -172,6 +176,10 @@ pg_advisory_unlock
t
(1 row)
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
s2: NOTICE: acquiring advisory lock on 2
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
@@ -369,6 +377,10 @@ key|data
step s1_commit: COMMIT;
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
step s2_upsert: <... completed>
step controller_show: SELECT * FROM upserttest;
key|data
@@ -530,6 +542,14 @@ isolation/insert-conflict-specconflict/s2|transactionid|ExclusiveLock|t
step s2_commit: COMMIT;
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_4() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 4
+s1: NOTICE: blurt_and_lock_4() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 4
step s1_upsert: <... completed>
step s1_noop:
step controller_show: SELECT * FROM upserttest;
diff --git a/src/test/regress/expected/heap_hot_updates.out b/src/test/regress/expected/heap_hot_updates.out
new file mode 100644
index 0000000000000..f6bd8b18af8ce
--- /dev/null
+++ b/src/test/regress/expected/heap_hot_updates.out
@@ -0,0 +1,1922 @@
+-- ================================================================
+-- Test Suite for Heap-only (HOT) Updates
+-- ================================================================
+-- Setup: Create function to measure HOT updates
+CREATE OR REPLACE FUNCTION check_hot_updates(
+ expected INT,
+ p_table_name TEXT DEFAULT 't',
+ p_schema_name TEXT DEFAULT current_schema()
+)
+RETURNS TABLE (
+ table_name TEXT,
+ total_updates BIGINT,
+ hot_updates BIGINT,
+ hot_update_percentage NUMERIC,
+ matches_expected BOOLEAN
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+ v_relid oid;
+ v_qualified_name TEXT;
+ v_hot_updates BIGINT;
+ v_updates BIGINT;
+ v_xact_hot_updates BIGINT;
+ v_xact_updates BIGINT;
+BEGIN
+ -- Force statistics update
+ PERFORM pg_stat_force_next_flush();
+
+ -- Get table OID
+ v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name);
+ v_relid := v_qualified_name::regclass;
+
+ IF v_relid IS NULL THEN
+ RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name;
+ END IF;
+
+ -- Get cumulative + transaction stats
+ v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0);
+ v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0);
+ v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0);
+ v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0);
+
+ v_hot_updates := v_hot_updates + v_xact_hot_updates;
+ v_updates := v_updates + v_xact_updates;
+
+ RETURN QUERY
+ SELECT
+ p_table_name::TEXT,
+ v_updates::BIGINT,
+ v_hot_updates::BIGINT,
+ CASE WHEN v_updates > 0
+ THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2)
+ ELSE 0
+ END,
+ (v_hot_updates = expected)::BOOLEAN;
+END;
+$$;
+CREATE COLLATION case_insensitive (
+ provider = libc,
+ locale = 'C'
+);
+-- ================================================================
+-- Basic JSONB Expression Index
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_name_idx ON t((docs->'name'));
+INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}');
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update indexed JSONB field - should NOT be HOT
+UPDATE t SET docs = '{"name": "bob", "age": 31}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update non-indexed field again - should be HOT
+UPDATE t SET docs = '{"name": "bob", "age": 32}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- JSONB Expression Index an some including columns
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB, status TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_name_idx ON t((docs->'name'));
+INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}', 'ok');
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET status = 'not ok' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Partial Index with Predicate Transitions
+-- ================================================================
+CREATE TABLE t(id INT, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_value_idx ON t(value) WHERE value > 10;
+INSERT INTO t VALUES (1, 5);
+-- Both outside predicate - should be HOT
+UPDATE t SET value = 8 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Transition into predicate - should NOT be HOT
+UPDATE t SET value = 15 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Both inside predicate, value changes - should NOT be HOT
+UPDATE t SET value = 20 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Transition out of predicate - should NOT be HOT
+UPDATE t SET value = 5 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+-- Both outside predicate again - should be HOT
+UPDATE t SET value = 3 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 5 | 2 | 40.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Expression Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(docs JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx ON t((docs->'status'))
+ WHERE (docs->'priority')::int > 5;
+INSERT INTO t VALUES ('{"status": "pending", "priority": 3}');
+-- Both outside predicate, status unchanged - should be HOT
+UPDATE t SET docs = '{"status": "pending", "priority": 4}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Transition into predicate - should NOT be HOT
+UPDATE t SET docs = '{"status": "pending", "priority": 10}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Inside predicate, status changes - should NOT be HOT
+UPDATE t SET docs = '{"status": "active", "priority": 10}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Inside predicate, status unchanged - should be HOT
+UPDATE t SET docs = '{"status": "active", "priority": 8}';
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 2 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Index on JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data);
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}');
+-- Change tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Change tags again - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Index with Unchanged Keys
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create GIN index on specific path
+CREATE INDEX t_gin_idx ON t USING gin((data->'tags'));
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}');
+-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Change indexed tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN with jsonb_path_ops
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops);
+INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}');
+-- Change value at different path - keys changed, NOT HOT
+UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Multi-Column Expression Index
+-- ================================================================
+CREATE TABLE t(id INT, a INT, b INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx ON t(id, abs(a), abs(b));
+INSERT INTO t VALUES (1, -5, -10);
+-- Change sign but not abs value - should be HOT
+UPDATE t SET a = 5 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Change abs value - should NOT be HOT
+UPDATE t SET b = -15 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Change id - should NOT be HOT
+UPDATE t SET id = 2 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Mixed Index Types (BRIN + Expression)
+-- ================================================================
+CREATE TABLE t(id INT, value INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_idx ON t USING brin(value);
+CREATE INDEX t_expr_idx ON t((data->'status'));
+INSERT INTO t VALUES (1, 100, '{"status": "active"}');
+-- Update only BRIN column - should be HOT
+UPDATE t SET value = 200 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update only expression column - should NOT be HOT
+UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - should NOT be HOT
+UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Expression with COLLATION and BTREE (nbtree) index
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ name TEXT COLLATE case_insensitive
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_lower_idx ON t USING BTREE (name COLLATE case_insensitive);
+INSERT INTO t VALUES (1, 'ALICE');
+-- Change case but not value - should NOT be HOT in BTREE
+UPDATE t SET name = 'Alice' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Change to new value - should NOT be HOT
+UPDATE t SET name = 'BOB' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Array Expression Index
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_array_len_idx ON t(array_length(tags, 1));
+INSERT INTO t VALUES (1, ARRAY['a', 'b', 'c']);
+-- Same length, different elements - should be HOT
+UPDATE t SET tags = ARRAY['d', 'e', 'f'] WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Different length - should NOT be HOT
+UPDATE t SET tags = ARRAY['d', 'e'] WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Nested JSONB Expression and JSONB equality '->' (not '->>')
+-- ================================================================
+CREATE TABLE t(data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_nested_idx ON t((data->'user'->'name'));
+INSERT INTO t VALUES ('{"user": {"name": "alice", "age": 30}}');
+-- Change nested non-indexed field - should be HOT
+UPDATE t SET data = '{"user": {"name": "alice", "age": 31}}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Change nested indexed field - should NOT be HOT
+UPDATE t SET data = '{"user": {"name": "bob", "age": 31}}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Complex Predicate on Multiple JSONB Fields
+-- ================================================================
+CREATE TABLE t(data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx ON t((data->'status'))
+ WHERE (data->'priority')::int > 5
+ AND (data->'active')::boolean = true;
+INSERT INTO t VALUES ('{"status": "pending", "priority": 3, "active": true}');
+-- Outside predicate (priority too low) - should be HOT
+UPDATE t SET data = '{"status": "done", "priority": 3, "active": true}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Transition into predicate - should NOT be HOT
+UPDATE t SET data = '{"status": "done", "priority": 10, "active": true}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Inside predicate, change to outside (active = false) - should NOT be HOT
+UPDATE t SET data = '{"status": "done", "priority": 10, "active": false}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Array Index - Order Insensitive Extraction
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ data JSONB
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+-- GIN index on JSONB array (extracts all elements)
+CREATE INDEX t_items_gin ON t USING GIN ((data->'items'));
+INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}');
+-- Update: Reorder array elements
+-- JSONB equality: NOT equal (different arrays)
+-- GIN extraction: Same elements extracted (might allow HOT if not careful)
+UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update: Add/remove element
+UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- TOASTed Values in Expression Index
+-- ================================================================
+CREATE TABLE t(id INT, large_text TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_substr_idx ON t(substr(large_text, 1, 10));
+INSERT INTO t VALUES (1, repeat('x', 5000) || 'identifier');
+-- Change end of string, prefix unchanged - should be HOT
+UPDATE t SET large_text = repeat('x', 5000) || 'different' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Change prefix - should NOT be HOT
+UPDATE t SET large_text = repeat('y', 5000) || 'different' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- TEST: GIN with TOASTed TEXT (tsvector)
+-- ================================================================
+CREATE TABLE t(id INT, content TEXT, search_vec tsvector)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create trigger to maintain tsvector
+CREATE TRIGGER tsvectorupdate_toast
+ BEFORE INSERT OR UPDATE ON t
+ FOR EACH ROW EXECUTE FUNCTION
+ tsvector_update_trigger(search_vec, 'pg_catalog.english', content);
+CREATE INDEX t_gin ON t USING gin(search_vec);
+-- Insert with large content (will be TOASTed)
+INSERT INTO t (id, content) VALUES
+ (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000));
+-- Verify initial state
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important');
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time
+-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed.
+-- This means the comparison sees old tsvector vs. trigger-modified tsvector,
+-- not the natural progression. HOT won't happen because the trigger changed
+-- the indexed column.
+-- Update: Even though content keywords unchanged, trigger still fires
+UPDATE t
+SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT)
+-- This is actually correct behavior - the trigger updated an indexed column
+-- Update: Change indexed keywords
+UPDATE t
+SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (index keys changed)
+-- Verify query correctness
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical');
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+DROP TABLE t CASCADE;
+-- ================================================================
+-- TEST: GIN with TOASTed JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin((data->'tags'));
+-- Insert with TOASTed JSONB
+INSERT INTO t (id, data) VALUES
+ (1, jsonb_build_object(
+ 'tags', '["postgres", "database"]'::jsonb,
+ 'large_field', repeat('x', 10000)
+ ));
+-- Update: Change large_field, tags unchanged - should be HOT
+UPDATE t
+SET data = jsonb_build_object(
+ 'tags', '["postgres", "database"]'::jsonb,
+ 'large_field', repeat('y', 10000)
+)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT update
+-- Update: Change tags - should NOT be HOT
+UPDATE t
+SET data = jsonb_build_object(
+ 'tags', '["postgres", "sql"]'::jsonb,
+ 'large_field', repeat('y', 10000)
+)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Expected: Still 1 HOT
+-- Verify correctness
+SELECT count(*) FROM t WHERE data->'tags' @> '["database"]'::jsonb;
+ count
+-------
+ 0
+(1 row)
+
+-- Expected: 0 rows
+SELECT count(*) FROM t WHERE data->'tags' @> '["sql"]'::jsonb;
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+DROP TABLE t CASCADE;
+-- ================================================================
+-- TEST: GIN with Array of Large Strings
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin(tags);
+-- Insert with large array elements (might be TOASTed)
+INSERT INTO t (id, tags) VALUES
+ (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]);
+-- Update: Change to different large values - NOT HOT
+UPDATE t
+SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (keys actually changed)
+-- Update: Keep same tag values, just reorder - SHOULD BE HOT
+-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3]
+-- extract to the same sorted key set ['tag3','tag4'])
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Expected: 1 HOT (GIN keys semantically identical)
+-- Update: Remove an element - NOT HOT (keys changed)
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Expected: Still 1 HOT (not this one)
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ value INT,
+ description TEXT
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100;
+INSERT INTO t VALUES (1, 50, 'below range');
+-- Test 1: Outside predicate
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Validate: Predicate query returns 0 rows
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+ cnt
+-----
+ 0
+(1 row)
+
+-- Test 2: Transition into predicate
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Validate: Predicate query returns 1 row with correct value
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+ cnt | max_val
+-----+---------
+ 1 | 150
+(1 row)
+
+-- Test 3: Inside predicate, value changes
+UPDATE t SET value = 160, description = 'updated again' WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 3 | 100.00 | t
+(1 row)
+
+-- Validate: Updated value (160) is returned
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+ cnt | max_val
+-----+---------
+ 1 | 160
+(1 row)
+
+-- Test 4: Transition out of predicate
+UPDATE t SET value = 50 WHERE id = 1;
+SELECT * FROM check_hot_updates(4);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 4 | 100.00 | t
+(1 row)
+
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+ cnt
+-----
+ 0
+(1 row)
+
+SELECT id, value, description FROM t;
+ id | value | description
+----+-------+---------------
+ 1 | 50 | updated again
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index (Simple Column)
+-- ================================================================
+CREATE TABLE t(id INT, code VARCHAR(20), description TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_idx ON t USING hash(code);
+INSERT INTO t VALUES (1, 'CODE001', 'initial');
+-- Update non-indexed column - should be HOT
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update indexed column - HASH index requires update, NOT HOT
+UPDATE t SET code = 'CODE002' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - NOT HOT
+UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Back to original code - NOT HOT (different hash bucket location)
+UPDATE t SET code = 'CODE001' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index on Expression
+-- ================================================================
+CREATE TABLE t(id INT, email TEXT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email));
+INSERT INTO t VALUES (1, 'Alice@Example.com', '{"status": "new"}');
+-- Update non-indexed field - should be HOT
+UPDATE t SET data = '{"status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update email with case change only (same lowercase) - should be HOT
+UPDATE t SET email = 'alice@example.com' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Update email to different lowercase - NOT HOT
+UPDATE t SET email = 'bob@example.com' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash((data->'category'));
+INSERT INTO t VALUES (1, '{"category": "books", "title": "PostgreSQL Guide"}');
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET data = '{"category": "books", "title": "PostgreSQL Handbook"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update indexed JSONB field - NOT HOT
+UPDATE t SET data = '{"category": "videos", "title": "PostgreSQL Handbook"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - NOT HOT
+UPDATE t SET data = '{"category": "courses", "title": "PostgreSQL Basics"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Multiple HASH Indexes
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+CREATE INDEX t_hash_status_idx ON t USING hash(status);
+INSERT INTO t VALUES (1, 'electronics', 'active', 100);
+-- Update non-indexed column - should be HOT
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update one indexed column - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update other indexed column - NOT HOT
+UPDATE t SET status = 'inactive' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Update both indexed columns - NOT HOT
+UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN vs HASH Comparison
+-- ================================================================
+CREATE TABLE t_brin(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE TABLE t_hash(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_value_idx ON t_brin USING brin(value);
+CREATE INDEX t_hash_value_idx ON t_hash USING hash(value);
+INSERT INTO t_brin VALUES (1, 100, 'initial');
+INSERT INTO t_hash VALUES (1, 100, 'initial');
+-- Same update on both - different HOT behavior expected
+-- BRIN: might allow HOT (range summary unchanged)
+-- HASH: blocks HOT (hash bucket changed)
+UPDATE t_brin SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 't_brin');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t_brin | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT (BRIN allows it for single row)
+UPDATE t_hash SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 't_hash');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t_hash | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (HASH blocks it)
+DROP TABLE t_brin CASCADE;
+DROP TABLE t_hash CASCADE;
+-- ================================================================
+-- HASH Index with NULL Values
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'electronics', 'initial');
+-- Update indexed column to NULL - NOT HOT (hash value changed)
+UPDATE t SET category = NULL WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT
+-- Update indexed column from NULL to value - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Expected: 1 HOT
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, metrics JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- BRIN doesn't directly support JSONB, but we can test on expression
+CREATE INDEX t_brin_count_idx ON t USING brin(
+ CAST(metrics->>'count' AS INTEGER)
+);
+INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}');
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT
+-- Update indexed field - BRIN allows HOT for single row
+UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Expected: 2 HOT (BRIN permits single-row updates)
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Mixed BRIN + HASH on Same Table
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial');
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT
+-- Update BRIN indexed column - allows HOT
+UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Expected: 2 HOT
+-- Update HASH indexed column - blocks HOT
+UPDATE t SET category = 'videos' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+-- Expected: 2 HOT (HASH blocks it)
+-- Update price (non-indexed) - should be HOT
+UPDATE t SET price = 39.99 WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 3 | 75.00 | t
+(1 row)
+
+-- Expected: 3 HOT
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Index both on a field in a JSONB document, and the document
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'name'));
+CREATE INDEX t_docs_col_idx ON t(docs);
+INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}');
+-- Update impacts index on whole docment attribute, can't go HOT
+UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Two indexes on a JSONB document, one partial
+-- ================================================================
+CREATE TABLE t (docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+INSERT INTO t (docs) VALUES ('{"a": 0, "b": 0}');
+INSERT INTO t (docs) SELECT jsonb_build_object('b', n) FROM generate_series(100, 10000) as n;
+CREATE INDEX t_idx_a ON t ((docs->'a'));
+CREATE INDEX t_idx_b ON t ((docs->'b')) WHERE (docs->'b')::numeric > 9;
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+-- Leave 'a' unchanged but modify 'b' to a value outside of the index predicate.
+-- This should be a HOT update because neither index is changed.
+UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 1) WHERE (docs->'a')::numeric = 0;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Check to make sure that the index does not contain a value for 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Scan using t_idx_b on t
+ Filter: (((docs -> 'b'::text))::numeric < '100'::numeric)
+(2 rows)
+
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ docs
+------
+(0 rows)
+
+-- Leave 'a' unchanged but modify 'b' to a value within the index predicate.
+-- This represents a change for field 'b' from unindexed to indexed and so
+-- this should not take the HOT path.
+UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 10) WHERE (docs->'a')::numeric = 0;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Check to make sure that the index contains the new value of 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Scan using t_idx_b on t
+ Filter: (((docs -> 'b'::text))::numeric < '100'::numeric)
+(2 rows)
+
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ docs
+-------------------
+ {"a": 0, "b": 10}
+(1 row)
+
+-- This update modifies the value of 'a', an indexed field, so it also cannot
+-- be a HOT update.
+UPDATE t SET docs = jsonb_build_object('a', 1, 'b', 10) WHERE (docs->'b')::numeric = 10;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- This update changes both 'a' and 'b' to new values this cannot use the HOT path.
+UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 12) WHERE (docs->'b')::numeric = 10;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+-- Check to make sure that the index contains the new value of 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Scan using t_idx_b on t
+ Filter: (((docs -> 'b'::text))::numeric < '100'::numeric)
+(2 rows)
+
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ docs
+-------------------
+ {"a": 2, "b": 12}
+(1 row)
+
+-- This update changes 'b' to a value outside its predicate requiring that
+-- we remove it from the index. That's a transition that can't be done
+-- during a HOT update.
+UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 1) WHERE (docs->'b')::numeric = 12;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 5 | 1 | 20.00 | t
+(1 row)
+
+-- Check to make sure that the index no longer contains the value of 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Scan using t_idx_b on t
+ Filter: (((docs -> 'b'::text))::numeric < '100'::numeric)
+(2 rows)
+
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+ docs
+------
+(0 rows)
+
+DROP TABLE t CASCADE;
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+-- ================================================================
+-- Tests to check expression indexes
+-- ================================================================
+CREATE TABLE t(a INT, b INT) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx_a ON t(abs(a)) WHERE abs(a) > 10;
+CREATE INDEX t_idx_b ON t(abs(b));
+INSERT INTO t VALUES (-1, -1), (-2, -2), (-3, -3), (-4, -4), (-5, -5);
+INSERT INTO t SELECT m, n FROM generate_series(-10000, -10) AS m, abs(m) AS n;
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+-- The indexed value of b hasn't changed, this should be a HOT update.
+-- (-5, -5) -> (-5, 1)
+UPDATE t SET b = 5 WHERE a = -5;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+ QUERY PLAN
+------------------------------------------------
+ Index Scan using t_idx_b on t
+ Index Cond: ((abs(b) < 10) AND (abs(b) > 0))
+(2 rows)
+
+SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+ b
+----
+ -1
+ -2
+ -3
+ -4
+ 5
+(5 rows)
+
+-- Now that we're not checking the predicate of the partial index, this
+-- update of a from -5 to 5 should be HOT because we should ignore the
+-- predicate and check the expression and find it unchanged.
+-- (-5, 1) -> (5, 1)
+UPDATE t SET a = 5 WHERE a = -5;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- This update moves a into the partial index and should not
+-- be HOT. Let's make sure of that and check the index as well.
+-- (-4, -4) -> (-11, -4)
+UPDATE t SET a = -11 WHERE a = -4;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ QUERY PLAN
+-------------------------------
+ Index Scan using t_idx_a on t
+ Index Cond: (abs(a) < 15)
+(2 rows)
+
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ a | b
+-----+----
+ -10 | 10
+ -11 | -4
+ -11 | 11
+ -12 | 12
+ -13 | 13
+ -14 | 14
+(6 rows)
+
+-- (-11, -4) -> (11, -4)
+UPDATE t SET a = 11 WHERE b = -4;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 3 | 75.00 | t
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ QUERY PLAN
+-------------------------------
+ Index Scan using t_idx_a on t
+ Index Cond: (abs(a) < 15)
+(2 rows)
+
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ a | b
+-----+----
+ -10 | 10
+ 11 | -4
+ -11 | 11
+ -12 | 12
+ -13 | 13
+ -14 | 14
+(6 rows)
+
+-- (11, -4) -> (-4, -4)
+UPDATE t SET a = -4 WHERE b = -4;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 5 | 3 | 60.00 | t
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ QUERY PLAN
+-------------------------------
+ Index Scan using t_idx_a on t
+ Index Cond: (abs(a) < 15)
+(2 rows)
+
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ a | b
+-----+----
+ -10 | 10
+ -11 | 11
+ -12 | 12
+ -13 | 13
+ -14 | 14
+(5 rows)
+
+-- This update of a from 5 to -1 is HOT despite that attribute
+-- being indexed because the before and after values for the
+-- partial index predicate are outside the index definition.
+-- (5, 1) -> (-1, 1)
+UPDATE t SET a = -1 WHERE a = 5;
+SELECT * FROM check_hot_updates(4);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 6 | 4 | 66.67 | t
+(1 row)
+
+-- This update of a from -2 to -1 will be HOT because the before/after values
+-- of a are both outside the predicate of the partial index.
+-- (-1, 1) -> (-2, 1)
+UPDATE t SET a = -2 WHERE b = -2;
+SELECT * FROM check_hot_updates(5);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 7 | 5 | 71.43 | t
+(1 row)
+
+-- The indexed value for b isn't changing, this should be HOT.
+-- (-2, -2) -> (-2, 2)
+UPDATE t SET b = 2 WHERE b = -2;
+SELECT * FROM check_hot_updates(6);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 8 | 6 | 75.00 | t
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+ QUERY PLAN
+------------------------------------------------
+ Index Scan using t_idx_b on t
+ Index Cond: ((abs(b) < 10) AND (abs(b) > 0))
+(2 rows)
+
+SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+ b
+----
+ -1
+ 2
+ -3
+ -4
+ 5
+(5 rows)
+
+SELECT * FROM t where a > -10 AND a < 10;
+ a | b
+----+----
+ -1 | -1
+ -3 | -3
+ -1 | 5
+ -4 | -4
+ -2 | 2
+(5 rows)
+
+-- Before and after values for a are outside the predicate of the index,
+-- and because we're checking this should be HOT.
+-- (-2, 1) -> (5, 1)
+-- (-2, -2) -> (5, -2)
+UPDATE t SET a = 5 WHERE a = -1;
+SELECT * FROM check_hot_updates(8);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 10 | 8 | 80.00 | t
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ QUERY PLAN
+-------------------------------
+ Index Scan using t_idx_a on t
+ Index Cond: (abs(a) < 15)
+(2 rows)
+
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+ a | b
+-----+----
+ -10 | 10
+ -11 | 11
+ -12 | 12
+ -13 | 13
+ -14 | 14
+(5 rows)
+
+DROP TABLE t CASCADE;
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+-- ================================================================
+-- JSONB with two indexes each on separate fields, one partial
+-- ================================================================
+CREATE TABLE t(docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'a')) WHERE (docs->'b')::integer = 1;
+INSERT INTO t VALUES ('{"a": 1, "b": 1}');
+EXPLAIN (COSTS OFF) SELECT * FROM t;
+ QUERY PLAN
+---------------
+ Seq Scan on t
+(1 row)
+
+SELECT * FROM t;
+ docs
+------------------
+ {"a": 1, "b": 1}
+(1 row)
+
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::integer = 1;
+ QUERY PLAN
+----------------------------------
+ Index Scan using t_docs_idx on t
+(1 row)
+
+SELECT * FROM t WHERE (docs->'b')::integer = 1;
+ docs
+------------------
+ {"a": 1, "b": 1}
+(1 row)
+
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 0 | 0 | 0 | t
+(1 row)
+
+UPDATE t SET docs='{"a": 1, "b": 0}';
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+SELECT * FROM t WHERE (docs->'b')::integer = 1;
+ docs
+------
+(0 rows)
+
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Tests for multi-column indexes
+-- ================================================================
+CREATE TABLE t(id INT, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t(id, (docs->'a'));
+INSERT INTO t VALUES (1, '{"a": 1, "b": 1}');
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+ QUERY PLAN
+------------------------------------------------
+ Index Scan using t_docs_idx on t
+ Index Cond: (id > 0)
+ Filter: (((docs -> 'a'::text))::integer > 0)
+(3 rows)
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+ id | docs
+----+------------------
+ 1 | {"a": 1, "b": 1}
+(1 row)
+
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 0 | 0 | 0 | t
+(1 row)
+
+-- Changing the id attribute which is an indexed attribute should
+-- prevent HOT updates.
+UPDATE t SET id = 2;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+ id | docs
+----+------------------
+ 2 | {"a": 1, "b": 1}
+(1 row)
+
+-- Changing the docs->'a' field in the indexed attribute 'docs'
+-- should prevent HOT updates.
+UPDATE t SET docs='{"a": -2, "b": 1}';
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0;
+ id | docs
+----+-------------------
+ 2 | {"a": -2, "b": 1}
+(1 row)
+
+-- Leaving the docs->'a' attribute unchanged means that the expression
+-- is unchanged and because the 'id' attribute isn't in the modified
+-- set the indexed tuple is unchanged, this can go HOT.
+UPDATE t SET docs='{"a": -2, "b": 2}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0;
+ id | docs
+----+-------------------
+ 2 | {"a": -2, "b": 2}
+(1 row)
+
+-- Here we change the 'id' attribute and the 'docs' attribute setting
+-- the expression docs->'a' to a new value, this cannot be a HOT update.
+UPDATE t SET id = 3, docs='{"a": 3, "b": 3}';
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+ id | docs
+----+------------------
+ 3 | {"a": 3, "b": 3}
+(1 row)
+
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Relation with unique constraint, partial index
+-- ================================================================
+CREATE TABLE users (
+ user_id serial primary key,
+ name VARCHAR(255) NOT NULL,
+ email VARCHAR(255) NOT NULL,
+ EXCLUDE USING btree (lower(email) WITH =)
+);
+-- Add some data to the table and then update it in ways that should and should
+-- not be HOT updates.
+INSERT INTO users (name, email) VALUES
+('user1', 'user1@example.com'),
+('user2', 'user2@example.com'),
+('taken', 'taken@EXAMPLE.com'),
+('you', 'you@domain.com'),
+('taken', 'taken@domain.com');
+-- Should fail because of the unique constraint on the email column.
+UPDATE users SET email = 'user1@example.com' WHERE email = 'user2@example.com';
+ERROR: conflicting key value violates exclusion constraint "users_lower_excl"
+DETAIL: Key (lower(email::text))=(user1@example.com) conflicts with existing key (lower(email::text))=(user1@example.com).
+SELECT * FROM check_hot_updates(0, 'users');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ users | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Should succeed because the email column is not being updated and should go HOT.
+UPDATE users SET name = 'foo' WHERE email = 'user1@example.com';
+SELECT * FROM check_hot_updates(1, 'users');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ users | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Create a partial index on the email column, updates
+CREATE INDEX idx_users_email_no_example ON users (lower(email)) WHERE lower(email) LIKE '%@example.com%';
+-- An update that changes the email column but not the indexed portion of it and falls outside the constraint.
+-- Shouldn't be a HOT update because of the exclusion constraint.
+UPDATE users SET email = 'you+2@domain.com' WHERE name = 'you';
+SELECT * FROM check_hot_updates(1, 'users');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ users | 3 | 1 | 33.33 | t
+(1 row)
+
+-- An update that changes the email column but not the indexed portion of it and falls within the constraint.
+-- Again, should fail constraint and fail to be a HOT update.
+UPDATE users SET email = 'taken@domain.com' WHERE name = 'you';
+ERROR: conflicting key value violates exclusion constraint "users_lower_excl"
+DETAIL: Key (lower(email::text))=(taken@domain.com) conflicts with existing key (lower(email::text))=(taken@domain.com).
+SELECT * FROM check_hot_updates(1, 'users');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ users | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE users CASCADE;
+-- ================================================================
+-- Constraints spoiling HOT updates, this time with a range.
+-- ================================================================
+CREATE TABLE events (
+ id serial primary key,
+ name VARCHAR(255) NOT NULL,
+ event_time tstzrange,
+ constraint no_screening_time_overlap exclude using gist (
+ event_time WITH &&
+ )
+);
+-- Add two non-overlapping events.
+INSERT INTO events (id, event_time, name)
+VALUES
+ (1, '["2023-01-01 19:00:00", "2023-01-01 20:45:00"]', 'event1'),
+ (2, '["2023-01-01 21:00:00", "2023-01-01 21:45:00"]', 'event2');
+-- Update the first event to overlap with the second, should fail the constraint and not be HOT.
+UPDATE events SET event_time = '["2023-01-01 20:00:00", "2023-01-01 21:45:00"]' WHERE id = 1;
+ERROR: conflicting key value violates exclusion constraint "no_screening_time_overlap"
+DETAIL: Key (event_time)=(["Sun Jan 01 20:00:00 2023 PST","Sun Jan 01 21:45:00 2023 PST"]) conflicts with existing key (event_time)=(["Sun Jan 01 21:00:00 2023 PST","Sun Jan 01 21:45:00 2023 PST"]).
+SELECT * FROM check_hot_updates(0, 'events');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ events | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Update the first event to not overlap with the second, again not HOT due to the constraint.
+UPDATE events SET event_time = '["2023-01-01 22:00:00", "2023-01-01 22:45:00"]' WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 'events');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ events | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Update the first event to not overlap with the second, this time we're HOT because we don't overlap with the constraint.
+UPDATE events SET name = 'new name here' WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 'events');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ events | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE events CASCADE;
+-- ================================================================
+-- Ensure that only the modified summarizing indexes are updated.
+-- ================================================================
+CREATE TABLE ex (id SERIAL primary key, att1 JSONB, att2 text, att3 text, att4 text) WITH (fillfactor = 60);
+CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data'));
+CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2);
+CREATE INDEX ex_expr2_idx ON ex USING btree((att1->'a'));
+CREATE INDEX ex_expr3_idx ON ex USING btree((att1->'b'));
+CREATE INDEX ex_expr4_idx ON ex USING btree((att1->'c'));
+CREATE INDEX ex_sumr2_idx ON ex USING BRIN(att3);
+CREATE INDEX ex_sumr3_idx ON ex USING BRIN(att4);
+CREATE INDEX ex_expr5_idx ON ex USING btree((att1->'d'));
+INSERT INTO ex (att1, att2) VALUES ('{"data": []}'::json, 'nothing special');
+SELECT * FROM ex;
+ id | att1 | att2 | att3 | att4
+----+--------------+-----------------+------+------
+ 1 | {"data": []} | nothing special | |
+(1 row)
+
+-- Update att2 and att4 both are BRIN/summarizing indexes, this should be a HOT update and
+-- only update two of the three summarizing indexes.
+UPDATE ex SET att2 = 'special indeed', att4 = 'whatever';
+SELECT * FROM check_hot_updates(1, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 1 | 1 | 100.00 | t
+(1 row)
+
+SELECT * FROM ex;
+ id | att1 | att2 | att3 | att4
+----+--------------+----------------+------+----------
+ 1 | {"data": []} | special indeed | | whatever
+(1 row)
+
+-- Update att1 and att2, only one is BRIN/summarizing, this should NOT be a HOT update.
+UPDATE ex SET att1 = att1 || '{"data": "howdy"}', att2 = 'special, so special';
+SELECT * FROM check_hot_updates(1, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 2 | 1 | 50.00 | t
+(1 row)
+
+SELECT * FROM ex;
+ id | att1 | att2 | att3 | att4
+----+-------------------+---------------------+------+----------
+ 1 | {"data": "howdy"} | special, so special | | whatever
+(1 row)
+
+-- Update att2, att3, and att4 all are BRIN/summarizing indexes, this should be a HOT update
+-- and yet still update all three summarizing indexes.
+UPDATE ex SET att2 = 'a', att3 = 'b', att4 = 'c';
+SELECT * FROM check_hot_updates(2, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 3 | 2 | 66.67 | t
+(1 row)
+
+SELECT * FROM ex;
+ id | att1 | att2 | att3 | att4
+----+-------------------+------+------+------
+ 1 | {"data": "howdy"} | a | b | c
+(1 row)
+
+-- Update att1, att2, and att3 all modified values are BRIN/summarizing indexes, this should be a HOT update
+-- and yet still update all three summarizing indexes.
+UPDATE ex SET att1 = '{"data": "howdy"}', att2 = 'd', att3 = 'e';
+SELECT * FROM check_hot_updates(3, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 4 | 3 | 75.00 | t
+(1 row)
+
+SELECT * FROM ex;
+ id | att1 | att2 | att3 | att4
+----+-------------------+------+------+------
+ 1 | {"data": "howdy"} | d | e | c
+(1 row)
+
+DROP TABLE ex CASCADE;
+-- ================================================================
+-- Don't update unmodified summarizing indexes but do allow HOT
+-- ================================================================
+CREATE TABLE ex (att1 JSONB, att2 text) WITH (fillfactor = 60);
+CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data'));
+CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2);
+INSERT INTO ex VALUES ('{"data": []}', 'nothing special');
+-- Update the unindexed value of att1, this should be a HOT update and and should
+-- update the summarizing index.
+UPDATE ex SET att1 = att1 || '{"status": "stalemate"}';
+SELECT * FROM check_hot_updates(1, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update the indexed value of att2, a summarized value, this is a summarized
+-- only update and should use the HOT path while still triggering an update to
+-- the summarizing BRIN index.
+UPDATE ex SET att2 = 'special indeed';
+SELECT * FROM check_hot_updates(2, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Update to att1 doesn't change the indexed value while the update to att2 does,
+-- this again is a summarized only update and should use the HOT path as well as
+-- trigger an update to the BRIN index.
+UPDATE ex SET att1 = att1 || '{"status": "checkmate"}', att2 = 'special, so special';
+SELECT * FROM check_hot_updates(3, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 3 | 3 | 100.00 | t
+(1 row)
+
+-- This updates both indexes, the expression index on att1 and the summarizing
+-- index on att2. This should not be a HOT update because there are modified
+-- indexes and only some are summarized, not all. This should force all
+-- indexes to be updated.
+UPDATE ex SET att1 = att1 || '{"data": [1,2,3]}', att2 = 'do you want to play a game?';
+SELECT * FROM check_hot_updates(3, 'ex');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ ex | 4 | 3 | 75.00 | t
+(1 row)
+
+DROP TABLE ex CASCADE;
+-- ================================================================
+-- Ensure custom type equality operators are used
+-- ================================================================
+CREATE TYPE my_custom_type AS (val int);
+-- Comparison functions (returns boolean)
+CREATE FUNCTION my_custom_lt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val < b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+CREATE FUNCTION my_custom_le(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val <= b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+CREATE FUNCTION my_custom_eq(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val = b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+CREATE FUNCTION my_custom_ge(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val >= b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+CREATE FUNCTION my_custom_gt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val > b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+CREATE FUNCTION my_custom_ne(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val != b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+-- Comparison function (returns -1, 0, 1)
+CREATE FUNCTION my_custom_cmp(a my_custom_type, b my_custom_type) RETURNS int AS $$
+BEGIN
+ IF a.val < b.val THEN
+ RETURN -1;
+ ELSIF a.val > b.val THEN
+ RETURN 1;
+ ELSE
+ RETURN 0;
+ END IF;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+-- Create the operators
+CREATE OPERATOR < (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_lt,
+ COMMUTATOR = >,
+ NEGATOR = >=
+);
+CREATE OPERATOR <= (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_le,
+ COMMUTATOR = >=,
+ NEGATOR = >
+);
+CREATE OPERATOR = (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_eq,
+ COMMUTATOR = =,
+ NEGATOR = <>
+);
+CREATE OPERATOR >= (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_ge,
+ COMMUTATOR = <=,
+ NEGATOR = <
+);
+CREATE OPERATOR > (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_gt,
+ COMMUTATOR = <,
+ NEGATOR = <=
+);
+CREATE OPERATOR <> (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_ne,
+ COMMUTATOR = <>,
+ NEGATOR = =
+);
+-- Create the operator class (including the support function)
+CREATE OPERATOR CLASS my_custom_ops
+ DEFAULT FOR TYPE my_custom_type USING btree AS
+ OPERATOR 1 <,
+ OPERATOR 2 <=,
+ OPERATOR 3 =,
+ OPERATOR 4 >=,
+ OPERATOR 5 >,
+ FUNCTION 1 my_custom_cmp(my_custom_type, my_custom_type);
+-- Create the table
+CREATE TABLE my_table (
+ id int,
+ custom_val my_custom_type
+);
+-- Insert some data
+INSERT INTO my_table (id, custom_val) VALUES
+(1, ROW(3)::my_custom_type),
+(2, ROW(1)::my_custom_type),
+(3, ROW(4)::my_custom_type),
+(4, ROW(2)::my_custom_type);
+-- Create a function to use when indexing
+CREATE OR REPLACE FUNCTION abs_val(val my_custom_type) RETURNS int AS $$
+BEGIN
+ RETURN abs(val.val);
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+-- Create the index
+CREATE INDEX idx_custom_val_abs ON my_table (abs_val(custom_val));
+-- Update 1
+UPDATE my_table SET custom_val = ROW(5)::my_custom_type WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 'my_table');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ my_table | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Update 2
+UPDATE my_table SET custom_val = ROW(0)::my_custom_type WHERE custom_val < ROW(3)::my_custom_type;
+SELECT * FROM check_hot_updates(0, 'my_table');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ my_table | 3 | 0 | 0.00 | t
+(1 row)
+
+-- Update 3
+UPDATE my_table SET custom_val = ROW(6)::my_custom_type WHERE id = 3;
+SELECT * FROM check_hot_updates(0, 'my_table');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ my_table | 4 | 0 | 0.00 | t
+(1 row)
+
+-- Update 4
+UPDATE my_table SET id = 5 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 'my_table');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ my_table | 5 | 1 | 20.00 | t
+(1 row)
+
+-- Query using the index
+SELECT * FROM my_table WHERE abs_val(custom_val) = 6;
+ id | custom_val
+----+------------
+ 3 | (6)
+(1 row)
+
+-- Clean up test case
+DROP TABLE my_table CASCADE;
+DROP OPERATOR CLASS my_custom_ops USING btree CASCADE;
+DROP OPERATOR < (my_custom_type, my_custom_type);
+DROP OPERATOR <= (my_custom_type, my_custom_type);
+DROP OPERATOR = (my_custom_type, my_custom_type);
+DROP OPERATOR >= (my_custom_type, my_custom_type);
+DROP OPERATOR > (my_custom_type, my_custom_type);
+DROP OPERATOR <> (my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_lt(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_le(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_eq(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_ge(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_gt(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_ne(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_cmp(my_custom_type, my_custom_type);
+DROP FUNCTION abs_val(my_custom_type);
+DROP TYPE my_custom_type CASCADE;
+-- Cleanup
+DROP FUNCTION check_hot_updates(int, text, text);
+DROP COLLATION case_insensitive;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index cc6d799bceaf0..f3db9270fe6b8 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -125,6 +125,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# ----------
test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate
+
+# ----------
+# Another group of parallel tests, these focused on heap HOT updates
+# ----------
+test: heap_hot_updates
+
# event_trigger depends on create_am and cannot run concurrently with
# any test that runs DDL
# oidjoins is read-only, though, and should run late for best coverage
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index efc41fca2ba79..0a8fc7f181c25 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -1232,7 +1232,7 @@ spawn_process(const char *cmdline)
char *cmdline2;
cmdline2 = psprintf("exec %s", cmdline);
- execl(shellprog, shellprog, "-c", cmdline2, (char *) NULL);
+ execlp(shellprog, shellprog, "-c", cmdline2, (char *) NULL);
/* Not using the normal bail() here as we want _exit */
bail_noatexit("could not exec \"%s\": %m", shellprog);
}
diff --git a/src/test/regress/sql/heap_hot_updates.sql b/src/test/regress/sql/heap_hot_updates.sql
new file mode 100644
index 0000000000000..8d5510989df0c
--- /dev/null
+++ b/src/test/regress/sql/heap_hot_updates.sql
@@ -0,0 +1,1325 @@
+-- ================================================================
+-- Test Suite for Heap-only (HOT) Updates
+-- ================================================================
+
+-- Setup: Create function to measure HOT updates
+CREATE OR REPLACE FUNCTION check_hot_updates(
+ expected INT,
+ p_table_name TEXT DEFAULT 't',
+ p_schema_name TEXT DEFAULT current_schema()
+)
+RETURNS TABLE (
+ table_name TEXT,
+ total_updates BIGINT,
+ hot_updates BIGINT,
+ hot_update_percentage NUMERIC,
+ matches_expected BOOLEAN
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+ v_relid oid;
+ v_qualified_name TEXT;
+ v_hot_updates BIGINT;
+ v_updates BIGINT;
+ v_xact_hot_updates BIGINT;
+ v_xact_updates BIGINT;
+BEGIN
+ -- Force statistics update
+ PERFORM pg_stat_force_next_flush();
+
+ -- Get table OID
+ v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name);
+ v_relid := v_qualified_name::regclass;
+
+ IF v_relid IS NULL THEN
+ RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name;
+ END IF;
+
+ -- Get cumulative + transaction stats
+ v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0);
+ v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0);
+ v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0);
+ v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0);
+
+ v_hot_updates := v_hot_updates + v_xact_hot_updates;
+ v_updates := v_updates + v_xact_updates;
+
+ RETURN QUERY
+ SELECT
+ p_table_name::TEXT,
+ v_updates::BIGINT,
+ v_hot_updates::BIGINT,
+ CASE WHEN v_updates > 0
+ THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2)
+ ELSE 0
+ END,
+ (v_hot_updates = expected)::BOOLEAN;
+END;
+$$;
+
+CREATE COLLATION case_insensitive (
+ provider = libc,
+ locale = 'C'
+);
+
+-- ================================================================
+-- Basic JSONB Expression Index
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_name_idx ON t((docs->'name'));
+INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}');
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update indexed JSONB field - should NOT be HOT
+UPDATE t SET docs = '{"name": "bob", "age": 31}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update non-indexed field again - should be HOT
+UPDATE t SET docs = '{"name": "bob", "age": 32}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t;
+
+-- ================================================================
+-- JSONB Expression Index an some including columns
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB, status TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_name_idx ON t((docs->'name'));
+INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}', 'ok');
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET status = 'not ok' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Partial Index with Predicate Transitions
+-- ================================================================
+CREATE TABLE t(id INT, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_value_idx ON t(value) WHERE value > 10;
+INSERT INTO t VALUES (1, 5);
+
+-- Both outside predicate - should be HOT
+UPDATE t SET value = 8 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Transition into predicate - should NOT be HOT
+UPDATE t SET value = 15 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Both inside predicate, value changes - should NOT be HOT
+UPDATE t SET value = 20 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Transition out of predicate - should NOT be HOT
+UPDATE t SET value = 5 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Both outside predicate again - should be HOT
+UPDATE t SET value = 3 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Expression Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(docs JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx ON t((docs->'status'))
+ WHERE (docs->'priority')::int > 5;
+INSERT INTO t VALUES ('{"status": "pending", "priority": 3}');
+
+-- Both outside predicate, status unchanged - should be HOT
+UPDATE t SET docs = '{"status": "pending", "priority": 4}';
+SELECT * FROM check_hot_updates(1);
+
+-- Transition into predicate - should NOT be HOT
+UPDATE t SET docs = '{"status": "pending", "priority": 10}';
+SELECT * FROM check_hot_updates(1);
+
+-- Inside predicate, status changes - should NOT be HOT
+UPDATE t SET docs = '{"status": "active", "priority": 10}';
+SELECT * FROM check_hot_updates(1);
+
+-- Inside predicate, status unchanged - should be HOT
+UPDATE t SET docs = '{"status": "active", "priority": 8}';
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t;
+
+-- ================================================================
+-- GIN Index on JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data);
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}');
+
+-- Change tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Change tags again - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+-- ================================================================
+-- GIN Index with Unchanged Keys
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create GIN index on specific path
+CREATE INDEX t_gin_idx ON t USING gin((data->'tags'));
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}');
+
+-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Change indexed tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- GIN with jsonb_path_ops
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops);
+INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}');
+
+-- Change value at different path - keys changed, NOT HOT
+UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Multi-Column Expression Index
+-- ================================================================
+CREATE TABLE t(id INT, a INT, b INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx ON t(id, abs(a), abs(b));
+INSERT INTO t VALUES (1, -5, -10);
+
+-- Change sign but not abs value - should be HOT
+UPDATE t SET a = 5 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Change abs value - should NOT be HOT
+UPDATE t SET b = -15 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Change id - should NOT be HOT
+UPDATE t SET id = 2 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Mixed Index Types (BRIN + Expression)
+-- ================================================================
+CREATE TABLE t(id INT, value INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_idx ON t USING brin(value);
+CREATE INDEX t_expr_idx ON t((data->'status'));
+INSERT INTO t VALUES (1, 100, '{"status": "active"}');
+
+-- Update only BRIN column - should be HOT
+UPDATE t SET value = 200 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update only expression column - should NOT be HOT
+UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - should NOT be HOT
+UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Expression with COLLATION and BTREE (nbtree) index
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ name TEXT COLLATE case_insensitive
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_lower_idx ON t USING BTREE (name COLLATE case_insensitive);
+
+INSERT INTO t VALUES (1, 'ALICE');
+
+-- Change case but not value - should NOT be HOT in BTREE
+UPDATE t SET name = 'Alice' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Change to new value - should NOT be HOT
+UPDATE t SET name = 'BOB' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Array Expression Index
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_array_len_idx ON t(array_length(tags, 1));
+INSERT INTO t VALUES (1, ARRAY['a', 'b', 'c']);
+
+-- Same length, different elements - should be HOT
+UPDATE t SET tags = ARRAY['d', 'e', 'f'] WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Different length - should NOT be HOT
+UPDATE t SET tags = ARRAY['d', 'e'] WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Nested JSONB Expression and JSONB equality '->' (not '->>')
+-- ================================================================
+CREATE TABLE t(data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_nested_idx ON t((data->'user'->'name'));
+INSERT INTO t VALUES ('{"user": {"name": "alice", "age": 30}}');
+
+-- Change nested non-indexed field - should be HOT
+UPDATE t SET data = '{"user": {"name": "alice", "age": 31}}';
+SELECT * FROM check_hot_updates(1);
+
+-- Change nested indexed field - should NOT be HOT
+UPDATE t SET data = '{"user": {"name": "bob", "age": 31}}';
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- Complex Predicate on Multiple JSONB Fields
+-- ================================================================
+CREATE TABLE t(data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx ON t((data->'status'))
+ WHERE (data->'priority')::int > 5
+ AND (data->'active')::boolean = true;
+
+INSERT INTO t VALUES ('{"status": "pending", "priority": 3, "active": true}');
+
+-- Outside predicate (priority too low) - should be HOT
+UPDATE t SET data = '{"status": "done", "priority": 3, "active": true}';
+SELECT * FROM check_hot_updates(1);
+
+-- Transition into predicate - should NOT be HOT
+UPDATE t SET data = '{"status": "done", "priority": 10, "active": true}';
+SELECT * FROM check_hot_updates(1);
+
+-- Inside predicate, change to outside (active = false) - should NOT be HOT
+UPDATE t SET data = '{"status": "done", "priority": 10, "active": false}';
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- GIN Array Index - Order Insensitive Extraction
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ data JSONB
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+-- GIN index on JSONB array (extracts all elements)
+CREATE INDEX t_items_gin ON t USING GIN ((data->'items'));
+
+INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}');
+
+-- Update: Reorder array elements
+-- JSONB equality: NOT equal (different arrays)
+-- GIN extraction: Same elements extracted (might allow HOT if not careful)
+UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update: Add/remove element
+UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- TOASTed Values in Expression Index
+-- ================================================================
+CREATE TABLE t(id INT, large_text TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_substr_idx ON t(substr(large_text, 1, 10));
+
+INSERT INTO t VALUES (1, repeat('x', 5000) || 'identifier');
+
+-- Change end of string, prefix unchanged - should be HOT
+UPDATE t SET large_text = repeat('x', 5000) || 'different' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Change prefix - should NOT be HOT
+UPDATE t SET large_text = repeat('y', 5000) || 'different' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+-- ================================================================
+-- TEST: GIN with TOASTed TEXT (tsvector)
+-- ================================================================
+CREATE TABLE t(id INT, content TEXT, search_vec tsvector)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+
+-- Create trigger to maintain tsvector
+CREATE TRIGGER tsvectorupdate_toast
+ BEFORE INSERT OR UPDATE ON t
+ FOR EACH ROW EXECUTE FUNCTION
+ tsvector_update_trigger(search_vec, 'pg_catalog.english', content);
+
+CREATE INDEX t_gin ON t USING gin(search_vec);
+
+-- Insert with large content (will be TOASTed)
+INSERT INTO t (id, content) VALUES
+ (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000));
+
+-- Verify initial state
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important');
+-- Expected: 1 row
+
+-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time
+-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed.
+-- This means the comparison sees old tsvector vs. trigger-modified tsvector,
+-- not the natural progression. HOT won't happen because the trigger changed
+-- the indexed column.
+
+-- Update: Even though content keywords unchanged, trigger still fires
+UPDATE t
+SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT)
+-- This is actually correct behavior - the trigger updated an indexed column
+
+-- Update: Change indexed keywords
+UPDATE t
+SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (index keys changed)
+
+-- Verify query correctness
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical');
+-- Expected: 1 row
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- TEST: GIN with TOASTed JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin((data->'tags'));
+
+-- Insert with TOASTed JSONB
+INSERT INTO t (id, data) VALUES
+ (1, jsonb_build_object(
+ 'tags', '["postgres", "database"]'::jsonb,
+ 'large_field', repeat('x', 10000)
+ ));
+
+-- Update: Change large_field, tags unchanged - should be HOT
+UPDATE t
+SET data = jsonb_build_object(
+ 'tags', '["postgres", "database"]'::jsonb,
+ 'large_field', repeat('y', 10000)
+)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT update
+
+-- Update: Change tags - should NOT be HOT
+UPDATE t
+SET data = jsonb_build_object(
+ 'tags', '["postgres", "sql"]'::jsonb,
+ 'large_field', repeat('y', 10000)
+)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: Still 1 HOT
+
+-- Verify correctness
+SELECT count(*) FROM t WHERE data->'tags' @> '["database"]'::jsonb;
+-- Expected: 0 rows
+SELECT count(*) FROM t WHERE data->'tags' @> '["sql"]'::jsonb;
+-- Expected: 1 row
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- TEST: GIN with Array of Large Strings
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin(tags);
+
+-- Insert with large array elements (might be TOASTed)
+INSERT INTO t (id, tags) VALUES
+ (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]);
+
+-- Update: Change to different large values - NOT HOT
+UPDATE t
+SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (keys actually changed)
+
+-- Update: Keep same tag values, just reorder - SHOULD BE HOT
+-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3]
+-- extract to the same sorted key set ['tag3','tag4'])
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT (GIN keys semantically identical)
+
+-- Update: Remove an element - NOT HOT (keys changed)
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: Still 1 HOT (not this one)
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- BRIN Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ value INT,
+ description TEXT
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100;
+
+INSERT INTO t VALUES (1, 50, 'below range');
+
+-- Test 1: Outside predicate
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Validate: Predicate query returns 0 rows
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+
+-- Test 2: Transition into predicate
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+-- Validate: Predicate query returns 1 row with correct value
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+
+-- Test 3: Inside predicate, value changes
+UPDATE t SET value = 160, description = 'updated again' WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+
+-- Validate: Updated value (160) is returned
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+
+-- Test 4: Transition out of predicate
+UPDATE t SET value = 50 WHERE id = 1;
+SELECT * FROM check_hot_updates(4);
+
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+
+SELECT id, value, description FROM t;
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- HASH Index (Simple Column)
+-- ================================================================
+CREATE TABLE t(id INT, code VARCHAR(20), description TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_idx ON t USING hash(code);
+INSERT INTO t VALUES (1, 'CODE001', 'initial');
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update indexed column - HASH index requires update, NOT HOT
+UPDATE t SET code = 'CODE002' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - NOT HOT
+UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Back to original code - NOT HOT (different hash bucket location)
+UPDATE t SET code = 'CODE001' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- HASH Index on Expression
+-- ================================================================
+CREATE TABLE t(id INT, email TEXT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email));
+INSERT INTO t VALUES (1, 'Alice@Example.com', '{"status": "new"}');
+
+-- Update non-indexed field - should be HOT
+UPDATE t SET data = '{"status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update email with case change only (same lowercase) - should be HOT
+UPDATE t SET email = 'alice@example.com' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+-- Update email to different lowercase - NOT HOT
+UPDATE t SET email = 'bob@example.com' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- HASH Index on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash((data->'category'));
+INSERT INTO t VALUES (1, '{"category": "books", "title": "PostgreSQL Guide"}');
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET data = '{"category": "books", "title": "PostgreSQL Handbook"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update indexed JSONB field - NOT HOT
+UPDATE t SET data = '{"category": "videos", "title": "PostgreSQL Handbook"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - NOT HOT
+UPDATE t SET data = '{"category": "courses", "title": "PostgreSQL Basics"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- Multiple HASH Indexes
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+CREATE INDEX t_hash_status_idx ON t USING hash(status);
+INSERT INTO t VALUES (1, 'electronics', 'active', 100);
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update one indexed column - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update other indexed column - NOT HOT
+UPDATE t SET status = 'inactive' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both indexed columns - NOT HOT
+UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- BRIN vs HASH Comparison
+-- ================================================================
+CREATE TABLE t_brin(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE TABLE t_hash(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_brin_value_idx ON t_brin USING brin(value);
+CREATE INDEX t_hash_value_idx ON t_hash USING hash(value);
+
+INSERT INTO t_brin VALUES (1, 100, 'initial');
+INSERT INTO t_hash VALUES (1, 100, 'initial');
+
+-- Same update on both - different HOT behavior expected
+-- BRIN: might allow HOT (range summary unchanged)
+-- HASH: blocks HOT (hash bucket changed)
+UPDATE t_brin SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 't_brin');
+-- Expected: 1 HOT (BRIN allows it for single row)
+
+UPDATE t_hash SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 't_hash');
+-- Expected: 0 HOT (HASH blocks it)
+
+DROP TABLE t_brin CASCADE;
+DROP TABLE t_hash CASCADE;
+
+-- ================================================================
+-- HASH Index with NULL Values
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'electronics', 'initial');
+
+-- Update indexed column to NULL - NOT HOT (hash value changed)
+UPDATE t SET category = NULL WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT
+
+-- Update indexed column from NULL to value - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- BRIN on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, metrics JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- BRIN doesn't directly support JSONB, but we can test on expression
+CREATE INDEX t_brin_count_idx ON t USING brin(
+ CAST(metrics->>'count' AS INTEGER)
+);
+INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}');
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+-- Update indexed field - BRIN allows HOT for single row
+UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT (BRIN permits single-row updates)
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- Mixed BRIN + HASH on Same Table
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial');
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+-- Update BRIN indexed column - allows HOT
+UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT
+
+-- Update HASH indexed column - blocks HOT
+UPDATE t SET category = 'videos' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT (HASH blocks it)
+
+-- Update price (non-indexed) - should be HOT
+UPDATE t SET price = 39.99 WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+-- Expected: 3 HOT
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- Index both on a field in a JSONB document, and the document
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'name'));
+CREATE INDEX t_docs_col_idx ON t(docs);
+INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}');
+
+-- Update impacts index on whole docment attribute, can't go HOT
+UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Two indexes on a JSONB document, one partial
+-- ================================================================
+CREATE TABLE t (docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+INSERT INTO t (docs) VALUES ('{"a": 0, "b": 0}');
+INSERT INTO t (docs) SELECT jsonb_build_object('b', n) FROM generate_series(100, 10000) as n;
+CREATE INDEX t_idx_a ON t ((docs->'a'));
+CREATE INDEX t_idx_b ON t ((docs->'b')) WHERE (docs->'b')::numeric > 9;
+
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+
+-- Leave 'a' unchanged but modify 'b' to a value outside of the index predicate.
+-- This should be a HOT update because neither index is changed.
+UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 1) WHERE (docs->'a')::numeric = 0;
+SELECT * FROM check_hot_updates(1);
+
+-- Check to make sure that the index does not contain a value for 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+
+-- Leave 'a' unchanged but modify 'b' to a value within the index predicate.
+-- This represents a change for field 'b' from unindexed to indexed and so
+-- this should not take the HOT path.
+UPDATE t SET docs = jsonb_build_object('a', 0, 'b', 10) WHERE (docs->'a')::numeric = 0;
+SELECT * FROM check_hot_updates(1);
+
+-- Check to make sure that the index contains the new value of 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+
+-- This update modifies the value of 'a', an indexed field, so it also cannot
+-- be a HOT update.
+UPDATE t SET docs = jsonb_build_object('a', 1, 'b', 10) WHERE (docs->'b')::numeric = 10;
+SELECT * FROM check_hot_updates(1);
+
+-- This update changes both 'a' and 'b' to new values this cannot use the HOT path.
+UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 12) WHERE (docs->'b')::numeric = 10;
+SELECT * FROM check_hot_updates(1);
+
+-- Check to make sure that the index contains the new value of 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+
+-- This update changes 'b' to a value outside its predicate requiring that
+-- we remove it from the index. That's a transition that can't be done
+-- during a HOT update.
+UPDATE t SET docs = jsonb_build_object('a', 2, 'b', 1) WHERE (docs->'b')::numeric = 12;
+SELECT * FROM check_hot_updates(1);
+
+-- Check to make sure that the index no longer contains the value of 'b'
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+SELECT * FROM t WHERE (docs->'b')::numeric > 9 AND (docs->'b')::numeric < 100;
+
+DROP TABLE t CASCADE;
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+
+-- ================================================================
+-- Tests to check expression indexes
+-- ================================================================
+CREATE TABLE t(a INT, b INT) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_idx_a ON t(abs(a)) WHERE abs(a) > 10;
+CREATE INDEX t_idx_b ON t(abs(b));
+INSERT INTO t VALUES (-1, -1), (-2, -2), (-3, -3), (-4, -4), (-5, -5);
+INSERT INTO t SELECT m, n FROM generate_series(-10000, -10) AS m, abs(m) AS n;
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+
+-- The indexed value of b hasn't changed, this should be a HOT update.
+-- (-5, -5) -> (-5, 1)
+UPDATE t SET b = 5 WHERE a = -5;
+SELECT * FROM check_hot_updates(1);
+EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+
+-- Now that we're not checking the predicate of the partial index, this
+-- update of a from -5 to 5 should be HOT because we should ignore the
+-- predicate and check the expression and find it unchanged.
+-- (-5, 1) -> (5, 1)
+UPDATE t SET a = 5 WHERE a = -5;
+SELECT * FROM check_hot_updates(2);
+
+-- This update moves a into the partial index and should not
+-- be HOT. Let's make sure of that and check the index as well.
+-- (-4, -4) -> (-11, -4)
+UPDATE t SET a = -11 WHERE a = -4;
+SELECT * FROM check_hot_updates(2);
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+
+-- (-11, -4) -> (11, -4)
+UPDATE t SET a = 11 WHERE b = -4;
+SELECT * FROM check_hot_updates(3);
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+
+-- (11, -4) -> (-4, -4)
+UPDATE t SET a = -4 WHERE b = -4;
+SELECT * FROM check_hot_updates(3);
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+
+-- This update of a from 5 to -1 is HOT despite that attribute
+-- being indexed because the before and after values for the
+-- partial index predicate are outside the index definition.
+-- (5, 1) -> (-1, 1)
+UPDATE t SET a = -1 WHERE a = 5;
+SELECT * FROM check_hot_updates(4);
+
+-- This update of a from -2 to -1 will be HOT because the before/after values
+-- of a are both outside the predicate of the partial index.
+-- (-1, 1) -> (-2, 1)
+UPDATE t SET a = -2 WHERE b = -2;
+SELECT * FROM check_hot_updates(5);
+
+-- The indexed value for b isn't changing, this should be HOT.
+-- (-2, -2) -> (-2, 2)
+UPDATE t SET b = 2 WHERE b = -2;
+SELECT * FROM check_hot_updates(6);
+EXPLAIN (COSTS OFF) SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+SELECT b FROM t WHERE abs(b) < 10 AND abs(b) > 0;
+
+SELECT * FROM t where a > -10 AND a < 10;
+
+-- Before and after values for a are outside the predicate of the index,
+-- and because we're checking this should be HOT.
+-- (-2, 1) -> (5, 1)
+-- (-2, -2) -> (5, -2)
+UPDATE t SET a = 5 WHERE a = -1;
+SELECT * FROM check_hot_updates(8);
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+SELECT * FROM t WHERE abs(a) > 10 AND abs(a) < 15;
+
+DROP TABLE t CASCADE;
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+
+
+-- ================================================================
+-- JSONB with two indexes each on separate fields, one partial
+-- ================================================================
+CREATE TABLE t(docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'a')) WHERE (docs->'b')::integer = 1;
+INSERT INTO t VALUES ('{"a": 1, "b": 1}');
+
+EXPLAIN (COSTS OFF) SELECT * FROM t;
+SELECT * FROM t;
+
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE (docs->'b')::integer = 1;
+SELECT * FROM t WHERE (docs->'b')::integer = 1;
+
+SELECT * FROM check_hot_updates(0);
+
+UPDATE t SET docs='{"a": 1, "b": 0}';
+SELECT * FROM check_hot_updates(0);
+
+SELECT * FROM t WHERE (docs->'b')::integer = 1;
+
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Tests for multi-column indexes
+-- ================================================================
+CREATE TABLE t(id INT, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t(id, (docs->'a'));
+INSERT INTO t VALUES (1, '{"a": 1, "b": 1}');
+
+SET SESSION enable_seqscan = OFF;
+SET SESSION enable_bitmapscan = OFF;
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+
+SELECT * FROM check_hot_updates(0);
+
+-- Changing the id attribute which is an indexed attribute should
+-- prevent HOT updates.
+UPDATE t SET id = 2;
+SELECT * FROM check_hot_updates(0);
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+
+-- Changing the docs->'a' field in the indexed attribute 'docs'
+-- should prevent HOT updates.
+UPDATE t SET docs='{"a": -2, "b": 1}';
+SELECT * FROM check_hot_updates(0);
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0;
+
+-- Leaving the docs->'a' attribute unchanged means that the expression
+-- is unchanged and because the 'id' attribute isn't in the modified
+-- set the indexed tuple is unchanged, this can go HOT.
+UPDATE t SET docs='{"a": -2, "b": 2}';
+SELECT * FROM check_hot_updates(1);
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer < 0;
+
+-- Here we change the 'id' attribute and the 'docs' attribute setting
+-- the expression docs->'a' to a new value, this cannot be a HOT update.
+UPDATE t SET id = 3, docs='{"a": 3, "b": 3}';
+SELECT * FROM check_hot_updates(1);
+
+SELECT * FROM t WHERE id > 0 AND (docs->'a')::integer > 0;
+
+SET SESSION enable_seqscan = ON;
+SET SESSION enable_bitmapscan = ON;
+
+DROP TABLE t CASCADE;
+
+-- ================================================================
+-- Relation with unique constraint, partial index
+-- ================================================================
+CREATE TABLE users (
+ user_id serial primary key,
+ name VARCHAR(255) NOT NULL,
+ email VARCHAR(255) NOT NULL,
+ EXCLUDE USING btree (lower(email) WITH =)
+);
+
+-- Add some data to the table and then update it in ways that should and should
+-- not be HOT updates.
+INSERT INTO users (name, email) VALUES
+('user1', 'user1@example.com'),
+('user2', 'user2@example.com'),
+('taken', 'taken@EXAMPLE.com'),
+('you', 'you@domain.com'),
+('taken', 'taken@domain.com');
+
+-- Should fail because of the unique constraint on the email column.
+UPDATE users SET email = 'user1@example.com' WHERE email = 'user2@example.com';
+SELECT * FROM check_hot_updates(0, 'users');
+
+-- Should succeed because the email column is not being updated and should go HOT.
+UPDATE users SET name = 'foo' WHERE email = 'user1@example.com';
+SELECT * FROM check_hot_updates(1, 'users');
+
+-- Create a partial index on the email column, updates
+CREATE INDEX idx_users_email_no_example ON users (lower(email)) WHERE lower(email) LIKE '%@example.com%';
+
+-- An update that changes the email column but not the indexed portion of it and falls outside the constraint.
+-- Shouldn't be a HOT update because of the exclusion constraint.
+UPDATE users SET email = 'you+2@domain.com' WHERE name = 'you';
+SELECT * FROM check_hot_updates(1, 'users');
+
+-- An update that changes the email column but not the indexed portion of it and falls within the constraint.
+-- Again, should fail constraint and fail to be a HOT update.
+UPDATE users SET email = 'taken@domain.com' WHERE name = 'you';
+SELECT * FROM check_hot_updates(1, 'users');
+
+DROP TABLE users CASCADE;
+
+-- ================================================================
+-- Constraints spoiling HOT updates, this time with a range.
+-- ================================================================
+CREATE TABLE events (
+ id serial primary key,
+ name VARCHAR(255) NOT NULL,
+ event_time tstzrange,
+ constraint no_screening_time_overlap exclude using gist (
+ event_time WITH &&
+ )
+);
+
+-- Add two non-overlapping events.
+INSERT INTO events (id, event_time, name)
+VALUES
+ (1, '["2023-01-01 19:00:00", "2023-01-01 20:45:00"]', 'event1'),
+ (2, '["2023-01-01 21:00:00", "2023-01-01 21:45:00"]', 'event2');
+
+-- Update the first event to overlap with the second, should fail the constraint and not be HOT.
+UPDATE events SET event_time = '["2023-01-01 20:00:00", "2023-01-01 21:45:00"]' WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 'events');
+
+-- Update the first event to not overlap with the second, again not HOT due to the constraint.
+UPDATE events SET event_time = '["2023-01-01 22:00:00", "2023-01-01 22:45:00"]' WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 'events');
+
+-- Update the first event to not overlap with the second, this time we're HOT because we don't overlap with the constraint.
+UPDATE events SET name = 'new name here' WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 'events');
+
+DROP TABLE events CASCADE;
+
+-- ================================================================
+-- Ensure that only the modified summarizing indexes are updated.
+-- ================================================================
+CREATE TABLE ex (id SERIAL primary key, att1 JSONB, att2 text, att3 text, att4 text) WITH (fillfactor = 60);
+CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data'));
+CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2);
+CREATE INDEX ex_expr2_idx ON ex USING btree((att1->'a'));
+CREATE INDEX ex_expr3_idx ON ex USING btree((att1->'b'));
+CREATE INDEX ex_expr4_idx ON ex USING btree((att1->'c'));
+CREATE INDEX ex_sumr2_idx ON ex USING BRIN(att3);
+CREATE INDEX ex_sumr3_idx ON ex USING BRIN(att4);
+CREATE INDEX ex_expr5_idx ON ex USING btree((att1->'d'));
+INSERT INTO ex (att1, att2) VALUES ('{"data": []}'::json, 'nothing special');
+
+SELECT * FROM ex;
+
+-- Update att2 and att4 both are BRIN/summarizing indexes, this should be a HOT update and
+-- only update two of the three summarizing indexes.
+UPDATE ex SET att2 = 'special indeed', att4 = 'whatever';
+SELECT * FROM check_hot_updates(1, 'ex');
+SELECT * FROM ex;
+
+-- Update att1 and att2, only one is BRIN/summarizing, this should NOT be a HOT update.
+UPDATE ex SET att1 = att1 || '{"data": "howdy"}', att2 = 'special, so special';
+SELECT * FROM check_hot_updates(1, 'ex');
+SELECT * FROM ex;
+
+-- Update att2, att3, and att4 all are BRIN/summarizing indexes, this should be a HOT update
+-- and yet still update all three summarizing indexes.
+UPDATE ex SET att2 = 'a', att3 = 'b', att4 = 'c';
+SELECT * FROM check_hot_updates(2, 'ex');
+SELECT * FROM ex;
+
+-- Update att1, att2, and att3 all modified values are BRIN/summarizing indexes, this should be a HOT update
+-- and yet still update all three summarizing indexes.
+UPDATE ex SET att1 = '{"data": "howdy"}', att2 = 'd', att3 = 'e';
+SELECT * FROM check_hot_updates(3, 'ex');
+SELECT * FROM ex;
+
+DROP TABLE ex CASCADE;
+
+-- ================================================================
+-- Don't update unmodified summarizing indexes but do allow HOT
+-- ================================================================
+CREATE TABLE ex (att1 JSONB, att2 text) WITH (fillfactor = 60);
+CREATE INDEX ex_expr1_idx ON ex USING btree((att1->'data'));
+CREATE INDEX ex_sumr1_idx ON ex USING BRIN(att2);
+INSERT INTO ex VALUES ('{"data": []}', 'nothing special');
+
+-- Update the unindexed value of att1, this should be a HOT update and and should
+-- update the summarizing index.
+UPDATE ex SET att1 = att1 || '{"status": "stalemate"}';
+SELECT * FROM check_hot_updates(1, 'ex');
+
+-- Update the indexed value of att2, a summarized value, this is a summarized
+-- only update and should use the HOT path while still triggering an update to
+-- the summarizing BRIN index.
+UPDATE ex SET att2 = 'special indeed';
+SELECT * FROM check_hot_updates(2, 'ex');
+
+-- Update to att1 doesn't change the indexed value while the update to att2 does,
+-- this again is a summarized only update and should use the HOT path as well as
+-- trigger an update to the BRIN index.
+UPDATE ex SET att1 = att1 || '{"status": "checkmate"}', att2 = 'special, so special';
+SELECT * FROM check_hot_updates(3, 'ex');
+
+-- This updates both indexes, the expression index on att1 and the summarizing
+-- index on att2. This should not be a HOT update because there are modified
+-- indexes and only some are summarized, not all. This should force all
+-- indexes to be updated.
+UPDATE ex SET att1 = att1 || '{"data": [1,2,3]}', att2 = 'do you want to play a game?';
+SELECT * FROM check_hot_updates(3, 'ex');
+
+DROP TABLE ex CASCADE;
+
+-- ================================================================
+-- Ensure custom type equality operators are used
+-- ================================================================
+
+CREATE TYPE my_custom_type AS (val int);
+
+-- Comparison functions (returns boolean)
+CREATE FUNCTION my_custom_lt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val < b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+CREATE FUNCTION my_custom_le(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val <= b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+CREATE FUNCTION my_custom_eq(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val = b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+CREATE FUNCTION my_custom_ge(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val >= b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+CREATE FUNCTION my_custom_gt(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val > b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+CREATE FUNCTION my_custom_ne(a my_custom_type, b my_custom_type) RETURNS boolean AS $$
+BEGIN
+ RETURN a.val != b.val;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+-- Comparison function (returns -1, 0, 1)
+CREATE FUNCTION my_custom_cmp(a my_custom_type, b my_custom_type) RETURNS int AS $$
+BEGIN
+ IF a.val < b.val THEN
+ RETURN -1;
+ ELSIF a.val > b.val THEN
+ RETURN 1;
+ ELSE
+ RETURN 0;
+ END IF;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+-- Create the operators
+CREATE OPERATOR < (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_lt,
+ COMMUTATOR = >,
+ NEGATOR = >=
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_le,
+ COMMUTATOR = >=,
+ NEGATOR = >
+);
+
+CREATE OPERATOR = (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_eq,
+ COMMUTATOR = =,
+ NEGATOR = <>
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_ge,
+ COMMUTATOR = <=,
+ NEGATOR = <
+);
+
+CREATE OPERATOR > (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_gt,
+ COMMUTATOR = <,
+ NEGATOR = <=
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = my_custom_type,
+ RIGHTARG = my_custom_type,
+ PROCEDURE = my_custom_ne,
+ COMMUTATOR = <>,
+ NEGATOR = =
+);
+
+-- Create the operator class (including the support function)
+CREATE OPERATOR CLASS my_custom_ops
+ DEFAULT FOR TYPE my_custom_type USING btree AS
+ OPERATOR 1 <,
+ OPERATOR 2 <=,
+ OPERATOR 3 =,
+ OPERATOR 4 >=,
+ OPERATOR 5 >,
+ FUNCTION 1 my_custom_cmp(my_custom_type, my_custom_type);
+
+-- Create the table
+CREATE TABLE my_table (
+ id int,
+ custom_val my_custom_type
+);
+
+-- Insert some data
+INSERT INTO my_table (id, custom_val) VALUES
+(1, ROW(3)::my_custom_type),
+(2, ROW(1)::my_custom_type),
+(3, ROW(4)::my_custom_type),
+(4, ROW(2)::my_custom_type);
+
+-- Create a function to use when indexing
+CREATE OR REPLACE FUNCTION abs_val(val my_custom_type) RETURNS int AS $$
+BEGIN
+ RETURN abs(val.val);
+END;
+$$ LANGUAGE plpgsql IMMUTABLE STRICT;
+
+-- Create the index
+CREATE INDEX idx_custom_val_abs ON my_table (abs_val(custom_val));
+
+-- Update 1
+UPDATE my_table SET custom_val = ROW(5)::my_custom_type WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 'my_table');
+
+-- Update 2
+UPDATE my_table SET custom_val = ROW(0)::my_custom_type WHERE custom_val < ROW(3)::my_custom_type;
+SELECT * FROM check_hot_updates(0, 'my_table');
+
+-- Update 3
+UPDATE my_table SET custom_val = ROW(6)::my_custom_type WHERE id = 3;
+SELECT * FROM check_hot_updates(0, 'my_table');
+
+-- Update 4
+UPDATE my_table SET id = 5 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 'my_table');
+
+-- Query using the index
+SELECT * FROM my_table WHERE abs_val(custom_val) = 6;
+
+-- Clean up test case
+DROP TABLE my_table CASCADE;
+DROP OPERATOR CLASS my_custom_ops USING btree CASCADE;
+DROP OPERATOR < (my_custom_type, my_custom_type);
+DROP OPERATOR <= (my_custom_type, my_custom_type);
+DROP OPERATOR = (my_custom_type, my_custom_type);
+DROP OPERATOR >= (my_custom_type, my_custom_type);
+DROP OPERATOR > (my_custom_type, my_custom_type);
+DROP OPERATOR <> (my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_lt(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_le(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_eq(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_ge(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_gt(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_ne(my_custom_type, my_custom_type);
+DROP FUNCTION my_custom_cmp(my_custom_type, my_custom_type);
+DROP FUNCTION abs_val(my_custom_type);
+DROP TYPE my_custom_type CASCADE;
+
+-- Cleanup
+DROP FUNCTION check_hot_updates(int, text, text);
+DROP COLLATION case_insensitive;
diff --git a/src/tools/pgindent/pgindent b/src/tools/pgindent/pgindent
index d14da3f01a995..18ef572a8be6c 100755
--- a/src/tools/pgindent/pgindent
+++ b/src/tools/pgindent/pgindent
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
# Copyright (c) 2021-2025, PostgreSQL Global Development Group
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index cf3f6a7dafd0d..4cc7a9d4c7d2b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -390,6 +390,7 @@ CachedFunctionCompileCallback
CachedFunctionDeleteCallback
CachedFunctionHashEntry
CachedFunctionHashKey
+CachedIndexDatum
CachedPlan
CachedPlanSource
CallContext