From f05f3ff3669fdcc6a4bb9e709dd092c1a50df94e Mon Sep 17 00:00:00 2001 From: chaadow Date: Sun, 30 Nov 2025 01:31:56 +0100 Subject: [PATCH 1/3] Add 3 new queries - ANALYZE progress Useful to check to know if PG is acquiring sample rows on a certain table - VACUUM progress Useful to know when vacuum workers are working or if you suspect the system beingn slow for some reason. This could also help check if there are multiple vacuums happening at the same time - Cumulative I/O statistics with pg_stat_io for vacuum purposes this can help detect if we're vacuuming too often. This might get added to the diagnose page in a later PR. More info: https://pganalyze.com/blog/pg-stat-io#tracking-cumulative-io-activity-by-autovacuum-and-manual-vacuums --- lib/ruby-pg-extras.rb | 28 ++++++++++++++++- .../queries/analyze_progress.sql | 27 +++++++++++++++++ .../queries/vacuum_io_stats.sql | 30 +++++++++++++++++++ .../queries/vacuum_io_stats_legacy.sql | 5 ++++ .../queries/vacuum_progress.sql | 26 ++++++++++++++++ .../queries/vacuum_progress_17.sql | 29 ++++++++++++++++++ 6 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 lib/ruby_pg_extras/queries/analyze_progress.sql create mode 100644 lib/ruby_pg_extras/queries/vacuum_io_stats.sql create mode 100644 lib/ruby_pg_extras/queries/vacuum_io_stats_legacy.sql create mode 100644 lib/ruby_pg_extras/queries/vacuum_progress.sql create mode 100644 lib/ruby_pg_extras/queries/vacuum_progress_17.sql diff --git a/lib/ruby-pg-extras.rb b/lib/ruby-pg-extras.rb index 288c3e3..c450351 100644 --- a/lib/ruby-pg-extras.rb +++ b/lib/ruby-pg-extras.rb @@ -26,7 +26,9 @@ module RubyPgExtras long_running_queries mandelbrot outliers records_rank seq_scans table_index_scans table_indexes_size table_size total_index_size total_table_size - unused_indexes duplicate_indexes vacuum_stats kill_all kill_pid + unused_indexes duplicate_indexes vacuum_stats vacuum_progress vacuum_io_stats + analyze_progress + kill_all kill_pid pg_stat_statements_reset buffercache_stats buffercache_usage ssl_used connections table_schema table_schemas @@ -50,6 +52,11 @@ module RubyPgExtras outliers: { limit: 10 }, outliers_legacy: { limit: 10 }, outliers_17: { limit: 10 }, + vacuum_progress: {}, + vacuum_progress_17: {}, + vacuum_io_stats: {}, + vacuum_io_stats_legacy: {}, + analyze_progress: {}, buffercache_stats: { limit: 10 }, buffercache_usage: { limit: 20 }, unused_indexes: { max_scans: 50, schema: DEFAULT_SCHEMA }, @@ -92,6 +99,25 @@ def self.run_query_base(query_name:, conn:, exec_method:, in_format:, args: {}) end end + # vacuum_progress uses pg_stat_progress_vacuum only and does not depend on pg_stat_statements, + # so we switch it based on the server_version_num instead of the pg_stat_statements version. + if query_name == :vacuum_progress + server_version_num = conn.send(exec_method, "SHOW server_version_num").to_a[0].values[0].to_i + if server_version_num >= 170000 + query_name = :vacuum_progress_17 + end + end + + # vacuum_io_stats relies on pg_stat_io which is available starting from PostgreSQL 16. + # For older versions we fall back to vacuum_io_stats_legacy which just indicates + # that this feature is not available on the current server. + if query_name == :vacuum_io_stats + server_version_num = conn.send(exec_method, "SHOW server_version_num").to_a[0].values[0].to_i + if server_version_num < 160000 + query_name = :vacuum_io_stats_legacy + end + end + REQUIRED_ARGS.fetch(query_name) { [] }.each do |arg_name| if args[arg_name].nil? raise ArgumentError, "'#{arg_name}' is required" diff --git a/lib/ruby_pg_extras/queries/analyze_progress.sql b/lib/ruby_pg_extras/queries/analyze_progress.sql new file mode 100644 index 0000000..faf1350 --- /dev/null +++ b/lib/ruby_pg_extras/queries/analyze_progress.sql @@ -0,0 +1,27 @@ +/* Current ANALYZE progress as reported by pg_stat_progress_analyze */ + +SELECT + a.datname AS database, + n.nspname AS schema, + c.relname AS table, + p.pid, + p.phase, + p.sample_blks_total, + p.sample_blks_scanned, + p.ext_stats_total, + p.ext_stats_computed, + p.child_tables_total, + p.child_tables_done, + p.current_child_table_relid +FROM + pg_stat_progress_analyze p + LEFT JOIN pg_class c ON p.relid = c.oid + LEFT JOIN pg_namespace n ON c.relnamespace = n.oid + LEFT JOIN pg_stat_activity a ON p.pid = a.pid +ORDER BY + a.datname, + n.nspname, + c.relname, + p.pid; + + diff --git a/lib/ruby_pg_extras/queries/vacuum_io_stats.sql b/lib/ruby_pg_extras/queries/vacuum_io_stats.sql new file mode 100644 index 0000000..8f8ed59 --- /dev/null +++ b/lib/ruby_pg_extras/queries/vacuum_io_stats.sql @@ -0,0 +1,30 @@ +/* I/O statistics for autovacuum backends from pg_stat_io (PostgreSQL 16+) */ + +SELECT + backend_type, + object, + context, + reads, + read_time, + writes, + write_time, + writebacks, + writeback_time, + extends, + extend_time, + fsyncs, + fsync_time, + reuses, + evictions +FROM + pg_stat_io +WHERE + backend_type IN ('autovacuum worker', 'autovacuum launcher') + AND object = 'relation' + AND context IN ('vacuum', 'autovacuum') +ORDER BY + backend_type, + context, + object; + + diff --git a/lib/ruby_pg_extras/queries/vacuum_io_stats_legacy.sql b/lib/ruby_pg_extras/queries/vacuum_io_stats_legacy.sql new file mode 100644 index 0000000..047a1f2 --- /dev/null +++ b/lib/ruby_pg_extras/queries/vacuum_io_stats_legacy.sql @@ -0,0 +1,5 @@ +/* I/O statistics for autovacuum backends are only available via pg_stat_io (PostgreSQL 16+). */ + +SELECT 'Upgrade to PostgreSQL 16 or newer to use this feature' AS "feature not available"; + + diff --git a/lib/ruby_pg_extras/queries/vacuum_progress.sql b/lib/ruby_pg_extras/queries/vacuum_progress.sql new file mode 100644 index 0000000..f7bb6e3 --- /dev/null +++ b/lib/ruby_pg_extras/queries/vacuum_progress.sql @@ -0,0 +1,26 @@ +/* Current VACUUM progress as reported by pg_stat_progress_vacuum */ + +SELECT + a.datname AS database, + n.nspname AS schema, + c.relname AS table, + p.pid, + p.phase, + p.heap_blks_total, + p.heap_blks_scanned, + p.heap_blks_vacuumed, + p.index_vacuum_count, + p.max_dead_tuples, + p.num_dead_tuples +FROM + pg_stat_progress_vacuum p + LEFT JOIN pg_class c ON p.relid = c.oid + LEFT JOIN pg_namespace n ON c.relnamespace = n.oid + LEFT JOIN pg_stat_activity a ON p.pid = a.pid +ORDER BY + a.datname, + n.nspname, + c.relname, + p.pid; + + diff --git a/lib/ruby_pg_extras/queries/vacuum_progress_17.sql b/lib/ruby_pg_extras/queries/vacuum_progress_17.sql new file mode 100644 index 0000000..d5e6acf --- /dev/null +++ b/lib/ruby_pg_extras/queries/vacuum_progress_17.sql @@ -0,0 +1,29 @@ +/* Current VACUUM progress as reported by pg_stat_progress_vacuum */ + +SELECT + a.datname AS database, + n.nspname AS schema, + c.relname AS table, + p.pid, + p.phase, + p.heap_blks_total, + p.heap_blks_scanned, + p.heap_blks_vacuumed, + p.index_vacuum_count, + p.indexes_total, + p.indexes_processed, + p.num_dead_item_ids, + p.dead_tuple_bytes, + p.max_dead_tuple_bytes +FROM + pg_stat_progress_vacuum p + LEFT JOIN pg_class c ON p.relid = c.oid + LEFT JOIN pg_namespace n ON c.relnamespace = n.oid + LEFT JOIN pg_stat_activity a ON p.pid = a.pid +ORDER BY + a.datname, + n.nspname, + c.relname, + p.pid; + + From a96b38b8791b4d9af20f6d9ca8b6034dee9777c8 Mon Sep 17 00:00:00 2001 From: chaadow Date: Sun, 30 Nov 2025 20:04:47 +0100 Subject: [PATCH 2/3] Add missing useful columns in vacuum_stats.sql --- lib/ruby_pg_extras/queries/vacuum_io_stats.sql | 3 ++- lib/ruby_pg_extras/queries/vacuum_stats.sql | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/ruby_pg_extras/queries/vacuum_io_stats.sql b/lib/ruby_pg_extras/queries/vacuum_io_stats.sql index 8f8ed59..b402279 100644 --- a/lib/ruby_pg_extras/queries/vacuum_io_stats.sql +++ b/lib/ruby_pg_extras/queries/vacuum_io_stats.sql @@ -15,7 +15,8 @@ SELECT fsyncs, fsync_time, reuses, - evictions + evictions, + stats_reset FROM pg_stat_io WHERE diff --git a/lib/ruby_pg_extras/queries/vacuum_stats.sql b/lib/ruby_pg_extras/queries/vacuum_stats.sql index 7fd3c5f..4a7267b 100644 --- a/lib/ruby_pg_extras/queries/vacuum_stats.sql +++ b/lib/ruby_pg_extras/queries/vacuum_stats.sql @@ -1,4 +1,4 @@ -/* Dead rows and whether an automatic vacuum is expected to be triggered */ +/* Dead rows, new inserts since last VACUUM and whether an automatic vacuum is expected to be triggered */ WITH table_opts AS ( SELECT @@ -34,8 +34,10 @@ WITH table_opts AS ( SELECT vacuum_settings.nspname AS schema, vacuum_settings.relname AS table, - to_char(psut.last_vacuum, 'YYYY-MM-DD HH24:MI') AS last_vacuum, + to_char(psut.last_vacuum, 'YYYY-MM-DD HH24:MI') AS last_manual_vacuum, + to_char(psut.vacuum_count, '9G999G999G999') AS manual_vacuum_count, to_char(psut.last_autovacuum, 'YYYY-MM-DD HH24:MI') AS last_autovacuum, + to_char(psut.autovacuum_count, '9G999G999G999') AS autovacuum_count, to_char(pg_class.reltuples, '9G999G999G999') AS rowcount, to_char(psut.n_dead_tup, '9G999G999G999') AS dead_rowcount, to_char( From ed5dc5f6e2457f40b8bcf3339c2f3e0826a15eb4 Mon Sep 17 00:00:00 2001 From: chaadow Date: Mon, 1 Dec 2025 22:17:41 +0100 Subject: [PATCH 3/3] Update README with analyze_progress / vacuum_progress / vacuum_io_stats queries + updated vacuum_stats command --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index c0f7556..cb12283 100644 --- a/README.md +++ b/README.md @@ -647,17 +647,60 @@ This command displays an estimation of table "bloat" – space allocated to a re RubyPgExtras.vacuum_stats - schema | table | last_vacuum | last_autovacuum | rowcount | dead_rowcount | autovacuum_threshold | expect_autovacuum ---------+-----------------------+-------------+------------------+----------------+----------------+----------------------+------------------- - public | log_table | | 2013-04-26 17:37 | 18,030 | 0 | 3,656 | - public | data_table | | 2013-04-26 13:09 | 79 | 28 | 66 | - public | other_table | | 2013-04-26 11:41 | 41 | 47 | 58 | - public | queue_table | | 2013-04-26 17:39 | 12 | 8,228 | 52 | yes - public | picnic_table | | | 13 | 0 | 53 | + schema | table | last_manual_vacuum | manual_vacuum_count | last_autovacuum | autovacuum_count | rowcount | dead_rowcount | dead_tup_autovacuum_threshold | n_ins_since_vacuum | insert_autovacuum_threshold | expect_autovacuum +--------+-----------------------+--------------------+---------------------+------------------+------------------+----------------+----------------+-------------------------------+--------------------+-----------------------------+------------------- + public | log_table | | 0 | 2013-04-26 17:37 | 5 | 18,030 | 0 | 3,656 | 0 | 3,606 | + public | data_table | | 0 | 2013-04-26 13:09 | 3 | 79 | 28 | 66 | 10 | 16 | yes (dead_tuples) + public | other_table | | 0 | 2013-04-26 11:41 | 4 | 41 | 47 | 58 | 2,000 | 1,008 | yes (dead_tuples & inserts) (truncated results for brevity) ``` -This command displays statistics related to vacuum operations for each table, including an estimation of dead rows, last autovacuum and the current autovacuum threshold. This command can be useful when determining if current vacuum thresholds require adjustments, and to determine when the table was last vacuumed. +This command displays statistics related to vacuum operations for each table, including last manual vacuum and autovacuum timestamps and counters, an estimation of dead rows, dead-tuple-based autovacuum threshold, number of rows inserted since the last VACUUM (`n_ins_since_vacuum`) and the insert-based autovacuum threshold introduced in PostgreSQL 13 ([PostgreSQL autovacuum configuration](https://www.postgresql.org/docs/current/runtime-config-vacuum.html#RUNTIME-CONFIG-AUTOVACUUM)). It helps determine if current autovacuum thresholds (both dead-tuple and insert-based) are appropriate, and whether an automatic vacuum is expected to be triggered soon. + +### `vacuum_progress` + +```ruby + +RubyPgExtras.vacuum_progress + + database | schema | table | pid | phase | heap_blks_total | heap_blks_scanned | heap_blks_vacuumed | index_vacuum_count +----------+--------+----------+-------+---------------------+-----------------+-------------------+--------------------+-------------------- + app_db | public | users | 12345 | scanning heap | 125000 | 32000 | 0 | 0 + app_db | public | orders | 12346 | vacuuming indexes | 80000 | 80000 | 75000 | 3 + (truncated results for brevity) +``` + +This command shows the current progress of `VACUUM` / autovacuum operations by reading `pg_stat_progress_vacuum` ([VACUUM progress reporting docs](https://www.postgresql.org/docs/current/progress-reporting.html#VACUUM-PROGRESS-REPORTING)). It can be used to see which tables are being vacuumed right now, how far each operation has progressed, and how many index vacuum cycles have been performed. + +### `analyze_progress` + +```ruby + +RubyPgExtras.analyze_progress + + database | schema | table | pid | phase | sample_blks_total | sample_blks_scanned | ext_stats_total | ext_stats_computed +----------+--------+----------+-------+----------------------+-------------------+---------------------+-----------------+-------------------- + app_db | public | users | 22345 | acquiring sample rows| 5000 | 1200 | 2 | 0 + app_db | public | orders | 22346 | computing statistics | 8000 | 8000 | 1 | 1 + (truncated results for brevity) +``` + +This command displays the current progress of `ANALYZE` and auto-analyze operations using `pg_stat_progress_analyze` ([ANALYZE progress reporting docs](https://www.postgresql.org/docs/current/progress-reporting.html#ANALYZE-PROGRESS-REPORTING)). It helps understand how far statistics collection has progressed for each active analyze and whether extended statistics are being computed. + +### `vacuum_io_stats` + +```ruby + +RubyPgExtras.vacuum_io_stats + + backend_type | object | context | reads | writes | writebacks | extends | evictions | reuses | fsyncs | stats_reset +--------------------+----------+----------+---------+---------+-----------+---------+-----------+---------+--------+------------------------------- + autovacuum worker | relation | vacuum | 5824251 | 3028684 | 0 | 0 | 2588 | 5821460 | 0 | 2025-01-10 11:50:27.583875+00 + autovacuum launcher| relation | autovacuum| 16306 | 2494 | 0 | 2915 | 17785 | 0 | 0 | 2025-01-10 11:50:27.583875+00 + (truncated results for brevity) +``` + +This command surfaces cumulative I/O statistics for autovacuum-related VACUUM activity, based on the `pg_stat_io` view introduced in PostgreSQL 16 ([pg_stat_io documentation](https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-IO-VIEW)). It shows how many blocks autovacuum workers have read and written, how many buffer evictions and ring-buffer reuses occurred, and when the statistics were last reset; this is useful for determining whether autovacuum is responsible for I/O spikes, as described in the pganalyze article on `pg_stat_io` ([Tracking cumulative I/O activity by autovacuum and manual VACUUMs](https://pganalyze.com/blog/pg-stat-io#tracking-cumulative-io-activity-by-autovacuum-and-manual-vacuums)). On PostgreSQL versions below 16 this method returns a single informational row indicating that the feature is unavailable. ### `kill_all`