From 175da6ea34f10fd89a6e7afe7dd1fa6a2ba91d07 Mon Sep 17 00:00:00 2001 From: nothankyou Date: Wed, 4 Feb 2026 18:56:29 +0800 Subject: [PATCH 1/2] fix(metrics): stabilize db pool available gauge Track pool availability by connection id and set gauge from absolute state. Add unit coverage for lifecycle transitions and bounds. --- postgrest.cabal | 1 + src/PostgREST/Metrics.hs | 68 ++++++++++++--- test/spec/Feature/MetricsPoolAvailableSpec.hs | 84 +++++++++++++++++++ test/spec/Main.hs | 3 + 4 files changed, 146 insertions(+), 10 deletions(-) create mode 100644 test/spec/Feature/MetricsPoolAvailableSpec.hs diff --git a/postgrest.cabal b/postgrest.cabal index fd5f3ec28b..262f40cce8 100644 --- a/postgrest.cabal +++ b/postgrest.cabal @@ -218,6 +218,7 @@ test-suite spec Feature.ConcurrentSpec Feature.CorsSpec Feature.ExtraSearchPathSpec + Feature.MetricsPoolAvailableSpec Feature.NoSuperuserSpec Feature.ObservabilitySpec Feature.OpenApi.DisabledOpenApiSpec diff --git a/src/PostgREST/Metrics.hs b/src/PostgREST/Metrics.hs index 7a39557751..1048e5cc00 100644 --- a/src/PostgREST/Metrics.hs +++ b/src/PostgREST/Metrics.hs @@ -6,11 +6,18 @@ Description : Metrics based on the Observation module. See Observation.hs. module PostgREST.Metrics ( init , MetricsState (..) + , PoolAvailableState(..) + , emptyPoolAvailableState + , stepPoolAvailable + , poolAvailableGaugeValue , observationMetrics , metricsToText ) where import qualified Data.ByteString.Lazy as LBS +import Data.IORef (IORef, atomicModifyIORef', + newIORef) +import qualified Data.Map.Strict as M import qualified Hasql.Pool.Observation as SQL import Prometheus @@ -19,6 +26,41 @@ import PostgREST.Observation import Protolude +data PoolAvailableState k = + PoolAvailableState { + poolAvailableById :: M.Map k Bool, + poolAvailableCount :: Int, + poolAvailableMax :: Maybe Int + } + +emptyPoolAvailableState :: Maybe Int -> PoolAvailableState k +emptyPoolAvailableState maxSize = + PoolAvailableState { + poolAvailableById = M.empty, + poolAvailableCount = 0, + poolAvailableMax = maxSize + } + +stepPoolAvailable :: Ord k => PoolAvailableState k -> (k, SQL.ConnectionStatus) -> PoolAvailableState k +stepPoolAvailable st (connId, status) = + let + PoolAvailableState{..} = st + wasReady = M.lookup connId poolAvailableById == Just True + countWithoutOld = poolAvailableCount - if wasReady then 1 else 0 + (nextById, nextCount) = case status of + SQL.ReadyForUseConnectionStatus -> + (M.insert connId True poolAvailableById, countWithoutOld + 1) + SQL.TerminatedConnectionStatus _ -> + (M.delete connId poolAvailableById, countWithoutOld) + _ -> + (M.insert connId False poolAvailableById, countWithoutOld) + in st { poolAvailableById = nextById, poolAvailableCount = nextCount } + +poolAvailableGaugeValue :: PoolAvailableState k -> Int +poolAvailableGaugeValue PoolAvailableState{..} = + let lowerBound = max 0 poolAvailableCount + in maybe lowerBound (`min` lowerBound) poolAvailableMax + data MetricsState = MetricsState { poolTimeouts :: Counter, @@ -29,11 +71,13 @@ data MetricsState = schemaCacheQueryTime :: Gauge, jwtCacheRequests :: Counter, jwtCacheHits :: Counter, - jwtCacheEvictions :: Counter + jwtCacheEvictions :: Counter, + poolAvailableState :: IORef (PoolAvailableState Text) } init :: Int -> IO MetricsState init configDbPoolSize = do + poolAvailableStateRef <- newIORef (emptyPoolAvailableState (Just configDbPoolSize)) metricState <- MetricsState <$> register (counter (Info "pgrst_db_pool_timeouts_total" "The total number of pool connection timeouts")) <*> register (gauge (Info "pgrst_db_pool_available" "Available connections in the pool")) <*> @@ -43,7 +87,8 @@ init configDbPoolSize = do register (gauge (Info "pgrst_schema_cache_query_time_seconds" "The query time in seconds of the last schema cache load")) <*> register (counter (Info "pgrst_jwt_cache_requests_total" "The total number of JWT cache lookups")) <*> register (counter (Info "pgrst_jwt_cache_hits_total" "The total number of JWT cache hits")) <*> - register (counter (Info "pgrst_jwt_cache_evictions_total" "The total number of JWT cache evictions")) + register (counter (Info "pgrst_jwt_cache_evictions_total" "The total number of JWT cache evictions")) <*> + pure poolAvailableStateRef setGauge (poolMaxSize metricState) (fromIntegral configDbPoolSize) pure metricState @@ -52,14 +97,8 @@ observationMetrics :: MetricsState -> ObservationHandler observationMetrics MetricsState{..} obs = case obs of (PoolAcqTimeoutObs _) -> do incCounter poolTimeouts - (HasqlPoolObs (SQL.ConnectionObservation _ status)) -> case status of - SQL.ReadyForUseConnectionStatus -> do - incGauge poolAvailable - SQL.InUseConnectionStatus -> do - decGauge poolAvailable - SQL.TerminatedConnectionStatus _ -> do - decGauge poolAvailable - SQL.ConnectingConnectionStatus -> pure () + (HasqlPoolObs (SQL.ConnectionObservation uuid status)) -> do + updatePoolAvailable poolAvailableState poolAvailable (show uuid) status PoolRequest -> incGauge poolWaiting PoolRequestFullfilled -> @@ -75,5 +114,14 @@ observationMetrics MetricsState{..} obs = case obs of _ -> pure () +updatePoolAvailable :: IORef (PoolAvailableState Text) -> Gauge -> Text -> SQL.ConnectionStatus -> IO () +updatePoolAvailable stateRef poolGauge connId status = do + gaugeValue <- atomicModifyIORef' stateRef $ \st -> + let + nextState = stepPoolAvailable st (connId, status) + nextGauge = poolAvailableGaugeValue nextState + in (nextState, nextGauge) + setGauge poolGauge (fromIntegral gaugeValue) + metricsToText :: IO LBS.ByteString metricsToText = exportMetricsAsText diff --git a/test/spec/Feature/MetricsPoolAvailableSpec.hs b/test/spec/Feature/MetricsPoolAvailableSpec.hs new file mode 100644 index 0000000000..a3a31dd8c2 --- /dev/null +++ b/test/spec/Feature/MetricsPoolAvailableSpec.hs @@ -0,0 +1,84 @@ +module Feature.MetricsPoolAvailableSpec where + +import qualified Hasql.Pool.Observation as SQL +import PostgREST.Metrics (PoolAvailableState (..), + emptyPoolAvailableState, + poolAvailableGaugeValue, + stepPoolAvailable) +import Protolude +import Test.Hspec (Spec, describe, it, shouldBe) + +spec :: Spec +spec = + describe "pgrst_db_pool_available" $ do + it "does not decrement on Connecting -> Terminated" $ do + let state0 = emptyPoolAvailableState (Just 10) :: PoolAvailableState Text + state1 = + applyEvents state0 + [ ("a", SQL.ConnectingConnectionStatus), + ("a", terminated) + ] + poolAvailableCount state1 `shouldBe` 0 + + it "does not double-decrement on Ready -> InUse -> Terminated" $ do + let state0 = emptyPoolAvailableState (Just 10) :: PoolAvailableState Text + counts = + countsAfter + state0 + [ ("a", SQL.ReadyForUseConnectionStatus), + ("a", SQL.InUseConnectionStatus), + ("a", terminated) + ] + counts `shouldBe` [1, 0, 0] + + it "ignores duplicate ReadyForUse observations" $ do + let state0 = emptyPoolAvailableState (Just 10) :: PoolAvailableState Text + counts = + countsAfter + state0 + [ ("a", SQL.ReadyForUseConnectionStatus), + ("a", SQL.ReadyForUseConnectionStatus) + ] + counts `shouldBe` [1, 1] + + it "ignores duplicate Terminated observations for unknown connections" $ do + let state0 = emptyPoolAvailableState (Just 10) :: PoolAvailableState Text + state1 = applyEvents state0 [("a", terminated), ("a", terminated)] + poolAvailableCount state1 `shouldBe` 0 + + it "tracks multiple connections independently" $ do + let state0 = emptyPoolAvailableState (Just 10) :: PoolAvailableState Text + counts = + countsAfter + state0 + [ ("a", SQL.ReadyForUseConnectionStatus), + ("b", SQL.ReadyForUseConnectionStatus), + ("a", SQL.InUseConnectionStatus) + ] + counts `shouldBe` [1, 2, 1] + + it "treats out-of-order observations as last-write-wins" $ do + let state0 = emptyPoolAvailableState (Just 10) :: PoolAvailableState Text + counts = + countsAfter + state0 + [ ("a", terminated), + ("a", SQL.ReadyForUseConnectionStatus), + ("a", SQL.InUseConnectionStatus) + ] + counts `shouldBe` [0, 1, 0] + + it "clamps gauge values to pool_max" $ do + let state0 = emptyPoolAvailableState (Just 1) :: PoolAvailableState Text + state1 = + applyEvents state0 + [ ("a", SQL.ReadyForUseConnectionStatus), + ("b", SQL.ReadyForUseConnectionStatus) + ] + poolAvailableCount state1 `shouldBe` 2 + poolAvailableGaugeValue state1 `shouldBe` 1 + + where + terminated = SQL.TerminatedConnectionStatus SQL.ReleaseConnectionTerminationReason + applyEvents = foldl' stepPoolAvailable + countsAfter st events = poolAvailableCount <$> drop 1 (scanl stepPoolAvailable st events) diff --git a/test/spec/Main.hs b/test/spec/Main.hs index e847926b6f..d756b466a2 100644 --- a/test/spec/Main.hs +++ b/test/spec/Main.hs @@ -29,6 +29,7 @@ import qualified Feature.Auth.NoJwtSecretSpec import qualified Feature.ConcurrentSpec import qualified Feature.CorsSpec import qualified Feature.ExtraSearchPathSpec +import qualified Feature.MetricsPoolAvailableSpec import qualified Feature.NoSuperuserSpec import qualified Feature.ObservabilitySpec import qualified Feature.OpenApi.DisabledOpenApiSpec @@ -167,6 +168,8 @@ main = do ] hspec $ do + describe "Feature.MetricsPoolAvailableSpec" Feature.MetricsPoolAvailableSpec.spec + mapM_ (parallel . before withApp) specs -- we analyze to get accurate results from EXPLAIN From 71525aaa316564cada73cc43004ed72dde737f93 Mon Sep 17 00:00:00 2001 From: nothankyou Date: Wed, 4 Feb 2026 19:21:50 +0800 Subject: [PATCH 2/2] docs(changelog): note pool availability fix (#4622) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09aa9af85e..a291ea6ff1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ All notable changes to this project will be documented in this file. From versio ### Fixed +- Fix `pgrst_db_pool_available` drifting below zero under connection churn by tracking availability per connection id and setting the gauge from absolute state by @nothankyouzzz in #4622. + ### Changed - Log error when `db-schemas` config contains schema `pg_catalog` or `information_schema` by @taimoorzaeem in #4359