From 026b39a2a8a766d65d3496f69562a9c2aabbcee2 Mon Sep 17 00:00:00 2001
From: spinkney <sean.pinkney@gmail.com>
Date: Tue, 23 Dec 2025 09:05:14 -0500
Subject: [PATCH 1/5] super stable gamma_lccdf

---
 stan/math/prim/prob/gamma_lccdf.hpp           | 187 +++++++++++++-----
 test/unit/math/prim/prob/gamma_lccdf_test.cpp |  48 +++++
 test/unit/math/rev/prob/gamma_lccdf_test.cpp  |  72 +++++++
 3 files changed, 263 insertions(+), 44 deletions(-)
diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp
index a670cefcecf..e28ce1cba65 100644
--- a/stan/math/prim/prob/gamma_lccdf.hpp
+++ b/stan/math/prim/prob/gamma_lccdf.hpp
@@ -4,30 +4,84 @@
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/constants.hpp>
-#include <stan/math/prim/fun/digamma.hpp>
 #include <stan/math/prim/fun/exp.hpp>
-#include <stan/math/prim/fun/gamma_q.hpp>
-#include <stan/math/prim/fun/grad_reg_inc_gamma.hpp>
+#include <stan/math/prim/fun/fma.hpp>
+#include <stan/math/prim/fun/gamma_p.hpp>
+#include <stan/math/prim/fun/grad_reg_lower_inc_gamma.hpp>
 #include <stan/math/prim/fun/log.hpp>
+#include <stan/math/prim/fun/log1m.hpp>
 #include <stan/math/prim/fun/max_size.hpp>
 #include <stan/math/prim/fun/scalar_seq_view.hpp>
 #include <stan/math/prim/fun/size.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
-#include <stan/math/prim/fun/tgamma.hpp>
 #include <stan/math/prim/fun/value_of.hpp>
 #include <stan/math/prim/functor/partials_propagator.hpp>
+#include <stan/math/fwd/fun/lgamma.hpp>
+#include <stan/math/fwd/fun/log.hpp>
+#include <stan/math/fwd/fun/value_of.hpp>
 #include <cmath>
 
 namespace stan {
 namespace math {
 
+namespace internal {
+
+template <typename T_a>
+inline auto log_q_gamma_cf(const T_a& a, const double x,
+                           int max_steps = 250, double precision = 1e-16) {
+  using std::fabs;
+  using stan::math::lgamma;
+  using stan::math::log;
+  using stan::math::value_of;
+
+  const auto log_prefactor = a * log(x) - x - lgamma(a);
+
+  auto b = x + 1.0 - a;
+  auto C = (fabs(value_of(b)) >= EPSILON) ? b : T_a(EPSILON);
+  auto D = T_a(0.0);
+  auto f = C;
+
+  for (int i = 1; i <= max_steps; ++i) {
+    auto an = -i * (i - a);
+    b += 2.0;
+
+    D = b + an * D;
+    if (fabs(value_of(D)) < EPSILON) {
+      D = T_a(EPSILON);
+    }
+    C = b + an / C;
+    if (fabs(value_of(C)) < EPSILON) {
+      C = T_a(EPSILON);
+    }
+
+    D = 1.0 / D;
+    auto delta = C * D;
+    f *= delta;
+
+    const double delta_m1 = fabs(value_of(delta) - 1.0);
+    if (delta_m1 < precision) {
+      if constexpr (stan::is_fvar<std::decay_t<T_a>>::value) {
+        if (fabs(value_of(delta.d_)) < precision) {
+          break;
+        }
+      } else {
+        break;
+      }
+    }
+  }
+
+  return log_prefactor - log(f);
+}
+
+}  // namespace internal
+
 template <typename T_y, typename T_shape, typename T_inv_scale>
-inline return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(
-    const T_y& y, const T_shape& alpha, const T_inv_scale& beta) {
+return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
+                                                     const T_shape& alpha,
+                                                     const T_inv_scale& beta) {
   using T_partials_return = partials_return_t<T_y, T_shape, T_inv_scale>;
   using std::exp;
   using std::log;
-  using std::pow;
   using T_y_ref = ref_type_t<T_y>;
   using T_alpha_ref = ref_type_t<T_shape>;
   using T_beta_ref = ref_type_t<T_inv_scale>;
@@ -51,61 +105,106 @@ inline return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(
   scalar_seq_view<T_y_ref> y_vec(y_ref);
   scalar_seq_view<T_alpha_ref> alpha_vec(alpha_ref);
   scalar_seq_view<T_beta_ref> beta_vec(beta_ref);
-  size_t N = max_size(y, alpha, beta);
-
-  // Explicit return for extreme values
-  // The gradients are technically ill-defined, but treated as zero
-  for (size_t i = 0; i < stan::math::size(y); i++) {
-    if (y_vec.val(i) == 0) {
-      // LCCDF(0) = log(P(Y > 0)) = log(1) = 0
-      return ops_partials.build(0.0);
-    }
-  }
+  const size_t N = max_size(y, alpha, beta);
+
+  constexpr bool need_y_beta_deriv = !is_constant_all<T_y, T_inv_scale>::value;
 
   for (size_t n = 0; n < N; n++) {
     // Explicit results for extreme values
     // The gradients are technically ill-defined, but treated as zero
-    if (y_vec.val(n) == INFTY) {
-      // LCCDF(∞) = log(P(Y > ∞)) = log(0) = -∞
+    const T_partials_return y_dbl = value_of(y_vec.val(n));
+    if (y_dbl == 0.0) {
+      continue;
+    }
+    if (y_dbl == INFTY) {
       return ops_partials.build(negative_infinity());
     }
 
-    const T_partials_return y_dbl = y_vec.val(n);
-    const T_partials_return alpha_dbl = alpha_vec.val(n);
-    const T_partials_return beta_dbl = beta_vec.val(n);
-    const T_partials_return beta_y_dbl = beta_dbl * y_dbl;
+    const T_partials_return alpha_dbl = value_of(alpha_vec.val(n));
+    const T_partials_return beta_dbl = value_of(beta_vec.val(n));
+
+    const T_partials_return beta_y = beta_dbl * y_dbl;
+    if (beta_y == INFTY) {
+      return ops_partials.build(negative_infinity());
+    }
 
-    // Qn = 1 - Pn
-    const T_partials_return Qn = gamma_q(alpha_dbl, beta_y_dbl);
-    const T_partials_return log_Qn = log(Qn);
+    bool use_cf = beta_y > alpha_dbl + 1.0;
+    T_partials_return log_Qn;
+    [[maybe_unused]] T_partials_return dlogQ_dalpha = 0.0;
+    // Extract double values for continued fraction - we handle y/beta derivatives via hazard
+    const double beta_y_dbl = value_of(value_of(beta_y));
+    const double alpha_dbl_val = value_of(value_of(alpha_dbl));
+
+    if (use_cf) {
+      if constexpr (is_autodiff_v<T_shape>) {
+        stan::math::fvar<double> a_f(alpha_dbl_val, 1.0);
+        const stan::math::fvar<double> logq_f
+            = internal::log_q_gamma_cf(a_f, beta_y_dbl);
+        log_Qn = logq_f.val_;
+        dlogQ_dalpha = logq_f.d_;
+      } else {
+        log_Qn = internal::log_q_gamma_cf(alpha_dbl_val, beta_y_dbl);
+      }
+    } else {
+      const T_partials_return Pn = gamma_p(alpha_dbl, beta_y);
+      log_Qn = log1m(Pn);
+
+      if (!std::isfinite(value_of(value_of(log_Qn)))) {
+        use_cf = beta_y > 0.0;
+        if (use_cf) {
+          if constexpr (is_autodiff_v<T_shape>) {
+            stan::math::fvar<double> a_f(alpha_dbl_val, 1.0);
+            const stan::math::fvar<double> logq_f
+                = internal::log_q_gamma_cf(a_f, beta_y_dbl);
+            log_Qn = logq_f.val_;
+            dlogQ_dalpha = logq_f.d_;
+          } else {
+            log_Qn = internal::log_q_gamma_cf(alpha_dbl_val, beta_y_dbl);
+          }
+        }
+      }
 
+      if constexpr (is_autodiff_v<T_shape>) {
+        if (!use_cf) {
+          const T_partials_return Qn = exp(log_Qn);
+          if (Qn > 0.0) {
+            dlogQ_dalpha = -grad_reg_lower_inc_gamma(alpha_dbl, beta_y) / Qn;
+          } else {
+            stan::math::fvar<double> a_f(alpha_dbl_val, 1.0);
+            const stan::math::fvar<double> logq_f
+                = internal::log_q_gamma_cf(a_f, beta_y_dbl);
+            log_Qn = logq_f.val_;
+            dlogQ_dalpha = logq_f.d_;
+            use_cf = true;
+          }
+        }
+      }
+    }
+    if (!std::isfinite(value_of(value_of(log_Qn)))) {
+      return ops_partials.build(negative_infinity());
+    }
     P += log_Qn;
 
-    if constexpr (is_any_autodiff_v<T_y, T_inv_scale>) {
-      const T_partials_return log_y_dbl = log(y_dbl);
-      const T_partials_return log_beta_dbl = log(beta_dbl);
-      const T_partials_return log_pdf
-          = alpha_dbl * log_beta_dbl - lgamma(alpha_dbl)
-            + (alpha_dbl - 1.0) * log_y_dbl - beta_y_dbl;
-      const T_partials_return common_term = exp(log_pdf - log_Qn);
+    if constexpr (need_y_beta_deriv) {
+      const T_partials_return log_y = log(y_dbl);
+      const T_partials_return log_beta = log(beta_dbl);
+      const T_partials_return lgamma_alpha = lgamma(alpha_dbl);
+      const T_partials_return alpha_minus_one = fma(alpha_dbl, log_y, -log_y);
+
+      const T_partials_return log_pdf = alpha_dbl * log_beta - lgamma_alpha
+                                        + alpha_minus_one - beta_y;
+
+      const T_partials_return hazard = exp(log_pdf - log_Qn);  // f/Q
 
       if constexpr (is_autodiff_v<T_y>) {
-        // d/dy log(1-F(y)) = -f(y)/(1-F(y))
-        partials<0>(ops_partials)[n] -= common_term;
+        partials<0>(ops_partials)[n] -= hazard;
       }
       if constexpr (is_autodiff_v<T_inv_scale>) {
-        // d/dbeta log(1-F(y)) = -y*f(y)/(beta*(1-F(y)))
-        partials<2>(ops_partials)[n] -= y_dbl / beta_dbl * common_term;
+        partials<2>(ops_partials)[n] -= (y_dbl / beta_dbl) * hazard;
       }
     }
-
     if constexpr (is_autodiff_v<T_shape>) {
-      const T_partials_return digamma_val = digamma(alpha_dbl);
-      const T_partials_return gamma_val = tgamma(alpha_dbl);
-      // d/dalpha log(1-F(y)) = grad_upper_inc_gamma / (1-F(y))
-      partials<1>(ops_partials)[n]
-          += grad_reg_inc_gamma(alpha_dbl, beta_y_dbl, gamma_val, digamma_val)
-             / Qn;
+      partials<1>(ops_partials)[n] += dlogQ_dalpha;
     }
   }
   return ops_partials.build(P);
diff --git a/test/unit/math/prim/prob/gamma_lccdf_test.cpp b/test/unit/math/prim/prob/gamma_lccdf_test.cpp
index 2893f2f0166..faddf19914e 100644
--- a/test/unit/math/prim/prob/gamma_lccdf_test.cpp
+++ b/test/unit/math/prim/prob/gamma_lccdf_test.cpp
@@ -66,6 +66,31 @@ TEST(ProbGamma, lccdf_small_alpha_small_y) {
   EXPECT_LT(result, 0.0);
 }
 
+TEST(ProbGamma, lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) {
+  using stan::math::gamma_lccdf;
+  using stan::math::gamma_p;
+  using stan::math::gamma_q;
+  using stan::math::log1m;
+
+  // For large alpha and very small y, the CCDF is extremely close to 1.
+  // The old implementation computed `log(gamma_q(alpha, beta * y))`, which can
+  // round to `log(1) == 0`. The updated implementation uses `log1m(gamma_p)`,
+  // which preserves the tiny negative value.
+  double y = 1e-8;
+  double alpha = 31.25;
+  double beta = 1.0;
+
+  double new_val = gamma_lccdf(y, alpha, beta);
+  double expected = log1m(gamma_p(alpha, beta * y));
+
+  // Old code: log(gamma_q(alpha, beta * y))
+  double old_val = std::log(gamma_q(alpha, beta * y));
+
+  EXPECT_EQ(old_val, 0.0);
+  EXPECT_LT(new_val, 0.0);
+  EXPECT_DOUBLE_EQ(new_val, expected);
+}
+
 TEST(ProbGamma, lccdf_large_alpha_large_y) {
   using stan::math::gamma_lccdf;
 
@@ -154,6 +179,29 @@ TEST(ProbGamma, lccdf_extreme_large_alpha) {
   EXPECT_TRUE(std::isfinite(result));
 }
 
+TEST(ProbGamma, lccdf_large_alpha_1000_beta_3) {
+  using stan::math::gamma_lccdf;
+
+  // Large alpha = 1000, beta = 3
+  double alpha = 1000.0;
+  double beta = 3.0;
+
+  // Test various y values
+  std::vector<double> y_values = {100.0, 300.0, 333.333, 400.0, 500.0};
+
+  for (double y : y_values) {
+    double result = gamma_lccdf(y, alpha, beta);
+
+    // Result should be finite
+    EXPECT_TRUE(std::isfinite(result))
+        << "Failed for y=" << y << ", alpha=" << alpha << ", beta=" << beta;
+
+    // Result should be <= 0 (log of probability)
+    EXPECT_LE(result, 0.0) << "Positive value for y=" << y << ", alpha="
+                           << alpha << ", beta=" << beta;
+  }
+}
+
 TEST(ProbGamma, lccdf_monotonic_in_y) {
   using stan::math::gamma_lccdf;
 
diff --git a/test/unit/math/rev/prob/gamma_lccdf_test.cpp b/test/unit/math/rev/prob/gamma_lccdf_test.cpp
index 1066773e060..2af454cd219 100644
--- a/test/unit/math/rev/prob/gamma_lccdf_test.cpp
+++ b/test/unit/math/rev/prob/gamma_lccdf_test.cpp
@@ -230,6 +230,48 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_small) {
   }
 }
 
+TEST(ProbDistributionsGamma,
+     lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) {
+  using stan::math::gamma_lccdf;
+  using stan::math::gamma_p;
+  using stan::math::gamma_q;
+  using stan::math::log1m;
+  using stan::math::var;
+
+  // Same comparison as the prim test, but also exercises autodiff for
+  // alpha > 30.
+  double y_d = 1e-8;
+  double alpha_d = 31.25;
+  double beta_d = 1.0;
+
+  var y_v = y_d;
+  var alpha_v = alpha_d;
+  var beta_v = beta_d;
+
+  var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v);
+
+  // Old code: log(gamma_q(alpha, beta * y))
+  double old_val = std::log(gamma_q(alpha_d, beta_d * y_d));
+  double expected = log1m(gamma_p(alpha_d, beta_d * y_d));
+
+  EXPECT_EQ(old_val, 0.0);
+  EXPECT_LT(lccdf_var.val(), 0.0);
+  EXPECT_DOUBLE_EQ(lccdf_var.val(), expected);
+
+  std::vector<var> vars = {y_v, alpha_v, beta_v};
+  std::vector<double> grads;
+  lccdf_var.grad(vars, grads);
+
+  for (size_t i = 0; i < grads.size(); ++i) {
+    EXPECT_FALSE(std::isnan(grads[i])) << "Gradient " << i << " is NaN";
+    EXPECT_TRUE(std::isfinite(grads[i]))
+        << "Gradient " << i << " is not finite";
+  }
+
+  // d/dy log(CCDF) should be <= 0 (can underflow to -0)
+  EXPECT_LE(grads[0], 0.0);
+}
+
 TEST(ProbDistributionsGamma, lccdf_extreme_values_large) {
   using stan::math::gamma_lccdf;
   using stan::math::var;
@@ -258,6 +300,36 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_large) {
   }
 }
 
+TEST(ProbDistributionsGamma, lccdf_large_alpha_1000_beta_3) {
+  using stan::math::gamma_lccdf;
+  using stan::math::var;
+
+  // Large alpha = 1000, beta = 3
+  // Note: This test only checks values, not gradients, as large alpha values
+  // can cause numerical issues with gradient computation
+  double alpha_d = 1000.0;
+  double beta_d = 3.0;
+
+  // Test various y values
+  std::vector<double> y_values = {100.0, 300.0, 333.333, 400.0, 500.0};
+
+  for (double y_d : y_values) {
+    var y_v = y_d;
+    var alpha_v = alpha_d;
+    var beta_v = beta_d;
+
+    var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v);
+
+    // Value should be finite and <= 0
+    EXPECT_TRUE(std::isfinite(lccdf_var.val()))
+        << "Failed for y=" << y_d << ", alpha=" << alpha_d << ", beta="
+        << beta_d;
+    EXPECT_LE(lccdf_var.val(), 0.0)
+        << "Positive value for y=" << y_d << ", alpha=" << alpha_d
+        << ", beta=" << beta_d;
+  }
+}
+
 TEST(ProbDistributionsGamma, lccdf_alpha_one_derivatives) {
   using stan::math::gamma_lccdf;
   using stan::math::var;

From f0cfa1c4f379e4f2c6ec177ca7de4816673bdcde Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Tue, 23 Dec 2025 14:44:14 -0500
Subject: [PATCH 2/5] [Jenkins] auto-formatting by clang-format version
 10.0.0-4ubuntu1

---
 stan/math/prim/prob/gamma_lccdf.hpp           | 13 +++++++------
 test/unit/math/prim/prob/gamma_lccdf_test.cpp |  4 ++--
 test/unit/math/rev/prob/gamma_lccdf_test.cpp  |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp
index e28ce1cba65..3b2ec46d217 100644
--- a/stan/math/prim/prob/gamma_lccdf.hpp
+++ b/stan/math/prim/prob/gamma_lccdf.hpp
@@ -27,12 +27,12 @@ namespace math {
 namespace internal {
 
 template <typename T_a>
-inline auto log_q_gamma_cf(const T_a& a, const double x,
-                           int max_steps = 250, double precision = 1e-16) {
-  using std::fabs;
+inline auto log_q_gamma_cf(const T_a& a, const double x, int max_steps = 250,
+                           double precision = 1e-16) {
   using stan::math::lgamma;
   using stan::math::log;
   using stan::math::value_of;
+  using std::fabs;
 
   const auto log_prefactor = a * log(x) - x - lgamma(a);
 
@@ -131,7 +131,8 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
     bool use_cf = beta_y > alpha_dbl + 1.0;
     T_partials_return log_Qn;
     [[maybe_unused]] T_partials_return dlogQ_dalpha = 0.0;
-    // Extract double values for continued fraction - we handle y/beta derivatives via hazard
+    // Extract double values for continued fraction - we handle y/beta
+    // derivatives via hazard
     const double beta_y_dbl = value_of(value_of(beta_y));
     const double alpha_dbl_val = value_of(value_of(alpha_dbl));
 
@@ -191,8 +192,8 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
       const T_partials_return lgamma_alpha = lgamma(alpha_dbl);
       const T_partials_return alpha_minus_one = fma(alpha_dbl, log_y, -log_y);
 
-      const T_partials_return log_pdf = alpha_dbl * log_beta - lgamma_alpha
-                                        + alpha_minus_one - beta_y;
+      const T_partials_return log_pdf
+          = alpha_dbl * log_beta - lgamma_alpha + alpha_minus_one - beta_y;
 
       const T_partials_return hazard = exp(log_pdf - log_Qn);  // f/Q
 
diff --git a/test/unit/math/prim/prob/gamma_lccdf_test.cpp b/test/unit/math/prim/prob/gamma_lccdf_test.cpp
index faddf19914e..84fa35fa1b2 100644
--- a/test/unit/math/prim/prob/gamma_lccdf_test.cpp
+++ b/test/unit/math/prim/prob/gamma_lccdf_test.cpp
@@ -197,8 +197,8 @@ TEST(ProbGamma, lccdf_large_alpha_1000_beta_3) {
         << "Failed for y=" << y << ", alpha=" << alpha << ", beta=" << beta;
 
     // Result should be <= 0 (log of probability)
-    EXPECT_LE(result, 0.0) << "Positive value for y=" << y << ", alpha="
-                           << alpha << ", beta=" << beta;
+    EXPECT_LE(result, 0.0) << "Positive value for y=" << y
+                           << ", alpha=" << alpha << ", beta=" << beta;
   }
 }
 
diff --git a/test/unit/math/rev/prob/gamma_lccdf_test.cpp b/test/unit/math/rev/prob/gamma_lccdf_test.cpp
index 2af454cd219..4ddd846a8fe 100644
--- a/test/unit/math/rev/prob/gamma_lccdf_test.cpp
+++ b/test/unit/math/rev/prob/gamma_lccdf_test.cpp
@@ -322,8 +322,8 @@ TEST(ProbDistributionsGamma, lccdf_large_alpha_1000_beta_3) {
 
     // Value should be finite and <= 0
     EXPECT_TRUE(std::isfinite(lccdf_var.val()))
-        << "Failed for y=" << y_d << ", alpha=" << alpha_d << ", beta="
-        << beta_d;
+        << "Failed for y=" << y_d << ", alpha=" << alpha_d
+        << ", beta=" << beta_d;
     EXPECT_LE(lccdf_var.val(), 0.0)
         << "Positive value for y=" << y_d << ", alpha=" << alpha_d
         << ", beta=" << beta_d;

From 2125bb67e916da2c01eedcfc1ce91c74f8816607 Mon Sep 17 00:00:00 2001
From: spinkney <sean.pinkney@gmail.com>
Date: Wed, 24 Dec 2025 06:57:50 -0500
Subject: [PATCH 3/5] explicit type

---
 stan/math/prim/prob/gamma_lccdf.hpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp
index 3b2ec46d217..3deccc50fce 100644
--- a/stan/math/prim/prob/gamma_lccdf.hpp
+++ b/stan/math/prim/prob/gamma_lccdf.hpp
@@ -8,7 +8,8 @@
 #include <stan/math/prim/fun/fma.hpp>
 #include <stan/math/prim/fun/gamma_p.hpp>
 #include <stan/math/prim/fun/grad_reg_lower_inc_gamma.hpp>
-#include <stan/math/prim/fun/log.hpp>
+// #include <stan/math/prim/fun/lgamma.hpp>
+// #include <stan/math/prim/fun/log.hpp>
 #include <stan/math/prim/fun/log1m.hpp>
 #include <stan/math/prim/fun/max_size.hpp>
 #include <stan/math/prim/fun/scalar_seq_view.hpp>
@@ -16,6 +17,7 @@
 #include <stan/math/prim/fun/size_zero.hpp>
 #include <stan/math/prim/fun/value_of.hpp>
 #include <stan/math/prim/functor/partials_propagator.hpp>
+
 #include <stan/math/fwd/fun/lgamma.hpp>
 #include <stan/math/fwd/fun/log.hpp>
 #include <stan/math/fwd/fun/value_of.hpp>
@@ -26,6 +28,19 @@ namespace math {
 
 namespace internal {
 
+/**
+ * Compute log(Q(a,x)) using continued fraction expansion for upper incomplete
+ * gamma function, where Q(a,x) = Gamma(a,x) / Gamma(a) is the regularized
+ * upper incomplete gamma function.
+ *
+ * @tparam T_a Type of shape parameter a; can be either double or fvar<double>
+ *             for forward-mode automatic differentiation
+ * @param a Shape parameter
+ * @param x Value at which to evaluate
+ * @param max_steps Maximum number of continued fraction iterations
+ * @param precision Convergence threshold
+ * @return log(Q(a,x)) with same type as T_a
+ */
 template <typename T_a>
 inline auto log_q_gamma_cf(const T_a& a, const double x, int max_steps = 250,
                            double precision = 1e-16) {

From 1f112d312ae73be6617ba3f0ca95f543c4636ecd Mon Sep 17 00:00:00 2001
From: spinkney <sean.pinkney@gmail.com>
Date: Wed, 24 Dec 2025 08:42:53 -0500
Subject: [PATCH 4/5] remove fwd and fix templating

---
 stan/math/prim/fun/log_gamma_q_dgamma.hpp | 128 +++++++++++++++++
 stan/math/prim/prob/gamma_lccdf.hpp       | 159 ++++++++++++++--------
 2 files changed, 232 insertions(+), 55 deletions(-)
 create mode 100644 stan/math/prim/fun/log_gamma_q_dgamma.hpp

diff --git a/stan/math/prim/fun/log_gamma_q_dgamma.hpp b/stan/math/prim/fun/log_gamma_q_dgamma.hpp
new file mode 100644
index 00000000000..9fa611eed9d
--- /dev/null
+++ b/stan/math/prim/fun/log_gamma_q_dgamma.hpp
@@ -0,0 +1,128 @@
+#ifndef STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP
+#define STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/fun/constants.hpp>
+#include <stan/math/prim/fun/digamma.hpp>
+#include <stan/math/prim/fun/exp.hpp>
+#include <stan/math/prim/fun/gamma_p.hpp>
+#include <stan/math/prim/fun/gamma_q.hpp>
+#include <stan/math/prim/fun/grad_reg_inc_gamma.hpp>
+#include <stan/math/prim/fun/lgamma.hpp>
+#include <stan/math/prim/fun/log.hpp>
+#include <stan/math/prim/fun/log1m.hpp>
+#include <stan/math/prim/fun/tgamma.hpp>
+#include <stan/math/prim/fun/value_of.hpp>
+#include <cmath>
+
+namespace stan {
+namespace math {
+
+/**
+ * Result structure containing log(Q(a,z)) and its gradient with respect to a.
+ *
+ * @tparam T return type
+ */
+template <typename T>
+struct log_gamma_q_result {
+  T log_q;      ///< log(Q(a,z)) where Q is upper regularized incomplete gamma
+  T dlog_q_da;  ///< d/da log(Q(a,z))
+};
+
+/**
+ * Compute log(Q(a,z)) and its gradient with respect to a using continued
+ * fraction expansion, where Q(a,z) = Gamma(a,z) / Gamma(a) is the regularized
+ * upper incomplete gamma function.
+ *
+ * This uses a continued fraction representation for numerical stability when
+ * computing the upper incomplete gamma function in log space, along with
+ * analytical gradient computation.
+ *
+ * @tparam T_a type of the shape parameter
+ * @tparam T_z type of the value parameter
+ * @param a shape parameter (must be positive)
+ * @param z value parameter (must be non-negative)
+ * @param max_steps maximum iterations for continued fraction
+ * @param precision convergence threshold
+ * @return structure containing log(Q(a,z)) and d/da log(Q(a,z))
+ */
+template <typename T_a, typename T_z>
+inline log_gamma_q_result<return_type_t<T_a, T_z>> log_gamma_q_dgamma(
+    const T_a& a, const T_z& z, int max_steps = 250,
+    double precision = 1e-16) {
+  using std::exp;
+  using std::fabs;
+  using std::log;
+  using T_return = return_type_t<T_a, T_z>;
+
+  const double a_dbl = value_of(a);
+  const double z_dbl = value_of(z);
+
+  log_gamma_q_result<T_return> result;
+
+  // For z > a + 1, use continued fraction for better numerical stability
+  if (z_dbl > a_dbl + 1.0) {
+    // Continued fraction for Q(a,z) in log space
+    // log(Q(a,z)) = log_prefactor - log(continued_fraction)
+    const double log_prefactor = a_dbl * log(z_dbl) - z_dbl - lgamma(a_dbl);
+
+    double b = z_dbl + 1.0 - a_dbl;
+    double C = (fabs(b) >= EPSILON) ? b : EPSILON;
+    double D = 0.0;
+    double f = C;
+
+    for (int i = 1; i <= max_steps; ++i) {
+      const double an = -i * (i - a_dbl);
+      b += 2.0;
+
+      D = b + an * D;
+      if (fabs(D) < EPSILON) {
+        D = EPSILON;
+      }
+      C = b + an / C;
+      if (fabs(C) < EPSILON) {
+        C = EPSILON;
+      }
+
+      D = 1.0 / D;
+      const double delta = C * D;
+      f *= delta;
+
+      const double delta_m1 = fabs(delta - 1.0);
+      if (delta_m1 < precision) {
+        break;
+      }
+    }
+
+    result.log_q = log_prefactor - log(f);
+
+    // For gradient, use: d/da log(Q) = (1/Q) * dQ/da
+    // grad_reg_inc_gamma computes dQ/da
+    const double Q_val = exp(result.log_q);
+    const double dQ_da = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl));
+    result.dlog_q_da = dQ_da / Q_val;
+
+  } else {
+    // For z <= a + 1, use log1m(P(a,z)) for better numerical accuracy
+    const double P_val = gamma_p(a_dbl, z_dbl);
+    result.log_q = log1m(P_val);
+
+    // Gradient: d/da log(Q) = (1/Q) * dQ/da
+    // grad_reg_inc_gamma computes dQ/da
+    const double Q_val = exp(result.log_q);
+    if (Q_val > 0) {
+      const double dQ_da = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl));
+      result.dlog_q_da = dQ_da / Q_val;
+    } else {
+      // Fallback if Q rounds to zero - use asymptotic approximation
+      result.dlog_q_da = log(z_dbl) - digamma(a_dbl);
+    }
+  }
+
+  return result;
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp
index 3deccc50fce..0c2cc21ac8a 100644
--- a/stan/math/prim/prob/gamma_lccdf.hpp
+++ b/stan/math/prim/prob/gamma_lccdf.hpp
@@ -4,23 +4,23 @@
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/constants.hpp>
+#include <stan/math/prim/fun/digamma.hpp>
 #include <stan/math/prim/fun/exp.hpp>
 #include <stan/math/prim/fun/fma.hpp>
 #include <stan/math/prim/fun/gamma_p.hpp>
+#include <stan/math/prim/fun/grad_reg_inc_gamma.hpp>
 #include <stan/math/prim/fun/grad_reg_lower_inc_gamma.hpp>
-// #include <stan/math/prim/fun/lgamma.hpp>
-// #include <stan/math/prim/fun/log.hpp>
+#include <stan/math/prim/fun/lgamma.hpp>
+#include <stan/math/prim/fun/log.hpp>
 #include <stan/math/prim/fun/log1m.hpp>
 #include <stan/math/prim/fun/max_size.hpp>
 #include <stan/math/prim/fun/scalar_seq_view.hpp>
 #include <stan/math/prim/fun/size.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
+#include <stan/math/prim/fun/tgamma.hpp>
 #include <stan/math/prim/fun/value_of.hpp>
+#include <stan/math/prim/fun/log_gamma_q_dgamma.hpp>
 #include <stan/math/prim/functor/partials_propagator.hpp>
-
-#include <stan/math/fwd/fun/lgamma.hpp>
-#include <stan/math/fwd/fun/log.hpp>
-#include <stan/math/fwd/fun/value_of.hpp>
 #include <cmath>
 
 namespace stan {
@@ -30,34 +30,35 @@ namespace internal {
 
 /**
  * Compute log(Q(a,x)) using continued fraction expansion for upper incomplete
- * gamma function, where Q(a,x) = Gamma(a,x) / Gamma(a) is the regularized
- * upper incomplete gamma function.
+ * gamma function. When used with fvar types, automatically computes derivatives.
  *
- * @tparam T_a Type of shape parameter a; can be either double or fvar<double>
- *             for forward-mode automatic differentiation
+ * @tparam T_a Type of shape parameter a (double or fvar types)
  * @param a Shape parameter
  * @param x Value at which to evaluate
  * @param max_steps Maximum number of continued fraction iterations
  * @param precision Convergence threshold
  * @return log(Q(a,x)) with same type as T_a
  */
-template <typename T_a>
-inline auto log_q_gamma_cf(const T_a& a, const double x, int max_steps = 250,
+template <typename T_a, typename T_x>
+inline auto log_q_gamma_cf(const T_a& a, const T_x& x, int max_steps = 250,
                            double precision = 1e-16) {
   using stan::math::lgamma;
   using stan::math::log;
   using stan::math::value_of;
   using std::fabs;
+  using T_return = return_type_t<T_a, T_x>;
 
-  const auto log_prefactor = a * log(x) - x - lgamma(a);
+  const T_return a_ret = a;
+  const T_return x_ret = x;
+  const auto log_prefactor = a_ret * log(x_ret) - x_ret - lgamma(a_ret);
 
-  auto b = x + 1.0 - a;
-  auto C = (fabs(value_of(b)) >= EPSILON) ? b : T_a(EPSILON);
-  auto D = T_a(0.0);
+  auto b = x_ret + 1.0 - a_ret;
+  auto C = (fabs(value_of(b)) >= EPSILON) ? b : T_return(EPSILON);
+  auto D = T_return(0.0);
   auto f = C;
 
   for (int i = 1; i <= max_steps; ++i) {
-    auto an = -i * (i - a);
+    auto an = -i * (i - a_ret);
     b += 2.0;
 
     D = b + an * D;
@@ -73,15 +74,9 @@ inline auto log_q_gamma_cf(const T_a& a, const double x, int max_steps = 250,
     auto delta = C * D;
     f *= delta;
 
-    const double delta_m1 = fabs(value_of(delta) - 1.0);
+    const double delta_m1 = value_of(fabs(value_of(delta) - 1.0));
     if (delta_m1 < precision) {
-      if constexpr (stan::is_fvar<std::decay_t<T_a>>::value) {
-        if (fabs(value_of(delta.d_)) < precision) {
-          break;
-        }
-      } else {
-        break;
-      }
+      break;
     }
   }
 
@@ -123,11 +118,16 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
   const size_t N = max_size(y, alpha, beta);
 
   constexpr bool need_y_beta_deriv = !is_constant_all<T_y, T_inv_scale>::value;
+  constexpr bool any_fvar
+      = is_fvar<scalar_type_t<T_y>>::value
+        || is_fvar<scalar_type_t<T_shape>>::value
+        || is_fvar<scalar_type_t<T_inv_scale>>::value;
+  constexpr bool partials_fvar = is_fvar<T_partials_return>::value;
 
   for (size_t n = 0; n < N; n++) {
     // Explicit results for extreme values
     // The gradients are technically ill-defined, but treated as zero
-    const T_partials_return y_dbl = value_of(y_vec.val(n));
+    const T_partials_return y_dbl = y_vec.val(n);
     if (y_dbl == 0.0) {
       continue;
     }
@@ -135,8 +135,8 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
       return ops_partials.build(negative_infinity());
     }
 
-    const T_partials_return alpha_dbl = value_of(alpha_vec.val(n));
-    const T_partials_return beta_dbl = value_of(beta_vec.val(n));
+    const T_partials_return alpha_dbl = alpha_vec.val(n);
+    const T_partials_return beta_dbl = beta_vec.val(n);
 
     const T_partials_return beta_y = beta_dbl * y_dbl;
     if (beta_y == INFTY) {
@@ -146,20 +146,35 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
     bool use_cf = beta_y > alpha_dbl + 1.0;
     T_partials_return log_Qn;
     [[maybe_unused]] T_partials_return dlogQ_dalpha = 0.0;
-    // Extract double values for continued fraction - we handle y/beta
-    // derivatives via hazard
-    const double beta_y_dbl = value_of(value_of(beta_y));
-    const double alpha_dbl_val = value_of(value_of(alpha_dbl));
+    // Extract double values for the double-only continued fraction path.
+    [[maybe_unused]] const double beta_y_dbl = value_of(value_of(beta_y));
+    [[maybe_unused]] const double alpha_dbl_val = value_of(value_of(alpha_dbl));
 
     if (use_cf) {
-      if constexpr (is_autodiff_v<T_shape>) {
-        stan::math::fvar<double> a_f(alpha_dbl_val, 1.0);
-        const stan::math::fvar<double> logq_f
-            = internal::log_q_gamma_cf(a_f, beta_y_dbl);
-        log_Qn = logq_f.val_;
-        dlogQ_dalpha = logq_f.d_;
+      if constexpr (!any_fvar && is_autodiff_v<T_shape>) {
+        // var-only: use analytical gradient with double inputs
+        auto log_q_result = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl);
+        log_Qn = log_q_result.log_q;
+        dlogQ_dalpha = log_q_result.dlog_q_da;
       } else {
-        log_Qn = internal::log_q_gamma_cf(alpha_dbl_val, beta_y_dbl);
+        log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y);
+        if constexpr (is_autodiff_v<T_shape>) {
+          if constexpr (partials_fvar) {
+            auto alpha_unit = alpha_dbl;
+            alpha_unit.d_ = 1;
+            auto beta_unit = beta_y;
+            beta_unit.d_ = 0;
+            auto log_Qn_fvar
+                = internal::log_q_gamma_cf(alpha_unit, beta_unit);
+            dlogQ_dalpha = log_Qn_fvar.d_;
+          } else {
+            const T_partials_return Qn = exp(log_Qn);
+            dlogQ_dalpha
+                = grad_reg_inc_gamma(alpha_dbl, beta_y, tgamma(alpha_dbl),
+                                     digamma(alpha_dbl))
+                  / Qn;
+          }
+        }
       }
     } else {
       const T_partials_return Pn = gamma_p(alpha_dbl, beta_y);
@@ -168,30 +183,64 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
       if (!std::isfinite(value_of(value_of(log_Qn)))) {
         use_cf = beta_y > 0.0;
         if (use_cf) {
-          if constexpr (is_autodiff_v<T_shape>) {
-            stan::math::fvar<double> a_f(alpha_dbl_val, 1.0);
-            const stan::math::fvar<double> logq_f
-                = internal::log_q_gamma_cf(a_f, beta_y_dbl);
-            log_Qn = logq_f.val_;
-            dlogQ_dalpha = logq_f.d_;
+          // Fallback to continued fraction if log1m fails
+          if constexpr (!any_fvar && is_autodiff_v<T_shape>) {
+            auto log_q_result = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl);
+            log_Qn = log_q_result.log_q;
+            dlogQ_dalpha = log_q_result.dlog_q_da;
           } else {
-            log_Qn = internal::log_q_gamma_cf(alpha_dbl_val, beta_y_dbl);
+            log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y);
+            if constexpr (is_autodiff_v<T_shape>) {
+              if constexpr (partials_fvar) {
+                auto alpha_unit = alpha_dbl;
+                alpha_unit.d_ = 1;
+                auto beta_unit = beta_y;
+                beta_unit.d_ = 0;
+                auto log_Qn_fvar
+                    = internal::log_q_gamma_cf(alpha_unit, beta_unit);
+                dlogQ_dalpha = log_Qn_fvar.d_;
+              } else {
+                const T_partials_return Qn = exp(log_Qn);
+                dlogQ_dalpha
+                    = grad_reg_inc_gamma(alpha_dbl, beta_y, tgamma(alpha_dbl),
+                                         digamma(alpha_dbl))
+                      / Qn;
+              }
+            }
           }
         }
       }
 
       if constexpr (is_autodiff_v<T_shape>) {
         if (!use_cf) {
-          const T_partials_return Qn = exp(log_Qn);
-          if (Qn > 0.0) {
-            dlogQ_dalpha = -grad_reg_lower_inc_gamma(alpha_dbl, beta_y) / Qn;
+          if constexpr (partials_fvar) {
+            auto alpha_unit = alpha_dbl;
+            alpha_unit.d_ = 1;
+            auto beta_unit = beta_y;
+            beta_unit.d_ = 0;
+            auto log_Qn_fvar = log1m(gamma_p(alpha_unit, beta_unit));
+            dlogQ_dalpha = log_Qn_fvar.d_;
           } else {
-            stan::math::fvar<double> a_f(alpha_dbl_val, 1.0);
-            const stan::math::fvar<double> logq_f
-                = internal::log_q_gamma_cf(a_f, beta_y_dbl);
-            log_Qn = logq_f.val_;
-            dlogQ_dalpha = logq_f.d_;
-            use_cf = true;
+            const T_partials_return Qn = exp(log_Qn);
+            if (Qn > 0.0) {
+              dlogQ_dalpha = -grad_reg_lower_inc_gamma(alpha_dbl, beta_y) / Qn;
+            } else {
+              // Fallback to continued fraction if Q rounds to zero
+              if constexpr (!any_fvar) {
+                auto log_q_result
+                    = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl);
+                log_Qn = log_q_result.log_q;
+                dlogQ_dalpha = log_q_result.dlog_q_da;
+              } else {
+                log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y);
+                const T_partials_return Qn_cf = exp(log_Qn);
+                dlogQ_dalpha
+                    = grad_reg_inc_gamma(alpha_dbl, beta_y, tgamma(alpha_dbl),
+                                         digamma(alpha_dbl))
+                      / Qn_cf;
+              }
+              use_cf = true;
+            }
           }
         }
       }

From 9e5ca4b895758d15c72e4c09864dcfff50384311 Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Wed, 24 Dec 2025 08:44:03 -0500
Subject: [PATCH 5/5] [Jenkins] auto-formatting by clang-format version
 10.0.0-4ubuntu1

---
 stan/math/prim/fun/log_gamma_q_dgamma.hpp |  9 +++++----
 stan/math/prim/prob/gamma_lccdf.hpp       | 13 ++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/stan/math/prim/fun/log_gamma_q_dgamma.hpp b/stan/math/prim/fun/log_gamma_q_dgamma.hpp
index 9fa611eed9d..81c0605257b 100644
--- a/stan/math/prim/fun/log_gamma_q_dgamma.hpp
+++ b/stan/math/prim/fun/log_gamma_q_dgamma.hpp
@@ -48,8 +48,7 @@ struct log_gamma_q_result {
  */
 template <typename T_a, typename T_z>
 inline log_gamma_q_result<return_type_t<T_a, T_z>> log_gamma_q_dgamma(
-    const T_a& a, const T_z& z, int max_steps = 250,
-    double precision = 1e-16) {
+    const T_a& a, const T_z& z, int max_steps = 250, double precision = 1e-16) {
   using std::exp;
   using std::fabs;
   using std::log;
@@ -99,7 +98,8 @@ inline log_gamma_q_result<return_type_t<T_a, T_z>> log_gamma_q_dgamma(
     // For gradient, use: d/da log(Q) = (1/Q) * dQ/da
     // grad_reg_inc_gamma computes dQ/da
     const double Q_val = exp(result.log_q);
-    const double dQ_da = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl));
+    const double dQ_da
+        = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl));
     result.dlog_q_da = dQ_da / Q_val;
 
   } else {
@@ -111,7 +111,8 @@ inline log_gamma_q_result<return_type_t<T_a, T_z>> log_gamma_q_dgamma(
     // grad_reg_inc_gamma computes dQ/da
     const double Q_val = exp(result.log_q);
     if (Q_val > 0) {
-      const double dQ_da = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl));
+      const double dQ_da
+          = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl));
       result.dlog_q_da = dQ_da / Q_val;
     } else {
       // Fallback if Q rounds to zero - use asymptotic approximation
diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp
index 0c2cc21ac8a..f373ee80edf 100644
--- a/stan/math/prim/prob/gamma_lccdf.hpp
+++ b/stan/math/prim/prob/gamma_lccdf.hpp
@@ -30,7 +30,8 @@ namespace internal {
 
 /**
  * Compute log(Q(a,x)) using continued fraction expansion for upper incomplete
- * gamma function. When used with fvar types, automatically computes derivatives.
+ * gamma function. When used with fvar types, automatically computes
+ * derivatives.
  *
  * @tparam T_a Type of shape parameter a (double or fvar types)
  * @param a Shape parameter
@@ -118,10 +119,9 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
   const size_t N = max_size(y, alpha, beta);
 
   constexpr bool need_y_beta_deriv = !is_constant_all<T_y, T_inv_scale>::value;
-  constexpr bool any_fvar
-      = is_fvar<scalar_type_t<T_y>>::value
-        || is_fvar<scalar_type_t<T_shape>>::value
-        || is_fvar<scalar_type_t<T_inv_scale>>::value;
+  constexpr bool any_fvar = is_fvar<scalar_type_t<T_y>>::value
+                            || is_fvar<scalar_type_t<T_shape>>::value
+                            || is_fvar<scalar_type_t<T_inv_scale>>::value;
   constexpr bool partials_fvar = is_fvar<T_partials_return>::value;
 
   for (size_t n = 0; n < N; n++) {
@@ -164,8 +164,7 @@ return_type_t<T_y, T_shape, T_inv_scale> gamma_lccdf(const T_y& y,
             alpha_unit.d_ = 1;
             auto beta_unit = beta_y;
             beta_unit.d_ = 0;
-            auto log_Qn_fvar
-                = internal::log_q_gamma_cf(alpha_unit, beta_unit);
+            auto log_Qn_fvar = internal::log_q_gamma_cf(alpha_unit, beta_unit);
             dlogQ_dalpha = log_Qn_fvar.d_;
           } else {
             const T_partials_return Qn = exp(log_Qn);