diff --git a/stan/math/prim/fun/log_gamma_q_dgamma.hpp b/stan/math/prim/fun/log_gamma_q_dgamma.hpp new file mode 100644 index 00000000000..81c0605257b --- /dev/null +++ b/stan/math/prim/fun/log_gamma_q_dgamma.hpp @@ -0,0 +1,129 @@ +#ifndef STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP +#define STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** + * Result structure containing log(Q(a,z)) and its gradient with respect to a. + * + * @tparam T return type + */ +template +struct log_gamma_q_result { + T log_q; ///< log(Q(a,z)) where Q is upper regularized incomplete gamma + T dlog_q_da; ///< d/da log(Q(a,z)) +}; + +/** + * Compute log(Q(a,z)) and its gradient with respect to a using continued + * fraction expansion, where Q(a,z) = Gamma(a,z) / Gamma(a) is the regularized + * upper incomplete gamma function. + * + * This uses a continued fraction representation for numerical stability when + * computing the upper incomplete gamma function in log space, along with + * analytical gradient computation. + * + * @tparam T_a type of the shape parameter + * @tparam T_z type of the value parameter + * @param a shape parameter (must be positive) + * @param z value parameter (must be non-negative) + * @param max_steps maximum iterations for continued fraction + * @param precision convergence threshold + * @return structure containing log(Q(a,z)) and d/da log(Q(a,z)) + */ +template +inline log_gamma_q_result> log_gamma_q_dgamma( + const T_a& a, const T_z& z, int max_steps = 250, double precision = 1e-16) { + using std::exp; + using std::fabs; + using std::log; + using T_return = return_type_t; + + const double a_dbl = value_of(a); + const double z_dbl = value_of(z); + + log_gamma_q_result result; + + // For z > a + 1, use continued fraction for better numerical stability + if (z_dbl > a_dbl + 1.0) { + // Continued fraction for Q(a,z) in log space + // log(Q(a,z)) = log_prefactor - log(continued_fraction) + const double log_prefactor = a_dbl * log(z_dbl) - z_dbl - lgamma(a_dbl); + + double b = z_dbl + 1.0 - a_dbl; + double C = (fabs(b) >= EPSILON) ? b : EPSILON; + double D = 0.0; + double f = C; + + for (int i = 1; i <= max_steps; ++i) { + const double an = -i * (i - a_dbl); + b += 2.0; + + D = b + an * D; + if (fabs(D) < EPSILON) { + D = EPSILON; + } + C = b + an / C; + if (fabs(C) < EPSILON) { + C = EPSILON; + } + + D = 1.0 / D; + const double delta = C * D; + f *= delta; + + const double delta_m1 = fabs(delta - 1.0); + if (delta_m1 < precision) { + break; + } + } + + result.log_q = log_prefactor - log(f); + + // For gradient, use: d/da log(Q) = (1/Q) * dQ/da + // grad_reg_inc_gamma computes dQ/da + const double Q_val = exp(result.log_q); + const double dQ_da + = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl)); + result.dlog_q_da = dQ_da / Q_val; + + } else { + // For z <= a + 1, use log1m(P(a,z)) for better numerical accuracy + const double P_val = gamma_p(a_dbl, z_dbl); + result.log_q = log1m(P_val); + + // Gradient: d/da log(Q) = (1/Q) * dQ/da + // grad_reg_inc_gamma computes dQ/da + const double Q_val = exp(result.log_q); + if (Q_val > 0) { + const double dQ_da + = grad_reg_inc_gamma(a_dbl, z_dbl, tgamma(a_dbl), digamma(a_dbl)); + result.dlog_q_da = dQ_da / Q_val; + } else { + // Fallback if Q rounds to zero - use asymptotic approximation + result.dlog_q_da = log(z_dbl) - digamma(a_dbl); + } + } + + return result; +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp index a670cefcecf..f373ee80edf 100644 --- a/stan/math/prim/prob/gamma_lccdf.hpp +++ b/stan/math/prim/prob/gamma_lccdf.hpp @@ -6,28 +6,93 @@ #include #include #include -#include +#include +#include #include +#include +#include #include +#include #include #include #include #include #include #include +#include #include #include namespace stan { namespace math { +namespace internal { + +/** + * Compute log(Q(a,x)) using continued fraction expansion for upper incomplete + * gamma function. When used with fvar types, automatically computes + * derivatives. + * + * @tparam T_a Type of shape parameter a (double or fvar types) + * @param a Shape parameter + * @param x Value at which to evaluate + * @param max_steps Maximum number of continued fraction iterations + * @param precision Convergence threshold + * @return log(Q(a,x)) with same type as T_a + */ +template +inline auto log_q_gamma_cf(const T_a& a, const T_x& x, int max_steps = 250, + double precision = 1e-16) { + using stan::math::lgamma; + using stan::math::log; + using stan::math::value_of; + using std::fabs; + using T_return = return_type_t; + + const T_return a_ret = a; + const T_return x_ret = x; + const auto log_prefactor = a_ret * log(x_ret) - x_ret - lgamma(a_ret); + + auto b = x_ret + 1.0 - a_ret; + auto C = (fabs(value_of(b)) >= EPSILON) ? b : T_return(EPSILON); + auto D = T_return(0.0); + auto f = C; + + for (int i = 1; i <= max_steps; ++i) { + auto an = -i * (i - a_ret); + b += 2.0; + + D = b + an * D; + if (fabs(value_of(D)) < EPSILON) { + D = T_a(EPSILON); + } + C = b + an / C; + if (fabs(value_of(C)) < EPSILON) { + C = T_a(EPSILON); + } + + D = 1.0 / D; + auto delta = C * D; + f *= delta; + + const double delta_m1 = value_of(fabs(value_of(delta) - 1.0)); + if (delta_m1 < precision) { + break; + } + } + + return log_prefactor - log(f); +} + +} // namespace internal + template -inline return_type_t gamma_lccdf( - const T_y& y, const T_shape& alpha, const T_inv_scale& beta) { +return_type_t gamma_lccdf(const T_y& y, + const T_shape& alpha, + const T_inv_scale& beta) { using T_partials_return = partials_return_t; using std::exp; using std::log; - using std::pow; using T_y_ref = ref_type_t; using T_alpha_ref = ref_type_t; using T_beta_ref = ref_type_t; @@ -51,61 +116,159 @@ inline return_type_t gamma_lccdf( scalar_seq_view y_vec(y_ref); scalar_seq_view alpha_vec(alpha_ref); scalar_seq_view beta_vec(beta_ref); - size_t N = max_size(y, alpha, beta); - - // Explicit return for extreme values - // The gradients are technically ill-defined, but treated as zero - for (size_t i = 0; i < stan::math::size(y); i++) { - if (y_vec.val(i) == 0) { - // LCCDF(0) = log(P(Y > 0)) = log(1) = 0 - return ops_partials.build(0.0); - } - } + const size_t N = max_size(y, alpha, beta); + + constexpr bool need_y_beta_deriv = !is_constant_all::value; + constexpr bool any_fvar = is_fvar>::value + || is_fvar>::value + || is_fvar>::value; + constexpr bool partials_fvar = is_fvar::value; for (size_t n = 0; n < N; n++) { // Explicit results for extreme values // The gradients are technically ill-defined, but treated as zero - if (y_vec.val(n) == INFTY) { - // LCCDF(∞) = log(P(Y > ∞)) = log(0) = -∞ + const T_partials_return y_dbl = y_vec.val(n); + if (y_dbl == 0.0) { + continue; + } + if (y_dbl == INFTY) { return ops_partials.build(negative_infinity()); } - const T_partials_return y_dbl = y_vec.val(n); const T_partials_return alpha_dbl = alpha_vec.val(n); const T_partials_return beta_dbl = beta_vec.val(n); - const T_partials_return beta_y_dbl = beta_dbl * y_dbl; - // Qn = 1 - Pn - const T_partials_return Qn = gamma_q(alpha_dbl, beta_y_dbl); - const T_partials_return log_Qn = log(Qn); + const T_partials_return beta_y = beta_dbl * y_dbl; + if (beta_y == INFTY) { + return ops_partials.build(negative_infinity()); + } + bool use_cf = beta_y > alpha_dbl + 1.0; + T_partials_return log_Qn; + [[maybe_unused]] T_partials_return dlogQ_dalpha = 0.0; + // Extract double values for the double-only continued fraction path. + [[maybe_unused]] const double beta_y_dbl = value_of(value_of(beta_y)); + [[maybe_unused]] const double alpha_dbl_val = value_of(value_of(alpha_dbl)); + + if (use_cf) { + if constexpr (!any_fvar && is_autodiff_v) { + // var-only: use analytical gradient with double inputs + auto log_q_result = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl); + log_Qn = log_q_result.log_q; + dlogQ_dalpha = log_q_result.dlog_q_da; + } else { + log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); + if constexpr (is_autodiff_v) { + if constexpr (partials_fvar) { + auto alpha_unit = alpha_dbl; + alpha_unit.d_ = 1; + auto beta_unit = beta_y; + beta_unit.d_ = 0; + auto log_Qn_fvar = internal::log_q_gamma_cf(alpha_unit, beta_unit); + dlogQ_dalpha = log_Qn_fvar.d_; + } else { + const T_partials_return Qn = exp(log_Qn); + dlogQ_dalpha + = grad_reg_inc_gamma(alpha_dbl, beta_y, tgamma(alpha_dbl), + digamma(alpha_dbl)) + / Qn; + } + } + } + } else { + const T_partials_return Pn = gamma_p(alpha_dbl, beta_y); + log_Qn = log1m(Pn); + + if (!std::isfinite(value_of(value_of(log_Qn)))) { + use_cf = beta_y > 0.0; + if (use_cf) { + // Fallback to continued fraction if log1m fails + if constexpr (!any_fvar && is_autodiff_v) { + auto log_q_result = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl); + log_Qn = log_q_result.log_q; + dlogQ_dalpha = log_q_result.dlog_q_da; + } else { + log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); + if constexpr (is_autodiff_v) { + if constexpr (partials_fvar) { + auto alpha_unit = alpha_dbl; + alpha_unit.d_ = 1; + auto beta_unit = beta_y; + beta_unit.d_ = 0; + auto log_Qn_fvar + = internal::log_q_gamma_cf(alpha_unit, beta_unit); + dlogQ_dalpha = log_Qn_fvar.d_; + } else { + const T_partials_return Qn = exp(log_Qn); + dlogQ_dalpha + = grad_reg_inc_gamma(alpha_dbl, beta_y, tgamma(alpha_dbl), + digamma(alpha_dbl)) + / Qn; + } + } + } + } + } + + if constexpr (is_autodiff_v) { + if (!use_cf) { + if constexpr (partials_fvar) { + auto alpha_unit = alpha_dbl; + alpha_unit.d_ = 1; + auto beta_unit = beta_y; + beta_unit.d_ = 0; + auto log_Qn_fvar = log1m(gamma_p(alpha_unit, beta_unit)); + dlogQ_dalpha = log_Qn_fvar.d_; + } else { + const T_partials_return Qn = exp(log_Qn); + if (Qn > 0.0) { + dlogQ_dalpha = -grad_reg_lower_inc_gamma(alpha_dbl, beta_y) / Qn; + } else { + // Fallback to continued fraction if Q rounds to zero + if constexpr (!any_fvar) { + auto log_q_result + = log_gamma_q_dgamma(alpha_dbl_val, beta_y_dbl); + log_Qn = log_q_result.log_q; + dlogQ_dalpha = log_q_result.dlog_q_da; + } else { + log_Qn = internal::log_q_gamma_cf(alpha_dbl, beta_y); + const T_partials_return Qn_cf = exp(log_Qn); + dlogQ_dalpha + = grad_reg_inc_gamma(alpha_dbl, beta_y, tgamma(alpha_dbl), + digamma(alpha_dbl)) + / Qn_cf; + } + use_cf = true; + } + } + } + } + } + if (!std::isfinite(value_of(value_of(log_Qn)))) { + return ops_partials.build(negative_infinity()); + } P += log_Qn; - if constexpr (is_any_autodiff_v) { - const T_partials_return log_y_dbl = log(y_dbl); - const T_partials_return log_beta_dbl = log(beta_dbl); + if constexpr (need_y_beta_deriv) { + const T_partials_return log_y = log(y_dbl); + const T_partials_return log_beta = log(beta_dbl); + const T_partials_return lgamma_alpha = lgamma(alpha_dbl); + const T_partials_return alpha_minus_one = fma(alpha_dbl, log_y, -log_y); + const T_partials_return log_pdf - = alpha_dbl * log_beta_dbl - lgamma(alpha_dbl) - + (alpha_dbl - 1.0) * log_y_dbl - beta_y_dbl; - const T_partials_return common_term = exp(log_pdf - log_Qn); + = alpha_dbl * log_beta - lgamma_alpha + alpha_minus_one - beta_y; + + const T_partials_return hazard = exp(log_pdf - log_Qn); // f/Q if constexpr (is_autodiff_v) { - // d/dy log(1-F(y)) = -f(y)/(1-F(y)) - partials<0>(ops_partials)[n] -= common_term; + partials<0>(ops_partials)[n] -= hazard; } if constexpr (is_autodiff_v) { - // d/dbeta log(1-F(y)) = -y*f(y)/(beta*(1-F(y))) - partials<2>(ops_partials)[n] -= y_dbl / beta_dbl * common_term; + partials<2>(ops_partials)[n] -= (y_dbl / beta_dbl) * hazard; } } - if constexpr (is_autodiff_v) { - const T_partials_return digamma_val = digamma(alpha_dbl); - const T_partials_return gamma_val = tgamma(alpha_dbl); - // d/dalpha log(1-F(y)) = grad_upper_inc_gamma / (1-F(y)) - partials<1>(ops_partials)[n] - += grad_reg_inc_gamma(alpha_dbl, beta_y_dbl, gamma_val, digamma_val) - / Qn; + partials<1>(ops_partials)[n] += dlogQ_dalpha; } } return ops_partials.build(P); diff --git a/test/unit/math/prim/prob/gamma_lccdf_test.cpp b/test/unit/math/prim/prob/gamma_lccdf_test.cpp index 2893f2f0166..84fa35fa1b2 100644 --- a/test/unit/math/prim/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/prim/prob/gamma_lccdf_test.cpp @@ -66,6 +66,31 @@ TEST(ProbGamma, lccdf_small_alpha_small_y) { EXPECT_LT(result, 0.0); } +TEST(ProbGamma, lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { + using stan::math::gamma_lccdf; + using stan::math::gamma_p; + using stan::math::gamma_q; + using stan::math::log1m; + + // For large alpha and very small y, the CCDF is extremely close to 1. + // The old implementation computed `log(gamma_q(alpha, beta * y))`, which can + // round to `log(1) == 0`. The updated implementation uses `log1m(gamma_p)`, + // which preserves the tiny negative value. + double y = 1e-8; + double alpha = 31.25; + double beta = 1.0; + + double new_val = gamma_lccdf(y, alpha, beta); + double expected = log1m(gamma_p(alpha, beta * y)); + + // Old code: log(gamma_q(alpha, beta * y)) + double old_val = std::log(gamma_q(alpha, beta * y)); + + EXPECT_EQ(old_val, 0.0); + EXPECT_LT(new_val, 0.0); + EXPECT_DOUBLE_EQ(new_val, expected); +} + TEST(ProbGamma, lccdf_large_alpha_large_y) { using stan::math::gamma_lccdf; @@ -154,6 +179,29 @@ TEST(ProbGamma, lccdf_extreme_large_alpha) { EXPECT_TRUE(std::isfinite(result)); } +TEST(ProbGamma, lccdf_large_alpha_1000_beta_3) { + using stan::math::gamma_lccdf; + + // Large alpha = 1000, beta = 3 + double alpha = 1000.0; + double beta = 3.0; + + // Test various y values + std::vector y_values = {100.0, 300.0, 333.333, 400.0, 500.0}; + + for (double y : y_values) { + double result = gamma_lccdf(y, alpha, beta); + + // Result should be finite + EXPECT_TRUE(std::isfinite(result)) + << "Failed for y=" << y << ", alpha=" << alpha << ", beta=" << beta; + + // Result should be <= 0 (log of probability) + EXPECT_LE(result, 0.0) << "Positive value for y=" << y + << ", alpha=" << alpha << ", beta=" << beta; + } +} + TEST(ProbGamma, lccdf_monotonic_in_y) { using stan::math::gamma_lccdf; diff --git a/test/unit/math/rev/prob/gamma_lccdf_test.cpp b/test/unit/math/rev/prob/gamma_lccdf_test.cpp index 1066773e060..4ddd846a8fe 100644 --- a/test/unit/math/rev/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/rev/prob/gamma_lccdf_test.cpp @@ -230,6 +230,48 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_small) { } } +TEST(ProbDistributionsGamma, + lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { + using stan::math::gamma_lccdf; + using stan::math::gamma_p; + using stan::math::gamma_q; + using stan::math::log1m; + using stan::math::var; + + // Same comparison as the prim test, but also exercises autodiff for + // alpha > 30. + double y_d = 1e-8; + double alpha_d = 31.25; + double beta_d = 1.0; + + var y_v = y_d; + var alpha_v = alpha_d; + var beta_v = beta_d; + + var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); + + // Old code: log(gamma_q(alpha, beta * y)) + double old_val = std::log(gamma_q(alpha_d, beta_d * y_d)); + double expected = log1m(gamma_p(alpha_d, beta_d * y_d)); + + EXPECT_EQ(old_val, 0.0); + EXPECT_LT(lccdf_var.val(), 0.0); + EXPECT_DOUBLE_EQ(lccdf_var.val(), expected); + + std::vector vars = {y_v, alpha_v, beta_v}; + std::vector grads; + lccdf_var.grad(vars, grads); + + for (size_t i = 0; i < grads.size(); ++i) { + EXPECT_FALSE(std::isnan(grads[i])) << "Gradient " << i << " is NaN"; + EXPECT_TRUE(std::isfinite(grads[i])) + << "Gradient " << i << " is not finite"; + } + + // d/dy log(CCDF) should be <= 0 (can underflow to -0) + EXPECT_LE(grads[0], 0.0); +} + TEST(ProbDistributionsGamma, lccdf_extreme_values_large) { using stan::math::gamma_lccdf; using stan::math::var; @@ -258,6 +300,36 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_large) { } } +TEST(ProbDistributionsGamma, lccdf_large_alpha_1000_beta_3) { + using stan::math::gamma_lccdf; + using stan::math::var; + + // Large alpha = 1000, beta = 3 + // Note: This test only checks values, not gradients, as large alpha values + // can cause numerical issues with gradient computation + double alpha_d = 1000.0; + double beta_d = 3.0; + + // Test various y values + std::vector y_values = {100.0, 300.0, 333.333, 400.0, 500.0}; + + for (double y_d : y_values) { + var y_v = y_d; + var alpha_v = alpha_d; + var beta_v = beta_d; + + var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); + + // Value should be finite and <= 0 + EXPECT_TRUE(std::isfinite(lccdf_var.val())) + << "Failed for y=" << y_d << ", alpha=" << alpha_d + << ", beta=" << beta_d; + EXPECT_LE(lccdf_var.val(), 0.0) + << "Positive value for y=" << y_d << ", alpha=" << alpha_d + << ", beta=" << beta_d; + } +} + TEST(ProbDistributionsGamma, lccdf_alpha_one_derivatives) { using stan::math::gamma_lccdf; using stan::math::var;