From b9ea47e0611b9dcb3e4287cce4db063d963ef9e1 Mon Sep 17 00:00:00 2001 From: Cameron Hart Date: Tue, 15 Nov 2016 21:34:28 +1100 Subject: [PATCH] Replace boost math functions. Tests added using common input values and comparing against the output from original boost::math implementations. The normal distribution cdf function is taken from the example code at http://en.cppreference.com/w/cpp/numeric/math/erfc. The erf_inv is based on code accompanying the article "Approximating the erfinv function" in GPU Computing Gems, Volume 2. --- include/nonius/detail/stats.h++ | 113 ++++++++++++++++++++++++++++++-- test/stats.c++ | 24 +++++++ 2 files changed, 130 insertions(+), 7 deletions(-) diff --git a/include/nonius/detail/stats.h++ b/include/nonius/detail/stats.h++ index e74a874..c84d017 100644 --- a/include/nonius/detail/stats.h++ +++ b/include/nonius/detail/stats.h++ @@ -18,9 +18,8 @@ #include #include -#include - #include +#include #include #include #include @@ -124,10 +123,109 @@ namespace nonius { return results; } + inline double normal_cdf(double x) { + return std::erfc(-x / std::sqrt(2.0)) / 2.0; + } + + inline double erf_inv(double x) { + // Code accompanying the article "Approximating the erfinv function" in GPU Computing Gems, Volume 2 + double w, p; + + w = - log((1.0-x)*(1.0+x)); + + if (w < 6.250000) { + w = w - 3.125000; + p = -3.6444120640178196996e-21; + p = -1.685059138182016589e-19 + p*w; + p = 1.2858480715256400167e-18 + p*w; + p = 1.115787767802518096e-17 + p*w; + p = -1.333171662854620906e-16 + p*w; + p = 2.0972767875968561637e-17 + p*w; + p = 6.6376381343583238325e-15 + p*w; + p = -4.0545662729752068639e-14 + p*w; + p = -8.1519341976054721522e-14 + p*w; + p = 2.6335093153082322977e-12 + p*w; + p = -1.2975133253453532498e-11 + p*w; + p = -5.4154120542946279317e-11 + p*w; + p = 1.051212273321532285e-09 + p*w; + p = -4.1126339803469836976e-09 + p*w; + p = -2.9070369957882005086e-08 + p*w; + p = 4.2347877827932403518e-07 + p*w; + p = -1.3654692000834678645e-06 + p*w; + p = -1.3882523362786468719e-05 + p*w; + p = 0.0001867342080340571352 + p*w; + p = -0.00074070253416626697512 + p*w; + p = -0.0060336708714301490533 + p*w; + p = 0.24015818242558961693 + p*w; + p = 1.6536545626831027356 + p*w; + } + else if (w < 16.000000) { + w = sqrt(w) - 3.250000; + p = 2.2137376921775787049e-09; + p = 9.0756561938885390979e-08 + p*w; + p = -2.7517406297064545428e-07 + p*w; + p = 1.8239629214389227755e-08 + p*w; + p = 1.5027403968909827627e-06 + p*w; + p = -4.013867526981545969e-06 + p*w; + p = 2.9234449089955446044e-06 + p*w; + p = 1.2475304481671778723e-05 + p*w; + p = -4.7318229009055733981e-05 + p*w; + p = 6.8284851459573175448e-05 + p*w; + p = 2.4031110387097893999e-05 + p*w; + p = -0.0003550375203628474796 + p*w; + p = 0.00095328937973738049703 + p*w; + p = -0.0016882755560235047313 + p*w; + p = 0.0024914420961078508066 + p*w; + p = -0.0037512085075692412107 + p*w; + p = 0.005370914553590063617 + p*w; + p = 1.0052589676941592334 + p*w; + p = 3.0838856104922207635 + p*w; + } + else { + w = sqrt(w) - 5.000000; + p = -2.7109920616438573243e-11; + p = -2.5556418169965252055e-10 + p*w; + p = 1.5076572693500548083e-09 + p*w; + p = -3.7894654401267369937e-09 + p*w; + p = 7.6157012080783393804e-09 + p*w; + p = -1.4960026627149240478e-08 + p*w; + p = 2.9147953450901080826e-08 + p*w; + p = -6.7711997758452339498e-08 + p*w; + p = 2.2900482228026654717e-07 + p*w; + p = -9.9298272942317002539e-07 + p*w; + p = 4.5260625972231537039e-06 + p*w; + p = -1.9681778105531670567e-05 + p*w; + p = 7.5995277030017761139e-05 + p*w; + p = -0.00021503011930044477347 + p*w; + p = -0.00013871931833623122026 + p*w; + p = 1.0103004648645343977 + p*w; + p = 4.8499064014085844221 + p*w; + } + return p*x; + } + + inline double erfc_inv(double x) { + return erf_inv(1.0 - x); + } + + inline double normal_quantile(double p) { + static const double ROOT_TWO = std::sqrt(2.0); + + double result = 0.0; + assert(p >= 0 && p <= 1); + if (p < 0 || p > 1) { + return result; + } + + result = -erfc_inv(2.0 * p); + // result *= normal distribution standard deviation (1.0) * sqrt(2) + result *= /*sd * */ ROOT_TWO; + // result += normal disttribution mean (0) + return result; + } + template estimate bootstrap(double confidence_level, Iterator first, Iterator last, sample const& resample, Estimator&& estimator) { - namespace bm = boost::math; - auto n_samples = last - first; double point = estimator(first, last); @@ -150,10 +248,11 @@ namespace nonius { // degenerate case with uniform samples if(prob_n == 0) return { point, point, point, confidence_level }; - double bias = bm::quantile(bm::normal{}, prob_n); - double z1 = bm::quantile(bm::normal{}, (1. - confidence_level) / 2.); + double bias = normal_quantile(prob_n); + double z1 = normal_quantile((1. - confidence_level) / 2.); - auto cumn = [n](double x) -> int { return std::lround(bm::cdf(bm::normal{}, x) * n); }; + auto cumn = [n](double x) -> int { + return std::lround(normal_cdf(x) * n); }; auto a = [bias, accel](double b) { return bias + b / (1. - accel * b); }; double b1 = bias + z1; double b2 = bias - z1; diff --git a/test/stats.c++ b/test/stats.c++ index 69020a7..fb0501a 100644 --- a/test/stats.c++ +++ b/test/stats.c++ @@ -17,6 +17,30 @@ #include +TEST_CASE("normal_cdf") { + using nonius::detail::normal_cdf; + CHECK(normal_cdf(0.000000) == Approx(0.50000000000000000)); + CHECK(normal_cdf(1.000000) == Approx(0.84134474606854293)); + CHECK(normal_cdf(-1.000000) == Approx(0.15865525393145705)); + CHECK(normal_cdf(2.809729) == Approx(0.99752083845315409)); + CHECK(normal_cdf(-1.352570) == Approx(0.08809652095066035)); +} + +TEST_CASE("erfc_inv") { + using nonius::detail::erfc_inv; + CHECK(erfc_inv(1.103560) == Approx(-0.09203687623843015)); + CHECK(erfc_inv(1.067400) == Approx(-0.05980291115763361)); + CHECK(erfc_inv(0.050000) == Approx(1.38590382434967796)); +} + +TEST_CASE("normal_quantile") { + using nonius::detail::normal_quantile; + CHECK(normal_quantile(0.551780) == Approx(0.13015979861484198)); + CHECK(normal_quantile(0.533700) == Approx(0.08457408802851875)); + CHECK(normal_quantile(0.025000) == Approx(-1.95996398454005449)); +} + + TEST_CASE("mean") { std::vector x { 10., 20., 14., 16., 30., 24. };