From 2305f931dbed269dff8d1a510ad7ca2352286ba6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Dec 2025 18:34:07 +0000 Subject: [PATCH] Use logarithmic binning for distribution sparklines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linear histogram binning produces misleading sparklines for highly skewed distributions typical of PR metrics (e.g., median 24h but P95 of 19 days). Most values cluster in the first bin, showing █▁▁▁▁▁▁▁▁▁. Logarithmic binning spreads the data across bins proportionally to orders of magnitude, producing meaningful visualizations that show the actual shape of long-tailed distributions. --- scripts/pr-statistics.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/pr-statistics.js b/scripts/pr-statistics.js index f461951..1049a75 100644 --- a/scripts/pr-statistics.js +++ b/scripts/pr-statistics.js @@ -36,7 +36,9 @@ function sparkline(values) { } /** - * Create a histogram from an array of values. + * Create a histogram from an array of values using logarithmic binning. + * This handles skewed distributions (common in time-based metrics) much better + * than linear binning by spreading out long-tailed data. * Returns an array of bin counts. */ function histogram(values, numBins = 10) { @@ -52,11 +54,17 @@ function histogram(values, numBins = 10) { return bins; } - const binWidth = (max - min) / numBins; + // Use logarithmic binning for better visualization of skewed distributions + // Add 1 to avoid log(0) issues and ensure all values are positive + const logMin = Math.log(min + 1); + const logMax = Math.log(max + 1); + const logBinWidth = (logMax - logMin) / numBins; + const bins = new Array(numBins).fill(0); values.forEach(value => { - let binIndex = Math.floor((value - min) / binWidth); + const logValue = Math.log(value + 1); + let binIndex = Math.floor((logValue - logMin) / logBinWidth); // Handle edge case where value equals max if (binIndex >= numBins) binIndex = numBins - 1; bins[binIndex]++;