From c99fd96b4467242051e974e746ce55916729989d Mon Sep 17 00:00:00 2001 From: "Vincent A. Arcila" Date: Thu, 29 May 2025 16:04:54 +0200 Subject: [PATCH] feat: Add csv output functionality --- bigotes.c | 336 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 230 insertions(+), 106 deletions(-) diff --git a/bigotes.c b/bigotes.c index b0859dd..c9235e4 100644 --- a/bigotes.c +++ b/bigotes.c @@ -17,11 +17,15 @@ #include "stats.h" #include "common.h" + +#define STR_SIZE 8192 + static int read_from_stdin = 0; static int use_wall_clock = 0; static double min_wall_time = 30.0; static int use_exec = 0; static int use_machine_output = 0; +static int use_csv_output = 0; static int be_quiet = 0; static int trim_outliers = 0; static long min_samples = 30; @@ -158,26 +162,6 @@ cmp_double(const void *pa, const void *pb) return 0; } -/* Test for normality */ -static void -shapiro_wilk_test(struct sampling *s) -{ - double W, p; - if (swilk(s->samples, s->n, &W, &p) != 0) { - err("swilk failed"); - return; - } - - if (use_machine_output) { - printf("%-10s %e\n", "sw_pvalue", p); - printf("%-10s %d\n", "sw_normal", (p >= 0.05)); - } else { - const char *msg = (p < 0.05) ? "NOT normal" : "may be normal"; - printf(" Shapiro-Wilk: W=%.2e, p-value=%.2e (%s)\n", W, p, msg); - } - -} - static void stats(struct sampling *s) { @@ -398,71 +382,239 @@ plot_histogram(struct sampling *s, int w, int h, int cut_outliers) } } +// Struct to hold all computed statistics +struct summary_stats { + double mean, stdev, skewness, kurtosis; + double xmin, q1, median, q3, xmax; + double mad, sem; + double rel_stdev, rel_mad, rel_sem, rel_far; + long n, far; + double wall; + double W, p; // Shapiro-Wilk results +}; + +// Helper to create a dynamic string buffer +struct string_buffer { + char *str; + int offset; + int maxsize; +}; + +static struct string_buffer * +create_string_buffer(void) +{ + struct string_buffer *buf = malloc(sizeof(struct string_buffer)); + if (!buf) { + err("malloc failed"); + exit(1); + } + + buf->str = malloc(STR_SIZE); + if (!buf->str) { + err("malloc failed"); + exit(1); + } + + buf->offset = 0; + buf->maxsize = STR_SIZE; + return buf; +} + static void -print_summary(struct sampling *s) +free_string_buffer(struct string_buffer *buf) +{ + if (buf) { + free(buf->str); + free(buf); + } +} + +// Modified snprintf_check to work with string_buffer +#define snprintf_buf(buf, ...) \ +do { \ + int ret = snprintf(buf->str + buf->offset, buf->maxsize - buf->offset, __VA_ARGS__); \ + if (ret >= buf->maxsize - buf->offset) { \ + int overflow = ret - (buf->maxsize - buf->offset); \ + int newsize = buf->maxsize + (overflow < STR_SIZE ? STR_SIZE : overflow / STR_SIZE + STR_SIZE); \ + char *newstr = malloc(sizeof(char) * newsize); \ + if (newstr == NULL) { \ + err("malloc failed"); \ + exit(1); \ + } \ + memcpy(newstr, buf->str, buf->offset); \ + free(buf->str); \ + buf->str = newstr; \ + buf->maxsize = newsize; \ + ret = snprintf(buf->str + buf->offset, buf->maxsize - buf->offset, __VA_ARGS__); \ + } \ + buf->offset += ret; \ +} while (0) + +static void +compute_summary_stats(struct sampling *s, struct summary_stats *stats) { qsort(s->samples, s->n, sizeof(double), cmp_double); - double mean = stats_mean(s->samples, s->n); - double stdev = stats_stdev(s->samples, s->n, mean); - double skewness = stats_skewness(s->samples, s->n, mean, stdev); - double kurtosis = stats_kurtosis(s->samples, s->n, mean, stdev); + stats->mean = stats_mean(s->samples, s->n); + stats->stdev = stats_stdev(s->samples, s->n, stats->mean); + stats->skewness = stats_skewness(s->samples, s->n, stats->mean, stats->stdev); + stats->kurtosis = stats_kurtosis(s->samples, s->n, stats->mean, stats->stdev); + + stats->xmin = stats_percentile(s->samples, s->n, 0.0); + stats->q1 = stats_percentile(s->samples, s->n, 0.25); + stats->median = stats_median(s->samples, s->n); + stats->q3 = stats_percentile(s->samples, s->n, 0.75); + stats->xmax = stats_percentile(s->samples, s->n, 1.0); + + stats->mad = stats_mad(s->samples, s->n, stats->median); + stats->n = s->n; + stats->far = stats_outliers(s->samples, s->n, stats->q1, stats->q3, 3.0); + stats->wall = s->wall; + + stats->sem = stats_sem(stats->stdev, s->n); + stats->rel_stdev = stats_percent(stats->stdev, stats->mean); + stats->rel_mad = stats_percent(stats->mad, stats->median); + stats->rel_sem = stats_percent(stats->sem * 1.96, stats->mean); + stats->rel_far = stats_percent(stats->far, s->n); + + if (swilk(s->samples, s->n, &stats->W, &stats->p) != 0) { + err("swilk failed"); + stats->W = NAN; + stats->p = NAN; + } +} - double xmin = stats_percentile(s->samples, s->n, 0.0); - double q1 = stats_percentile(s->samples, s->n, 0.25); - double median = stats_median(s->samples, s->n); - double q3 = stats_percentile(s->samples, s->n, 0.75); - double xmax = stats_percentile(s->samples, s->n, 1.0); +static void +print_machine_output(struct summary_stats *stats, struct sampling *s) +{ + struct string_buffer *buf = create_string_buffer(); + + snprintf_buf(buf, "%-10s %e\n", "min", stats->xmin); + snprintf_buf(buf, "%-10s %e\n", "q1", stats->q1); + snprintf_buf(buf, "%-10s %e\n", "median", stats->median); + snprintf_buf(buf, "%-10s %e\n", "mean", stats->mean); + snprintf_buf(buf, "%-10s %e\n", "q3", stats->q3); + snprintf_buf(buf, "%-10s %e\n", "max", stats->xmax); + snprintf_buf(buf, "%-10s %ld\n", "samples", stats->n); + snprintf_buf(buf, "%-10s %e\n", "wall", stats->wall); + snprintf_buf(buf, "%-10s %e\n", "mad", stats->mad); + snprintf_buf(buf, "%-10s %e\n", "stdev", stats->stdev); + snprintf_buf(buf, "%-10s %e\n", "skewness", stats->skewness); + snprintf_buf(buf, "%-10s %e\n", "kurtosis", stats->kurtosis); + snprintf_buf(buf, "%-10s %ld\n", "far", stats->far); + snprintf_buf(buf, "%-10s %e\n", "rfar", stats->rel_far); + snprintf_buf(buf, "%-10s %e\n", "rmad", stats->rel_mad); + snprintf_buf(buf, "%-10s %e\n", "rstdev", stats->rel_stdev); + snprintf_buf(buf, "%-10s %e\n", "sem", stats->sem); + snprintf_buf(buf, "%-10s %e\n", "rsem", stats->rel_sem); + snprintf_buf(buf, "%-10s %e\n", "sw_pvalue", stats->p); + snprintf_buf(buf, "%-10s %d\n", "sw_normal", (stats->p >= 0.05)); + + const char *runmode = read_from_stdin ? "stdin" : "exec"; + snprintf_buf(buf, "%-10s %s\n", "runmode", runmode); - double mad = stats_mad(s->samples, s->n, median); - long n = s->n; - long far = stats_outliers(s->samples, s->n, q1, q3, 3.0); + if (!read_from_stdin) { + snprintf_buf(buf, "%-10s ", "command"); + for (char *const* p = s->cmd; *p; p++) { + snprintf_buf(buf, "%s ", *p); + } + snprintf_buf(buf, "\n"); + } - double sem = stats_sem(stdev, s->n); - double rel_stdev = stats_percent(stdev, mean); - double rel_mad = stats_percent(mad, median); - double rel_sem = stats_percent(sem * 1.96, mean); - double rel_far = stats_percent(far, s->n); + printf("%s\n", buf->str); + free_string_buffer(buf); +} - if (use_machine_output) { - printf("%-10s %e\n", "min", xmin); - printf("%-10s %e\n", "q1", q1); - printf("%-10s %e\n", "median", median); - printf("%-10s %e\n", "mean", mean); - printf("%-10s %e\n", "q3", q3); - printf("%-10s %e\n", "max", xmax); - printf("%-10s %ld\n", "samples", n); - printf("%-10s %e\n", "wall", s->wall); - printf("%-10s %e\n", "mad", mad); - printf("%-10s %e\n", "stdev", stdev); - printf("%-10s %e\n", "skewness", skewness); - printf("%-10s %e\n", "kurtosis", kurtosis); - printf("%-10s %ld\n", "far", far); - printf("%-10s %e\n", "rfar", rel_far); - printf("%-10s %e\n", "rmad", rel_mad); - printf("%-10s %e\n", "rstdev", rel_stdev); - printf("%-10s %e\n", "sem", sem); - printf("%-10s %e\n", "rsem", rel_sem); +static void +print_csv_output(struct summary_stats *stats, struct sampling *s) +{ + struct string_buffer *header = create_string_buffer(); + struct string_buffer *values = create_string_buffer(); + + snprintf_buf(header, "median,mean,min,q1,q3,max,samples,wall,mad,stdev,skewness,kurtosis,far,rfar,rmad,rstdev,sem,rsem"); + snprintf_buf(values, "%e,%e,%e,%e,%e,%e,%ld,%e,%e,%e,%e,%e,%ld,%e,%e,%e,%e,%e", + stats->median, stats->mean, stats->xmin, stats->q1, stats->q3, stats->xmax, + stats->n, stats->wall, stats->mad, stats->stdev, stats->skewness, stats->kurtosis, + stats->far, stats->rel_far, stats->rel_mad, stats->rel_stdev, stats->sem, stats->rel_sem); + + snprintf_buf(header, ",sw_pvalue,sw_normal"); + snprintf_buf(values, ",%e,%d", stats->p, (stats->p >= 0.05)); + + const char *runmode = read_from_stdin ? "stdin" : "exec"; + snprintf_buf(header, ",runmode"); + snprintf_buf(values, ",%s", runmode); + + if (!read_from_stdin) { + for (char *const* p = s->cmd; *p; p++) { + if (p == s->cmd) { + snprintf_buf(header, ",command"); + } else { + snprintf_buf(header, ",arg%ld", p - s->cmd); + } + snprintf_buf(values, ",%s", *p); + } + } + + printf("%s\n%s\n", header->str, values->str); + free_string_buffer(header); + free_string_buffer(values); +} + +static void +print_human_output(struct summary_stats *stats, struct sampling *s) +{ + struct string_buffer *buf = create_string_buffer(); + + snprintf_buf(buf, "\n"); + snprintf_buf(buf, "%10s %10s %10s %10s %10s %10s\n", + "MIN", "Q1", "MEDIAN", "MEAN", "Q3", "MAX"); + snprintf_buf(buf, "% 10.3e % 10.3e % 10.3e % 10.3e % 10.3e % 10.3e \n", + stats->xmin, stats->q1, stats->median, stats->mean, stats->q3, stats->xmax); + + snprintf_buf(buf, "\n"); + snprintf_buf(buf, "%10s %10s %10s %10s %10s %10s\n", + "N", "WALL", "MAD", "STDEV", "SKEW", "KURTOSIS"); + snprintf_buf(buf, "%10ld %10.1f % 10.3e % 10.3e % 10.3e % 10.3e\n", + stats->n, stats->wall, stats->mad, stats->stdev, stats->skewness, stats->kurtosis); + + snprintf_buf(buf, "\n"); + snprintf_buf(buf, "%10s %10s %10s %10s %10s %10s\n", + "FAR", "%FAR", "%MAD", "%STDEV", "SEM", "%SEM"); + snprintf_buf(buf, "%10ld %10.2f % 10.2f % 10.2f % 10.3e % 10.2f\n", + stats->far, stats->rel_far, stats->rel_mad, stats->rel_stdev, stats->sem, stats->rel_sem); + + snprintf_buf(buf, "\n"); + const char *msg = (stats->p < 0.05) ? "NOT normal" : "may be normal"; + snprintf_buf(buf, " Shapiro-Wilk: W=%.2e, p-value=%.2e (%s)\n", stats->W, stats->p, msg); + + if (read_from_stdin) { + snprintf_buf(buf, " Read from stdin\n"); } else { - printf("\n"); - printf("%10s %10s %10s %10s %10s %10s\n", - "MIN", "Q1", "MEDIAN", "MEAN", "Q3", "MAX"); - printf("% 10.3e % 10.3e % 10.3e % 10.3e % 10.3e % 10.3e \n", - xmin, q1, median, mean, q3, xmax); - printf("\n"); - printf("%10s %10s %10s %10s %10s %10s\n", - "N", "WALL", "MAD", "STDEV", "SKEW", "KURTOSIS"); - printf("%10ld %10.1f % 10.3e % 10.3e % 10.3e % 10.3e\n", - n, s->wall, mad, stdev, skewness, kurtosis); - printf("\n"); - printf("%10s %10s %10s %10s %10s %10s\n", - "FAR", "%FAR", "%MAD", "%STDEV", "SEM", "%SEM"); - printf("%10ld %10.2f % 10.2f % 10.2f % 10.3e % 10.2f\n", - far, rel_far, rel_mad, rel_stdev, sem, rel_sem); - printf("\n"); + snprintf_buf(buf, " Cmd: "); + for (char *const* p = s->cmd; *p; p++) { + snprintf_buf(buf, "%s ", *p); + } + snprintf_buf(buf, "\n"); } + + printf("%s", buf->str); + free_string_buffer(buf); } +static void +print_summary(struct sampling *s) +{ + struct summary_stats stats; + compute_summary_stats(s, &stats); + + if (use_machine_output) { + print_machine_output(&stats, s); + } else if (use_csv_output) { + print_csv_output(&stats, s); + } else { + print_human_output(&stats, s); + } +} /* Return -1 on error, 0 on success */ static int @@ -491,34 +643,6 @@ do_read(FILE *f, double *metric, int *end) return 0; } -static void -print_command(struct sampling *s) -{ - if (use_machine_output) { - const char *runmode = read_from_stdin ? "stdin" : "exec"; - printf("%-10s %s\n", "runmode", runmode); - - if (!read_from_stdin) { - printf("%-10s ", "command"); - for (char *const* p = s->cmd; *p; p++) { - printf("%s ", *p); - } - printf("\n"); - } - return; - } - - if (read_from_stdin) { - printf(" Read from stdin\n"); - } else { - printf(" Cmd: "); - for (char *const* p = s->cmd; *p; p++) { - printf("%s ", *p); - } - printf("\n"); - } -} - static int do_sample(char * const cmd[], char * const argv[], int argc) { @@ -576,11 +700,8 @@ do_sample(char * const cmd[], char * const argv[], int argc) } print_summary(&s); - print_command(&s); - shapiro_wilk_test(&s); - - if (!use_machine_output) { + if (!use_machine_output && !use_csv_output) { printf("\n"); /* Leave one empty before histogram */ plot_histogram(&s, 64, 4, trim_outliers); printf("\n"); /* Leave one empty after histogram */ @@ -607,7 +728,7 @@ main(int argc, char *argv[]) progname_set("bigotes"); int opt; - while ((opt = getopt(argc, argv, "imn:wo:qhXt:")) != -1) { + while ((opt = getopt(argc, argv, "cimn:wo:qhXt:")) != -1) { switch (opt) { case 'i': read_from_stdin = 1; @@ -624,6 +745,9 @@ main(int argc, char *argv[]) case 'm': use_machine_output = 1; break; + case 'c': + use_csv_output = 1; + break; case 'n': min_samples = atol(optarg); break;