From b2f5f7af0434805c1e17bbfbb853639c25dc4b17 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Mon, 14 Feb 2022 20:23:48 -0800 Subject: [PATCH 1/2] Add include for ruby/version.h This is necessary for RUBY_API_VERSION_MAJOR, at least in Ruby 3.1 --- ext/stackprof/stackprof.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/stackprof/stackprof.c b/ext/stackprof/stackprof.c index 6471f162..dc1d65c9 100644 --- a/ext/stackprof/stackprof.c +++ b/ext/stackprof/stackprof.c @@ -7,6 +7,7 @@ **********************************************************************/ #include +#include #include #include #include From 7a8e4cad387970075ad4531ad014d5f99e164921 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Mon, 14 Feb 2022 23:52:44 -0800 Subject: [PATCH 2/2] Avoid unsafe mallocs inside signals In 6c3f4d7c5234cb3274c4a624ea6cb52c26be3833 we started recording signals immediately on receiving a signal, after observing that in Ruby 3.0+ the CFP structure was safe to read. This resulted in much more accurate measurements. Unfortunately though rb_profile_frames seems to be safe to call inside a signal handler, stackprof_record_sample_for_stack is not, since it calls malloc (itself or through st_*) which is not async-signal-safe. This could potentially result in deadlocks or memory corruption depending on the malloc implementation. This commit attempts to restore the safety of the postponed job approach while keeping the accuracy of measuring immediately. Inside the signal handler we record the frames to a buffer, then enqueue a postponed job to do the actual recording. --- ext/stackprof/stackprof.c | 68 ++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/ext/stackprof/stackprof.c b/ext/stackprof/stackprof.c index dc1d65c9..21e65b09 100644 --- a/ext/stackprof/stackprof.c +++ b/ext/stackprof/stackprof.c @@ -126,6 +126,9 @@ static struct { VALUE fake_frame_names[TOTAL_FAKE_FRAMES]; VALUE empty_string; + + int buffer_count; + sample_time_t buffer_time; VALUE frames_buffer[BUF_SIZE]; int lines_buffer[BUF_SIZE]; } _stackprof; @@ -594,9 +597,17 @@ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t ti } } +// buffer the current profile frames +// This must be async-signal-safe +// Returns immediately if another set of frames are already in the buffer void -stackprof_record_sample() +stackprof_buffer_sample(void) { + if (_stackprof.buffer_count > 0) { + // Another sample is already pending + return; + } + uint64_t start_timestamp = 0; int64_t timestamp_delta = 0; int num; @@ -606,12 +617,16 @@ stackprof_record_sample() start_timestamp = timestamp_usec(&t); timestamp_delta = delta_usec(&t, &_stackprof.last_sample_at); } + num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer); - stackprof_record_sample_for_stack(num, start_timestamp, timestamp_delta); + + _stackprof.buffer_count = num; + _stackprof.buffer_time.timestamp_usec = start_timestamp; + _stackprof.buffer_time.delta_usec = timestamp_delta; } void -stackprof_record_gc_samples() +stackprof_record_gc_samples(void) { int64_t delta_to_first_unrecorded_gc_sample = 0; uint64_t start_timestamp = 0; @@ -661,20 +676,47 @@ stackprof_record_gc_samples() _stackprof.unrecorded_gc_sweeping_samples = 0; } +// record the sample previously buffered by stackprof_buffer_sample static void -stackprof_gc_job_handler(void *data) +stackprof_record_buffer(void) +{ + stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec); + + // reset the buffer + _stackprof.buffer_count = 0; +} + +static void +stackprof_sample_and_record(void) +{ + stackprof_buffer_sample(); + stackprof_record_buffer(); +} + +static void +stackprof_job_record_gc(void *data) { if (!_stackprof.running) return; stackprof_record_gc_samples(); } +#ifdef USE_POSTPONED_JOB +static void +stackprof_job_sample_and_record(void *data) +{ + if (!_stackprof.running) return; + + stackprof_sample_and_record(); +} +#endif + static void -stackprof_job_handler(void *data) +stackprof_job_record_buffer(void *data) { if (!_stackprof.running) return; - stackprof_record_sample(); + stackprof_record_buffer(); } static void @@ -696,12 +738,16 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext) _stackprof.unrecorded_gc_sweeping_samples++; } _stackprof.unrecorded_gc_samples++; - rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0); + rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0); } else { #ifdef USE_POSTPONED_JOB - rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0); + rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0); #else - stackprof_job_handler(0); + // Buffer a sample immediately, if an existing sample exists this will + // return immediately + stackprof_buffer_sample(); + // Enqueue a job to record the sample + rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0); #endif } pthread_mutex_unlock(&lock); @@ -713,7 +759,7 @@ stackprof_newobj_handler(VALUE tpval, void *data) _stackprof.overall_signals++; if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval)) return; - stackprof_job_handler(0); + stackprof_sample_and_record(); } static VALUE @@ -723,7 +769,7 @@ stackprof_sample(VALUE self) return Qfalse; _stackprof.overall_signals++; - stackprof_job_handler(0); + stackprof_sample_and_record(); return Qtrue; }