diff --git a/CMakeLists.txt b/CMakeLists.txt index 508619fb..7ca7f263 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -267,6 +267,10 @@ if(BUILD_UNIVERSAL_DDPROF) endif() endif() +if(USE_LOADER) + target_compile_definitions(dd_profiling-embedded PRIVATE "DDPROF_USE_LOADER") +endif() + # Fix for link error in sanitizeddebug build mode with gcc: # ~~~ # /usr/bin/ld: ./libdd_profiling.so: undefined reference to `__dynamic_cast' diff --git a/cmake/dd_profiling.version b/cmake/dd_profiling.version index d7e1730e..988721d2 100644 --- a/cmake/dd_profiling.version +++ b/cmake/dd_profiling.version @@ -1,4 +1,4 @@ { - global: ddprof_start_profiling; ddprof_stop_profiling; + global: ddprof_start_profiling; ddprof_stop_profiling; ddprof_lib_state; local: *; }; diff --git a/include/lib/allocation_tracker.hpp b/include/lib/allocation_tracker.hpp index ef7c9705..3fb80d01 100644 --- a/include/lib/allocation_tracker.hpp +++ b/include/lib/allocation_tracker.hpp @@ -120,8 +120,6 @@ class AllocationTracker { static void delete_tl_state(void *tl_state); - static void make_key(); - void track_allocation(uintptr_t addr, size_t size, TrackerThreadLocalState &tl_state, bool is_large_alloc); void track_deallocation(uintptr_t addr, TrackerThreadLocalState &tl_state, diff --git a/src/lib/allocation_tracker.cc b/src/lib/allocation_tracker.cc index e7a2981d..41ad0a9e 100644 --- a/src/lib/allocation_tracker.cc +++ b/src/lib/allocation_tracker.cc @@ -35,6 +35,14 @@ pthread_key_t AllocationTracker::_tl_state_key; AllocationTracker *AllocationTracker::_instance; namespace { + +#ifdef DDPROF_USE_LOADER +extern "C" + __attribute((tls_model("initial-exec"))) __thread void *ddprof_lib_state; +#else +__attribute((tls_model("initial-exec"))) __thread void *ddprof_lib_state; +#endif + DDPROF_NOINLINE auto sleep_and_retry_reserve(MPSCRingBufferWriter &writer, size_t size, bool &timeout) { constexpr std::chrono::nanoseconds k_sleep_duration = @@ -53,13 +61,7 @@ DDPROF_NOINLINE auto sleep_and_retry_reserve(MPSCRingBufferWriter &writer, } // namespace TrackerThreadLocalState *AllocationTracker::get_tl_state() { - // In shared libraries, TLS access requires a call to tls_get_addr, - // tls_get_addr can call into malloc, which can create a recursive loop - // instead we call pthread APIs to control the creation of TLS objects - pthread_once(&_key_once, make_key); - auto *tl_state = static_cast( - pthread_getspecific(_tl_state_key)); - return tl_state; + return static_cast(ddprof_lib_state); } TrackerThreadLocalState *AllocationTracker::init_tl_state() { @@ -69,13 +71,7 @@ TrackerThreadLocalState *AllocationTracker::init_tl_state() { auto tl_state = std::make_unique(); tl_state->tid = ddprof::gettid(); tl_state->stack_bounds = retrieve_stack_bounds(); - - if (int const res = pthread_setspecific(_tl_state_key, tl_state.get()); - res != 0) { - // should return 0 - LG_DBG("Unable to store tl_state. Error %d: %s\n", res, strerror(res)); - tl_state.reset(); - } + ddprof_lib_state = tl_state.get(); return tl_state.release(); } @@ -91,11 +87,6 @@ void AllocationTracker::delete_tl_state(void *tl_state) { delete static_cast(tl_state); } -void AllocationTracker::make_key() { - // delete is called on all key objects - pthread_key_create(&_tl_state_key, delete_tl_state); -} - DDRes AllocationTracker::allocation_tracking_init( uint64_t allocation_profiling_rate, uint32_t flags, uint32_t stack_sample_size, const RingBufferInfo &ring_buffer, diff --git a/src/lib/loader.c b/src/lib/loader.c index ffd6022a..83317f0b 100644 --- a/src/lib/loader.c +++ b/src/lib/loader.c @@ -18,6 +18,9 @@ #include #include +__attribute__((__visibility__("default"))) +__attribute__((tls_model("initial-exec"))) __thread void *ddprof_lib_state; + /* Role of loader is to ensure that all dependencies (libdl/lim/libpthread) of * libdd_profiling-embedded.so are satisfied before dlopen'ing it. * On musl, all libc features are in libc.so and hence are available once libc