diff --git a/docs/Build.md b/docs/Build.md index 6b0d4a9e8..e841ed3ab 100644 --- a/docs/Build.md +++ b/docs/Build.md @@ -47,7 +47,7 @@ Using the build image will guarantee this. ```bash CXX=clang++-17 CC=clang-17 source ./setup_env.sh MkBuildDir ClangDeb - DebCMake -DENABLE_CLANG_TIDY=ON ../ +DebCMake -DENABLE_CLANG_TIDY=ON ../ ``` ### Updating libdatadog diff --git a/include/ddprof_cli.hpp b/include/ddprof_cli.hpp index 0b20b4d15..e02895dea 100644 --- a/include/ddprof_cli.hpp +++ b/include/ddprof_cli.hpp @@ -41,6 +41,7 @@ struct DDProfCLI { // Profiling options int pid{0}; bool global{false}; + bool inlining{true}; std::chrono::seconds upload_period; unsigned worker_period; // worker_period std::vector events; diff --git a/include/ddprof_context.hpp b/include/ddprof_context.hpp index 4396cbe79..6b35928c4 100644 --- a/include/ddprof_context.hpp +++ b/include/ddprof_context.hpp @@ -21,6 +21,7 @@ namespace ddprof { struct DDProfContext { struct { bool enable{true}; + bool inlining{true}; std::chrono::seconds upload_period{}; bool fault_info{true}; int nice{-1}; diff --git a/include/ddres_list.hpp b/include/ddres_list.hpp index d2f512e73..59ee55621 100644 --- a/include/ddres_list.hpp +++ b/include/ddres_list.hpp @@ -21,6 +21,7 @@ enum { DD_COMMON_START_RANGE = 1000, DD_NATIVE_START_RANGE = 2000 }; #define NATIVE_ERROR_TABLE(X) \ X(DWFL_LIB_ERROR, "error withing dwfl library") \ + X(NO_DWARF, "No dwarf information available") \ X(UW_CACHE_ERROR, "error from unwinding cache") \ X(UW_ERROR, "error from unwinding code") \ X(UW_MAX_DEPTH, "max depth reached in unwinding") \ diff --git a/include/dwarf_helpers.hpp b/include/dwarf_helpers.hpp new file mode 100644 index 000000000..3118ee849 --- /dev/null +++ b/include/dwarf_helpers.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include "ddprof_defs.hpp" +#include "ddres_def.hpp" +#include "dwfl_internals.hpp" + +#include + +namespace ddprof { +struct DieInformation { + struct Function { + ElfAddress_t start_addr{}; + ElfAddress_t end_addr{}; + const char *func_name{}; + const char *file_name{}; + int decl_line_number{0}; + int call_line_number{0}; + int parent_pos{-1}; // position within the die vector + SymbolIdx_t symbol_idx = -1; + }; + std::vector die_mem_vec{}; +}; + +// debug attribute functions +const char *get_attribute_name(int attrCode); +int print_attribute(Dwarf_Attribute *attr, void *arg); + +DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, + DieInformation &die_information); +} // namespace ddprof diff --git a/include/dwfl_internals.hpp b/include/dwfl_internals.hpp index 06f06b227..969c203eb 100644 --- a/include/dwfl_internals.hpp +++ b/include/dwfl_internals.hpp @@ -5,5 +5,6 @@ #pragma once +#include #include #include diff --git a/include/dwfl_symbol.hpp b/include/dwfl_symbol.hpp index ca0a26217..b6accff4b 100644 --- a/include/dwfl_symbol.hpp +++ b/include/dwfl_symbol.hpp @@ -24,4 +24,5 @@ bool symbol_get_from_dwfl(Dwfl_Module *mod, ProcessAddress_t process_pc, bool compute_elf_range(ElfAddress_t file_pc, const GElf_Sym &elf_sym, ElfAddress_t &start_sym, ElfAddress_t &end_sym); + } // namespace ddprof diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index a17d5a450..349732c40 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -8,11 +8,13 @@ #include "ddprof_defs.hpp" #include "ddprof_file_info-i.hpp" #include "ddprof_module.hpp" +#include "ddres.hpp" #include "dso.hpp" #include "dso_symbol_lookup.hpp" #include "hash_helper.hpp" #include "symbol_map.hpp" #include "symbol_table.hpp" +#include "unwind_output.hpp" #include #include @@ -42,22 +44,33 @@ struct DwflSymbolLookupStats { class DwflSymbolLookup { public: + using SymbolRange = std::pair; // build and check env var to know check setting - DwflSymbolLookup(); + DwflSymbolLookup(bool inlining = true); // Get symbol from internal cache or fetch through dwarf - SymbolIdx_t get_or_insert(const DDProfMod &ddprof_mod, SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - FileInfoId_t file_info_id, - ProcessAddress_t process_pc, const Dso &dso); + void get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, + FileInfoId_t file_info_id, ProcessAddress_t process_pc, + const Dso &dso, std::vector &func_locs); - void erase(FileInfoId_t file_info_id) { _file_info_map.erase(file_info_id); } + void erase(FileInfoId_t file_info_id) { + _file_info_function_map.erase(file_info_id); + } unsigned size() const; const DwflSymbolLookupStats &stats() const { return _stats; } DwflSymbolLookupStats &stats() { return _stats; } + // todo: we can have a better type than symbol idx for the line + using InlineMap = NestedSymbolMap; + struct SymbolWrapper { + LineMap _line_map; + SymbolMap _symbol_map; + InlineMap _inline_map; + }; + private: /// Set through env var (DDPROF_CACHE_SETTING) in case of doubts on cache enum SymbolLookupSetting { @@ -67,24 +80,44 @@ class DwflSymbolLookup { SymbolLookupSetting _lookup_setting{K_CACHE_ON}; - SymbolIdx_t insert(const DDProfMod &ddprof_mod, SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - ProcessAddress_t process_pc, const Dso &dso, - SymbolMap &map); + SymbolMap::ValueType &insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + ProcessAddress_t process_pc, const Dso &dso, + SymbolWrapper &symbol_wrapper); + + void add_fun_loc(DwflSymbolLookup::SymbolWrapper &symbol_wrapper, + const SymbolMap::ValueType &parent_sym, ElfAddress_t elf_pc, + ProcessAddress_t process_pc, std::vector &func_locs); + + static DDRes insert_inlining_info(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + ProcessAddress_t process_pc, const Dso &dso, + SymbolWrapper &symbol_wrapper, + SymbolMap::ValueType &parent_func); + + static NestedSymbolMap::FindRes + get_inlined(SymbolWrapper &symbol_wrapper, ElfAddress_t process_pc, + ElfAddress_t elf_pc, const SymbolMap::ValueType &parent_sym, + std::vector &func_locs); // Symbols are ordered by file. // The assumption is that the elf addresses are the same across processes // The unordered map stores symbols per file, // The map stores symbols per address range - using FileInfo2SymbolMap = std::unordered_map; - using FileInfo2SymbolVT = FileInfo2SymbolMap::value_type; + using FileInfo2SymbolWrapper = + std::unordered_map; + using FileInfo2LineMap = std::unordered_map; + using FileInfo2SymbolVT = FileInfo2SymbolWrapper::value_type; static bool symbol_lookup_check(Dwfl_Module *mod, ElfAddress_t process_pc, const Symbol &symbol); // unordered map of DSO elements - FileInfo2SymbolMap _file_info_map; + FileInfo2SymbolWrapper _file_info_function_map; + FileInfo2LineMap _file_info_inlining_map; DwflSymbolLookupStats _stats; + bool _inlining; }; } // namespace ddprof diff --git a/include/symbol.hpp b/include/symbol.hpp index 0e54e7a3c..040781490 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -15,13 +15,13 @@ namespace ddprof { class Symbol { public: - Symbol() : _lineno(0) {} + Symbol() : _func_start_lineno(0) {} // Warning : Generates some string copies (these are not rvalues) Symbol(std::string symname, std::string demangle_name, uint32_t lineno, - std::string srcpath) + std::string srcpath, int parent_idx = -1) : _symname(std::move(symname)), _demangle_name(std::move(demangle_name)), - _lineno(lineno), _srcpath(std::move(srcpath)) {} + _func_start_lineno(lineno), _srcpath(std::move(srcpath)) {} // OUTPUT OF ADDRINFO std::string _symname; @@ -30,7 +30,7 @@ class Symbol { std::string _demangle_name; // OUTPUT OF LINE INFO - uint32_t _lineno; + uint32_t _func_start_lineno; std::string _srcpath; }; } // namespace ddprof diff --git a/include/symbol_hdr.hpp b/include/symbol_hdr.hpp index 621a8b402..7b17f1bab 100644 --- a/include/symbol_hdr.hpp +++ b/include/symbol_hdr.hpp @@ -19,8 +19,8 @@ namespace ddprof { struct SymbolHdr { - explicit SymbolHdr(std::string_view path_to_proc = "") - : _runtime_symbol_lookup(path_to_proc) {} + explicit SymbolHdr(std::string_view path_to_proc = "", bool inlining = true) + : _dwfl_symbol_lookup(inlining), _runtime_symbol_lookup(path_to_proc) {} void display_stats() const { _dwfl_symbol_lookup.stats().display(_dwfl_symbol_lookup.size()); _dso_symbol_lookup.stats_display(); diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index 00ffb7bf8..005783560 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -7,14 +7,14 @@ #include #include "ddprof_defs.hpp" +#include namespace ddprof { -class SymbolSpan { +template class TSpan { public: - SymbolSpan() : _end(0), _symbol_idx(-1) {} - SymbolSpan(Offset_t end, SymbolIdx_t symbol_idx) - : _end(end), _symbol_idx(symbol_idx) {} + TSpan() : _end(0), _value(DefaultValue) {} + TSpan(Offset_t end, T value) : _end(end), _value(value) {} // push end further void set_end(Offset_t end) { if (end > _end) { @@ -23,23 +23,27 @@ class SymbolSpan { } [[nodiscard]] Offset_t get_end() const { return _end; } - [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } + [[nodiscard]] T get_value() const { return _value; } private: // symbol end within the segment (considering file offset) Offset_t _end; // element inside internal symbol cache - SymbolIdx_t _symbol_idx; + T _value; }; -class SymbolMap : private std::map { +using SymbolSpan = TSpan; +using LineSpan = TSpan; + +template +class SpanMap : private std::map { public: - using Map = std::map; - using It = Map::iterator; - using ConstIt = Map::const_iterator; + using Map = std::map; + using It = typename Map::iterator; + using ConstIt = typename Map::const_iterator; using FindRes = std::pair; using ValueType = - Map::value_type; // key value pair ElfAddress_t, SymbolSpanMap + typename Map::value_type; // key value pair ElfAddress_t, SymbolSpanMap // Functions we forward from underlying map type using Map::begin; @@ -51,8 +55,95 @@ class SymbolMap : private std::map { using Map::erase; using Map::size; + bool is_within(const Offset_t &norm_pc, const SpanMap::ValueType &kv) { + if (norm_pc < kv.first) { + return false; + } + if (norm_pc > kv.second.get_end()) { + return false; + } + return true; + } + + FindRes find_closest(Offset_t norm_pc) { + // First element not less than (can match exactly a start addr) + auto it = Map::lower_bound(norm_pc); + if (it != end()) { // map is empty + if (SpanMap::is_within(norm_pc, *it)) { + return {it, true}; + } + } + + // previous element is more likely to contain our addr + if (it != begin()) { + --it; + } else { // map is empty + return {end(), false}; + } + // element can not be end (as we reversed or exit) + return {it, is_within(norm_pc, *it)}; + } +}; + +using SymbolMap = SpanMap; +using LineMap = SpanMap; + +class NestedSymbolValue { +public: + NestedSymbolValue() : _symbol_idx(-1), _call_line_number(0) {} + NestedSymbolValue(SymbolIdx_t symbol_idx, int call_line_number = 0) + : _symbol_idx(symbol_idx), _call_line_number(call_line_number) {} + [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } + [[nodiscard]] int get_call_line_number() const { return _call_line_number; } + +private: + SymbolIdx_t _symbol_idx; + int _call_line_number; +}; + +struct NestedSymbolKey { + ElfAddress_t start; + ElfAddress_t end; + NestedSymbolKey(ElfAddress_t s, ElfAddress_t e) : start(s), end(e) {} + bool operator<(const NestedSymbolKey &other) const { + if (start != other.start) { + return start < other.start; + } + // Sort by end address in descending order if start addresses are equal + return end > other.end; + } +}; + +class NestedSymbolMap : private std::map { +public: + using Map = std::map; + using It = Map::iterator; + using ConstIt = Map::const_iterator; + using FindRes = std::pair; + using ValueType = Map::value_type; + using Map::begin; + using Map::clear; + using Map::emplace; + using Map::emplace_hint; + using Map::empty; + using Map::end; + using Map::erase; + using Map::size; + + // todo: possible improvement to return a table of all elements matching + + FindRes find_parent(ConstIt it, const NestedSymbolKey &parent_bound, + Offset_t norm_pc) const; + + // returns the element that is the most leaf + FindRes find_closest(Offset_t norm_pc, + const NestedSymbolKey &parent_bound) const; + + FindRes find_closest_hint(Offset_t norm_pc, + const NestedSymbolKey &parent_bound, + ConstIt hint) const; + static bool is_within(const Offset_t &norm_pc, const ValueType &kv); - FindRes find_closest(Offset_t norm_pc); }; } // namespace ddprof diff --git a/include/unwind_helpers.hpp b/include/unwind_helpers.hpp index 7788dd57c..9639caf7e 100644 --- a/include/unwind_helpers.hpp +++ b/include/unwind_helpers.hpp @@ -17,6 +17,11 @@ struct UnwindState; bool is_max_stack_depth_reached(const UnwindState &us); +DDRes add_frame(const std::vector &fun_locs, UnwindState *us); + +DDRes add_frame(std::vector symbol_indices, MapInfoIdx_t map_idx, + ElfAddress_t pc, UnwindState *us); + DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us); diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 267c7f0c3..8996435c9 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -17,10 +17,10 @@ namespace ddprof { struct FunLoc { - uint64_t ip; // Relative to file, not VMA - SymbolIdx_t _symbol_idx; - MapInfoIdx_t _map_info_idx; - + uint64_t _ip{}; + uint32_t _lineno{}; + SymbolIdx_t _symbol_idx{-1}; + MapInfoIdx_t _map_info_idx{-1}; friend auto operator<=>(const FunLoc &, const FunLoc &) = default; }; diff --git a/include/unwind_output_hash.hpp b/include/unwind_output_hash.hpp index c5fc1d4c6..f7452b8db 100644 --- a/include/unwind_output_hash.hpp +++ b/include/unwind_output_hash.hpp @@ -15,7 +15,7 @@ struct UnwindOutputHash { hash_combine(seed, uo.pid); hash_combine(seed, uo.tid); for (const auto &fl : uo.locs) { - hash_combine(seed, fl.ip); + hash_combine(seed, fl._ip); hash_combine(seed, fl._symbol_idx); hash_combine(seed, fl._map_info_idx); } diff --git a/include/unwind_state.hpp b/include/unwind_state.hpp index 02947c685..51c7c7497 100644 --- a/include/unwind_state.hpp +++ b/include/unwind_state.hpp @@ -39,8 +39,8 @@ struct UnwindRegisters { /// Single structure with everything necessary in unwinding. The structure is /// given through callbacks struct UnwindState { - explicit UnwindState(int dd_profiling_fd = -1) - : dso_hdr("", dd_profiling_fd) { + explicit UnwindState(int dd_profiling_fd = -1, bool inlining = true) + : dso_hdr("", dd_profiling_fd), symbol_hdr("", inlining) { output.clear(); output.locs.reserve(kMaxStackDepth); } diff --git a/setup_env.sh b/setup_env.sh index 069189d51..f616bd6a4 100755 --- a/setup_env.sh +++ b/setup_env.sh @@ -76,6 +76,12 @@ CmakeWithOptions() { eval ${cmake_cmd} } + +RelDebCMake() { + local BUILD_TYPE=RelWithDebInfo + CmakeWithOptions ${BUILD_TYPE} $@ +} + RelCMake() { local BUILD_TYPE=Release CmakeWithOptions ${BUILD_TYPE} $@ diff --git a/src/ddprof_cli.cc b/src/ddprof_cli.cc index 7d38caee3..019c31e1d 100644 --- a/src/ddprof_cli.cc +++ b/src/ddprof_cli.cc @@ -164,6 +164,10 @@ int DDProfCLI::parse(int argc, const char *argv[]) { ->excludes(pid_opt) ->excludes(exec_option); + app.add_option("--inlining", inlining, "Add inlining information.\n") + ->group("Profiling settings") + ->default_val(true); + app.add_flag("--timeline,-t", timeline, "Enables Timeline view in the Datadog UI.\n" "Works by adding timestmaps to certain events.") @@ -469,6 +473,8 @@ void DDProfCLI::print() const { if (!enable) { PRINT_NFO(" - enable: %s", enable ? "true" : "false"); } + PRINT_NFO(" - inlining: %s", inlining ? "true" : "false"); + if (!cpu_affinity.empty()) { PRINT_NFO(" - cpu_affinity: %s", cpu_affinity.c_str()); } diff --git a/src/ddprof_context_lib.cc b/src/ddprof_context_lib.cc index 68a06c423..91148a7ce 100644 --- a/src/ddprof_context_lib.cc +++ b/src/ddprof_context_lib.cc @@ -72,6 +72,8 @@ void copy_cli_values(const DDProfCLI &ddprof_cli, DDProfContext &ctx) { ctx.params.pid = ddprof_cli.pid; } ctx.params.upload_period = ddprof_cli.upload_period; + ctx.params.inlining = ddprof_cli.inlining; + // todo : naming ? ctx.params.worker_period = ddprof_cli.worker_period; // Advanced diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 82994f7a3..1541dc085 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -339,7 +339,8 @@ DDRes worker_library_init(DDProfContext &ctx, // Make sure worker index is initialized correctly ctx.worker_ctx.i_current_pprof = 0; ctx.worker_ctx.exp_tid = {0}; - ctx.worker_ctx.us = new UnwindState(ctx.params.dd_profiling_fd); + ctx.worker_ctx.us = + new UnwindState(ctx.params.dd_profiling_fd, ctx.params.inlining); std::fill(ctx.worker_ctx.lost_events_per_watcher.begin(), ctx.worker_ctx.lost_events_per_watcher.end(), 0UL); diff --git a/src/dwarf_helpers.cc b/src/dwarf_helpers.cc new file mode 100644 index 000000000..f352a0374 --- /dev/null +++ b/src/dwarf_helpers.cc @@ -0,0 +1,377 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "dwarf_helpers.hpp" + +#include "ddres.hpp" +#include "logger.hpp" + +#include +#include +#include + +namespace ddprof { + +struct DieSearchParam { + Dwarf_Addr addr; + Dwarf_Die *die_mem; +}; + +/* die_find callback for non-inlined function search */ +static int die_search_func_cb(Dwarf_Die *fn_die, void *data) { + DieSearchParam *ad = reinterpret_cast(data); + if (dwarf_tag(fn_die) == DW_TAG_subprogram && dwarf_haspc(fn_die, ad->addr)) { + memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); + return DWARF_CB_ABORT; + } + return DWARF_CB_OK; +} + +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) { + DieSearchParam ad; + ad.addr = addr; + ad.die_mem = die_mem; + /* dwarf_getscopes can't find subprogram. */ + if (!dwarf_getfuncs(cu_die, die_search_func_cb, &ad, 0)) + return NULL; + else + return die_mem; +} + +// return index to added element, else returns -1 +static int store_die_information(Dwarf_Die *sc_die, int parent_index, + DieInformation &data, + Dwarf_Files *dwarf_files) { +#ifdef DEEP_DEBUG + // dwarf_dieoffset is good to figure out what element we are working on + dwarf_getattrs(sc_die, print_attribute, nullptr, 0); +#endif + + // function or inlined function + DieInformation::Function function{}; + // die_name is usually the raw function name (no mangling info) + // link name can have mangling info + function.func_name = dwarf_diename(sc_die); + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { + if (attr) { + Dwarf_Addr ret_value; + if (dwarf_formaddr(attr, &ret_value) == 0) { + function.start_addr = ret_value; + } + } + } + // end is stored as a unsigned (not as a pointer) + if ((attr = dwarf_attr(sc_die, DW_AT_high_pc, &attr_mem))) { + if (attr) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.end_addr = function.start_addr + return_uval; + } + } + } + // some of the functions don't have the start and end info + if (!function.start_addr || !function.end_addr) { + return -1; + } + + // declaration files come with an indirection + // dwarf_attr_integrate follows the indirections + // for inlined functions, we could cache this access (as we are making several + // of them) + if (dwarf_files && + ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_file, &attr_mem)))) { + Dwarf_Word fileIdx = 0; + if (dwarf_formudata(attr, &fileIdx) == 0) { + const char *file = dwarf_filesrc(dwarf_files, fileIdx, NULL, NULL); + // Store or process the file name + function.file_name = file; + } + } + + if ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_line, &attr_mem))) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.decl_line_number = return_uval; + } + } + + if ((attr = dwarf_attr(sc_die, DW_AT_call_line, &attr_mem))) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.call_line_number = return_uval; + } + } + // other fields of interest + // - DW_AT_call_file + // - DW_AT_call_line to define parent line + + // we often can find duplicates within the dwarf information + function.parent_pos = parent_index; + data.die_mem_vec.push_back(std::move(function)); + return (data.die_mem_vec.size() - 1); +} + +static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, + int parent_index, + DieInformation &die_info, + Dwarf_Die *die_mem, + Dwarf_Files *dwarf_files) { + Dwarf_Die child_die; + int ret; + ret = dwarf_child(current_die, die_mem); + if (ret != 0) + return nullptr; + do { + int tag_val = dwarf_tag(die_mem); + int next_parent_idx = parent_index; + if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { + int current_idx = + store_die_information(die_mem, parent_index, die_info, dwarf_files); + next_parent_idx = (current_idx != -1 ? current_idx : next_parent_idx); + } + // + // todo: optimize the exploration to avoid going through soo many elements + // Child dies can have functions, even without being a child of another func + find_functions_in_child_die(die_mem, next_parent_idx, die_info, &child_die, + dwarf_files); + } while (dwarf_siblingof(die_mem, die_mem) == 0); + return nullptr; +} + +DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, + DieInformation &die_information) { + Dwarf_Files *files = nullptr; + size_t nfiles = 0; + assert(cudie); + // cached within the CU + if (dwarf_getsrcfiles(cudie, &files, &nfiles) != 0) { + files = nullptr; + } + Dwarf_Die die_mem; + Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); + if (sc_die == nullptr) { + LG_DBG("Unable to retrieve sc_die at %lx", elf_addr); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + // store parent function at index 0 + if (store_die_information(sc_die, -1, die_information, files) == -1) { + LG_DBG("Incomplete die information for parent function"); + // On some functions we are unable to find start / end info + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + find_functions_in_child_die(sc_die, 0, die_information, &die_mem, files); + + for (auto &el : die_information.die_mem_vec) { + LG_DBG("Inlined func start=%lx / end=%lx / Sym = %s / file=%s", + el.start_addr, el.end_addr, el.func_name, el.file_name); + } + return {}; +} + +const char *get_attribute_name(int attrCode) { + // Should not get init unless cablled + // Something like following awk can help generate this map: + // cat file_with_dwarf_attributes.txt | + // awk '!/\/\*/ { print "{ "$1", \""$1"\"},"}' + static const std::map attributeNameMap = { + {DW_AT_sibling, "DW_AT_sibling"}, + {DW_AT_location, "DW_AT_location"}, + {DW_AT_name, "DW_AT_name"}, + {DW_AT_ordering, "DW_AT_ordering"}, + {DW_AT_byte_size, "DW_AT_byte_size"}, + {DW_AT_bit_size, "DW_AT_bit_size"}, + {DW_AT_stmt_list, "DW_AT_stmt_list"}, + {DW_AT_low_pc, "DW_AT_low_pc"}, + {DW_AT_high_pc, "DW_AT_high_pc"}, + {DW_AT_language, "DW_AT_language"}, + {DW_AT_discr, "DW_AT_discr"}, + {DW_AT_discr_value, "DW_AT_discr_value"}, + {DW_AT_visibility, "DW_AT_visibility"}, + {DW_AT_import, "DW_AT_import"}, + {DW_AT_string_length, "DW_AT_string_length"}, + {DW_AT_common_reference, "DW_AT_common_reference"}, + {DW_AT_comp_dir, "DW_AT_comp_dir"}, + {DW_AT_const_value, "DW_AT_const_value"}, + {DW_AT_containing_type, "DW_AT_containing_type"}, + {DW_AT_default_value, "DW_AT_default_value"}, + {DW_AT_inline, "DW_AT_inline"}, + {DW_AT_is_optional, "DW_AT_is_optional"}, + {DW_AT_lower_bound, "DW_AT_lower_bound"}, + {DW_AT_producer, "DW_AT_producer"}, + {DW_AT_prototyped, "DW_AT_prototyped"}, + {DW_AT_return_addr, "DW_AT_return_addr"}, + {DW_AT_start_scope, "DW_AT_start_scope"}, + {DW_AT_bit_stride, "DW_AT_bit_stride"}, + {DW_AT_upper_bound, "DW_AT_upper_bound"}, + {DW_AT_abstract_origin, "DW_AT_abstract_origin"}, + {DW_AT_accessibility, "DW_AT_accessibility"}, + {DW_AT_address_class, "DW_AT_address_class"}, + {DW_AT_artificial, "DW_AT_artificial"}, + {DW_AT_base_types, "DW_AT_base_types"}, + {DW_AT_calling_convention, "DW_AT_calling_convention"}, + {DW_AT_count, "DW_AT_count"}, + {DW_AT_data_member_location, "DW_AT_data_member_location"}, + {DW_AT_decl_column, "DW_AT_decl_column"}, + {DW_AT_decl_file, "DW_AT_decl_file"}, + {DW_AT_decl_line, "DW_AT_decl_line"}, + {DW_AT_declaration, "DW_AT_declaration"}, + {DW_AT_discr_list, "DW_AT_discr_list"}, + {DW_AT_encoding, "DW_AT_encoding"}, + {DW_AT_external, "DW_AT_external"}, + {DW_AT_frame_base, "DW_AT_frame_base"}, + {DW_AT_friend, "DW_AT_friend"}, + {DW_AT_identifier_case, "DW_AT_identifier_case"}, + {DW_AT_namelist_item, "DW_AT_namelist_item"}, + {DW_AT_priority, "DW_AT_priority"}, + {DW_AT_segment, "DW_AT_segment"}, + {DW_AT_specification, "DW_AT_specification"}, + {DW_AT_static_link, "DW_AT_static_link"}, + {DW_AT_type, "DW_AT_type"}, + {DW_AT_use_location, "DW_AT_use_location"}, + {DW_AT_variable_parameter, "DW_AT_variable_parameter"}, + {DW_AT_virtuality, "DW_AT_virtuality"}, + {DW_AT_vtable_elem_location, "DW_AT_vtable_elem_location"}, + {DW_AT_allocated, "DW_AT_allocated"}, + {DW_AT_associated, "DW_AT_associated"}, + {DW_AT_data_location, "DW_AT_data_location"}, + {DW_AT_byte_stride, "DW_AT_byte_stride"}, + {DW_AT_entry_pc, "DW_AT_entry_pc"}, + {DW_AT_use_UTF8, "DW_AT_use_UTF8"}, + {DW_AT_extension, "DW_AT_extension"}, + {DW_AT_ranges, "DW_AT_ranges"}, + {DW_AT_trampoline, "DW_AT_trampoline"}, + {DW_AT_call_column, "DW_AT_call_column"}, + {DW_AT_call_file, "DW_AT_call_file"}, + {DW_AT_call_line, "DW_AT_call_line"}, + {DW_AT_description, "DW_AT_description"}, + {DW_AT_binary_scale, "DW_AT_binary_scale"}, + {DW_AT_decimal_scale, "DW_AT_decimal_scale"}, + {DW_AT_small, "DW_AT_small"}, + {DW_AT_decimal_sign, "DW_AT_decimal_sign"}, + {DW_AT_digit_count, "DW_AT_digit_count"}, + {DW_AT_picture_string, "DW_AT_picture_string"}, + {DW_AT_mutable, "DW_AT_mutable"}, + {DW_AT_threads_scaled, "DW_AT_threads_scaled"}, + {DW_AT_explicit, "DW_AT_explicit"}, + {DW_AT_object_pointer, "DW_AT_object_pointer"}, + {DW_AT_endianity, "DW_AT_endianity"}, + {DW_AT_elemental, "DW_AT_elemental"}, + {DW_AT_pure, "DW_AT_pure"}, + {DW_AT_recursive, "DW_AT_recursive"}, + {DW_AT_signature, "DW_AT_signature"}, + {DW_AT_main_subprogram, "DW_AT_main_subprogram"}, + {DW_AT_data_bit_offset, "DW_AT_data_bit_offset"}, + {DW_AT_const_expr, "DW_AT_const_expr"}, + {DW_AT_enum_class, "DW_AT_enum_class"}, + {DW_AT_linkage_name, "DW_AT_linkage_name"}, + {DW_AT_string_length_bit_size, "DW_AT_string_length_bit_size"}, + {DW_AT_string_length_byte_size, "DW_AT_string_length_byte_size"}, + {DW_AT_rank, "DW_AT_rank"}, + {DW_AT_str_offsets_base, "DW_AT_str_offsets_base"}, + {DW_AT_addr_base, "DW_AT_addr_base"}, + {DW_AT_rnglists_base, "DW_AT_rnglists_base"}, + {DW_AT_dwo_name, "DW_AT_dwo_name"}, + {DW_AT_reference, "DW_AT_reference"}, + {DW_AT_rvalue_reference, "DW_AT_rvalue_reference"}, + {DW_AT_macros, "DW_AT_macros"}, + {DW_AT_call_all_calls, "DW_AT_call_all_calls"}, + {DW_AT_call_all_source_calls, "DW_AT_call_all_source_calls"}, + {DW_AT_call_all_tail_calls, "DW_AT_call_all_tail_calls"}, + {DW_AT_call_return_pc, "DW_AT_call_return_pc"}, + {DW_AT_call_value, "DW_AT_call_value"}, + {DW_AT_call_origin, "DW_AT_call_origin"}, + {DW_AT_call_parameter, "DW_AT_call_parameter"}, + {DW_AT_call_pc, "DW_AT_call_pc"}, + {DW_AT_call_tail_call, "DW_AT_call_tail_call"}, + {DW_AT_call_target, "DW_AT_call_target"}, + {DW_AT_call_target_clobbered, "DW_AT_call_target_clobbered"}, + {DW_AT_call_data_location, "DW_AT_call_data_location"}, + {DW_AT_call_data_value, "DW_AT_call_data_value"}, + {DW_AT_noreturn, "DW_AT_noreturn"}, + {DW_AT_alignment, "DW_AT_alignment"}, + {DW_AT_export_symbols, "DW_AT_export_symbols"}, + {DW_AT_deleted, "DW_AT_deleted"}, + {DW_AT_defaulted, "DW_AT_defaulted"}, + {DW_AT_loclists_base, "DW_AT_loclists_base"}, + {DW_AT_lo_user, "DW_AT_lo_user"}, + {DW_AT_MIPS_fde, "DW_AT_MIPS_fde"}, + {DW_AT_MIPS_loop_begin, "DW_AT_MIPS_loop_begin"}, + {DW_AT_MIPS_tail_loop_begin, "DW_AT_MIPS_tail_loop_begin"}, + {DW_AT_MIPS_epilog_begin, "DW_AT_MIPS_epilog_begin"}, + {DW_AT_MIPS_loop_unroll_factor, "DW_AT_MIPS_loop_unroll_factor"}, + {DW_AT_MIPS_software_pipeline_depth, + "DW_AT_MIPS_software_pipeline_depth"}, + {DW_AT_MIPS_linkage_name, "DW_AT_MIPS_linkage_name"}, + {DW_AT_MIPS_stride, "DW_AT_MIPS_stride"}, + {DW_AT_MIPS_abstract_name, "DW_AT_MIPS_abstract_name"}, + {DW_AT_MIPS_clone_origin, "DW_AT_MIPS_clone_origin"}, + {DW_AT_MIPS_has_inlines, "DW_AT_MIPS_has_inlines"}, + {DW_AT_MIPS_stride_byte, "DW_AT_MIPS_stride_byte"}, + {DW_AT_MIPS_stride_elem, "DW_AT_MIPS_stride_elem"}, + {DW_AT_MIPS_ptr_dopetype, "DW_AT_MIPS_ptr_dopetype"}, + {DW_AT_MIPS_allocatable_dopetype, "DW_AT_MIPS_allocatable_dopetype"}, + {DW_AT_MIPS_assumed_shape_dopetype, "DW_AT_MIPS_assumed_shape_dopetype"}, + {DW_AT_MIPS_assumed_size, "DW_AT_MIPS_assumed_size"}, + {DW_AT_sf_names, "DW_AT_sf_names"}, + {DW_AT_src_info, "DW_AT_src_info"}, + {DW_AT_mac_info, "DW_AT_mac_info"}, + {DW_AT_src_coords, "DW_AT_src_coords"}, + {DW_AT_body_begin, "DW_AT_body_begin"}, + {DW_AT_body_end, "DW_AT_body_end"}, + {DW_AT_GNU_vector, "DW_AT_GNU_vector"}, + {DW_AT_GNU_guarded_by, "DW_AT_GNU_guarded_by"}, + {DW_AT_GNU_pt_guarded_by, "DW_AT_GNU_pt_guarded_by"}, + {DW_AT_GNU_guarded, "DW_AT_GNU_guarded"}, + {DW_AT_GNU_pt_guarded, "DW_AT_GNU_pt_guarded"}, + {DW_AT_GNU_locks_excluded, "DW_AT_GNU_locks_excluded"}, + {DW_AT_GNU_exclusive_locks_required, + "DW_AT_GNU_exclusive_locks_required"}, + {DW_AT_GNU_shared_locks_required, "DW_AT_GNU_shared_locks_required"}, + {DW_AT_GNU_odr_signature, "DW_AT_GNU_odr_signature"}, + {DW_AT_GNU_template_name, "DW_AT_GNU_template_name"}, + {DW_AT_GNU_call_site_value, "DW_AT_GNU_call_site_value"}, + {DW_AT_GNU_call_site_data_value, "DW_AT_GNU_call_site_data_value"}, + {DW_AT_GNU_call_site_target, "DW_AT_GNU_call_site_target"}, + {DW_AT_GNU_call_site_target_clobbered, + "DW_AT_GNU_call_site_target_clobbered"}, + {DW_AT_GNU_tail_call, "DW_AT_GNU_tail_call"}, + {DW_AT_GNU_all_tail_call_sites, "DW_AT_GNU_all_tail_call_sites"}, + {DW_AT_GNU_all_call_sites, "DW_AT_GNU_all_call_sites"}, + {DW_AT_GNU_all_source_call_sites, "DW_AT_GNU_all_source_call_sites"}, + {DW_AT_GNU_locviews, "DW_AT_GNU_locviews"}, + {DW_AT_GNU_entry_view, "DW_AT_GNU_entry_view"}, + {DW_AT_GNU_macros, "DW_AT_GNU_macros"}, + {DW_AT_GNU_deleted, "DW_AT_GNU_deleted"}, + {DW_AT_GNU_dwo_name, "DW_AT_GNU_dwo_name"}, + {DW_AT_GNU_dwo_id, "DW_AT_GNU_dwo_id"}, + {DW_AT_GNU_ranges_base, "DW_AT_GNU_ranges_base"}, + {DW_AT_GNU_addr_base, "DW_AT_GNU_addr_base"}, + {DW_AT_GNU_pubnames, "DW_AT_GNU_pubnames"}, + {DW_AT_GNU_pubtypes, "DW_AT_GNU_pubtypes"}, + {DW_AT_GNU_numerator, "DW_AT_GNU_numerator"}, + {DW_AT_GNU_denominator, "DW_AT_GNU_denominator"}, + {DW_AT_GNU_bias, "DW_AT_GNU_bias"}, + {DW_AT_hi_user, "DW_AT_hi_user"}, + }; + auto it = attributeNameMap.find(attrCode); + if (it != attributeNameMap.end()) { + return it->second.c_str(); + } + return "Unknown Attribute"; +} + +int print_attribute(Dwarf_Attribute *attr, void *arg) { + // Extract information from the attribute and print it + // For example, you might want to print the attribute's name and value + // The implementation depends on how you want to display the attributes + LG_DBG("Attribute code %x(%s) - form %d", attr->code, + get_attribute_name(attr->code), attr->form); + // Return a non-zero value to continue iterating through attributes + return 0; +} +} // namespace ddprof diff --git a/src/dwfl_symbol.cc b/src/dwfl_symbol.cc index 26142d248..b310cb82c 100644 --- a/src/dwfl_symbol.cc +++ b/src/dwfl_symbol.cc @@ -24,62 +24,14 @@ bool symbol_get_from_dwfl(Dwfl_Module *mod, ProcessAddress_t process_pc, bool symbol_success = false; const char *lsymname = dwfl_module_addrinfo( mod, process_pc, &loffset, &elf_sym, &lshndxp, &lelfp, &lbias); -#ifdef DEBUG - int dwfl_error_value = dwfl_errno(); - if (unlikely(dwfl_error_value)) { - LG_DBG("[DWFL_SYMB] addrinfo error -- Error:%s -- %lx", - dwfl_errmsg(dwfl_error_value), process_pc); - } -#else - dwfl_errno(); -#endif - if (lsymname) { symbol._symname = std::string(lsymname); symbol._demangle_name = Demangler::demangle(symbol._symname); symbol_success = true; } else { - return false; - } - -// #define FLAG_SYMBOL -// A small mechanism to create a trace around the expected function -#ifdef FLAG_SYMBOL - static constexpr std::string_view look_for_symb = "$x"; - if (symbol._demangle_name.find(look_for_symb) != std::string::npos) { - LG_NFO("DGB:: GOING THROUGH EXPECTED FUNC: %s", look_for_symb.data()); - } -#endif - Dwfl_Line *line = dwfl_module_getsrc(mod, process_pc); -#ifdef DEBUG - dwfl_error_value = dwfl_errno(); - if (unlikely(dwfl_error_value)) { - LG_DBG("[DWFL_SYMB] dwfl_src error pc=%lx : Error:%s (Sym=%s)", process_pc, - dwfl_errmsg(dwfl_error_value), symbol._demangle_name.c_str()); - } -#else - dwfl_errno(); -#endif - - if (line) { - int linep; - const char *localsrcpath = - dwfl_lineinfo(line, &process_pc, static_cast(&linep), nullptr, - nullptr, nullptr); - if (localsrcpath) { - symbol._srcpath = std::string(localsrcpath); - symbol._lineno = static_cast(linep); - } -#ifdef DEBUG - dwfl_error_value = dwfl_errno(); - if (unlikely(dwfl_error_value)) { - LG_DBG("[DWFL_SYMB] dwfl_lineinfo error pc=%lx : Error:%s (Sym=%s)", - process_pc, dwfl_errmsg(dwfl_error_value), - symbol._demangle_name.c_str()); - } -#else + // reset error state in case of dwfl error dwfl_errno(); -#endif + symbol_success = false; } return symbol_success; } diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 9c0e51917..93f63609b 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -5,20 +5,52 @@ #include "dwfl_symbol_lookup.hpp" -#include "ddprof_module.hpp" -#include "dwfl_hdr.hpp" -#include "dwfl_internals.hpp" +#include "dwarf_helpers.hpp" #include "dwfl_symbol.hpp" -#include "logger.hpp" #include #include #include -#include +#include // For std::iota +#include +#include +#include + +#define DEBUG namespace ddprof { -DwflSymbolLookup::DwflSymbolLookup() { +namespace { + +size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, + ElfAddress_t start_sym) { + size_t low = 0; + size_t high = nlines - 1; + + while (low <= high) { + size_t mid = low + (high - low) / 2; + Dwarf_Line *mid_line = dwarf_onesrcline(lines, mid); + Dwarf_Addr mid_addr; + dwarf_lineaddr(mid_line, &mid_addr); + + if (mid_addr < start_sym) { + low = mid + 1; + } else if (mid_addr > start_sym) { + high = mid - 1; + } else { + return mid; + } + + if (low == high) { + return low; + } + } + + return nlines; // Return a default value if no suitable index is found +} +} // namespace + +DwflSymbolLookup::DwflSymbolLookup(bool inlining) : _inlining(inlining) { if (const char *env_p = std::getenv("DDPROF_CACHE_SETTING")) { if (strcmp(env_p, "VALIDATE") == 0) { // Allows to compare the accuracy of the cache @@ -33,66 +65,250 @@ DwflSymbolLookup::DwflSymbolLookup() { unsigned DwflSymbolLookup::size() const { unsigned total_nb_elts = 0; - std::for_each( - _file_info_map.begin(), _file_info_map.end(), - [&](FileInfo2SymbolVT const &el) { total_nb_elts += el.second.size(); }); + std::for_each(_file_info_function_map.begin(), _file_info_function_map.end(), + [&](FileInfo2SymbolVT const &el) { + total_nb_elts += el.second._symbol_map.size(); + }); return total_nb_elts; } -/****************/ -/* Range implem */ -/****************/ +void DwflSymbolLookup::add_fun_loc( + DwflSymbolLookup::SymbolWrapper &symbol_wrapper, + const SymbolMap::ValueType &parent_sym, ElfAddress_t elf_pc, + ProcessAddress_t process_pc, std::vector &func_locs) { + const auto last_inlined = _inlining + ? get_inlined(symbol_wrapper, process_pc, elf_pc, parent_sym, func_locs) + : NestedSymbolMap::FindRes{symbol_wrapper._inline_map.end(), false}; + uint32_t line = 0; + if (last_inlined.second) { + line = last_inlined.first->second.get_call_line_number(); + } else { + // line can be associated to parent + const auto line_find = symbol_wrapper._line_map.find_closest(elf_pc); + if (line_find.second) { + line = line_find.first->second.get_value(); + } + } + func_locs.emplace_back(FunLoc{._ip = process_pc, + ._lineno = line, + ._symbol_idx = parent_sym.second.get_value(), + ._map_info_idx = -1}); +} // Retrieve existing symbol or attempt to read from dwarf -SymbolIdx_t DwflSymbolLookup::get_or_insert(const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - FileInfoId_t file_info_id, - ProcessAddress_t process_pc, - const Dso &dso) { +void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + FileInfoId_t file_info_id, + ProcessAddress_t process_pc, + const Dso &dso, + std::vector &func_locs) { ++_stats._calls; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; - #ifdef DEBUG LG_DBG("Looking for : %lx = (%lx - %lx) / dso:%s", elf_pc, process_pc, ddprof_mod._low_addr, dso._filename.c_str()); #endif - SymbolMap &map = _file_info_map[file_info_id]; + SymbolWrapper &symbol_wrapper = _file_info_function_map[file_info_id]; + SymbolMap &map = symbol_wrapper._symbol_map; SymbolMap::FindRes const find_res = map.find_closest(elf_pc); if (find_res.second) { // already found the correct symbol #ifdef DEBUG - LG_DBG("Match : %lx,%lx -> %s,%d", find_res.first->first, + LG_DBG("Match: %lx,%lx -> %s,%d", find_res.first->first, find_res.first->second.get_end(), - table[find_res.first->second.get_symbol_idx()]._symname.c_str(), - find_res.first->second.get_symbol_idx()); + table[find_res.first->second.get_value()]._symname.c_str(), + find_res.first->second.get_value()); #endif // cache validation mechanism: force dwfl lookup to compare with matched // symbols if (_lookup_setting == K_CACHE_VALIDATE) { if (symbol_lookup_check(ddprof_mod._mod, process_pc, - table[find_res.first->second.get_symbol_idx()])) { + table[find_res.first->second.get_value()])) { ++_stats._errors; } } ++_stats._hit; - return find_res.first->second.get_symbol_idx(); + add_fun_loc(symbol_wrapper, *find_res.first, elf_pc, process_pc, func_locs); + } else { + const size_t previous_table_size = table.size(); + // insert symbols using elf info + SymbolMap::ValueType &elf_sym = + insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, + symbol_wrapper); + if (_inlining) { + // parse associated dwarf info + insert_inlining_info(dwfl, ddprof_mod, table, process_pc, dso, + symbol_wrapper, elf_sym); + } + // For newly added symbols, insure we don't leave a blank file name + for (unsigned i = previous_table_size; i < table.size(); ++i) { + auto &sym = table[i]; + if (sym._srcpath.empty()) { + // override with info from dso (this slightly mixes mappings and + // sources) But it helps a lot at Datadog (as mappings are ignored for + // now in UI) + sym._srcpath = dso.format_filename(); + } + } + add_fun_loc(symbol_wrapper, elf_sym, elf_pc, process_pc, func_locs); } + return; +} + +static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, + DwflSymbolLookup::SymbolWrapper &symbol_wrapper, + SymbolTable &table, DieInformation &die_information) { - return insert(ddprof_mod, table, dso_symbol_lookup, process_pc, dso, map); + LineMap &line_map = symbol_wrapper._line_map; + DwflSymbolLookup::InlineMap &inline_map = symbol_wrapper._inline_map; + Dwarf_Lines *lines; + size_t nlines; + const DieInformation::Function &parent_func = die_information.die_mem_vec[0]; + SymbolIdx_t symbol_idx = parent_func.symbol_idx; + const Symbol *ref_sym = &table[symbol_idx]; + + if (dwarf_getsrclines(cudie, &lines, &nlines) != 0) { + LG_DBG("Unable to find source lines for %s", ref_sym->_symname.c_str()); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + NestedSymbolKey parent_bound{parent_func.start_addr, parent_func.end_addr}; + size_t start_index = + binary_search_start_index(lines, nlines, parent_bound.start); + if (start_index >= nlines) { + LG_DBG("Unable to match lines for %s", ref_sym->_symname.c_str()); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + auto hint_line = line_map.end(); + NestedSymbolMap::ConstIt hint_inline = inline_map.begin(); + Dwarf_Addr previous_addr = 0; + NestedSymbolMap::FindRes current_func{inline_map.end(), false}; + // store closest line per file (to avoid missmatches) + std::unordered_map closest_lines; + for (size_t line_index = start_index; line_index < nlines; ++line_index) { + Dwarf_Line *line = dwarf_onesrcline(lines, line_index); + Dwarf_Addr line_addr; + dwarf_lineaddr(line, &line_addr); + if (line_addr > parent_bound.end) { + break; + } + int lineno; + if (dwarf_lineno(line, &lineno) == -1) { + lineno = 0; // Handle the case where line number is not available + } + // Update the source path if necessary + const char *current_file = dwarf_linesrc(line, nullptr, nullptr); + + if (previous_addr && line_addr != previous_addr) { + if (hint_line != line_map.end() && + hint_line->second.get_value() == closest_lines[ref_sym->_srcpath]) { + // extend previous element + hint_line->second.set_end(previous_addr); + } else { + // New line element + hint_line = line_map.emplace_hint( + hint_line, + std::make_pair( + previous_addr, + LineSpan{line_addr - 1, closest_lines[ref_sym->_srcpath]})); + } +#ifdef DEBUG + LG_DBG("Associate %d (%lx->%lx) / %s to %s (vs %s)", + closest_lines[ref_sym->_srcpath], previous_addr, line_addr - 1, + current_file ? current_file : "undef", + ref_sym->_demangle_name.c_str(), ref_sym->_srcpath.c_str()); +#endif + current_func = inline_map.find_closest_hint(line_addr, parent_bound, + current_func.first); + if (!current_func.second) { + symbol_idx = parent_func.symbol_idx; + } else { + symbol_idx = current_func.first->second.get_symbol_idx(); + hint_inline = current_func.first; + } + ref_sym = &table[symbol_idx]; + } + // keep line, if it matches the symbol + // todo can be optimized to avoid conversion to string + closest_lines[std::string(current_file)] = static_cast(lineno); + previous_addr = line_addr; + } + return {}; } -SymbolIdx_t DwflSymbolLookup::insert(const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - ProcessAddress_t process_pc, - const Dso &dso, SymbolMap &map) { +DDRes DwflSymbolLookup::insert_inlining_info( + Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, + ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper, + SymbolMap::ValueType &parent_func) { + SymbolIdx_t parent_sym_idx = parent_func.second.get_value(); + Dwarf_Addr bias; + Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + if (!cudie) { + Symbol &parent_sym = table[parent_sym_idx]; + LG_DBG("No debug information for %s (%s)", + parent_sym._demangle_name.c_str(), dso._filename.c_str()); + return ddres_warn(DD_WHAT_NO_DWARF); + } + ElfAddress_t elf_addr = process_pc - bias; + DieInformation die_information; + if (!IsDDResOK(parse_die_information(cudie, elf_addr, die_information)) || + die_information.die_mem_vec.size() == 0) { + Symbol &parent_sym = table[parent_sym_idx]; + LG_DBG("Unable to extract die information for %s (%s)", + parent_sym._demangle_name.c_str(), dso._filename.c_str()); + return ddres_warn(DD_WHAT_NO_DWARF); + } + + // Extend the span of the elf symbol + if ((parent_func.second.get_end() + 1) < + die_information.die_mem_vec[0].end_addr) { + LG_DBG("Extending end of parent func from %lx to %lx", + parent_func.second.get_end(), + die_information.die_mem_vec[0].end_addr); + parent_func.second.set_end(die_information.die_mem_vec[0].end_addr); + } + die_information.die_mem_vec[0].symbol_idx = parent_sym_idx; + + // update parent file name + if (die_information.die_mem_vec[0].file_name) { + auto &sym = table[parent_sym_idx]; + sym._srcpath = die_information.die_mem_vec[0].file_name; + } + + NestedSymbolMap &inline_map = symbol_wrapper._inline_map; + for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { + DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; + current_func.symbol_idx = table.size(); + table.emplace_back( + Symbol({}, current_func.func_name ? current_func.func_name : "undef", + current_func.decl_line_number, + current_func.file_name ? current_func.file_name : "")); + // add to the lookup + inline_map.emplace( + NestedSymbolKey{current_func.start_addr, current_func.end_addr}, + NestedSymbolValue(current_func.symbol_idx, + current_func.call_line_number)); + } + + // associate line information to die information (includes file info) + if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, symbol_wrapper, table, + die_information))) { + LG_DBG("Error when parsing line information (%s)", dso._filename.c_str()); + } + return {}; +} + +SymbolMap::ValueType & +DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, + ProcessAddress_t process_pc, const Dso &dso, + SymbolWrapper &symbol_wrapper) { Symbol symbol; GElf_Sym elf_sym; Offset_t lbias; + SymbolMap &func_map = symbol_wrapper._symbol_map; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; - if (!symbol_get_from_dwfl(ddprof_mod._mod, process_pc, symbol, elf_sym, lbias)) { ++_stats._no_dwfl_symbols; @@ -110,13 +326,10 @@ SymbolIdx_t DwflSymbolLookup::insert(const DDProfMod &ddprof_mod, #else SymbolIdx_t const symbol_idx = dso_symbol_lookup.get_or_insert(dso, table); #endif -#ifdef DEBUG - LG_NTC("Insert (dwfl failure): %lx,%lx -> %s,%d,%s", start_sym, end_sym, - table[symbol_idx]._symname.c_str(), symbol_idx, - dso.to_string().c_str()); -#endif - map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - return symbol_idx; + auto res_emplace = + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + assert(res_emplace.second); + return *(res_emplace.first); } if (lbias != ddprof_mod._sym_bias) { @@ -133,33 +346,58 @@ SymbolIdx_t DwflSymbolLookup::insert(const DDProfMod &ddprof_mod, table.push_back(std::move(symbol)); Symbol &sym_ref = table.back(); - if (sym_ref._srcpath.empty()) { - // override with info from dso (this slightly mixes mappings and sources) - // But it helps a lot at Datadog (as mappings are ignored for now in UI) - sym_ref._srcpath = dso.format_filename(); - } + if (sym_ref._srcpath.empty()) {} if (!compute_elf_range(elf_pc, elf_sym, start_sym, end_sym)) { // elf section does not add up to something that makes sense // insert this PC without considering elf section start_sym = elf_pc; end_sym = elf_pc; -#ifdef DEBUG - LG_DBG("elf_range failure --> Insert: %lx,%lx -> %s,%d / shndx=%d", + LG_DBG("elf_range failure --> Insert: %lx,%lx -> %s, %d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); -#endif - map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - return symbol_idx; } - +#define DEBUG #ifdef DEBUG + LG_DBG("-------------------------------"); LG_DBG("Insert: %lx,%lx -> %s,%d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); #endif - map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - return symbol_idx; + auto res_emplace = + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + assert(res_emplace.second); + return *(res_emplace.first); + } +} + +NestedSymbolMap::FindRes DwflSymbolLookup::get_inlined( + SymbolWrapper &symbol_wrapper, ElfAddress_t process_pc, ElfAddress_t elf_pc, + const SymbolMap::ValueType &parent_sym, std::vector &func_locs) { + const InlineMap &inline_map = symbol_wrapper._inline_map; + + NestedSymbolKey parent_key{parent_sym.first, parent_sym.second.get_end()}; + NestedSymbolMap::FindRes find_inline = + inline_map.find_closest(elf_pc, parent_key); + NestedSymbolMap::FindRes last_found = {inline_map.end(), false}; + while (find_inline.second) { + uint32_t line = 0; + if (last_found.second) { + line = last_found.first->second.get_call_line_number(); + } else { + auto find_line = symbol_wrapper._line_map.find_closest(elf_pc); + if (find_line.second) { + line = find_line.first->second.get_value(); + } + } + func_locs.emplace_back( + FunLoc{._ip = process_pc, + ._lineno = line, + ._symbol_idx = find_inline.first->second.get_symbol_idx(), + ._map_info_idx = -1}); + find_inline = inline_map.find_parent(find_inline.first, parent_key, elf_pc); + last_found = find_inline; } + return last_found; } bool DwflSymbolLookup::symbol_lookup_check(Dwfl_Module *mod, diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index fe2d5ac61..5516838f7 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -27,8 +27,7 @@ void write_function(const Symbol &symbol, ddog_prof_Function *ffi_func) { ffi_func->name = to_CharSlice(symbol._demangle_name); ffi_func->system_name = to_CharSlice(symbol._symname); ffi_func->filename = to_CharSlice(symbol._srcpath); - // Not filed (can be computed if needed using the start range from elf) - ffi_func->start_line = 0; + ffi_func->start_line = symbol._func_start_lineno; } void write_mapping(const MapInfo &mapinfo, ddog_prof_Mapping *ffi_mapping) { @@ -43,8 +42,8 @@ void write_location(const FunLoc *loc, const MapInfo &mapinfo, const Symbol &symbol, ddog_prof_Location *ffi_location) { write_mapping(mapinfo, &ffi_location->mapping); write_function(symbol, &ffi_location->function); - ffi_location->address = loc->ip; - ffi_location->line = symbol._lineno; + ffi_location->address = loc->_ip; + ffi_location->line = loc->_lineno; } constexpr int k_max_value_types = @@ -394,14 +393,14 @@ void ddprof_print_sample(const UnwindOutput &uw_output, buf += ";"; } if (sym._symname.empty()) { - if (loc_it->ip == 0) { + if (loc_it->_ip == 0) { std::string_view const path{sym._srcpath}; auto pos = path.rfind('/'); buf += "("; buf += path.substr(pos == std::string_view::npos ? 0 : pos + 1); buf += ")"; } else { - absl::StrAppendFormat(&buf, "%#x", loc_it->ip); + absl::StrAppendFormat(&buf, "%#x", loc_it->_ip); } } else { std::string_view const func{sym._symname}; diff --git a/src/runtime_symbol_lookup.cc b/src/runtime_symbol_lookup.cc index 8c696ebd9..3279712b3 100644 --- a/src/runtime_symbol_lookup.cc +++ b/src/runtime_symbol_lookup.cc @@ -74,7 +74,7 @@ bool RuntimeSymbolLookup::insert_or_replace(std::string_view symbol, "jit"); } else { // todo managing range erase (we can overal with other syms) - SymbolIdx_t const existing = find_res.first->second.get_symbol_idx(); + SymbolIdx_t const existing = find_res.first->second.get_value(); #ifdef DEBUG LG_DBG("Existyng sym -- %s (%lx-%lx)", symbol_table[existing]._demangle_name.c_str(), find_res.first->first, @@ -194,7 +194,7 @@ RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, if (!find_res.second) { flag_lookup_failure(symbol_info, jitdump_path); } - return find_res.second ? find_res.first->second.get_symbol_idx() : -1; + return find_res.second ? find_res.first->second.get_value() : -1; } SymbolIdx_t RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, @@ -211,7 +211,7 @@ SymbolIdx_t RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, if (!find_res.second) { flag_lookup_failure(symbol_info, "perfmap"); } - return find_res.second ? find_res.first->second.get_symbol_idx() : -1; + return find_res.second ? find_res.first->second.get_value() : -1; } } // namespace ddprof diff --git a/src/symbol_map.cc b/src/symbol_map.cc index fb1871ace..cf06774ce 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -4,38 +4,82 @@ // Datadog, Inc. #include "symbol_map.hpp" +#include +#include namespace ddprof { -bool SymbolMap::is_within(const Offset_t &norm_pc, - const SymbolMap::ValueType &kv) { - if (norm_pc < kv.first) { +// parent span acts as a bound +NestedSymbolMap::FindRes +NestedSymbolMap::find_parent(NestedSymbolMap::ConstIt it, + const NestedSymbolKey &parent_bound, + Offset_t norm_pc) const { + while (it != begin()) { + --it; + if (it->first < parent_bound) { + return {end(), false}; + } + if (is_within(norm_pc, *it)) { + return {it, true}; + } + } + return {end(), false}; +} + +NestedSymbolMap::FindRes +NestedSymbolMap::find_closest(Offset_t norm_pc, + const NestedSymbolKey &parent_bound) const { + // Use the element with the lowest end possible, to ensure we find the + // deepest element + auto it = lower_bound(NestedSymbolKey{norm_pc, 0}); + if (it != end()) { // map not empty + if (is_within(norm_pc, *it)) { + return {it, true}; + } + } + return find_parent(it, parent_bound, norm_pc); +} + +bool NestedSymbolMap::is_within(const Offset_t &norm_pc, + const NestedSymbolMap::ValueType &kv) { + if (norm_pc < kv.first.start) { return false; } - if (norm_pc > kv.second.get_end()) { + if (norm_pc > kv.first.end) { return false; } return true; } -SymbolMap::FindRes SymbolMap::find_closest(Offset_t norm_pc) { - - // First element not less than (can match exactly a start addr) - auto it = lower_bound(norm_pc); - if (it != end()) { // map is empty - if (SymbolMap::is_within(norm_pc, *it)) { - return {it, true}; - } +NestedSymbolMap::FindRes NestedSymbolMap::find_closest_hint( + Offset_t norm_pc, const NestedSymbolKey &parent_bound, ConstIt hint) const { + if (hint == end() || hint == begin()) { + return find_closest(norm_pc, parent_bound); } + const NestedSymbolKey leaf_element{norm_pc, 0}; + const NestedSymbolKey high_bound{parent_bound.end, 0}; + NestedSymbolMap::FindRes res{end(), false}; - // previous element is more likely to contain our addr - if (it != begin()) { - --it; - } else { // map is empty - return {end(), false}; + auto it = hint; + if (hint->first < leaf_element) { + // If the current element is less than or equal to norm_pc, move forward + for (; it != end(); ++it) { + // we will test when looping back from highest to lowest element + if (leaf_element < it->first) { + // We reached an element that was higher + break; + } + // we could be looking for an out of bound element ? + if (high_bound < it->first) { + break; + } + } + return find_parent(it, parent_bound, norm_pc); + } else { + ++it; + // always move forward to make sure we can return current element } - // element can not be end (as we reversed or exit) - return {it, SymbolMap::is_within(norm_pc, *it)}; + return find_parent(it, parent_bound, norm_pc); } } // namespace ddprof diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index c533400ab..593ae4f1a 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -52,7 +52,7 @@ bool is_infinite_loop(UnwindState *us) { for (unsigned i = 1; i < nb_frames_to_check; ++i) { FunLoc const &n_minus_one_loc = output.locs[nb_locs - i]; FunLoc const &n_minus_two_loc = output.locs[nb_locs - i - 1]; - if (n_minus_one_loc.ip != n_minus_two_loc.ip) { + if (n_minus_one_loc._ip != n_minus_two_loc._ip) { return false; } } @@ -210,17 +210,18 @@ int frame_cb(Dwfl_Frame *dwfl_frame, void *arg) { DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, const DDProfMod &ddprof_mod, FileInfoId_t file_info_id) { - SymbolHdr &unwind_symbol_hdr = us->symbol_hdr; - // get or create the dwfl symbol - SymbolIdx_t const symbol_idx = - unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( - ddprof_mod, unwind_symbol_hdr._symbol_table, - unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso); + std::vector fun_locs{}; + unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( + us->_dwfl_wrapper->_dwfl, ddprof_mod, unwind_symbol_hdr._symbol_table, + unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso, fun_locs); MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); - return add_frame(symbol_idx, map_idx, pc, us); + for (auto &el : fun_locs) { + el._map_info_idx = map_idx; + } + return add_frame(fun_locs, us); } // check for runtime symbols provided in /tmp files diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index b475ec3ec..516162c9a 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -25,6 +25,27 @@ bool is_max_stack_depth_reached(const UnwindState &us) { return us.output.locs.size() + 2 >= kMaxStackDepth; } +DDRes add_frame(const std::vector &fun_locs, UnwindState *us) { + for (auto const &el : fun_locs) { + UnwindOutput *output = &us->output; + if (output->locs.size() >= kMaxStackDepth) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_MAX_DEPTH, + "Max stack depth reached"); // avoid overflow + } + output->locs.push_back(el); + } + return {}; +} + +DDRes add_frame(std::vector symbol_indices, MapInfoIdx_t map_idx, + ElfAddress_t pc, UnwindState *us) { + + for (auto const el : symbol_indices) { + DDRES_CHECK_FWD(add_frame(el, map_idx, pc, us)); + } + return {}; +} + DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us) { UnwindOutput *output = &us->output; @@ -34,7 +55,7 @@ DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, } FunLoc current; current._symbol_idx = symbol_idx; - current.ip = pc; + current._ip = pc; if (map_idx == -1) { // just add an empty element for mapping info current._map_info_idx = us->symbol_hdr._common_mapinfo_lookup.get_or_insert( @@ -48,7 +69,6 @@ DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, us->symbol_hdr._symbol_table[current._symbol_idx]._symname.c_str()); #endif output->locs.push_back(current); - return {}; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d3ee6c5c9..413d74b25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -201,9 +201,13 @@ add_unit_test( dwfl_module-ut dwfl_module-ut.cc ../src/build_id.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc + ../src/dwfl_symbol_lookup.cc + ../src/dso_symbol_lookup.cc + ../src/symbol_map.cc ../src/demangler/demangler.cc ../src/dso.cc ../src/dso_hdr.cc @@ -229,6 +233,7 @@ add_unit_test( ../src/dso.cc ../src/dso_hdr.cc ../src/dso_symbol_lookup.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc @@ -269,6 +274,7 @@ set(ALLOCATION_TRACKER_UT_SRCS ../src/dso.cc ../src/dso_hdr.cc ../src/dso_symbol_lookup.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 411c1cb15..4adba557f 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -20,7 +20,8 @@ namespace ddprof { // todo : cut this dependency -DwflSymbolLookup::DwflSymbolLookup() : _lookup_setting(K_CACHE_ON) {} +DwflSymbolLookup::DwflSymbolLookup(bool inlining) + : _lookup_setting(K_CACHE_ON) {} // Mock int get_nb_hw_thread() { return 2; } diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index b3a128887..e24ca0e7d 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -22,7 +22,8 @@ namespace ddprof { // todo : cut this dependency -DwflSymbolLookup::DwflSymbolLookup() : _lookup_setting(K_CACHE_ON) {} +DwflSymbolLookup::DwflSymbolLookup(bool inlining) + : _lookup_setting(K_CACHE_ON) {} TEST(DDProfPProf, init_profiles) { DDProfPProf pprof; diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index dee9682c7..ef8f61e4f 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -11,6 +11,9 @@ #include "dwfl_symbol.hpp" #include "loghandle.hpp" +#include "dwfl_symbol_lookup.hpp" +#include "symbol_table.hpp" + #include #include #include @@ -38,7 +41,7 @@ int count_fds(pid_t pid) { } return fd_count; } - +#ifdef TEMP_REMOVE TEST(DwflModule, inconsistency_test) { pid_t my_pid = getpid(); int nb_fds_start = count_fds(my_pid); @@ -182,5 +185,71 @@ TEST(DwflModule, short_lived) { } } } +#endif +__attribute__((always_inline)) inline ElfAddress_t deeper_function() { + // Without these instructions we can fall in the calling function + LG_DBG("Adding some logging instructions"); + ElfAddress_t ip = _THIS_IP_; + LG_DBG("I'm capturing the ip = %lx", ip); + return ip; +} + +__attribute__((always_inline)) inline ElfAddress_t inlined_function() { + LG_DBG("Before the call to deeper func!!"); + ElfAddress_t ip = deeper_function(); + LG_DBG("I'm going up!!"); + return ip; +} +ElfAddress_t my_custom_function() { + ElfAddress_t ip = inlined_function(); + LG_DBG("The actual ip = %lx", ip); + return ip; +} + +TEST(DwflModule, inlined_func) { + pid_t my_pid = getpid(); + LogHandle handle; + // Load DSOs from our unit test + ElfAddress_t ip = my_custom_function(); + DsoHdr dso_hdr; + ddprof::SymbolTable table; + DwflSymbolLookup symbol_lookup; + DsoSymbolLookup dso_lookup; + DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); + // Check that we found the DSO matching this IP + ASSERT_TRUE(find_res.second); + { + DwflWrapper dwfl_wrapper; + // retrieve the map associated to pid + DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; + for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { + Dso &dso = it->second; + if (!has_relevant_path(dso._type) || !dso.is_executable()) { + continue; // skip non exec / non standard (anon/vdso...) + } + FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); + ASSERT_TRUE(file_info_id > k_file_info_error); + + const FileInfoValue &file_info_value = + dso_hdr.get_file_info_value(file_info_id); + DDProfMod *ddprof_mod = nullptr; + auto res = dwfl_wrapper.register_mod(dso._start, + dso_hdr.get_elf_range(dso_map, it), + file_info_value, &ddprof_mod); + + ASSERT_TRUE(IsDDResOK(res)); + ASSERT_TRUE(ddprof_mod->_mod); + if (find_res.first == it) { + std::vector fun_locs; + symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, + dso_lookup, file_info_id, ip, dso, + fun_locs); + const auto &sym = table[fun_locs[0]._symbol_idx]; + LG_DBG("Sym = %s", sym._demangle_name.c_str()); + EXPECT_EQ(sym._demangle_name, "deeper_function"); + } + } + } +} } // namespace ddprof diff --git a/test/savecontext-ut.cc b/test/savecontext-ut.cc index a4f0dee00..42232d957 100644 --- a/test/savecontext-ut.cc +++ b/test/savecontext-ut.cc @@ -111,7 +111,7 @@ TEST(getcontext, unwind_from_sighandler) { for (size_t iloc = 0; iloc < state.output.locs.size(); ++iloc) { auto &symbol = symbol_table[state.output.locs[iloc]._symbol_idx]; printf("%zu: %s %lx \n", iloc, symbol._demangle_name.c_str(), - state.output.locs[iloc].ip); + state.output.locs[iloc]._ip); } auto get_symbol = [&](int idx) { return symbol_table[state.output.locs[idx]._symbol_idx]; diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index 0da3ce5d9..6d7facc34 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -5,6 +5,7 @@ #include +#include "loghandle.hpp" #include "symbol_map.hpp" namespace ddprof { @@ -12,16 +13,107 @@ namespace ddprof { TEST(SymbolMap, Span) { SymbolSpan span1; EXPECT_EQ(span1.get_end(), 0); - EXPECT_EQ(span1.get_symbol_idx(), -1); + EXPECT_EQ(span1.get_value(), -1); SymbolSpan span2(0x1000, 12); EXPECT_EQ(span2.get_end(), 0x1000); - EXPECT_EQ(span2.get_symbol_idx(), 12); + EXPECT_EQ(span2.get_value(), 12); } TEST(SymbolMap, Map) { + LogHandle handle; SymbolMap map; SymbolSpan span0_1000(0x1000, 12); map.emplace(0, span0_1000); + SymbolMap::FindRes res = map.find_closest(50); + EXPECT_TRUE(res.second); +} + +TEST(NestedSymbolMap, simple) { + NestedSymbolKey parent_key{0x50, 0x1000}; + LogHandle handle; + NestedSymbolMap map; + NestedSymbolValue span100_1000(0); + map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); + NestedSymbolValue span150_300(1); + map.emplace(NestedSymbolKey{0x150, 0x300}, span150_300); + { + NestedSymbolMap::FindRes res = map.find_closest(0x150, parent_key); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 1); + } + { + NestedSymbolMap::FindRes res = map.find_closest(0x400, parent_key); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 0); + } +} + +TEST(NestedSymbolMap, same_addr) { + LogHandle handle; + NestedSymbolMap map; + NestedSymbolKey parent_key{0x50, 0x1000}; + NestedSymbolValue span100_1000(0); + map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); + NestedSymbolValue span100_300(1); + map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); + + { // always return the deeper element + NestedSymbolMap::FindRes res = map.find_closest(0x100, parent_key); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 1); + } +} + +// todo : fix bug on same start different end with multiple +TEST(NestedSymbolMap, InlinedFunctionLookup) { + LogHandle handle; + NestedSymbolMap map; + // Insert main function + map.emplace(NestedSymbolKey{0x1180, 0x128a}, NestedSymbolValue(34)); + // Insert inlined functions as per the log + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(1)); + map.emplace(NestedSymbolKey{0x11bd, 0x11c4}, NestedSymbolValue(2)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(3)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(4)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(5)); + map.emplace(NestedSymbolKey{0x11d0, 0x1203}, NestedSymbolValue(6)); + map.emplace(NestedSymbolKey{0x11fe, 0x11fe}, NestedSymbolValue(7)); + map.emplace(NestedSymbolKey{0x11d0, 0x11d0}, NestedSymbolValue(8)); + + NestedSymbolKey parent_key{0x1180, 0x1300}; + // Test for a specific address + NestedSymbolMap::FindRes res = map.find_closest(0x11e0, parent_key); + ASSERT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), + 6); // Expecting the most specific (deepest) symbol for this address +} + +TEST(NestedSymbolMap, closest_hint) { + LogHandle handle; + NestedSymbolMap map; + NestedSymbolKey parent_key{0x50, 0x1000}; + NestedSymbolValue span100_1000(0); + map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); + NestedSymbolValue span100_300(1); + map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); + NestedSymbolValue span300_400(2); + map.emplace(NestedSymbolKey{0x300, 0x400}, span300_400); + + { // always return the deeper element + NestedSymbolMap::FindRes res = map.find_closest(0x100, parent_key); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 1); + + NestedSymbolMap::FindRes res_2 = + map.find_closest_hint(0x350, parent_key, res.first); + EXPECT_TRUE(res_2.second); + EXPECT_EQ(res_2.first->second.get_symbol_idx(), 2); + + NestedSymbolMap::FindRes res_3 = + map.find_closest_hint(0x900, parent_key, res_2.first); + EXPECT_TRUE(res_3.second); + EXPECT_EQ(res_3.first->second.get_symbol_idx(), 0); + } } } // namespace ddprof diff --git a/test/unwind_output_mock.hpp b/test/unwind_output_mock.hpp index 69742f504..4b7cd56d4 100644 --- a/test/unwind_output_mock.hpp +++ b/test/unwind_output_mock.hpp @@ -48,7 +48,7 @@ static inline void fill_unwind_output_1(UnwindOutput &uw_output) { std::vector &locs = uw_output.locs; for (unsigned i = 0; i < uw_output.locs.size(); ++i) { - locs[i].ip = 42 + i; + locs[i]._ip = 42 + i; locs[i]._symbol_idx = i; locs[i]._map_info_idx = i; }