From 2eff7fd38c2ebe5fa58148167eaf063f3066d95e Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 1 Feb 2025 16:23:11 +0900 Subject: [PATCH 1/2] modules: Add __module_build_id() to find module by build_id Add __module_build_id() to find module by build_id. This also makes module::build_id available with CONFIG_MODULE_BUILD_ID kconfig. Signed-off-by: Masami Hiramatsu (Google) --- include/linux/module.h | 8 +++++++- kernel/module/Kconfig | 3 +++ kernel/module/kallsyms.c | 4 ++-- kernel/module/main.c | 29 +++++++++++++++++++++++++++++ lib/Kconfig.debug | 1 + 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 0516f5ea9153b3..69e5c9260095dd 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -419,7 +419,7 @@ struct module { /* Unique handle for this module */ char name[MODULE_NAME_LEN]; -#ifdef CONFIG_STACKTRACE_BUILD_ID +#ifdef CONFIG_MODULE_BUILD_ID /* Module build ID */ unsigned char build_id[BUILD_ID_SIZE_MAX]; #endif @@ -625,6 +625,7 @@ static inline bool module_is_coming(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); +struct module *__module_build_id(unsigned char *build_id, int size); bool is_module_address(unsigned long addr); bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); @@ -794,6 +795,11 @@ static inline struct module *__module_text_address(unsigned long addr) return NULL; } +static inline struct module *__module_build_id(unsigned char *build_id, int size) +{ + return NULL; +} + static inline bool is_module_address(unsigned long addr) { return false; diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index d7762ef5949a2c..ac11c5390fe6db 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -26,6 +26,9 @@ if MODULES config MODULE_DEBUGFS bool +config MODULE_BUILD_ID + bool + config MODULE_DEBUG bool "Module debugging" depends on DEBUG_FS diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 00a60796327c06..758cc0235d30ed 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -221,7 +221,7 @@ void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.num_symtab = ndst; } -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) +#if IS_ENABLED(CONFIG_MODULE_BUILD_ID) void init_build_id(struct module *mod, const struct load_info *info) { const Elf_Shdr *sechdr; @@ -335,7 +335,7 @@ int module_address_lookup(unsigned long addr, if (modname) *modname = mod->name; if (modbuildid) { -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) +#if IS_ENABLED(CONFIG_MODULE_BUILD_ID) *modbuildid = mod->build_id; #else *modbuildid = NULL; diff --git a/kernel/module/main.c b/kernel/module/main.c index 81ecb9d3a93530..2ecaaa2b2e8442 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3798,6 +3798,35 @@ struct module *__module_text_address(unsigned long addr) return mod; } +/** + * __module_build_id() - get the module whose build_id start with an array. + * @build_id: the first part of the build_id. + * @size: the size of @build_id. + * + * Must be called with preempt disabled or module mutex held so that + * module doesn't get freed during this. + */ +struct module *__module_build_id(unsigned char *build_id, int size) +{ +#ifdef CONFIG_MODULE_BUILD_ID + struct module *mod; + + if (size < 0) + return NULL; + + if (size > BUILD_ID_SIZE_MAX) + size = BUILD_ID_SIZE_MAX; + + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (!memcmp(mod->build_id, build_id, size)) + return mod; + } +#endif + return NULL; +} + /* Don't grab lock, we're oopsing. */ void print_modules(void) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06f6..9d1eda9e47445e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -38,6 +38,7 @@ config PRINTK_CALLER config STACKTRACE_BUILD_ID bool "Show build ID information in stacktraces" depends on PRINTK + select MODULE_BUILD_ID if MODULES help Selecting this option adds build ID information for symbols in stacktraces printed with the printk format '%p[SR]b'. From 03b93b35ff2be8e9641f4117099d59bcb61bb33c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 1 Feb 2025 16:23:20 +0900 Subject: [PATCH 2/2] tracing: Add relative-stacktrace option Add "relative-stacktrace" option to save the stacktrace entries as offset from the _stext or module's text section. For the module address, each entry also saves the first 4bytes of the build_id of the module so that it can find the appropriate symbol from the offset. By using this build_id, we can also easily find the corresponding modules off-line. Thus this stacktrace data is in the persistent ring buffer, the kernel can find appropriate modules after reboot without saving previous live information. This feature also shows the module name after the symbol as below. event-sample-1622 [005] ...1. 2112.978105: => event_triggers_post_call => trace_event_raw_event_foo_bar [trace_events_sample] => do_simple_thread_func [trace_events_sample] => simple_thread [trace_events_sample] => kthread => ret_from_fork => ret_from_fork_asm The downside is the cost of time and memory (on 32bit arch). This needs to find the module if the address in the module, and save both of 32bit offset and 32bit hash for each entry, this may consume twice memory on 32bit arch. On 64bit arch, the amount of consuming memory is the same. Signed-off-by: Masami Hiramatsu (Google) --- include/linux/trace.h | 5 +++ kernel/trace/Kconfig | 1 + kernel/trace/trace.c | 50 +++++++++++++++++++++++++---- kernel/trace/trace.h | 3 ++ kernel/trace/trace_entries.h | 18 +++++++++++ kernel/trace/trace_output.c | 62 ++++++++++++++++++++++++++++++++++++ 6 files changed, 133 insertions(+), 6 deletions(-) diff --git a/include/linux/trace.h b/include/linux/trace.h index fdcd76b7be83d7..4b4f008a2fa90e 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -26,6 +26,11 @@ struct trace_export { int flags; }; +struct ftrace_rel_caller { + int offset; + unsigned int build_id32; +} __packed; + struct trace_array; #ifdef CONFIG_TRACING diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d570b8b9c0a9b4..b4bd186732fea4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -169,6 +169,7 @@ config TRACING bool select RING_BUFFER select STACKTRACE if STACKTRACE_SUPPORT + select MODULE_BUILD_ID if STACKTRACE_SUPPORT select TRACEPOINTS select NOP_TRACER select BINARY_PRINTF diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e6d517e74e0fd..800e309e02a1c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2916,16 +2916,41 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); +static void record_as_offset(struct ftrace_rel_caller *caller, unsigned long addr) +{ + if (likely(core_kernel_text(addr))) { + caller->build_id32 = 0; + caller->offset = addr - (unsigned long)_stext; + } else if (addr == FTRACE_TRAMPOLINE_MARKER) { + caller->build_id32 = 0; + caller->offset = (int)FTRACE_TRAMPOLINE_MARKER; + } else { + struct module *mod = __module_text_address(addr); + + if (mod) { + unsigned long base = (unsigned long)mod->mem[MOD_TEXT].base; + + caller->offset = addr - base; + caller->build_id32 = *(unsigned int *)mod->build_id; + } else { + caller->build_id32 = 0; + caller->offset = addr - (unsigned long)_stext; + } + } +} + static void __ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs) { + struct rel_stack_entry *rel_entry; struct ring_buffer_event *event; unsigned int size, nr_entries; struct ftrace_stack *fstack; struct stack_entry *entry; int stackidx; + int type; /* * Add one, for this function and the call to save_stack_trace() @@ -2937,6 +2962,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, #endif preempt_disable_notrace(); + type = (tr->trace_flags & TRACE_ITER_REL_STACK) ? TRACE_REL_STACK : TRACE_STACK; stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; @@ -2977,16 +3003,28 @@ static void __ftrace_trace_stack(struct trace_array *tr, } #endif - event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - struct_size(entry, caller, nr_entries), + if (type == TRACE_REL_STACK) + size = struct_size(rel_entry, caller, nr_entries); + else + size = struct_size(entry, caller, nr_entries); + + event = __trace_buffer_lock_reserve(buffer, type, size, trace_ctx); if (!event) goto out; - entry = ring_buffer_event_data(event); - entry->size = nr_entries; - memcpy(&entry->caller, fstack->calls, - flex_array_size(entry, caller, nr_entries)); + if (type == TRACE_REL_STACK) { + rel_entry = ring_buffer_event_data(event); + rel_entry->size = nr_entries; + for (int i = 0; i < nr_entries; i++) + record_as_offset((struct ftrace_rel_caller *)&rel_entry->caller[i], + fstack->calls[i]); + } else { + entry = ring_buffer_event_data(event); + entry->size = nr_entries; + memcpy(&entry->caller, fstack->calls, + flex_array_size(entry, caller, nr_entries)); + } __buffer_unlock_commit(buffer, event); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9c21ba45b7af6b..b2753345686574 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -55,6 +55,7 @@ enum trace_type { TRACE_TIMERLAT, TRACE_RAW_DATA, TRACE_FUNC_REPEATS, + TRACE_REL_STACK, __TRACE_LAST_TYPE, }; @@ -511,6 +512,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ + IF_ASSIGN(var, ent, struct rel_stack_entry, TRACE_REL_STACK);\ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ @@ -1350,6 +1352,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(TRACE_PRINTK, "trace_printk_dest"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ + C(REL_STACK, "relative-stacktrace"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index fbfb396905a6b6..46a770e572877d 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -229,6 +229,24 @@ FTRACE_ENTRY(kernel_stack, stack_entry, (void *)__entry->caller[6], (void *)__entry->caller[7]) ); +FTRACE_ENTRY(kernel_rel_stack, rel_stack_entry, + + TRACE_REL_STACK, + + F_STRUCT( + __field( int, size ) + __stack_array( u64, caller, FTRACE_STACK_ENTRIES, size) + ), + + F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n" + "\t=> %ps\n\t=> %ps\n\t=> %ps\n" + "\t=> %ps\n\t=> %ps\n", + (void *)__entry->caller[0], (void *)__entry->caller[1], + (void *)__entry->caller[2], (void *)__entry->caller[3], + (void *)__entry->caller[4], (void *)__entry->caller[5], + (void *)__entry->caller[6], (void *)__entry->caller[7]) +); + FTRACE_ENTRY(user_stack, userstack_entry, TRACE_USER_STACK, diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 03d56f711ad14e..b3748a5f584379 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1281,6 +1281,67 @@ static struct trace_event trace_stack_event = { .funcs = &trace_stack_funcs, }; +/* TRACE_REL_STACK */ + +static enum print_line_t trace_rel_stack_print(struct trace_iterator *iter, + int flags, struct trace_event *event) +{ + struct ftrace_rel_caller *p; + struct rel_stack_entry *field; + struct trace_seq *s = &iter->seq; + unsigned long base; + struct module *mod; + + trace_assign_type(field, iter->ent); + + trace_seq_puts(s, "\n"); + + for (int i = 0; i < field->size; i++) { + p = (struct ftrace_rel_caller *)&field->caller[i]; + + if (trace_seq_has_overflowed(s)) + break; + + trace_seq_puts(s, " => "); + if (p->offset == FTRACE_TRAMPOLINE_MARKER) { + trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n"); + continue; + } + if (p->build_id32) { + unsigned char build_id32[4]; + + guard(rcu)(); + *(unsigned int *)build_id32 = p->build_id32; + mod = __module_build_id(build_id32, 4); + if (!mod) { + trace_seq_printf(s, "%x [MODULE %02x%02x%02x%02x]\n", + p->offset, build_id32[0], build_id32[1], + build_id32[2], build_id32[3]); + continue; + } + base = (unsigned long)mod->mem[MOD_TEXT].base; + } else { + mod = NULL; + base = (unsigned long)_stext; + } + seq_print_ip_sym(s, base + p->offset, flags); + if (mod) + trace_seq_printf(s, " [%s]", mod->name); + trace_seq_putc(s, '\n'); + } + + return trace_handle_return(s); +} + +static struct trace_event_functions trace_rel_stack_funcs = { + .trace = trace_rel_stack_print, +}; + +static struct trace_event trace_rel_stack_event = { + .type = TRACE_REL_STACK, + .funcs = &trace_rel_stack_funcs, +}; + /* TRACE_USER_STACK */ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, int flags, struct trace_event *event) @@ -1724,6 +1785,7 @@ static struct trace_event *events[] __initdata = { &trace_ctx_event, &trace_wake_event, &trace_stack_event, + &trace_rel_stack_event, &trace_user_stack_event, &trace_bputs_event, &trace_bprint_event,