From 7ae75c73e5d8561c372e11e5ee482d6874d2f8b9 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Wed, 22 Jan 2025 11:33:49 +0100 Subject: [PATCH 01/39] tests/module: nix-ify Use "#!/usr/bin/env bash" instead of "#!/bin/bash". This is necessary for nix environments as they only provide /usr/bin/env at the standard location. Signed-off-by: Joel Granados Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20250122-jag-nix-ify-v1-1-addb3170f93c@kernel.org Signed-off-by: Petr Pavlu --- lib/tests/module/gen_test_kallsyms.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh index 561dcac0f359c1..31fe4ed63de83e 100755 --- a/lib/tests/module/gen_test_kallsyms.sh +++ b/lib/tests/module/gen_test_kallsyms.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash TARGET=$(basename $1) DIR=lib/tests/module From 4ac044665d49e4afa06e2492c61c5ff95f02e9e0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:31 +0100 Subject: [PATCH 02/39] module: Begin to move from RCU-sched to RCU. The RCU usage in module was introduced in commit d72b37513cdfb ("Remove stop_machine during module load v2") and it claimed not to be RCU but similar. Then there was another improvement in commit e91defa26c527 ("module: don't use stop_machine on module load"). It become a mix of RCU and RCU-sched and was eventually fixed 0be964be0d450 ("module: Sanitize RCU usage and locking"). Later RCU & RCU-sched was merged in commit cb2f55369d3a9 ("modules: Replace synchronize_sched() and call_rcu_sched()") so that was aligned. Looking at it today, there is still leftovers. The preempt_disable() was used instead rcu_read_lock_sched(). The RCU & RCU-sched merge was not complete as there is still rcu_dereference_sched() for module::kallsyms. The RCU-list modules and unloaded_tainted_modules are always accessed under RCU protection or the module_mutex. The modules list iteration can always happen safely because the module will not disappear. Once the module is removed (free_module()) then after removing the module from the list, there is a synchronize_rcu() which waits until every RCU reader left the section. That means iterating over the list within a RCU-read section is enough, there is no need to disable preemption. module::kallsyms is first assigned in add_kallsyms() before the module is added to the list. At this point, it points to init data. This pointer is later updated and before the init code is removed there is also synchronize_rcu() in do_free_init(). That means A RCU read lock is enough for protection and rcu_dereference() can be safely used. Convert module code and its users step by step. Update comments and convert print_modules() to use RCU. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-3-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/main.c | 9 ++++----- kernel/module/tree_lookup.c | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 1fb9ad289a6f8f..5f661d5343ac7b 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -67,7 +67,7 @@ /* * Mutex protects: - * 1) List of modules (also safely readable with preempt_disable), + * 1) List of modules (also safely readable within RCU read section), * 2) module_use links, * 3) mod_tree.addr_min/mod_tree.addr_max. * (delete and add uses RCU list operations). @@ -1348,7 +1348,7 @@ static void free_module(struct module *mod) mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); - /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ + /* Wait for RCU synchronizing before releasing mod->list and buglist. */ synchronize_rcu(); if (try_add_tainted_module(mod)) pr_err("%s: adding tainted module to the unloaded tainted modules list failed.\n", @@ -3049,7 +3049,7 @@ static noinline int do_init_module(struct module *mod) #endif /* * We want to free module_init, but be aware that kallsyms may be - * walking this with preempt disabled. In all the failure paths, we + * walking this within an RCU read section. In all the failure paths, we * call synchronize_rcu(), but we don't want to slow down the success * path. execmem_free() cannot be called in an interrupt, so do the * work and call synchronize_rcu() in a work queue. @@ -3836,7 +3836,7 @@ void print_modules(void) printk(KERN_DEFAULT "Modules linked in:"); /* Most callers should already have preempt disabled, but make sure */ - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -3844,7 +3844,6 @@ void print_modules(void) } print_unloaded_tainted_modules(); - preempt_enable(); if (last_unloaded_module.name[0]) pr_cont(" [last unloaded: %s%s]", last_unloaded_module.name, last_unloaded_module.taints); diff --git a/kernel/module/tree_lookup.c b/kernel/module/tree_lookup.c index 277197977d438b..d3204c5c74eb7c 100644 --- a/kernel/module/tree_lookup.c +++ b/kernel/module/tree_lookup.c @@ -12,11 +12,11 @@ /* * Use a latched RB-tree for __module_address(); this allows us to use - * RCU-sched lookups of the address from any context. + * RCU lookups of the address from any context. * - * This is conditional on PERF_EVENTS || TRACING because those can really hit - * __module_address() hard by doing a lot of stack unwinding; potentially from - * NMI context. + * This is conditional on PERF_EVENTS || TRACING || CFI_CLANG because those can + * really hit __module_address() hard by doing a lot of stack unwinding; + * potentially from NMI context. */ static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) From 58f036498fde7791bf21e2d52ace25e725d65b95 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:32 +0100 Subject: [PATCH 03/39] module: Use proper RCU assignment in add_kallsyms(). add_kallsyms() assigns the RCU pointer module::kallsyms and setups the structures behind it which point to init-data. The module was not published yet, nothing can see the kallsyms pointer and the data behind it. Also module's init function was not yet invoked. There is no need to use rcu_dereference() here, it is just to keep checkers quiet. The whole RCU read section is also not needed. Use a local kallsyms pointer and setup the data structures. Assign that pointer to the data structure at the end via rcu_assign_pointer(). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-4-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/kallsyms.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index bf65e0c3c86fc0..45846ae4042d12 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -177,19 +177,15 @@ void add_kallsyms(struct module *mod, const struct load_info *info) unsigned long strtab_size; void *data_base = mod->mem[MOD_DATA].base; void *init_data_base = mod->mem[MOD_INIT_DATA].base; + struct mod_kallsyms *kallsyms; - /* Set up to point into init section. */ - mod->kallsyms = (void __rcu *)init_data_base + - info->mod_kallsyms_init_off; + kallsyms = init_data_base + info->mod_kallsyms_init_off; - rcu_read_lock(); - /* The following is safe since this pointer cannot change */ - rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; - rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + kallsyms->symtab = (void *)symsec->sh_addr; + kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - rcu_dereference(mod->kallsyms)->strtab = - (void *)info->sechdrs[info->index.str].sh_addr; - rcu_dereference(mod->kallsyms)->typetab = init_data_base + info->init_typeoffs; + kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; + kallsyms->typetab = init_data_base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init @@ -199,20 +195,19 @@ void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.strtab = s = data_base + info->stroffs; mod->core_kallsyms.typetab = data_base + info->core_typeoffs; strtab_size = info->core_typeoffs - info->stroffs; - src = rcu_dereference(mod->kallsyms)->symtab; - for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { - rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); + src = kallsyms->symtab; + for (ndst = i = 0; i < kallsyms->num_symtab; i++) { + kallsyms->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { ssize_t ret; mod->core_kallsyms.typetab[ndst] = - rcu_dereference(mod->kallsyms)->typetab[i]; + kallsyms->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; - ret = strscpy(s, - &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], + ret = strscpy(s, &kallsyms->strtab[src[i].st_name], strtab_size); if (ret < 0) break; @@ -220,7 +215,9 @@ void add_kallsyms(struct module *mod, const struct load_info *info) strtab_size -= ret + 1; } } - rcu_read_unlock(); + + /* Set up to point into init section. */ + rcu_assign_pointer(mod->kallsyms, kallsyms); mod->core_kallsyms.num_symtab = ndst; } From dd19935173ae2929727580c41ad58fbdd5a1fe11 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:33 +0100 Subject: [PATCH 04/39] module: Use RCU in find_kallsyms_symbol(). The modules list and module::kallsyms can be accessed under RCU assumption. Use rcu_dereference() to reference the kallsyms pointer in find_kallsyms_symbol(). Use a RCU section instead of preempt_disable in callers of find_kallsyms_symbol(). Keep the preempt-disable in module_address_lookup() due to __module_address(). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-5-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/kallsyms.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 45846ae4042d12..3f59d047955728 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -257,7 +257,7 @@ static const char *find_kallsyms_symbol(struct module *mod, { unsigned int i, best = 0; unsigned long nextval, bestval; - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms); struct module_memory *mod_mem; /* At worse, next value is at end of module */ @@ -329,6 +329,7 @@ int module_address_lookup(unsigned long addr, int ret = 0; struct module *mod; + guard(rcu)(); preempt_disable(); mod = __module_address(addr); if (mod) { @@ -356,7 +357,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) { struct module *mod; - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -368,12 +369,10 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) goto out; strscpy(symname, sym, KSYM_NAME_LEN); - preempt_enable(); return 0; } } out: - preempt_enable(); return -ERANGE; } From ea3bb6864718dd8191248f2d4fd05e9bfe1b1f45 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:34 +0100 Subject: [PATCH 05/39] module: Use RCU in module_get_kallsym(). The modules list and module::kallsyms can be accessed under RCU assumption. Iterate the modules with RCU protection, use rcu_dereference() to access the kallsyms pointer. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-6-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/kallsyms.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 3f59d047955728..4eef518204eb54 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -381,13 +381,13 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, { struct module *mod; - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; - kallsyms = rcu_dereference_sched(mod->kallsyms); + kallsyms = rcu_dereference(mod->kallsyms); if (symnum < kallsyms->num_symtab) { const Elf_Sym *sym = &kallsyms->symtab[symnum]; @@ -396,12 +396,10 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, strscpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); strscpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); - preempt_enable(); return 0; } symnum -= kallsyms->num_symtab; } - preempt_enable(); return -ERANGE; } From 3d8628d636f0a65332e5f91f4566c40c256e920c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:35 +0100 Subject: [PATCH 06/39] module: Use RCU in find_module_all(). The modules list and module::kallsyms can be accessed under RCU assumption. Remove module_assert_mutex_or_preempt() from find_module_all() so it can be used under RCU protection without warnings. Update its callers to use RCU protection instead of preempt_disable(). Cc: Jiri Kosina Cc: Joe Lawrence Cc: Josh Poimboeuf Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Miroslav Benes Cc: Petr Mladek Cc: Steven Rostedt Cc: linux-trace-kernel@vger.kernel.org Cc: live-patching@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250108090457.512198-7-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- include/linux/module.h | 2 +- kernel/livepatch/core.c | 4 +--- kernel/module/kallsyms.c | 1 + kernel/module/main.c | 6 ++---- kernel/trace/trace_kprobe.c | 9 +++------ 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 30e5b19bafa983..0516f5ea9153b3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -666,7 +666,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) return within_module_init(addr, mod) || within_module_core(addr, mod); } -/* Search for module by name: must be in a RCU-sched critical section. */ +/* Search for module by name: must be in a RCU critical section. */ struct module *find_module(const char *name); extern void __noreturn __module_put_and_kthread_exit(struct module *mod, diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 0cd39954d5a10f..abea193977d219 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -59,7 +59,7 @@ static void klp_find_object_module(struct klp_object *obj) if (!klp_is_module(obj)) return; - rcu_read_lock_sched(); + guard(rcu)(); /* * We do not want to block removal of patched modules and therefore * we do not take a reference here. The patches are removed by @@ -75,8 +75,6 @@ static void klp_find_object_module(struct klp_object *obj) */ if (mod && mod->klp_alive) obj->mod = mod; - - rcu_read_unlock_sched(); } static bool klp_initialized(void) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 4eef518204eb54..3cba9f933b24fc 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -450,6 +450,7 @@ unsigned long module_kallsyms_lookup_name(const char *name) unsigned long ret; /* Don't lock: we're in enough trouble already. */ + guard(rcu)(); preempt_disable(); ret = __module_kallsyms_lookup_name(name); preempt_enable(); diff --git a/kernel/module/main.c b/kernel/module/main.c index 5f661d5343ac7b..cdd403b940b0da 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -374,16 +374,14 @@ bool find_symbol(struct find_symbol_arg *fsa) } /* - * Search for module by name: must hold module_mutex (or preempt disabled - * for read-only access). + * Search for module by name: must hold module_mutex (or RCU for read-only + * access). */ struct module *find_module_all(const char *name, size_t len, bool even_unformed) { struct module *mod; - module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list, lockdep_is_held(&module_mutex)) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d8d5f18a141adc..48057531ee4ebb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -124,9 +124,8 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) if (!p) return true; *p = '\0'; - rcu_read_lock_sched(); - ret = !!find_module(tk->symbol); - rcu_read_unlock_sched(); + scoped_guard(rcu) + ret = !!find_module(tk->symbol); *p = ':'; return ret; @@ -796,12 +795,10 @@ static struct module *try_module_get_by_name(const char *name) { struct module *mod; - rcu_read_lock_sched(); + guard(rcu)(); mod = find_module(name); if (mod && !try_module_get(mod)) mod = NULL; - rcu_read_unlock_sched(); - return mod; } #else From 4fa1035ef33ad9fbbc4c0e53d4383836a33305f8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:36 +0100 Subject: [PATCH 07/39] module: Use RCU in __find_kallsyms_symbol_value(). module::kallsyms can be accessed under RCU assumption. Use rcu_dereference() to access module::kallsyms. Update callers. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-8-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/kallsyms.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 3cba9f933b24fc..e3c55bc879c110 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -407,7 +407,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned int i; - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; @@ -447,24 +447,15 @@ static unsigned long __module_kallsyms_lookup_name(const char *name) /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name) { - unsigned long ret; - /* Don't lock: we're in enough trouble already. */ guard(rcu)(); - preempt_disable(); - ret = __module_kallsyms_lookup_name(name); - preempt_enable(); - return ret; + return __module_kallsyms_lookup_name(name); } unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) { - unsigned long ret; - - preempt_disable(); - ret = __find_kallsyms_symbol_value(mod, name); - preempt_enable(); - return ret; + guard(rcu)(); + return __find_kallsyms_symbol_value(mod, name); } int module_kallsyms_on_each_symbol(const char *modname, From 0e2c77131c44beef876da3350089580812bc363f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:37 +0100 Subject: [PATCH 08/39] module: Use RCU in module_kallsyms_on_each_symbol(). module::kallsyms can be accessed under RCU assumption. Use rcu_dereference() to access module::kallsyms. Update callers. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-9-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/kallsyms.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index e3c55bc879c110..0e8ec6486d95c8 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -476,10 +476,8 @@ int module_kallsyms_on_each_symbol(const char *modname, if (modname && strcmp(modname, mod->name)) continue; - /* Use rcu_dereference_sched() to remain compliant with the sparse tool */ - preempt_disable(); - kallsyms = rcu_dereference_sched(mod->kallsyms); - preempt_enable(); + kallsyms = rcu_dereference_check(mod->kallsyms, + lockdep_is_held(&module_mutex)); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; From 69e938adb161f424937499d6c18784494e44a96e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:38 +0100 Subject: [PATCH 09/39] module: Remove module_assert_mutex_or_preempt() from try_add_tainted_module(). module_assert_mutex_or_preempt() is not needed in try_add_tainted_module(). The function checks for RCU-sched or the module_mutex to be acquired. The list_for_each_entry_rcu() below does the same check. Remove module_assert_mutex_or_preempt() from try_add_tainted_module(). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-10-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/tracking.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/module/tracking.c b/kernel/module/tracking.c index 16742d1c630c67..4fefec5b683c60 100644 --- a/kernel/module/tracking.c +++ b/kernel/module/tracking.c @@ -21,8 +21,6 @@ int try_add_tainted_module(struct module *mod) { struct mod_unload_taint *mod_taint; - module_assert_mutex_or_preempt(); - if (!mod->taints) goto out; From 31a587aa5bf9ae9003d13c46e84d89b67a9b8165 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:39 +0100 Subject: [PATCH 10/39] module: Use RCU in find_symbol(). module_assert_mutex_or_preempt() is not needed in find_symbol(). The function checks for RCU-sched or the module_mutex to be acquired. The list_for_each_entry_rcu() below does the same check. Remove module_assert_mutex_or_preempt() from try_add_tainted_module(). Use RCU protection to invoke find_symbol() and update callers. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-11-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/main.c | 30 ++++++++++++------------------ kernel/module/version.c | 14 +++++++------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index cdd403b940b0da..af7ca713eff0a7 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -331,7 +331,7 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, /* * Find an exported symbol and return it, along with, (optional) crc and - * (optional) module which owns it. Needs preempt disabled or module_mutex. + * (optional) module which owns it. Needs RCU or module_mutex. */ bool find_symbol(struct find_symbol_arg *fsa) { @@ -345,8 +345,6 @@ bool find_symbol(struct find_symbol_arg *fsa) struct module *mod; unsigned int i; - module_assert_mutex_or_preempt(); - for (i = 0; i < ARRAY_SIZE(arr); i++) if (find_exported_symbol_in_section(&arr[i], NULL, fsa)) return true; @@ -812,10 +810,9 @@ void __symbol_put(const char *symbol) .gplok = true, }; - preempt_disable(); + guard(rcu)(); BUG_ON(!find_symbol(&fsa)); module_put(fsa.owner); - preempt_enable(); } EXPORT_SYMBOL(__symbol_put); @@ -1369,21 +1366,18 @@ void *__symbol_get(const char *symbol) .warn = true, }; - preempt_disable(); - if (!find_symbol(&fsa)) - goto fail; - if (fsa.license != GPL_ONLY) { - pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n", - symbol); - goto fail; + scoped_guard(rcu) { + if (!find_symbol(&fsa)) + return NULL; + if (fsa.license != GPL_ONLY) { + pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n", + symbol); + return NULL; + } + if (strong_try_module_get(fsa.owner)) + return NULL; } - if (strong_try_module_get(fsa.owner)) - goto fail; - preempt_enable(); return (void *)kernel_symbol_value(fsa.sym); -fail: - preempt_enable(); - return NULL; } EXPORT_SYMBOL_GPL(__symbol_get); diff --git a/kernel/module/version.c b/kernel/module/version.c index 3718a886832198..2beefeba82d94a 100644 --- a/kernel/module/version.c +++ b/kernel/module/version.c @@ -79,17 +79,17 @@ int check_modstruct_version(const struct load_info *info, .name = "module_layout", .gplok = true, }; + bool have_symbol; /* * Since this should be found in kernel (which can't be removed), no - * locking is necessary -- use preempt_disable() to placate lockdep. + * locking is necessary. Regardless use a RCU read section to keep + * lockdep happy. */ - preempt_disable(); - if (!find_symbol(&fsa)) { - preempt_enable(); - BUG(); - } - preempt_enable(); + scoped_guard(rcu) + have_symbol = find_symbol(&fsa); + BUG_ON(!have_symbol); + return check_version(info, "module_layout", mod, fsa.crc); } From 2f573d51a03da73df5647137f8fa0077e351de7f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:40 +0100 Subject: [PATCH 11/39] module: Use RCU in __is_module_percpu_address(). The modules list can be accessed under RCU assumption. Use RCU protection instead preempt_disable(). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-12-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index af7ca713eff0a7..5d6f44c09bec25 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -450,8 +450,7 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) struct module *mod; unsigned int cpu; - preempt_disable(); - + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -468,13 +467,10 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) per_cpu_ptr(mod->percpu, get_boot_cpu_id()); } - preempt_enable(); return true; } } } - - preempt_enable(); return false; } From 27a85c57ace59d06f6bd73246c2d255703daad11 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:41 +0100 Subject: [PATCH 12/39] module: Allow __module_address() to be called from RCU section. mod_find() uses either the modules list to find a module or a tree lookup (CONFIG_MODULES_TREE_LOOKUP). The list and the tree can both be iterated under RCU assumption (as well as RCU-sched). Remove module_assert_mutex_or_preempt() from __module_address() and entirely since __module_address() is the last user. Update comments. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-13-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/internal.h | 11 ----------- kernel/module/main.c | 4 +--- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/kernel/module/internal.h b/kernel/module/internal.h index d09b46ef032f08..626cf8668a7eb9 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -124,17 +124,6 @@ char *module_next_tag_pair(char *string, unsigned long *secsize); #define for_each_modinfo_entry(entry, info, name) \ for (entry = get_modinfo(info, name); entry; entry = get_next_modinfo(info, name, entry)) -static inline void module_assert_mutex_or_preempt(void) -{ -#ifdef CONFIG_LOCKDEP - if (unlikely(!debug_locks)) - return; - - WARN_ON_ONCE(!rcu_read_lock_sched_held() && - !lockdep_is_held(&module_mutex)); -#endif -} - static inline unsigned long kernel_symbol_value(const struct kernel_symbol *sym) { #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS diff --git a/kernel/module/main.c b/kernel/module/main.c index 5d6f44c09bec25..2155814f35dd0e 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3749,7 +3749,7 @@ bool is_module_address(unsigned long addr) * __module_address() - get the module which contains an address. * @addr: the address. * - * Must be called with preempt disabled or module mutex held so that + * Must be called within RCU read section or module mutex held so that * module doesn't get freed during this. */ struct module *__module_address(unsigned long addr) @@ -3767,8 +3767,6 @@ struct module *__module_address(unsigned long addr) return NULL; lookup: - module_assert_mutex_or_preempt(); - mod = mod_find(addr, &mod_tree); if (mod) { BUG_ON(!within_module(addr, mod)); From f32fd85ec31cb3ac1f2079f5e5b14c593cf6113c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:42 +0100 Subject: [PATCH 13/39] module: Use RCU in search_module_extables(). search_module_extables() returns an exception_table_entry belonging to a module. The lookup via __module_address() can be performed with RCU protection. The returned exception_table_entry remains valid because the passed address usually belongs to a module that is currently executed. So the module can not be removed because "something else" holds a reference to it, ensuring that it can not be removed. Exceptions here are: - kprobe, acquires a reference on the module beforehand - MCE, invokes the function from within a timer and the RCU lifetime guarantees (of the timer) are sufficient. Therefore it is safe to return the exception_table_entry outside the RCU section which provided the module. Use RCU for the lookup in search_module_extables() and update the comment. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-14-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/main.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 2155814f35dd0e..aebb17e1bfb976 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3703,28 +3703,23 @@ char *module_flags(struct module *mod, char *buf, bool show_state) /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_extables(unsigned long addr) { - const struct exception_table_entry *e = NULL; struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address(addr); if (!mod) - goto out; + return NULL; if (!mod->num_exentries) - goto out; - - e = search_extable(mod->extable, - mod->num_exentries, - addr); -out: - preempt_enable(); - + return NULL; /* - * Now, if we found one, we are running inside it now, hence - * we cannot unload the module, hence no refcnt needed. + * The address passed here belongs to a module that is currently + * invoked (we are running inside it). Therefore its module::refcnt + * needs already be >0 to ensure that it is not removed at this stage. + * All other user need to invoke this function within a RCU read + * section. */ - return e; + return search_extable(mod->extable, mod->num_exentries, addr); } /** From ca62145db34fc7f75c87b7e15902ee492c55887a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:43 +0100 Subject: [PATCH 14/39] module: Use RCU in all users of __module_address(). __module_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_address() with RCU. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-15-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- include/linux/kallsyms.h | 3 +-- kernel/module/kallsyms.c | 5 +---- kernel/module/main.c | 9 ++------- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 1c6a6c1704d8d0..d5dd54c53ace61 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -55,12 +55,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) if (is_ksym_addr((unsigned long)ptr)) return ptr; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)ptr); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); - preempt_enable(); #endif return ptr; } diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 0e8ec6486d95c8..00a60796327c06 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -316,7 +316,7 @@ void * __weak dereference_module_function_descriptor(struct module *mod, /* * For kallsyms to ask for address resolution. NULL means not found. Careful - * not to lock to avoid deadlock on oopses, simply disable preemption. + * not to lock to avoid deadlock on oopses, RCU is enough. */ int module_address_lookup(unsigned long addr, unsigned long *size, @@ -330,7 +330,6 @@ int module_address_lookup(unsigned long addr, struct module *mod; guard(rcu)(); - preempt_disable(); mod = __module_address(addr); if (mod) { if (modname) @@ -348,8 +347,6 @@ int module_address_lookup(unsigned long addr, if (sym) ret = strscpy(namebuf, sym, KSYM_NAME_LEN); } - preempt_enable(); - return ret; } diff --git a/kernel/module/main.c b/kernel/module/main.c index aebb17e1bfb976..8a8b499730d865 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3731,13 +3731,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) */ bool is_module_address(unsigned long addr) { - bool ret; - - preempt_disable(); - ret = __module_address(addr) != NULL; - preempt_enable(); - - return ret; + guard(rcu)(); + return __module_address(addr) != NULL; } /** From 212c2c09bd6195691c8c3010352cd5d53d6edc86 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:44 +0100 Subject: [PATCH 15/39] module: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-16-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/module/main.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 8a8b499730d865..81ecb9d3a93530 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -823,13 +823,12 @@ void symbol_put_addr(void *addr) /* * Even though we hold a reference on the module; we still need to - * disable preemption in order to safely traverse the data structure. + * RCU read section in order to safely traverse the data structure. */ - preempt_disable(); + guard(rcu)(); modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); - preempt_enable(); } EXPORT_SYMBOL_GPL(symbol_put_addr); @@ -3776,20 +3775,15 @@ struct module *__module_address(unsigned long addr) */ bool is_module_text_address(unsigned long addr) { - bool ret; - - preempt_disable(); - ret = __module_text_address(addr) != NULL; - preempt_enable(); - - return ret; + guard(rcu)(); + return __module_text_address(addr) != NULL; } /** * __module_text_address() - get the module whose code contains an address. * @addr: the address. * - * Must be called with preempt disabled or module mutex held so that + * Must be called within RCU read section or module mutex held so that * module doesn't get freed during this. */ struct module *__module_text_address(unsigned long addr) From c4a4e40c8dd10f30e819c455013e43b7af39f9be Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:45 +0100 Subject: [PATCH 16/39] ARM: module: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-17-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- arch/arm/kernel/module-plts.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index da2ee8d6ef1a7b..354ce16d83cb5a 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -285,11 +285,9 @@ bool in_module_plt(unsigned long loc) struct module *mod; bool ret; - preempt_disable(); + guard(rcu)(); mod = __module_text_address(loc); ret = mod && (loc - (u32)mod->arch.core.plt_ent < mod->arch.core.plt_count * PLT_ENT_SIZE || loc - (u32)mod->arch.init.plt_ent < mod->arch.init.plt_count * PLT_ENT_SIZE); - preempt_enable(); - return ret; } From 94df3a9a5fc158c289326b09957267497530b539 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:46 +0100 Subject: [PATCH 17/39] arm64: module: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Cc: Catalin Marinas Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Steven Rostedt Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-trace-kernel@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-18-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- arch/arm64/kernel/ftrace.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index d7c0d023dfe5e6..5a890714ee2e98 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -320,14 +320,13 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * dealing with an out-of-range condition, we can assume it * is due to a module being loaded far away from the kernel. * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * NOTE: __module_text_address() must be called within a RCU read + * section, but we can rely on ftrace_lock to ensure that 'mod' * retains its validity throughout the remainder of this code. */ if (!mod) { - preempt_disable(); + guard(rcu)(); mod = __module_text_address(pc); - preempt_enable(); } if (WARN_ON(!mod)) From f09e234b290d0c1fc08277edf6661004b75563f8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:47 +0100 Subject: [PATCH 18/39] LoongArch/orc: Use RCU in all users of __module_address(). __module_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_address() with RCU. Cc: Huacai Chen Cc: WANG Xuerui Cc: loongarch@lists.linux.dev Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-19-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- arch/loongarch/kernel/unwind_orc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index b2572287633179..d623935a75471c 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -399,7 +399,7 @@ bool unwind_next_frame(struct unwind_state *state) return false; /* Don't let modules unload while we're reading their ORC data. */ - preempt_disable(); + guard(rcu)(); if (is_entry_func(state->pc)) goto end; @@ -514,14 +514,12 @@ bool unwind_next_frame(struct unwind_state *state) if (!__kernel_text_address(state->pc)) goto err; - preempt_enable(); return true; err: state->error = true; end: - preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } From f6c70b3e7ed62cfca99f3dd462ea3a85852590a6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:48 +0100 Subject: [PATCH 19/39] LoongArch: ftrace: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Cc: Huacai Chen Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Steven Rostedt Cc: WANG Xuerui Cc: linux-trace-kernel@vger.kernel.org Cc: loongarch@lists.linux.dev Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-20-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- arch/loongarch/kernel/ftrace_dyn.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 25c9a4cfd5fa92..d5d81d74034c85 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -85,14 +85,13 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod * dealing with an out-of-range condition, we can assume it * is due to a module being loaded far away from the kernel. * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * NOTE: __module_text_address() must be called within a RCU read + * section, but we can rely on ftrace_lock to ensure that 'mod' * retains its validity throughout the remainder of this code. */ if (!mod) { - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); + scoped_guard(rcu) + mod = __module_text_address(pc); } if (WARN_ON(!mod)) From 8d287fd4f1dd4f32b6a2aa638fe5b65a48fccd8d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:49 +0100 Subject: [PATCH 20/39] powerpc/ftrace: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Cc: Christophe Leroy Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Steven Rostedt Cc: linux-trace-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Tested-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20250108090457.512198-21-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- arch/powerpc/kernel/trace/ftrace.c | 6 ++---- arch/powerpc/kernel/trace/ftrace_64_pg.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2f776f137a89ec..6dca92d5a6e822 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -115,10 +115,8 @@ static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long a { struct module *mod = NULL; - preempt_disable(); - mod = __module_text_address(ip); - preempt_enable(); - + scoped_guard(rcu) + mod = __module_text_address(ip); if (!mod) pr_err("No module loaded at addr=%lx\n", ip); diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index ac35015f04c6ad..5c6e545d1708ca 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -120,10 +120,8 @@ static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) { struct module *mod; - preempt_disable(); - mod = __module_text_address(rec->ip); - preempt_enable(); - + scoped_guard(rcu) + mod = __module_text_address(rec->ip); if (!mod) pr_err("No module loaded at addr=%lx\n", rec->ip); From 70798b93b7c47176625578ea81ae9d94b61ebccc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:50 +0100 Subject: [PATCH 21/39] cfi: Use RCU while invoking __module_address(). __module_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. The _notrace() variant was introduced in commit 14c4c8e41511a ("cfi: Use rcu_read_{un}lock_sched_notrace"). The recursive case where __cfi_slowpath_diag() could end up calling itself is no longer present, as all that logic is gone since commit 89245600941e ("cfi: Switch to -fsanitize=kcfi"). Sami Tolvanen said that KCFI checks don't perform function calls. Elliot Berman verified it with | modprobe -a dummy_stm stm_ftrace stm_p_basic | mkdir -p /sys/kernel/config/stp-policy/dummy_stm.0.my-policy/default | echo function > /sys/kernel/tracing/current_tracer | echo 1 > /sys/kernel/tracing/tracing_on | echo dummy_stm.0 > /sys/class/stm_source/ftrace/stm_source_link Replace the rcu_read_lock_sched_notrace() section around __module_address() with RCU. Cc: Elliot Berman Cc: Kees Cook Cc: Nathan Chancellor Cc: Sami Tolvanen Cc: Steven Rostedt Cc: llvm@lists.linux.dev Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Tested-by: Elliot Berman # sm8650-qrd [1] Link: https://lore.kernel.org/all/20241230185812429-0800.eberman@hu-eberman-lv.qualcomm.com [1] Link: https://lore.kernel.org/r/20250108090457.512198-22-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/cfi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/cfi.c b/kernel/cfi.c index 08caad7767176e..abcd4d1f98eab2 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -71,14 +71,11 @@ static bool is_module_cfi_trap(unsigned long addr) struct module *mod; bool found = false; - rcu_read_lock_sched_notrace(); - + guard(rcu)(); mod = __module_address(addr); if (mod) found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end); - rcu_read_unlock_sched_notrace(); - return found; } #else /* CONFIG_MODULES */ From e9d25b42bde5a5e12aa58690087848b46838b0f6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:51 +0100 Subject: [PATCH 22/39] x86: Use RCU in all users of __module_address(). __module_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_address() with RCU. Cc: H. Peter Anvin Cc: Borislav Petkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-23-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- arch/x86/kernel/callthunks.c | 3 +-- arch/x86/kernel/unwind_orc.c | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 8418a892d195a9..251b65f2ab2168 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -98,11 +98,10 @@ static inline bool within_module_coretext(void *addr) #ifdef CONFIG_MODULES struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)addr); if (mod && within_module_core((unsigned long)addr, mod)) ret = true; - preempt_enable(); #endif return ret; } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index d4705a348a8045..977ee75e047c84 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -476,7 +476,7 @@ bool unwind_next_frame(struct unwind_state *state) return false; /* Don't let modules unload while we're reading their ORC data. */ - preempt_disable(); + guard(rcu)(); /* End-of-stack check for user tasks: */ if (state->regs && user_mode(state->regs)) @@ -669,14 +669,12 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } - preempt_enable(); return true; err: state->error = true; the_end: - preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } From 541895c6c7f4d57219dde2eeb383fab5b58569ea Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:52 +0100 Subject: [PATCH 23/39] jump_label: Use RCU in all users of __module_address(). __module_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_address() with RCU. Cc: Ard Biesheuvel Cc: Jason Baron Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-24-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/jump_label.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 93a822d3c468ca..7fcf4017cb3838 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -746,9 +746,9 @@ static int jump_label_add_module(struct module *mod) kfree(jlm); return -ENOMEM; } - preempt_disable(); - jlm2->mod = __module_address((unsigned long)key); - preempt_enable(); + scoped_guard(rcu) + jlm2->mod = __module_address((unsigned long)key); + jlm2->entries = static_key_entries(key); jlm2->next = NULL; static_key_set_mod(key, jlm2); @@ -906,13 +906,13 @@ static void jump_label_update(struct static_key *key) return; } - preempt_disable(); - mod = __module_address((unsigned long)key); - if (mod) { - stop = mod->jump_entries + mod->num_jump_entries; - init = mod->state == MODULE_STATE_COMING; + scoped_guard(rcu) { + mod = __module_address((unsigned long)key); + if (mod) { + stop = mod->jump_entries + mod->num_jump_entries; + init = mod->state == MODULE_STATE_COMING; + } } - preempt_enable(); #endif entry = static_key_entries(key); /* if there are no users, entry can be NULL */ From 762d22f02ac45a90f0a9f41c350fddd852bfd524 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:53 +0100 Subject: [PATCH 24/39] jump_label: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Cc: Ard Biesheuvel Cc: Jason Baron Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-25-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/jump_label.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7fcf4017cb3838..7cb19e6014266a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -653,13 +653,12 @@ static int __jump_label_mod_text_reserved(void *start, void *end) struct module *mod; int ret; - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - + scoped_guard(rcu) { + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + } if (!mod) return 0; From 6e263b4fdc9722fc7dabcb7b7277a5b136268ad9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 29 Jan 2025 09:47:51 +0100 Subject: [PATCH 25/39] bpf: Use RCU in all users of __module_text_address(). __module_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_address() with RCU. Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Matt Bobrowski Cc: Song Liu Cc: Stanislav Fomichev Cc: Steven Rostedt Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: linux-trace-kernel@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20250129084751.tH6iidUO@linutronix.de Signed-off-by: Petr Pavlu --- kernel/trace/bpf_trace.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index adc947587eb813..e6a17a60d8787c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2345,10 +2345,9 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)btp); module_put(mod); - preempt_enable(); } static __always_inline @@ -2932,18 +2931,21 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 u32 i, err = 0; for (i = 0; i < addrs_cnt; i++) { + bool skip_add = false; struct module *mod; - preempt_disable(); - mod = __module_address(addrs[i]); - /* Either no module or we it's already stored */ - if (!mod || has_module(&arr, mod)) { - preempt_enable(); - continue; + scoped_guard(rcu) { + mod = __module_address(addrs[i]); + /* Either no module or it's already stored */ + if (!mod || has_module(&arr, mod)) { + skip_add = true; + break; /* scoped_guard */ + } + if (!try_module_get(mod)) + err = -EINVAL; } - if (!try_module_get(mod)) - err = -EINVAL; - preempt_enable(); + if (skip_add) + continue; if (err) break; err = add_module(&arr, mod); From 9446f285eb6fcd526ae790e5b35350a561c2cce2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 29 Jan 2025 09:49:25 +0100 Subject: [PATCH 26/39] kprobes: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Cc: David S. Miller Cc: Anil S Keshavamurthy Cc: Masami Hiramatsu Cc: Naveen N Rao Cc: linux-trace-kernel@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250129084925.9ppBjGLC@linutronix.de Signed-off-by: Petr Pavlu --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 88aeac84e4c057..ffe0c3d5230637 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1547,7 +1547,7 @@ static int check_kprobe_address_safe(struct kprobe *p, /* Ensure the address is in a text area, and find a module if exists. */ *probed_mod = NULL; if (!core_kernel_text((unsigned long) p->addr)) { - guard(preempt)(); + guard(rcu)(); *probed_mod = __module_text_address((unsigned long) p->addr); if (!(*probed_mod)) return -EINVAL; From f480f048e9e09de3f0741bfb88515f5551044f00 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:56 +0100 Subject: [PATCH 27/39] static_call: Use RCU in all users of __module_text_address(). __module_text_address() can be invoked within a RCU section, there is no requirement to have preemption disabled. Replace the preempt_disable() section around __module_text_address() with RCU. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-28-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- kernel/static_call_inline.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index bb7d066a7c3979..c2c59e6ef35d07 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -325,13 +325,12 @@ static int __static_call_mod_text_reserved(void *start, void *end) struct module *mod; int ret; - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - + scoped_guard(rcu) { + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + } if (!mod) return 0; From 3a2ef092de782d4165caa937fd703d3e05387c93 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 8 Jan 2025 10:04:57 +0100 Subject: [PATCH 28/39] bug: Use RCU instead RCU-sched to protect module_bug_list. The list module_bug_list relies on module_mutex for writer synchronisation. The list is already RCU style. The list removal is synchronized with modules' synchronize_rcu() in free_module(). Use RCU read lock protection instead of RCU-sched. Cc: Andrew Morton Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250108090457.512198-29-bigeasy@linutronix.de Signed-off-by: Petr Pavlu --- lib/bug.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/lib/bug.c b/lib/bug.c index e0ff219899902f..b1f07459c2ee36 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -66,23 +66,19 @@ static LIST_HEAD(module_bug_list); static struct bug_entry *module_find_bug(unsigned long bugaddr) { + struct bug_entry *bug; struct module *mod; - struct bug_entry *bug = NULL; - rcu_read_lock_sched(); + guard(rcu)(); list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; bug = mod->bug_table; for (i = 0; i < mod->num_bugs; ++i, ++bug) if (bugaddr == bug_addr(bug)) - goto out; + return bug; } - bug = NULL; -out: - rcu_read_unlock_sched(); - - return bug; + return NULL; } void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, @@ -235,11 +231,11 @@ void generic_bug_clear_once(void) #ifdef CONFIG_MODULES struct module *mod; - rcu_read_lock_sched(); - list_for_each_entry_rcu(mod, &module_bug_list, bug_list) - clear_once_table(mod->bug_table, - mod->bug_table + mod->num_bugs); - rcu_read_unlock_sched(); + scoped_guard(rcu) { + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) + clear_once_table(mod->bug_table, + mod->bug_table + mod->num_bugs); + } #endif clear_once_table(__start___bug_table, __stop___bug_table); From afa92869776a1aff196d8a55b200da52a58f9d76 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 13 Feb 2025 23:13:52 +0100 Subject: [PATCH 29/39] params: Annotate struct module_param_attrs with __counted_by() Add the __counted_by compiler attribute to the flexible array member attrs to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Increment num before adding a new param_attribute to the attrs array and adjust the array index accordingly. Increment num immediately after the first reallocation such that the reallocation for the NULL terminator only needs to add 1 (instead of 2) to mk->mp->num. Use struct_size() instead of manually calculating the size for the reallocation. Use krealloc_array() for the additional NULL terminator. Cc: Andy Shevchenko Cc: Luis Chamberlain Cc: Nathan Chancellor Signed-off-by: Thorsten Blum Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20250213221352.2625-3-thorsten.blum@linux.dev Signed-off-by: Petr Pavlu --- kernel/params.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 0074d29c9b80ce..2509f216c9f3cf 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -551,7 +551,7 @@ struct module_param_attrs { unsigned int num; struct attribute_group grp; - struct param_attribute attrs[]; + struct param_attribute attrs[] __counted_by(num); }; #ifdef CONFIG_SYSFS @@ -651,35 +651,32 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, } /* Enlarge allocations. */ - new_mp = krealloc(mk->mp, - sizeof(*mk->mp) + - sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1), + new_mp = krealloc(mk->mp, struct_size(mk->mp, attrs, mk->mp->num + 1), GFP_KERNEL); if (!new_mp) return -ENOMEM; mk->mp = new_mp; + mk->mp->num++; /* Extra pointer for NULL terminator */ - new_attrs = krealloc(mk->mp->grp.attrs, - sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2), - GFP_KERNEL); + new_attrs = krealloc_array(mk->mp->grp.attrs, mk->mp->num + 1, + sizeof(mk->mp->grp.attrs[0]), GFP_KERNEL); if (!new_attrs) return -ENOMEM; mk->mp->grp.attrs = new_attrs; /* Tack new one on the end. */ - memset(&mk->mp->attrs[mk->mp->num], 0, sizeof(mk->mp->attrs[0])); - sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); - mk->mp->attrs[mk->mp->num].param = kp; - mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; + memset(&mk->mp->attrs[mk->mp->num - 1], 0, sizeof(mk->mp->attrs[0])); + sysfs_attr_init(&mk->mp->attrs[mk->mp->num - 1].mattr.attr); + mk->mp->attrs[mk->mp->num - 1].param = kp; + mk->mp->attrs[mk->mp->num - 1].mattr.show = param_attr_show; /* Do not allow runtime DAC changes to make param writable. */ if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) - mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; + mk->mp->attrs[mk->mp->num - 1].mattr.store = param_attr_store; else - mk->mp->attrs[mk->mp->num].mattr.store = NULL; - mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; - mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; - mk->mp->num++; + mk->mp->attrs[mk->mp->num - 1].mattr.store = NULL; + mk->mp->attrs[mk->mp->num - 1].mattr.attr.name = (char *)name; + mk->mp->attrs[mk->mp->num - 1].mattr.attr.mode = kp->perm; /* Fix up all the pointers, since krealloc can move us */ for (i = 0; i < mk->mp->num; i++) From b5b7508b229cae1f6cdef54efb0a446eeced24d9 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 17 Feb 2025 10:01:53 +0000 Subject: [PATCH 30/39] adding ci files --- .github/workflows/kdevops-cleanup.yml | 60 +++++++++ .github/workflows/kdevops-generic.yml | 37 +++++ .github/workflows/kdevops-init.yml | 187 ++++++++++++++++++++++++++ 3 files changed, 284 insertions(+) create mode 100644 .github/workflows/kdevops-cleanup.yml create mode 100644 .github/workflows/kdevops-generic.yml create mode 100644 .github/workflows/kdevops-init.yml diff --git a/.github/workflows/kdevops-cleanup.yml b/.github/workflows/kdevops-cleanup.yml new file mode 100644 index 00000000000000..d7e69ac48e6c92 --- /dev/null +++ b/.github/workflows/kdevops-cleanup.yml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# This can be used towards the end of your action. All tasks here run even if +# any of the previous tasks failed. + +name: Kdevops cleanup workflow + +on: + workflow_call: # Makes this workflow reusable + +jobs: + cleanup: + name: Archive results and cleanup + runs-on: [self-hosted, Linux, X64] + steps: + - name: Set Linux kdevops development path + if: ${{ job.status != 'cancelled' }} + run: echo "LINUX_KDEVOPS_PATH=$GITHUB_WORKSPACE" >> $GITHUB_ENV + + - name: Get systemd journal files + if: ${{ job.status != 'cancelled' }} + run: | + if [[ ! -d kdevops ]]; then + exit 0 + fi + cd kdevops + make journal-dump + + - name: Start SSH Agent + if: ${{ job.status != 'cancelled' }} + uses: webfactory/ssh-agent@v0.9.0 + with: + ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} + + - name: Build our kdevops archive results + if: ${{ job.status != 'cancelled' }} + run: | + if [[ ! -d kdevops ]]; then + exit 0 + fi + cd kdevops + make ci-archive + + - name: Upload our kdevops results archive + if: ${{ job.status != 'cancelled' }} + uses: actions/upload-artifact@v4 + with: + name: kdevops-ci-results + path: ${{ env.LINUX_KDEVOPS_PATH }}/kdevops/archive/*.zip + + - name: Run kdevops make destroy + if: always() + run: | + if [[ ! -d kdevops ]]; then + exit 0 + fi + cd kdevops + make destroy + cd .. + rm -rf kdevops diff --git a/.github/workflows/kdevops-generic.yml b/.github/workflows/kdevops-generic.yml new file mode 100644 index 00000000000000..8ca2006c1e1d22 --- /dev/null +++ b/.github/workflows/kdevops-generic.yml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Most simple Linux kernel subsystems can be tested with this target +# test setup. For more elaborates tests look for a topic branch under the +# kdevops-ci tree. For example to test a filesystem look at the fstests +# branch. + +name: Run generic kdevops CI tests + +on: + push: + branches: ['**'] + pull_request: + branches: ['**'] + workflow_dispatch: # Allow manual triggering + +jobs: + setup: + uses: ./.github/workflows/kdevops-init.yml + secrets: inherit + + run-tests: + needs: setup + name: Run CI tests + runs-on: [self-hosted, Linux, X64] + steps: + - name: Run CI tests + run: | + cd kdevops + make ci-test + echo "ok" > ci.result + + cleanup: + needs: [run-tests, setup] # Add setup as a dependency to ensure proper ordering + if: always() # This ensures cleanup runs even if run-tests fails + uses: ./.github/workflows/kdevops-cleanup.yml + secrets: inherit diff --git a/.github/workflows/kdevops-init.yml b/.github/workflows/kdevops-init.yml new file mode 100644 index 00000000000000..9a265c2dbc1f84 --- /dev/null +++ b/.github/workflows/kdevops-init.yml @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# This can be used as a initialization workflow for most Linux kernel +# development environments. This takes care of: +# +# - Checks out and re-using a local mirror for your kernel tree +# - Looks for a defconfig in kdevops to use for your kernel tree +# - Sets up CI metadata for kdevops-results-archive +# - Ensures your kernel tree at least builds with defconfig +# - Brings up target DUTs nodes +# - Installs your Linux kernel tree on them +# - Builds all of your test requirements for your Linux kernel tree + +name: Base kdevops workflow + +on: + workflow_call: # Makes this workflow reusable + inputs: + kdevops_defconfig: + required: false + type: string + +jobs: + setup: + name: Setup kdevops environment + runs-on: [self-hosted, Linux, X64] + steps: + - name: Verify we won't expect user input interactions on the host key + run: | + mkdir -p ~/.ssh + if ! grep -q "StrictHostKeyChecking no" ~/.ssh/config 2>/dev/null; then + echo "StrictHostKeyChecking no" >> ~/.ssh/config + fi + + - name: Start SSH Agent for initial test + uses: webfactory/ssh-agent@v0.9.0 + with: + ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} + + # Modify the repo here if you have a custom or private URL for the archive + # This can also just be a repo variable later. + - name: Verify our ssh connection will work + run: | + if ! git ls-remote git@github.com:linux-kdevops/kdevops-results-archive.git HEAD; then + echo "Cannot access kdevops-results-archive repository" + exit 1 + fi + + - name: Configure git + run: | + git config --global --add safe.directory '*' + git config --global user.name "kdevops" + git config --global user.email "kdevops@lists.linux.dev" + + - name: Checkout kdevops + run: | + rm -rf kdevops + git clone /mirror/kdevops.git kdevops + + - name: Make sure our repo kdevops defconfig exists + run: | + cd kdevops + if [[ -z "${{ inputs.kdevops_defconfig }}" ]]; then + KDEVOPS_DEFCONFIG=$(basename ${{ github.repository }}) + else + KDEVOPS_DEFCONFIG="${{ inputs.kdevops_defconfig }}" + fi + + if [[ ! -f defconfigs/$KDEVOPS_DEFCONFIG ]]; then + echo "kdevops lacks a defconfig for this repository, expected to find: defconfigs/$KDEVOPS_DEFCONFIG" + exit 1 + fi + + echo "KDEVOPS_DEFCONFIG=$KDEVOPS_DEFCONFIG" >> $GITHUB_ENV + + - name: Checkout custom branch with delta on kdevops/linux + run: | + LINUX_TREE="https://github.com/${{ github.repository }}" + LINUX_TREE_REF="${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}" + cd kdevops + git clone $LINUX_TREE --reference /mirror/linux.git/ --depth=5 linux + cd linux + git fetch origin $LINUX_TREE_REF + git checkout $LINUX_TREE_REF + git log -1 + + - name: Initialize CI metadata for kdevops-results-archive for linux + run: | + cd kdevops/linux + echo "$(basename ${{ github.repository }})" > ../ci.trigger + + # This supports using kdevops github actions using two different + # approaches: + # + # 1) Commit the .github/ directory onto a Linux tree before your + # kernel changes. This approach is used for example for + # testing patches posted on the mailing list with patchwork, + # this is the strategy kernel-patch-deaemon uses. Since the + # patches are ephemeral there is not important git history to + # maintain. + # + # 2) Merge the .github/ directory at the end of your development + # tree. This is useful for kernel developers wishing to test + # existing trees. + # + # So this checks to see if the last commit (top of the tree) *added* + # the .github directory. If the last commit added it, then we assume + # the commit prior to it was the one we'd like to document as the main + # test point. + if git diff-tree --no-commit-id --name-only --diff-filter=A -r HEAD | grep -q "^\.github/"; then + git log -2 --skip=1 --pretty=format:"%s" -1 > ../ci.subject + git describe --exact-match --tags HEAD^ 2>/dev/null || git rev-parse --short HEAD^ > ../ci.ref + else + git log -1 --pretty=format:"%s" > ../ci.subject + git describe --exact-match --tags HEAD 2>/dev/null || git rev-parse --short HEAD > ../ci.ref + fi + + RELEVANT_GIT_TAG=$(cat ../ci.ref) + RELEVANT_GIT_REF=$(git rev-parse --short=12 $RELEVANT_GIT_TAG) + + echo "LINUX_GIT_REF=$RELEVANT_GIT_REF" >> $GITHUB_ENV + echo "LINUX_GIT_TAG=$RELEVANT_GIT_TAG" >> $GITHUB_ENV + + # Start out pessimistic + echo "unknown" > ../ci.result + echo "Nothing to write home about." > ../ci.commit_extra + + - name: Run a quick Linux kernel defconfig build test + run: | + cd kdevops/linux + git reset --hard ${{ env.LINUX_GIT_TAG }} + make defconfig + make -j$(nproc) + + - name: Run kdevops make defconfig-repo + run: | + LINUX_TREE="https://github.com/${{ github.repository }}" + LINUX_TREE_REF="${{ env.LINUX_GIT_TAG }}" + + # We make the compromise here to use a relevant git tag for the + # host prefix so that folks can easily tell what exact kernel tree + # is being tested by using the relevant git ref. That is, if you + # pushed a tree with the .github/ directory as the top of the tree, + # that commit will not be used, we'll use the last one as that is + # the relevant git ref we want to annotate a test for. + # + # The compromise here we use special KDEVOPS to separete the + # commit ID and github.run_id. Exotic things likes UTF characters + # and dots have problems. + KDEVOPS_HOSTS_PREFIX="${{ env.LINUX_GIT_REF }}KDEVOPS${{ github.run_id }}" + + echo "Going to use defconfig-${{ env.KDEVOPS_DEFCONFIG }}" + + echo "Linux tree: $LINUX_TREE" + echo "Linux trigger ref: $LINUX_TREE_REF" + echo "Linux tag: ${{ env.LINUX_GIT_TAG }}" + echo "Runner ID: ${{ github.run_id }}" + echo "kdevops host prefix: $KDEVOPS_HOSTS_PREFIX" + echo "kdevops defconfig: defconfig-${{ env.KDEVOPS_DEFCONFIG }}" + + KDEVOPS_ARGS="KDEVOPS_HOSTS_PREFIX=$KDEVOPS_HOSTS_PREFIX LINUX_TREE=$LINUX_TREE LINUX_TREE_REF=$LINUX_TREE_REF defconfig-${{ env.KDEVOPS_DEFCONFIG }}" + echo "Going to run:" + echo "make $KDEVOPS_ARGS" + + cd kdevops + make $KDEVOPS_ARGS + + - name: Run kdevops make + run: | + cd kdevops + make -j$(nproc) + + - name: Run kdevops make bringup + run: | + cd kdevops + ls -ld linux + make bringup + + - name: Build linux and boot test nodes on test kernel + run: | + cd kdevops + make linux + + - name: Build required ci tests + run: | + cd kdevops + make ci-build-test From f7d66d8441cb01bfa5f337675c7059167bd38101 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:25 +0200 Subject: [PATCH 31/39] x86/mm/pat: cpa-test: fix length for CPA_ARRAY test The CPA_ARRAY test always uses len[1] as numpages argument to change_page_attr_set() although the addresses array is different each iteration of the test loop. Replace len[1] with len[i] to have numpages matching the addresses array. Fixes: ecc729f1f471 ("x86/mm/cpa: Add ARRAY and PAGES_ARRAY selftests") Signed-off-by: Mike Rapoport (Microsoft) --- arch/x86/mm/pat/cpa-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 3d2f7f0a6ed142..ad3c1feec990db 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -183,7 +183,7 @@ static int pageattr_test(void) break; case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1); break; case 2: From 8bfdea77c14e5af16f96605ee6d028666cd90dc5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:26 +0200 Subject: [PATCH 32/39] x86/mm/pat: drop duplicate variable in cpa_flush() There is a 'struct cpa_data *data' parameter in cpa_flush() that is assigned to a local 'struct cpa_data *cpa' variable. Rename the parameter from 'data' to 'cpa' and drop declaration of the local 'cpa' variable. Signed-off-by: Mike Rapoport (Microsoft) --- arch/x86/mm/pat/set_memory.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index ef4514d64c0524..1f7698caa6f7b6 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -394,9 +394,8 @@ static void __cpa_flush_tlb(void *data) flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } -static void cpa_flush(struct cpa_data *data, int cache) +static void cpa_flush(struct cpa_data *cpa, int cache) { - struct cpa_data *cpa = data; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); From fd198515cb11b420def85b882461453d12710987 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sun, 26 Jan 2025 09:47:27 +0200 Subject: [PATCH 33/39] x86/mm/pat: restore large ROX pages after fragmentation Change of attributes of the pages may lead to fragmentation of direct mapping over time and performance degradation when these pages contain executable code. With current code it's one way road: kernel tries to avoid splitting large pages, but it doesn't restore them back even if page attributes got compatible again. Any change to the mapping may potentially allow to restore large page. Add a hook to cpa_flush() path that will check if the pages in the range that were just touched can be mapped at PMD level. If the collapse at the PMD level succeeded, also attempt to collapse PUD level. The collapse logic runs only when a set_memory_ method explicitly sets CPA_COLLAPSE flag, for now this is only enabled in set_memory_rox(). CPUs don't like[1] to have to have TLB entries of different size for the same memory, but looks like it's okay as long as these entries have matching attributes[2]. Therefore it's critical to flush TLB before any following changes to the mapping. Note that we already allow for multiple TLB entries of different sizes for the same memory now in split_large_page() path. It's not a new situation. set_memory_4k() provides a way to use 4k pages on purpose. Kernel must not remap such pages as large. Re-use one of software PTE bits to indicate such pages. [1] See Erratum 383 of AMD Family 10h Processors [2] https://lore.kernel.org/linux-mm/1da1b025-cabc-6f04-bde5-e50830d1ecf0@amd.com/ [rppt@kernel.org: * s/restore/collapse/ * update formatting per peterz * use 'struct ptdesc' instead of 'struct page' for list of page tables to be freed * try to collapse PMD first and if it succeeds move on to PUD as peterz suggested * flush TLB twice: for changes done in the original CPA call and after collapsing of large pages * update commit message ] Link: https://lore.kernel.org/all/20200416213229.19174-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) --- arch/x86/include/asm/pgtable_types.h | 2 + arch/x86/mm/pat/set_memory.c | 217 ++++++++++++++++++++++++++- include/linux/vm_event_item.h | 2 + mm/vmstat.c | 2 + 4 files changed, 219 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4b804531b03c3c..c90e9c51edb7ac 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -33,6 +33,7 @@ #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ +#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 #ifdef CONFIG_X86_64 @@ -64,6 +65,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K) #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) #define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1f7698caa6f7b6..7bd0f62ba48fdc 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -73,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ +#define CPA_COLLAPSE 16 /* try to collapse large pages */ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { @@ -105,6 +106,18 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } +static void collapse_page_count(int level) +{ + direct_pages_count[level]++; + if (system_state == SYSTEM_RUNNING) { + if (level == PG_LEVEL_2M) + count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE); + else if (level == PG_LEVEL_1G) + count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE); + } + direct_pages_count[level - 1] -= PTRS_PER_PTE; +} + void arch_report_meminfo(struct seq_file *m) { seq_printf(m, "DirectMap4k: %8lu kB\n", @@ -122,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m) } #else static inline void split_page_count(int level) { } +static inline void collapse_page_count(int level) { } #endif #ifdef CONFIG_X86_CPA_STATISTICS @@ -394,6 +408,40 @@ static void __cpa_flush_tlb(void *data) flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); + +static void cpa_collapse_large_pages(struct cpa_data *cpa) +{ + unsigned long start, addr, end; + struct ptdesc *ptdesc, *tmp; + LIST_HEAD(pgtables); + int collapsed = 0; + int i; + + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + for (i = 0; i < cpa->numpages; i++) + collapsed += collapse_large_pages(__cpa_addr(cpa, i), + &pgtables); + } else { + addr = __cpa_addr(cpa, 0); + start = addr & PMD_MASK; + end = addr + PAGE_SIZE * cpa->numpages; + + for (addr = start; within(addr, start, end); addr += PMD_SIZE) + collapsed += collapse_large_pages(addr, &pgtables); + } + + if (!collapsed) + return; + + flush_tlb_all(); + + list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) { + list_del(&ptdesc->pt_list); + __free_page(ptdesc_page(ptdesc)); + } +} + static void cpa_flush(struct cpa_data *cpa, int cache) { unsigned int i; @@ -402,7 +450,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { cpa_flush_all(cache); - return; + goto collapse_large_pages; } if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) @@ -411,7 +459,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) - return; + goto collapse_large_pages; mb(); for (i = 0; i < cpa->numpages; i++) { @@ -427,6 +475,10 @@ static void cpa_flush(struct cpa_data *cpa, int cache) clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); + +collapse_large_pages: + if (cpa->flags & CPA_COLLAPSE) + cpa_collapse_large_pages(cpa); } static bool overlaps(unsigned long r1_start, unsigned long r1_end, @@ -1196,6 +1248,161 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } +static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, + struct list_head *pgtables) +{ + pmd_t _pmd, old_pmd; + pte_t *pte, first; + unsigned long pfn; + pgprot_t pgprot; + int i = 0; + + addr &= PMD_MASK; + pte = pte_offset_kernel(pmd, addr); + first = *pte; + pfn = pte_pfn(first); + + /* Make sure alignment is suitable */ + if (PFN_PHYS(pfn) & ~PMD_MASK) + return 0; + + /* The page is 4k intentionally */ + if (pte_flags(first) & _PAGE_KERNEL_4K) + return 0; + + /* Check that the rest of PTEs are compatible with the first one */ + for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) { + pte_t entry = *pte; + + if (!pte_present(entry)) + return 0; + if (pte_flags(entry) != pte_flags(first)) + return 0; + if (pte_pfn(entry) != pte_pfn(first) + i) + return 0; + } + + old_pmd = *pmd; + + /* Success: set up a large page */ + pgprot = pgprot_4k_2_large(pte_pgprot(first)); + pgprot_val(pgprot) |= _PAGE_PSE; + _pmd = pfn_pmd(pfn, pgprot); + set_pmd(pmd, _pmd); + + /* Queue the page table to be freed after TLB flush */ + list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables); + + if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) { + struct page *page; + + /* Update all PGD tables to use the same large page */ + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd = pmd_offset(pud, addr); + /* Something is wrong if entries doesn't match */ + if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd))) + continue; + set_pmd(pmd, _pmd); + } + } + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_2M); + + return 1; +} + +static int collapse_pud_page(pud_t *pud, unsigned long addr, + struct list_head *pgtables) +{ + unsigned long pfn; + pmd_t *pmd, first; + int i; + + if (!direct_gbpages) + return 0; + + addr &= PUD_MASK; + pmd = pmd_offset(pud, addr); + first = *pmd; + + /* + * To restore PUD page all PMD entries must be large and + * have suitable alignment + */ + pfn = pmd_pfn(first); + if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK)) + return 0; + + /* + * To restore PUD page, all following PMDs must be compatible with the + * first one. + */ + for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) { + pmd_t entry = *pmd; + + if (!pmd_present(entry) || !pmd_leaf(entry)) + return 0; + if (pmd_flags(entry) != pmd_flags(first)) + return 0; + if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE) + return 0; + } + + /* Restore PUD page and queue page table to be freed after TLB flush */ + list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables); + set_pud(pud, pfn_pud(pfn, pmd_pgprot(first))); + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_1G); + + return 1; +} + +/* + * Collapse PMD and PUD pages in the kernel mapping around the address where + * possible. + * + * Caller must flush TLB and free page tables queued on the list before + * touching the new entries. CPU must not see TLB entries of different size + * with different attributes. + */ +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables) +{ + int collapsed = 0; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + addr &= PMD_MASK; + + spin_lock(&pgd_lock); + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + goto out; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + goto out; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud) || pud_leaf(*pud)) + goto out; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) + goto out; + + collapsed = collapse_pmd_page(pmd, addr, pgtables); + if (collapsed) + collapsed += collapse_pud_page(pud, addr, pgtables); + +out: + spin_unlock(&pgd_lock); + return collapsed; +} + static bool try_to_free_pte_page(pte_t *pte) { int i; @@ -2119,7 +2326,8 @@ int set_memory_rox(unsigned long addr, int numpages) if (__supported_pte_mask & _PAGE_NX) clr.pgprot |= _PAGE_NX; - return change_page_attr_clear(&addr, numpages, clr, 0); + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0, + CPA_COLLAPSE, NULL); } int set_memory_rw(unsigned long addr, int numpages) @@ -2146,7 +2354,8 @@ int set_memory_p(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + return change_page_attr_set_clr(&addr, numpages, + __pgprot(_PAGE_KERNEL_4K), __pgprot(0), 1, 0, NULL); } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d0958095cd9..5a37cb2b6f93b8 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, + DIRECT_MAP_LEVEL2_COLLAPSE, + DIRECT_MAP_LEVEL3_COLLAPSE, #endif #ifdef CONFIG_PER_VMA_LOCK_STATS VMA_LOCK_SUCCESS, diff --git a/mm/vmstat.c b/mm/vmstat.c index 16bfe1c694dd4e..88998725f1c5af 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1435,6 +1435,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_X86 "direct_map_level2_splits", "direct_map_level3_splits", + "direct_map_level2_collapses", + "direct_map_level3_collapses", #endif #ifdef CONFIG_PER_VMA_LOCK_STATS "vma_lock_success", From 20d4655745b6422ca8380818ce97e6e78e14a062 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:28 +0200 Subject: [PATCH 34/39] execmem: don't remove ROX cache from the direct map The memory allocated for the ROX cache was removed from the direct map to reduce amount of direct map updates, however this cannot be tolerated by /proc/kcore that accesses module memory using vread_iter() and the latter does vmalloc_to_page() and copy_page_to_iter_nofault(). Instead of removing ROX cache memory from the direct map and mapping it as ROX in vmalloc space, simply call set_memory_rox() that will take care of proper permissions on both vmalloc and in the direct map. Signed-off-by: Mike Rapoport (Microsoft) --- mm/execmem.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/mm/execmem.c b/mm/execmem.c index 317b6a8d35be07..04b0bf1b5025a8 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -257,7 +257,6 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; - unsigned long start, end; struct vm_struct *vm; size_t alloc_size; int err = -ENOMEM; @@ -275,26 +274,18 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - start = (unsigned long)p; - end = start + alloc_size; - - vunmap_range(start, end); - - err = execmem_set_direct_map_valid(vm, false); - if (err) - goto err_free_mem; - - err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages, - PMD_SHIFT); + err = set_memory_rox((unsigned long)p, vm->nr_pages); if (err) goto err_free_mem; err = execmem_cache_add(p, alloc_size); if (err) - goto err_free_mem; + goto err_reset_direct_map; return 0; +err_reset_direct_map: + execmem_set_direct_map_valid(vm, true); err_free_mem: vfree(p); return err; From 02de34d5a4fb0a6333a9c71b5bdabeb9cd314948 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:29 +0200 Subject: [PATCH 35/39] execmem: add API for temporal remapping as RW and restoring ROX afterwards Using a writable copy for ROX memory is cumbersome and error prone. Add API that allow temporarily remapping of ranges in the ROX cache as writable and then restoring their read-only-execute permissions. This API will be later used in modules code and will allow removing nasty games with writable copy in alternatives patching on x86. The restoring of the ROX permissions relies on the ability of architecture to reconstruct large pages in its set_memory_rox() method. Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/execmem.h | 31 +++++++++++++++++++++++++++++++ mm/execmem.c | 22 ++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 64130ae19690a9..65655a5d1be283 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -65,6 +65,37 @@ enum execmem_range_flags { * Architectures that use EXECMEM_ROX_CACHE must implement this. */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); + +/** + * execmem_make_temp_rw - temporarily remap region with read-write + * permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Remaps a part of the cached large page in the ROX cache in the range + * [@ptr, @ptr + @size) as writable and not executable. The caller must + * have exclusive ownership of this range and ensure nothing will try to + * execute code in this range. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_make_temp_rw(void *ptr, size_t size); + +/** + * execmem_restore_rox - restore read-only-execute permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Restores read-only-execute permissions on a range [@ptr, @ptr + @size) + * after it was temporarily remapped as writable. Relies on architecture + * implementation of set_memory_rox() to restore mapping using large pages. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_restore_rox(void *ptr, size_t size); +#else +static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } +static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif /** diff --git a/mm/execmem.c b/mm/execmem.c index 04b0bf1b5025a8..e6c4f5076ca8d8 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -335,6 +335,28 @@ static bool execmem_cache_free(void *ptr) return true; } + +int execmem_make_temp_rw(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + int ret; + + ret = set_memory_nx(addr, nr); + if (ret) + return ret; + + return set_memory_rw(addr, nr); +} + +int execmem_restore_rox(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + + return set_memory_rox(addr, nr); +} + #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ static void *execmem_cache_alloc(struct execmem_range *range, size_t size) { From 89386f109ddd1dd06a3b591b4c3028b681f40a97 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:30 +0200 Subject: [PATCH 36/39] module: switch to execmem API for remapping as RW and restoring ROX Instead of using writable copy for module text sections, temporarily remap the memory allocated from execmem's ROX cache as writable and restore its ROX permissions after the module is formed. This will allow removing nasty games with writable copy in alternatives patching on x86. Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Petr Pavlu --- include/linux/module.h | 8 +--- include/linux/moduleloader.h | 4 -- kernel/module/main.c | 78 ++++++++++-------------------------- kernel/module/strict_rwx.c | 9 +++-- 4 files changed, 27 insertions(+), 72 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 0516f5ea9153b3..1c145938ddd6af 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -370,7 +370,6 @@ enum mod_mem_type { struct module_memory { void *base; - void *rw_copy; bool is_rox; unsigned int size; @@ -772,14 +771,9 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); -void *__module_writable_address(struct module *mod, void *loc); - static inline void *module_writable_address(struct module *mod, void *loc) { - if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod || - mod->state != MODULE_STATE_UNFORMED) - return loc; - return __module_writable_address(mod, loc); + return loc; } #else /* !CONFIG_MODULES... */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1f5507ba5a1284..e395461d59e5e8 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *mod); - #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/kernel/module/main.c b/kernel/module/main.c index 81ecb9d3a93530..974821d6e85ab3 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1211,18 +1211,6 @@ void __weak module_arch_freeing_init(struct module *mod) { } -void *__module_writable_address(struct module *mod, void *loc) -{ - for_class_mod_mem_type(type, text) { - struct module_memory *mem = &mod->mem[type]; - - if (loc >= mem->base && loc < mem->base + mem->size) - return loc + (mem->rw_copy - mem->base); - } - - return loc; -} - static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); @@ -1240,21 +1228,15 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (!ptr) return -ENOMEM; - mod->mem[type].base = ptr; - if (execmem_is_rox(execmem_type)) { - ptr = vzalloc(size); + int err = execmem_make_temp_rw(ptr, size); - if (!ptr) { - execmem_free(mod->mem[type].base); + if (err) { + execmem_free(ptr); return -ENOMEM; } - mod->mem[type].rw_copy = ptr; mod->mem[type].is_rox = true; - } else { - mod->mem[type].rw_copy = mod->mem[type].base; - memset(mod->mem[type].base, 0, size); } /* @@ -1270,16 +1252,26 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) */ kmemleak_not_leak(ptr); + memset(ptr, 0, size); + mod->mem[type].base = ptr; + return 0; } +static void module_memory_restore_rox(struct module *mod) +{ + for_class_mod_mem_type(type, text) { + struct module_memory *mem = &mod->mem[type]; + + if (mem->is_rox) + execmem_restore_rox(mem->base, mem->size); + } +} + static void module_memory_free(struct module *mod, enum mod_mem_type type) { struct module_memory *mem = &mod->mem[type]; - if (mem->is_rox) - vfree(mem->rw_copy); - execmem_free(mem->base); } @@ -2629,7 +2621,6 @@ static int move_module(struct module *mod, struct load_info *info) for_each_mod_mem_type(type) { if (!mod->mem[type].size) { mod->mem[type].base = NULL; - mod->mem[type].rw_copy = NULL; continue; } @@ -2646,7 +2637,6 @@ static int move_module(struct module *mod, struct load_info *info) void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; const char *sname; - unsigned long addr; if (!(shdr->sh_flags & SHF_ALLOC)) continue; @@ -2667,14 +2657,12 @@ static int move_module(struct module *mod, struct load_info *info) ret = PTR_ERR(dest); goto out_err; } - addr = (unsigned long)dest; codetag_section_found = true; } else { enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; - addr = (unsigned long)mod->mem[type].base + offset; - dest = mod->mem[type].rw_copy + offset; + dest = mod->mem[type].base + offset; } if (shdr->sh_type != SHT_NOBITS) { @@ -2697,13 +2685,14 @@ static int move_module(struct module *mod, struct load_info *info) * users of info can keep taking advantage and using the newly * minted official memory area. */ - shdr->sh_addr = addr; + shdr->sh_addr = (unsigned long)dest; pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr, (long)shdr->sh_size, info->secstrings + shdr->sh_name); } return 0; out_err: + module_memory_restore_rox(mod); for (t--; t >= 0; t--) module_memory_free(mod, t); if (codetag_section_found) @@ -2850,17 +2839,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; } -int __weak module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - return 0; -} - static int post_relocation(struct module *mod, const struct load_info *info) { - int ret; - /* Sort exception table now relocations are done. */ sort_extable(mod->extable, mod->extable + mod->num_exentries); @@ -2872,24 +2852,7 @@ static int post_relocation(struct module *mod, const struct load_info *info) add_kallsyms(mod, info); /* Arch-specific module finalizing. */ - ret = module_finalize(info->hdr, info->sechdrs, mod); - if (ret) - return ret; - - for_each_mod_mem_type(type) { - struct module_memory *mem = &mod->mem[type]; - - if (mem->is_rox) { - if (!execmem_update_copy(mem->base, mem->rw_copy, - mem->size)) - return -ENOMEM; - - vfree(mem->rw_copy); - mem->rw_copy = NULL; - } - } - - return module_post_finalize(info->hdr, info->sechdrs, mod); + return module_finalize(info->hdr, info->sechdrs, mod); } /* Call module constructors. */ @@ -3486,6 +3449,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod->mem[type].size); } + module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy: /* diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 74834ba15615fa..03f4142cfbf4e3 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" static int module_set_memory(const struct module *mod, enum mod_mem_type type, @@ -32,12 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type, int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { + const struct module_memory *mem = &mod->mem[type]; int ret; - if (mod->mem[type].is_rox) - continue; - - if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + if (mem->is_rox) + ret = execmem_restore_rox(mem->base, mem->size); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox); else ret = module_set_memory(mod, type, set_memory_x); From cadc0034ef3b021517cddf00e1201484b044db3f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:31 +0200 Subject: [PATCH 37/39] Revert "x86/module: prepare module loading for ROX allocations of text" The module code does not create a writable copy of the executable memory anymore so there is no need to handle it in module relocation and alternatives patching. This reverts commit 9bfc4824fd4836c16bb44f922bfaffba5da3e4f3. Signed-off-by: Mike Rapoport (Microsoft) --- arch/um/kernel/um_arch.c | 11 +- arch/x86/entry/vdso/vma.c | 3 +- arch/x86/include/asm/alternative.h | 14 +-- arch/x86/kernel/alternative.c | 181 ++++++++++++----------------- arch/x86/kernel/ftrace.c | 30 +++-- arch/x86/kernel/module.c | 45 +++---- 6 files changed, 117 insertions(+), 167 deletions(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 79ea97d4797ecc..8be91974e786d4 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -440,25 +440,24 @@ void __init arch_cpu_finalize_init(void) os_check_bugs(); } -void apply_seal_endbr(s32 *start, s32 *end, struct module *mod) +void apply_seal_endbr(s32 *start, s32 *end) { } -void apply_retpolines(s32 *start, s32 *end, struct module *mod) +void apply_retpolines(s32 *start, s32 *end) { } -void apply_returns(s32 *start, s32 *end, struct module *mod) +void apply_returns(s32 *start, s32 *end) { } void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi) { } -void apply_alternatives(struct alt_instr *start, struct alt_instr *end, - struct module *mod) +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 39e6efc1a9cab7..bfc7cabf4017ba 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -48,8 +48,7 @@ int __init init_vdso_image(const struct vdso_image *image) apply_alternatives((struct alt_instr *)(image->data + image->alt), (struct alt_instr *)(image->data + image->alt + - image->alt_len), - NULL); + image->alt_len)); return 0; } diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e3903b731305c9..a2141665239b5a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -87,16 +87,16 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; * instructions were patched in already: */ extern int alternatives_patched; -struct module; extern void alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end, - struct module *mod); -extern void apply_retpolines(s32 *start, s32 *end, struct module *mod); -extern void apply_returns(s32 *start, s32 *end, struct module *mod); -extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); +extern void apply_seal_endbr(s32 *start, s32 *end); extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, - s32 *start_cfi, s32 *end_cfi, struct module *mod); + s32 *start_cfi, s32 *end_cfi); + +struct module; struct callthunk_sites { s32 *call_start, *call_end; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c71b575bf2292d..8b66a555d2f035 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -392,10 +392,8 @@ EXPORT_SYMBOL(BUG_func); * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)". */ -static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, - struct module *mod) +static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) { - u8 *wr_instr = module_writable_address(mod, instr); void *target, *bug = &BUG_func; s32 disp; @@ -405,14 +403,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, } if (a->instrlen != 6 || - wr_instr[0] != CALL_RIP_REL_OPCODE || - wr_instr[1] != CALL_RIP_REL_MODRM) { + instr[0] != CALL_RIP_REL_OPCODE || + instr[1] != CALL_RIP_REL_MODRM) { pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n"); BUG(); } /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */ - disp = *(s32 *)(wr_instr + 2); + disp = *(s32 *)(instr + 2); #ifdef CONFIG_X86_64 /* ff 15 00 00 00 00 call *0x0(%rip) */ /* target address is stored at "next instruction + disp". */ @@ -450,8 +448,7 @@ static inline u8 * instr_va(struct alt_instr *i) * to refetch changed I$ lines. */ void __init_or_module noinline apply_alternatives(struct alt_instr *start, - struct alt_instr *end, - struct module *mod) + struct alt_instr *end) { u8 insn_buff[MAX_PATCH_LEN]; u8 *instr, *replacement; @@ -480,7 +477,6 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { int insn_buff_sz = 0; - u8 *wr_instr, *wr_replacement; /* * In case of nested ALTERNATIVE()s the outer alternative might @@ -494,11 +490,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, } instr = instr_va(a); - wr_instr = module_writable_address(mod, instr); - replacement = (u8 *)&a->repl_offset + a->repl_offset; - wr_replacement = module_writable_address(mod, replacement); - BUG_ON(a->instrlen > sizeof(insn_buff)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); @@ -509,9 +501,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - memcpy(insn_buff, wr_instr, a->instrlen); + memcpy(insn_buff, instr, a->instrlen); optimize_nops(instr, insn_buff, a->instrlen); - text_poke_early(wr_instr, insn_buff, a->instrlen); + text_poke_early(instr, insn_buff, a->instrlen); continue; } @@ -521,12 +513,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, instr, instr, a->instrlen, replacement, a->replacementlen, a->flags); - memcpy(insn_buff, wr_replacement, a->replacementlen); + memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; if (a->flags & ALT_FLAG_DIRECT_CALL) { - insn_buff_sz = alt_replace_call(instr, insn_buff, a, - mod); + insn_buff_sz = alt_replace_call(instr, insn_buff, a); if (insn_buff_sz < 0) continue; } @@ -536,11 +527,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen); - DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(wr_instr, insn_buff, insn_buff_sz); + text_poke_early(instr, insn_buff, insn_buff_sz); } kasan_enable_current(); @@ -731,20 +722,18 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* * Generated by 'objtool --retpoline'. */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, - struct module *mod) +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op1, op2; - ret = insn_decode_kernel(&insn, wr_addr); + ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -772,9 +761,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(addr, bytes, len); - DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(wr_addr, bytes, len); + text_poke_early(addr, bytes, len); } } } @@ -810,8 +799,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) return i; } -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; @@ -820,13 +808,12 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end, for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op; - ret = insn_decode_kernel(&insn, wr_addr); + ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -846,35 +833,32 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end, len = patch_return(addr, &insn, bytes); if (len == insn.length) { - DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr); + DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(wr_addr, bytes, len); + text_poke_early(addr, bytes, len); } } } #else -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_MITIGATION_RETHUNK */ #else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, - struct module *mod) { } -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) { } +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */ #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr, void *wr_addr); +static void poison_cfi(void *addr); -static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) +static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); - if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr))) + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) return; if (!is_endbr(endbr)) { @@ -889,7 +873,7 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) */ DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(wr_addr, &poison, 4); + text_poke_early(addr, &poison, 4); } /* @@ -898,23 +882,22 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) * Seal the functions for indirect calls by clobbering the ENDBR instructions * and the kCFI hash value. */ -void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr, wr_addr, true); + poison_endbr(addr, true); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_cfi(addr - 16, wr_addr - 16); + poison_cfi(addr - 16); } } #else -void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ @@ -1136,7 +1119,7 @@ static u32 decode_caller_hash(void *addr) } /* .retpoline_sites */ -static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_disable_callers(s32 *start, s32 *end) { /* * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate @@ -1148,23 +1131,20 @@ static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); - + hash = decode_caller_hash(addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(wr_addr, jmp, 2); + text_poke_early(addr, jmp, 2); } return 0; } -static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_enable_callers(s32 *start, s32 *end) { /* * Re-enable kCFI, undo what cfi_disable_callers() did. @@ -1174,115 +1154,106 @@ static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(wr_addr, mov, 2); + text_poke_early(addr, mov, 2); } return 0; } /* .cfi_sites */ -static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod) +static int cfi_rand_preamble(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(wr_addr); + hash = decode_preamble_hash(addr); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; hash = cfi_rehash(hash); - text_poke_early(wr_addr + 1, &hash, 4); + text_poke_early(addr + 1, &hash, 4); } return 0; } -static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod) +static int cfi_rewrite_preamble(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(wr_addr); + hash = decode_preamble_hash(addr); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; - text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size); - WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678); - text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4); + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); + WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); + text_poke_early(addr + fineibt_preamble_hash, &hash, 4); } return 0; } -static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod) +static void cfi_rewrite_endbr(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr + 16, wr_addr + 16, false); + poison_endbr(addr+16, false); } } /* .retpoline_sites */ -static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_rand_callers(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (hash) { hash = -cfi_rehash(hash); - text_poke_early(wr_addr + 2, &hash, 4); + text_poke_early(addr + 2, &hash, 4); } } return 0; } -static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (hash) { - text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size); - WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678); - text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4); + text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); + WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); + text_poke_early(addr + fineibt_caller_hash, &hash, 4); } /* rely on apply_retpolines() */ } @@ -1291,9 +1262,8 @@ static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod) } static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi, bool builtin) { - bool builtin = mod ? false : true; int ret; if (WARN_ONCE(fineibt_preamble_size != 16, @@ -1311,7 +1281,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI. */ - ret = cfi_disable_callers(start_retpoline, end_retpoline, mod); + ret = cfi_disable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1322,11 +1292,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); } - ret = cfi_rand_preamble(start_cfi, end_cfi, mod); + ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err; - ret = cfi_rand_callers(start_retpoline, end_retpoline, mod); + ret = cfi_rand_callers(start_retpoline, end_retpoline); if (ret) goto err; } @@ -1338,7 +1308,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_KCFI: - ret = cfi_enable_callers(start_retpoline, end_retpoline, mod); + ret = cfi_enable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1348,17 +1318,17 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, case CFI_FINEIBT: /* place the FineIBT preamble at func()-16 */ - ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod); + ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; /* rewrite the callers to target func()-16 */ - ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod); + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; /* now that nobody targets func()+0, remove ENDBR there */ - cfi_rewrite_endbr(start_cfi, end_cfi, mod); + cfi_rewrite_endbr(start_cfi, end_cfi); if (builtin) pr_info("Using FineIBT CFI\n"); @@ -1377,7 +1347,7 @@ static inline void poison_hash(void *addr) *(u32 *)addr = 0; } -static void poison_cfi(void *addr, void *wr_addr) +static void poison_cfi(void *addr) { switch (cfi_mode) { case CFI_FINEIBT: @@ -1389,8 +1359,8 @@ static void poison_cfi(void *addr, void *wr_addr) * ud2 * 1: nop */ - poison_endbr(addr, wr_addr, false); - poison_hash(wr_addr + fineibt_preamble_hash); + poison_endbr(addr, false); + poison_hash(addr + fineibt_preamble_hash); break; case CFI_KCFI: @@ -1399,7 +1369,7 @@ static void poison_cfi(void *addr, void *wr_addr) * movl $0, %eax * .skip 11, 0x90 */ - poison_hash(wr_addr + 1); + poison_hash(addr + 1); break; default: @@ -1410,21 +1380,22 @@ static void poison_cfi(void *addr, void *wr_addr) #else static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi, bool builtin) { } #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr, void *wr_addr) { } +static void poison_cfi(void *addr) { } #endif #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi) { return __apply_fineibt(start_retpoline, end_retpoline, - start_cfi, end_cfi, mod); + start_cfi, end_cfi, + /* .builtin = */ false); } #ifdef CONFIG_SMP @@ -1721,16 +1692,16 @@ void __init alternative_instructions(void) paravirt_set_cap(); __apply_fineibt(__retpoline_sites, __retpoline_sites_end, - __cfi_sites, __cfi_sites_end, NULL); + __cfi_sites, __cfi_sites_end, true); /* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks. */ - apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL); - apply_returns(__return_sites, __return_sites_end, NULL); + apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); - apply_alternatives(__alt_instructions, __alt_instructions_end, NULL); + apply_alternatives(__alt_instructions, __alt_instructions_end); /* * Now all calls are established. Apply the call thunks if @@ -1741,7 +1712,7 @@ void __init alternative_instructions(void) /* * Seal all functions that do not have their address taken. */ - apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL); + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 166bc0ea3bdff9..cace6e8d7cc77a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -118,13 +118,10 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code, return ret; /* replace the text with the new text */ - if (ftrace_poke_late) { + if (ftrace_poke_late) text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); - } else { - mutex_lock(&text_mutex); - text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE); - mutex_unlock(&text_mutex); - } + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } @@ -321,7 +318,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; - void *ret; + int ret; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { start_offset = (unsigned long)ftrace_regs_caller; @@ -352,15 +349,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = text_poke_copy(trampoline, (void *)start_offset, size); - if (WARN_ON(!ret)) + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); + if (WARN_ON(ret < 0)) goto fail; ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else - text_poke_copy(ip, retq, sizeof(retq)); + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { @@ -368,7 +365,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + (jmp_offset - start_offset); if (WARN_ON(*(char *)ip != 0x75)) goto fail; - if (!text_poke_copy(ip, x86_nops[2], 2)) + ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); + if (ret < 0) goto fail; } @@ -381,7 +379,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) */ ptr = (unsigned long *)(trampoline + size + RET_SIZE); - text_poke_copy(ptr, &ops, sizeof(unsigned long)); + *ptr = (unsigned long)ops; op_offset -= start_offset; memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); @@ -397,7 +395,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_ptr.offset = offset; /* put in the new offset to the ftrace_ops */ - text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); /* put in the call to the function */ mutex_lock(&text_mutex); @@ -407,9 +405,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) * the depth accounting before the call already. */ dest = ftrace_ops_get_func(ops); - text_poke_copy_locked(trampoline + call_offset, - text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), - CALL_INSN_SIZE, false); + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), + CALL_INSN_SIZE); mutex_unlock(&text_mutex); /* ALLOC_TRAMP flags lets us know we created it */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8984abd91c001f..837450b6e882f7 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -146,21 +146,18 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, } if (apply) { - void *wr_loc = module_writable_address(me, loc); - - if (memcmp(wr_loc, &zero, size)) { + if (memcmp(loc, &zero, size)) { pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - write(wr_loc, &val, size); + write(loc, &val, size); } else { if (memcmp(loc, &val, size)) { pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - /* FIXME: needs care for ROX module allocations */ write(loc, &zero, size); } } @@ -227,7 +224,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *alt = NULL, + const Elf_Shdr *s, *alt = NULL, *locks = NULL, *orc = NULL, *orc_ip = NULL, *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, *calls = NULL, *cfi = NULL; @@ -236,6 +233,8 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks = s; if (!strcmp(".orc_unwind", secstrings + s->sh_name)) orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) @@ -266,20 +265,20 @@ int module_finalize(const Elf_Ehdr *hdr, csize = cfi->sh_size; } - apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me); + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; - apply_retpolines(rseg, rseg + retpolines->sh_size, me); + apply_retpolines(rseg, rseg + retpolines->sh_size); } if (returns) { void *rseg = (void *)returns->sh_addr; - apply_returns(rseg, rseg + returns->sh_size, me); + apply_returns(rseg, rseg + returns->sh_size); } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size, me); + apply_alternatives(aseg, aseg + alt->sh_size); } if (calls || alt) { struct callthunk_sites cs = {}; @@ -298,28 +297,8 @@ int module_finalize(const Elf_Ehdr *hdr, } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); } - - if (orc && orc_ip) - unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, - (void *)orc->sh_addr, orc->sh_size); - - return 0; -} - -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *locks = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; - } - if (locks) { void *lseg = (void *)locks->sh_addr; void *text = me->mem[MOD_TEXT].base; @@ -329,6 +308,10 @@ int module_post_finalize(const Elf_Ehdr *hdr, text, text_end); } + if (orc && orc_ip) + unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, + (void *)orc->sh_addr, orc->sh_size); + return 0; } From 7ab15109f9ae41322b7655192d9bcd4986045c7d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:32 +0200 Subject: [PATCH 38/39] module: drop unused module_writable_address() module_writable_address() is unused and can be removed. Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/module.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 1c145938ddd6af..d9a5183a9fe711 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -771,11 +771,6 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); -static inline void *module_writable_address(struct module *mod, void *loc) -{ - return loc; -} - #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -883,11 +878,6 @@ static inline bool module_is_coming(struct module *mod) { return false; } - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - return loc; -} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS From cd42e64dd5bafd0753e25a077df2b739568d83f8 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 26 Jan 2025 09:47:33 +0200 Subject: [PATCH 39/39] x86: re-enable EXECMEM_ROX support after rework of execmem ROX caches Signed-off-by: Mike Rapoport (Microsoft) --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be2c311f5118dc..c4175f4635eea1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_EXECMEM_ROX if X86_64 select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL