diff --git a/.github/workflows/kdevops-cleanup.yml b/.github/workflows/kdevops-cleanup.yml new file mode 100644 index 00000000000000..d7e69ac48e6c92 --- /dev/null +++ b/.github/workflows/kdevops-cleanup.yml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# This can be used towards the end of your action. All tasks here run even if +# any of the previous tasks failed. + +name: Kdevops cleanup workflow + +on: + workflow_call: # Makes this workflow reusable + +jobs: + cleanup: + name: Archive results and cleanup + runs-on: [self-hosted, Linux, X64] + steps: + - name: Set Linux kdevops development path + if: ${{ job.status != 'cancelled' }} + run: echo "LINUX_KDEVOPS_PATH=$GITHUB_WORKSPACE" >> $GITHUB_ENV + + - name: Get systemd journal files + if: ${{ job.status != 'cancelled' }} + run: | + if [[ ! -d kdevops ]]; then + exit 0 + fi + cd kdevops + make journal-dump + + - name: Start SSH Agent + if: ${{ job.status != 'cancelled' }} + uses: webfactory/ssh-agent@v0.9.0 + with: + ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} + + - name: Build our kdevops archive results + if: ${{ job.status != 'cancelled' }} + run: | + if [[ ! -d kdevops ]]; then + exit 0 + fi + cd kdevops + make ci-archive + + - name: Upload our kdevops results archive + if: ${{ job.status != 'cancelled' }} + uses: actions/upload-artifact@v4 + with: + name: kdevops-ci-results + path: ${{ env.LINUX_KDEVOPS_PATH }}/kdevops/archive/*.zip + + - name: Run kdevops make destroy + if: always() + run: | + if [[ ! -d kdevops ]]; then + exit 0 + fi + cd kdevops + make destroy + cd .. + rm -rf kdevops diff --git a/.github/workflows/kdevops-generic.yml b/.github/workflows/kdevops-generic.yml new file mode 100644 index 00000000000000..8ca2006c1e1d22 --- /dev/null +++ b/.github/workflows/kdevops-generic.yml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Most simple Linux kernel subsystems can be tested with this target +# test setup. For more elaborates tests look for a topic branch under the +# kdevops-ci tree. For example to test a filesystem look at the fstests +# branch. + +name: Run generic kdevops CI tests + +on: + push: + branches: ['**'] + pull_request: + branches: ['**'] + workflow_dispatch: # Allow manual triggering + +jobs: + setup: + uses: ./.github/workflows/kdevops-init.yml + secrets: inherit + + run-tests: + needs: setup + name: Run CI tests + runs-on: [self-hosted, Linux, X64] + steps: + - name: Run CI tests + run: | + cd kdevops + make ci-test + echo "ok" > ci.result + + cleanup: + needs: [run-tests, setup] # Add setup as a dependency to ensure proper ordering + if: always() # This ensures cleanup runs even if run-tests fails + uses: ./.github/workflows/kdevops-cleanup.yml + secrets: inherit diff --git a/.github/workflows/kdevops-init.yml b/.github/workflows/kdevops-init.yml new file mode 100644 index 00000000000000..9a265c2dbc1f84 --- /dev/null +++ b/.github/workflows/kdevops-init.yml @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# This can be used as a initialization workflow for most Linux kernel +# development environments. This takes care of: +# +# - Checks out and re-using a local mirror for your kernel tree +# - Looks for a defconfig in kdevops to use for your kernel tree +# - Sets up CI metadata for kdevops-results-archive +# - Ensures your kernel tree at least builds with defconfig +# - Brings up target DUTs nodes +# - Installs your Linux kernel tree on them +# - Builds all of your test requirements for your Linux kernel tree + +name: Base kdevops workflow + +on: + workflow_call: # Makes this workflow reusable + inputs: + kdevops_defconfig: + required: false + type: string + +jobs: + setup: + name: Setup kdevops environment + runs-on: [self-hosted, Linux, X64] + steps: + - name: Verify we won't expect user input interactions on the host key + run: | + mkdir -p ~/.ssh + if ! grep -q "StrictHostKeyChecking no" ~/.ssh/config 2>/dev/null; then + echo "StrictHostKeyChecking no" >> ~/.ssh/config + fi + + - name: Start SSH Agent for initial test + uses: webfactory/ssh-agent@v0.9.0 + with: + ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} + + # Modify the repo here if you have a custom or private URL for the archive + # This can also just be a repo variable later. + - name: Verify our ssh connection will work + run: | + if ! git ls-remote git@github.com:linux-kdevops/kdevops-results-archive.git HEAD; then + echo "Cannot access kdevops-results-archive repository" + exit 1 + fi + + - name: Configure git + run: | + git config --global --add safe.directory '*' + git config --global user.name "kdevops" + git config --global user.email "kdevops@lists.linux.dev" + + - name: Checkout kdevops + run: | + rm -rf kdevops + git clone /mirror/kdevops.git kdevops + + - name: Make sure our repo kdevops defconfig exists + run: | + cd kdevops + if [[ -z "${{ inputs.kdevops_defconfig }}" ]]; then + KDEVOPS_DEFCONFIG=$(basename ${{ github.repository }}) + else + KDEVOPS_DEFCONFIG="${{ inputs.kdevops_defconfig }}" + fi + + if [[ ! -f defconfigs/$KDEVOPS_DEFCONFIG ]]; then + echo "kdevops lacks a defconfig for this repository, expected to find: defconfigs/$KDEVOPS_DEFCONFIG" + exit 1 + fi + + echo "KDEVOPS_DEFCONFIG=$KDEVOPS_DEFCONFIG" >> $GITHUB_ENV + + - name: Checkout custom branch with delta on kdevops/linux + run: | + LINUX_TREE="https://github.com/${{ github.repository }}" + LINUX_TREE_REF="${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}" + cd kdevops + git clone $LINUX_TREE --reference /mirror/linux.git/ --depth=5 linux + cd linux + git fetch origin $LINUX_TREE_REF + git checkout $LINUX_TREE_REF + git log -1 + + - name: Initialize CI metadata for kdevops-results-archive for linux + run: | + cd kdevops/linux + echo "$(basename ${{ github.repository }})" > ../ci.trigger + + # This supports using kdevops github actions using two different + # approaches: + # + # 1) Commit the .github/ directory onto a Linux tree before your + # kernel changes. This approach is used for example for + # testing patches posted on the mailing list with patchwork, + # this is the strategy kernel-patch-deaemon uses. Since the + # patches are ephemeral there is not important git history to + # maintain. + # + # 2) Merge the .github/ directory at the end of your development + # tree. This is useful for kernel developers wishing to test + # existing trees. + # + # So this checks to see if the last commit (top of the tree) *added* + # the .github directory. If the last commit added it, then we assume + # the commit prior to it was the one we'd like to document as the main + # test point. + if git diff-tree --no-commit-id --name-only --diff-filter=A -r HEAD | grep -q "^\.github/"; then + git log -2 --skip=1 --pretty=format:"%s" -1 > ../ci.subject + git describe --exact-match --tags HEAD^ 2>/dev/null || git rev-parse --short HEAD^ > ../ci.ref + else + git log -1 --pretty=format:"%s" > ../ci.subject + git describe --exact-match --tags HEAD 2>/dev/null || git rev-parse --short HEAD > ../ci.ref + fi + + RELEVANT_GIT_TAG=$(cat ../ci.ref) + RELEVANT_GIT_REF=$(git rev-parse --short=12 $RELEVANT_GIT_TAG) + + echo "LINUX_GIT_REF=$RELEVANT_GIT_REF" >> $GITHUB_ENV + echo "LINUX_GIT_TAG=$RELEVANT_GIT_TAG" >> $GITHUB_ENV + + # Start out pessimistic + echo "unknown" > ../ci.result + echo "Nothing to write home about." > ../ci.commit_extra + + - name: Run a quick Linux kernel defconfig build test + run: | + cd kdevops/linux + git reset --hard ${{ env.LINUX_GIT_TAG }} + make defconfig + make -j$(nproc) + + - name: Run kdevops make defconfig-repo + run: | + LINUX_TREE="https://github.com/${{ github.repository }}" + LINUX_TREE_REF="${{ env.LINUX_GIT_TAG }}" + + # We make the compromise here to use a relevant git tag for the + # host prefix so that folks can easily tell what exact kernel tree + # is being tested by using the relevant git ref. That is, if you + # pushed a tree with the .github/ directory as the top of the tree, + # that commit will not be used, we'll use the last one as that is + # the relevant git ref we want to annotate a test for. + # + # The compromise here we use special KDEVOPS to separete the + # commit ID and github.run_id. Exotic things likes UTF characters + # and dots have problems. + KDEVOPS_HOSTS_PREFIX="${{ env.LINUX_GIT_REF }}KDEVOPS${{ github.run_id }}" + + echo "Going to use defconfig-${{ env.KDEVOPS_DEFCONFIG }}" + + echo "Linux tree: $LINUX_TREE" + echo "Linux trigger ref: $LINUX_TREE_REF" + echo "Linux tag: ${{ env.LINUX_GIT_TAG }}" + echo "Runner ID: ${{ github.run_id }}" + echo "kdevops host prefix: $KDEVOPS_HOSTS_PREFIX" + echo "kdevops defconfig: defconfig-${{ env.KDEVOPS_DEFCONFIG }}" + + KDEVOPS_ARGS="KDEVOPS_HOSTS_PREFIX=$KDEVOPS_HOSTS_PREFIX LINUX_TREE=$LINUX_TREE LINUX_TREE_REF=$LINUX_TREE_REF defconfig-${{ env.KDEVOPS_DEFCONFIG }}" + echo "Going to run:" + echo "make $KDEVOPS_ARGS" + + cd kdevops + make $KDEVOPS_ARGS + + - name: Run kdevops make + run: | + cd kdevops + make -j$(nproc) + + - name: Run kdevops make bringup + run: | + cd kdevops + ls -ld linux + make bringup + + - name: Build linux and boot test nodes on test kernel + run: | + cd kdevops + make linux + + - name: Build required ci tests + run: | + cd kdevops + make ci-build-test diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index da2ee8d6ef1a7b..354ce16d83cb5a 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -285,11 +285,9 @@ bool in_module_plt(unsigned long loc) struct module *mod; bool ret; - preempt_disable(); + guard(rcu)(); mod = __module_text_address(loc); ret = mod && (loc - (u32)mod->arch.core.plt_ent < mod->arch.core.plt_count * PLT_ENT_SIZE || loc - (u32)mod->arch.init.plt_ent < mod->arch.init.plt_count * PLT_ENT_SIZE); - preempt_enable(); - return ret; } diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index d7c0d023dfe5e6..5a890714ee2e98 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -320,14 +320,13 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * dealing with an out-of-range condition, we can assume it * is due to a module being loaded far away from the kernel. * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * NOTE: __module_text_address() must be called within a RCU read + * section, but we can rely on ftrace_lock to ensure that 'mod' * retains its validity throughout the remainder of this code. */ if (!mod) { - preempt_disable(); + guard(rcu)(); mod = __module_text_address(pc); - preempt_enable(); } if (WARN_ON(!mod)) diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 25c9a4cfd5fa92..d5d81d74034c85 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -85,14 +85,13 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod * dealing with an out-of-range condition, we can assume it * is due to a module being loaded far away from the kernel. * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * NOTE: __module_text_address() must be called within a RCU read + * section, but we can rely on ftrace_lock to ensure that 'mod' * retains its validity throughout the remainder of this code. */ if (!mod) { - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); + scoped_guard(rcu) + mod = __module_text_address(pc); } if (WARN_ON(!mod)) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index b2572287633179..d623935a75471c 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -399,7 +399,7 @@ bool unwind_next_frame(struct unwind_state *state) return false; /* Don't let modules unload while we're reading their ORC data. */ - preempt_disable(); + guard(rcu)(); if (is_entry_func(state->pc)) goto end; @@ -514,14 +514,12 @@ bool unwind_next_frame(struct unwind_state *state) if (!__kernel_text_address(state->pc)) goto err; - preempt_enable(); return true; err: state->error = true; end: - preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2f776f137a89ec..6dca92d5a6e822 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -115,10 +115,8 @@ static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long a { struct module *mod = NULL; - preempt_disable(); - mod = __module_text_address(ip); - preempt_enable(); - + scoped_guard(rcu) + mod = __module_text_address(ip); if (!mod) pr_err("No module loaded at addr=%lx\n", ip); diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index ac35015f04c6ad..5c6e545d1708ca 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -120,10 +120,8 @@ static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) { struct module *mod; - preempt_disable(); - mod = __module_text_address(rec->ip); - preempt_enable(); - + scoped_guard(rcu) + mod = __module_text_address(rec->ip); if (!mod) pr_err("No module loaded at addr=%lx\n", rec->ip); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 79ea97d4797ecc..8be91974e786d4 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -440,25 +440,24 @@ void __init arch_cpu_finalize_init(void) os_check_bugs(); } -void apply_seal_endbr(s32 *start, s32 *end, struct module *mod) +void apply_seal_endbr(s32 *start, s32 *end) { } -void apply_retpolines(s32 *start, s32 *end, struct module *mod) +void apply_retpolines(s32 *start, s32 *end) { } -void apply_returns(s32 *start, s32 *end, struct module *mod) +void apply_returns(s32 *start, s32 *end) { } void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi) { } -void apply_alternatives(struct alt_instr *start, struct alt_instr *end, - struct module *mod) +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be2c311f5118dc..c4175f4635eea1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_EXECMEM_ROX if X86_64 select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 39e6efc1a9cab7..bfc7cabf4017ba 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -48,8 +48,7 @@ int __init init_vdso_image(const struct vdso_image *image) apply_alternatives((struct alt_instr *)(image->data + image->alt), (struct alt_instr *)(image->data + image->alt + - image->alt_len), - NULL); + image->alt_len)); return 0; } diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e3903b731305c9..a2141665239b5a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -87,16 +87,16 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; * instructions were patched in already: */ extern int alternatives_patched; -struct module; extern void alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end, - struct module *mod); -extern void apply_retpolines(s32 *start, s32 *end, struct module *mod); -extern void apply_returns(s32 *start, s32 *end, struct module *mod); -extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); +extern void apply_seal_endbr(s32 *start, s32 *end); extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, - s32 *start_cfi, s32 *end_cfi, struct module *mod); + s32 *start_cfi, s32 *end_cfi); + +struct module; struct callthunk_sites { s32 *call_start, *call_end; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4b804531b03c3c..c90e9c51edb7ac 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -33,6 +33,7 @@ #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ +#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 #ifdef CONFIG_X86_64 @@ -64,6 +65,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K) #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) #define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c71b575bf2292d..8b66a555d2f035 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -392,10 +392,8 @@ EXPORT_SYMBOL(BUG_func); * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)". */ -static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, - struct module *mod) +static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) { - u8 *wr_instr = module_writable_address(mod, instr); void *target, *bug = &BUG_func; s32 disp; @@ -405,14 +403,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, } if (a->instrlen != 6 || - wr_instr[0] != CALL_RIP_REL_OPCODE || - wr_instr[1] != CALL_RIP_REL_MODRM) { + instr[0] != CALL_RIP_REL_OPCODE || + instr[1] != CALL_RIP_REL_MODRM) { pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n"); BUG(); } /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */ - disp = *(s32 *)(wr_instr + 2); + disp = *(s32 *)(instr + 2); #ifdef CONFIG_X86_64 /* ff 15 00 00 00 00 call *0x0(%rip) */ /* target address is stored at "next instruction + disp". */ @@ -450,8 +448,7 @@ static inline u8 * instr_va(struct alt_instr *i) * to refetch changed I$ lines. */ void __init_or_module noinline apply_alternatives(struct alt_instr *start, - struct alt_instr *end, - struct module *mod) + struct alt_instr *end) { u8 insn_buff[MAX_PATCH_LEN]; u8 *instr, *replacement; @@ -480,7 +477,6 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { int insn_buff_sz = 0; - u8 *wr_instr, *wr_replacement; /* * In case of nested ALTERNATIVE()s the outer alternative might @@ -494,11 +490,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, } instr = instr_va(a); - wr_instr = module_writable_address(mod, instr); - replacement = (u8 *)&a->repl_offset + a->repl_offset; - wr_replacement = module_writable_address(mod, replacement); - BUG_ON(a->instrlen > sizeof(insn_buff)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); @@ -509,9 +501,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - memcpy(insn_buff, wr_instr, a->instrlen); + memcpy(insn_buff, instr, a->instrlen); optimize_nops(instr, insn_buff, a->instrlen); - text_poke_early(wr_instr, insn_buff, a->instrlen); + text_poke_early(instr, insn_buff, a->instrlen); continue; } @@ -521,12 +513,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, instr, instr, a->instrlen, replacement, a->replacementlen, a->flags); - memcpy(insn_buff, wr_replacement, a->replacementlen); + memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; if (a->flags & ALT_FLAG_DIRECT_CALL) { - insn_buff_sz = alt_replace_call(instr, insn_buff, a, - mod); + insn_buff_sz = alt_replace_call(instr, insn_buff, a); if (insn_buff_sz < 0) continue; } @@ -536,11 +527,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen); - DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(wr_instr, insn_buff, insn_buff_sz); + text_poke_early(instr, insn_buff, insn_buff_sz); } kasan_enable_current(); @@ -731,20 +722,18 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* * Generated by 'objtool --retpoline'. */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, - struct module *mod) +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op1, op2; - ret = insn_decode_kernel(&insn, wr_addr); + ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -772,9 +761,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(addr, bytes, len); - DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(wr_addr, bytes, len); + text_poke_early(addr, bytes, len); } } } @@ -810,8 +799,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) return i; } -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; @@ -820,13 +808,12 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end, for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op; - ret = insn_decode_kernel(&insn, wr_addr); + ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -846,35 +833,32 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end, len = patch_return(addr, &insn, bytes); if (len == insn.length) { - DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr); + DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(wr_addr, bytes, len); + text_poke_early(addr, bytes, len); } } } #else -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_MITIGATION_RETHUNK */ #else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, - struct module *mod) { } -void __init_or_module noinline apply_returns(s32 *start, s32 *end, - struct module *mod) { } +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */ #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr, void *wr_addr); +static void poison_cfi(void *addr); -static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) +static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); - if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr))) + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) return; if (!is_endbr(endbr)) { @@ -889,7 +873,7 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) */ DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(wr_addr, &poison, 4); + text_poke_early(addr, &poison, 4); } /* @@ -898,23 +882,22 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) * Seal the functions for indirect calls by clobbering the ENDBR instructions * and the kCFI hash value. */ -void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr, wr_addr, true); + poison_endbr(addr, true); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_cfi(addr - 16, wr_addr - 16); + poison_cfi(addr - 16); } } #else -void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ @@ -1136,7 +1119,7 @@ static u32 decode_caller_hash(void *addr) } /* .retpoline_sites */ -static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_disable_callers(s32 *start, s32 *end) { /* * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate @@ -1148,23 +1131,20 @@ static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); - + hash = decode_caller_hash(addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(wr_addr, jmp, 2); + text_poke_early(addr, jmp, 2); } return 0; } -static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_enable_callers(s32 *start, s32 *end) { /* * Re-enable kCFI, undo what cfi_disable_callers() did. @@ -1174,115 +1154,106 @@ static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(wr_addr, mov, 2); + text_poke_early(addr, mov, 2); } return 0; } /* .cfi_sites */ -static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod) +static int cfi_rand_preamble(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(wr_addr); + hash = decode_preamble_hash(addr); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; hash = cfi_rehash(hash); - text_poke_early(wr_addr + 1, &hash, 4); + text_poke_early(addr + 1, &hash, 4); } return 0; } -static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod) +static int cfi_rewrite_preamble(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(wr_addr); + hash = decode_preamble_hash(addr); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; - text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size); - WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678); - text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4); + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); + WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); + text_poke_early(addr + fineibt_preamble_hash, &hash, 4); } return 0; } -static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod) +static void cfi_rewrite_endbr(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr + 16, wr_addr + 16, false); + poison_endbr(addr+16, false); } } /* .retpoline_sites */ -static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_rand_callers(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (hash) { hash = -cfi_rehash(hash); - text_poke_early(wr_addr + 2, &hash, 4); + text_poke_early(addr + 2, &hash, 4); } } return 0; } -static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod) +static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; - void *wr_addr; u32 hash; addr -= fineibt_caller_size; - wr_addr = module_writable_address(mod, addr); - hash = decode_caller_hash(wr_addr); + hash = decode_caller_hash(addr); if (hash) { - text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size); - WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678); - text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4); + text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); + WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); + text_poke_early(addr + fineibt_caller_hash, &hash, 4); } /* rely on apply_retpolines() */ } @@ -1291,9 +1262,8 @@ static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod) } static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi, bool builtin) { - bool builtin = mod ? false : true; int ret; if (WARN_ONCE(fineibt_preamble_size != 16, @@ -1311,7 +1281,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI. */ - ret = cfi_disable_callers(start_retpoline, end_retpoline, mod); + ret = cfi_disable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1322,11 +1292,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); } - ret = cfi_rand_preamble(start_cfi, end_cfi, mod); + ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err; - ret = cfi_rand_callers(start_retpoline, end_retpoline, mod); + ret = cfi_rand_callers(start_retpoline, end_retpoline); if (ret) goto err; } @@ -1338,7 +1308,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_KCFI: - ret = cfi_enable_callers(start_retpoline, end_retpoline, mod); + ret = cfi_enable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1348,17 +1318,17 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, case CFI_FINEIBT: /* place the FineIBT preamble at func()-16 */ - ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod); + ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; /* rewrite the callers to target func()-16 */ - ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod); + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; /* now that nobody targets func()+0, remove ENDBR there */ - cfi_rewrite_endbr(start_cfi, end_cfi, mod); + cfi_rewrite_endbr(start_cfi, end_cfi); if (builtin) pr_info("Using FineIBT CFI\n"); @@ -1377,7 +1347,7 @@ static inline void poison_hash(void *addr) *(u32 *)addr = 0; } -static void poison_cfi(void *addr, void *wr_addr) +static void poison_cfi(void *addr) { switch (cfi_mode) { case CFI_FINEIBT: @@ -1389,8 +1359,8 @@ static void poison_cfi(void *addr, void *wr_addr) * ud2 * 1: nop */ - poison_endbr(addr, wr_addr, false); - poison_hash(wr_addr + fineibt_preamble_hash); + poison_endbr(addr, false); + poison_hash(addr + fineibt_preamble_hash); break; case CFI_KCFI: @@ -1399,7 +1369,7 @@ static void poison_cfi(void *addr, void *wr_addr) * movl $0, %eax * .skip 11, 0x90 */ - poison_hash(wr_addr + 1); + poison_hash(addr + 1); break; default: @@ -1410,21 +1380,22 @@ static void poison_cfi(void *addr, void *wr_addr) #else static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi, bool builtin) { } #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr, void *wr_addr) { } +static void poison_cfi(void *addr) { } #endif #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, struct module *mod) + s32 *start_cfi, s32 *end_cfi) { return __apply_fineibt(start_retpoline, end_retpoline, - start_cfi, end_cfi, mod); + start_cfi, end_cfi, + /* .builtin = */ false); } #ifdef CONFIG_SMP @@ -1721,16 +1692,16 @@ void __init alternative_instructions(void) paravirt_set_cap(); __apply_fineibt(__retpoline_sites, __retpoline_sites_end, - __cfi_sites, __cfi_sites_end, NULL); + __cfi_sites, __cfi_sites_end, true); /* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks. */ - apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL); - apply_returns(__return_sites, __return_sites_end, NULL); + apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); - apply_alternatives(__alt_instructions, __alt_instructions_end, NULL); + apply_alternatives(__alt_instructions, __alt_instructions_end); /* * Now all calls are established. Apply the call thunks if @@ -1741,7 +1712,7 @@ void __init alternative_instructions(void) /* * Seal all functions that do not have their address taken. */ - apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL); + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 8418a892d195a9..251b65f2ab2168 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -98,11 +98,10 @@ static inline bool within_module_coretext(void *addr) #ifdef CONFIG_MODULES struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)addr); if (mod && within_module_core((unsigned long)addr, mod)) ret = true; - preempt_enable(); #endif return ret; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 166bc0ea3bdff9..cace6e8d7cc77a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -118,13 +118,10 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code, return ret; /* replace the text with the new text */ - if (ftrace_poke_late) { + if (ftrace_poke_late) text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); - } else { - mutex_lock(&text_mutex); - text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE); - mutex_unlock(&text_mutex); - } + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } @@ -321,7 +318,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; - void *ret; + int ret; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { start_offset = (unsigned long)ftrace_regs_caller; @@ -352,15 +349,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = text_poke_copy(trampoline, (void *)start_offset, size); - if (WARN_ON(!ret)) + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); + if (WARN_ON(ret < 0)) goto fail; ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else - text_poke_copy(ip, retq, sizeof(retq)); + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { @@ -368,7 +365,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + (jmp_offset - start_offset); if (WARN_ON(*(char *)ip != 0x75)) goto fail; - if (!text_poke_copy(ip, x86_nops[2], 2)) + ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); + if (ret < 0) goto fail; } @@ -381,7 +379,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) */ ptr = (unsigned long *)(trampoline + size + RET_SIZE); - text_poke_copy(ptr, &ops, sizeof(unsigned long)); + *ptr = (unsigned long)ops; op_offset -= start_offset; memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); @@ -397,7 +395,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_ptr.offset = offset; /* put in the new offset to the ftrace_ops */ - text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); /* put in the call to the function */ mutex_lock(&text_mutex); @@ -407,9 +405,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) * the depth accounting before the call already. */ dest = ftrace_ops_get_func(ops); - text_poke_copy_locked(trampoline + call_offset, - text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), - CALL_INSN_SIZE, false); + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), + CALL_INSN_SIZE); mutex_unlock(&text_mutex); /* ALLOC_TRAMP flags lets us know we created it */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8984abd91c001f..837450b6e882f7 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -146,21 +146,18 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, } if (apply) { - void *wr_loc = module_writable_address(me, loc); - - if (memcmp(wr_loc, &zero, size)) { + if (memcmp(loc, &zero, size)) { pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - write(wr_loc, &val, size); + write(loc, &val, size); } else { if (memcmp(loc, &val, size)) { pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - /* FIXME: needs care for ROX module allocations */ write(loc, &zero, size); } } @@ -227,7 +224,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *alt = NULL, + const Elf_Shdr *s, *alt = NULL, *locks = NULL, *orc = NULL, *orc_ip = NULL, *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, *calls = NULL, *cfi = NULL; @@ -236,6 +233,8 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks = s; if (!strcmp(".orc_unwind", secstrings + s->sh_name)) orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) @@ -266,20 +265,20 @@ int module_finalize(const Elf_Ehdr *hdr, csize = cfi->sh_size; } - apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me); + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; - apply_retpolines(rseg, rseg + retpolines->sh_size, me); + apply_retpolines(rseg, rseg + retpolines->sh_size); } if (returns) { void *rseg = (void *)returns->sh_addr; - apply_returns(rseg, rseg + returns->sh_size, me); + apply_returns(rseg, rseg + returns->sh_size); } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size, me); + apply_alternatives(aseg, aseg + alt->sh_size); } if (calls || alt) { struct callthunk_sites cs = {}; @@ -298,28 +297,8 @@ int module_finalize(const Elf_Ehdr *hdr, } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); } - - if (orc && orc_ip) - unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, - (void *)orc->sh_addr, orc->sh_size); - - return 0; -} - -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *locks = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; - } - if (locks) { void *lseg = (void *)locks->sh_addr; void *text = me->mem[MOD_TEXT].base; @@ -329,6 +308,10 @@ int module_post_finalize(const Elf_Ehdr *hdr, text, text_end); } + if (orc && orc_ip) + unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, + (void *)orc->sh_addr, orc->sh_size); + return 0; } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index d4705a348a8045..977ee75e047c84 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -476,7 +476,7 @@ bool unwind_next_frame(struct unwind_state *state) return false; /* Don't let modules unload while we're reading their ORC data. */ - preempt_disable(); + guard(rcu)(); /* End-of-stack check for user tasks: */ if (state->regs && user_mode(state->regs)) @@ -669,14 +669,12 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } - preempt_enable(); return true; err: state->error = true; the_end: - preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 3d2f7f0a6ed142..ad3c1feec990db 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -183,7 +183,7 @@ static int pageattr_test(void) break; case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1); break; case 2: diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index ef4514d64c0524..7bd0f62ba48fdc 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -73,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ +#define CPA_COLLAPSE 16 /* try to collapse large pages */ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { @@ -105,6 +106,18 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } +static void collapse_page_count(int level) +{ + direct_pages_count[level]++; + if (system_state == SYSTEM_RUNNING) { + if (level == PG_LEVEL_2M) + count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE); + else if (level == PG_LEVEL_1G) + count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE); + } + direct_pages_count[level - 1] -= PTRS_PER_PTE; +} + void arch_report_meminfo(struct seq_file *m) { seq_printf(m, "DirectMap4k: %8lu kB\n", @@ -122,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m) } #else static inline void split_page_count(int level) { } +static inline void collapse_page_count(int level) { } #endif #ifdef CONFIG_X86_CPA_STATISTICS @@ -394,16 +408,49 @@ static void __cpa_flush_tlb(void *data) flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } -static void cpa_flush(struct cpa_data *data, int cache) +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); + +static void cpa_collapse_large_pages(struct cpa_data *cpa) +{ + unsigned long start, addr, end; + struct ptdesc *ptdesc, *tmp; + LIST_HEAD(pgtables); + int collapsed = 0; + int i; + + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + for (i = 0; i < cpa->numpages; i++) + collapsed += collapse_large_pages(__cpa_addr(cpa, i), + &pgtables); + } else { + addr = __cpa_addr(cpa, 0); + start = addr & PMD_MASK; + end = addr + PAGE_SIZE * cpa->numpages; + + for (addr = start; within(addr, start, end); addr += PMD_SIZE) + collapsed += collapse_large_pages(addr, &pgtables); + } + + if (!collapsed) + return; + + flush_tlb_all(); + + list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) { + list_del(&ptdesc->pt_list); + __free_page(ptdesc_page(ptdesc)); + } +} + +static void cpa_flush(struct cpa_data *cpa, int cache) { - struct cpa_data *cpa = data; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { cpa_flush_all(cache); - return; + goto collapse_large_pages; } if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) @@ -412,7 +459,7 @@ static void cpa_flush(struct cpa_data *data, int cache) on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) - return; + goto collapse_large_pages; mb(); for (i = 0; i < cpa->numpages; i++) { @@ -428,6 +475,10 @@ static void cpa_flush(struct cpa_data *data, int cache) clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); + +collapse_large_pages: + if (cpa->flags & CPA_COLLAPSE) + cpa_collapse_large_pages(cpa); } static bool overlaps(unsigned long r1_start, unsigned long r1_end, @@ -1197,6 +1248,161 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } +static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, + struct list_head *pgtables) +{ + pmd_t _pmd, old_pmd; + pte_t *pte, first; + unsigned long pfn; + pgprot_t pgprot; + int i = 0; + + addr &= PMD_MASK; + pte = pte_offset_kernel(pmd, addr); + first = *pte; + pfn = pte_pfn(first); + + /* Make sure alignment is suitable */ + if (PFN_PHYS(pfn) & ~PMD_MASK) + return 0; + + /* The page is 4k intentionally */ + if (pte_flags(first) & _PAGE_KERNEL_4K) + return 0; + + /* Check that the rest of PTEs are compatible with the first one */ + for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) { + pte_t entry = *pte; + + if (!pte_present(entry)) + return 0; + if (pte_flags(entry) != pte_flags(first)) + return 0; + if (pte_pfn(entry) != pte_pfn(first) + i) + return 0; + } + + old_pmd = *pmd; + + /* Success: set up a large page */ + pgprot = pgprot_4k_2_large(pte_pgprot(first)); + pgprot_val(pgprot) |= _PAGE_PSE; + _pmd = pfn_pmd(pfn, pgprot); + set_pmd(pmd, _pmd); + + /* Queue the page table to be freed after TLB flush */ + list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables); + + if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) { + struct page *page; + + /* Update all PGD tables to use the same large page */ + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd = pmd_offset(pud, addr); + /* Something is wrong if entries doesn't match */ + if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd))) + continue; + set_pmd(pmd, _pmd); + } + } + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_2M); + + return 1; +} + +static int collapse_pud_page(pud_t *pud, unsigned long addr, + struct list_head *pgtables) +{ + unsigned long pfn; + pmd_t *pmd, first; + int i; + + if (!direct_gbpages) + return 0; + + addr &= PUD_MASK; + pmd = pmd_offset(pud, addr); + first = *pmd; + + /* + * To restore PUD page all PMD entries must be large and + * have suitable alignment + */ + pfn = pmd_pfn(first); + if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK)) + return 0; + + /* + * To restore PUD page, all following PMDs must be compatible with the + * first one. + */ + for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) { + pmd_t entry = *pmd; + + if (!pmd_present(entry) || !pmd_leaf(entry)) + return 0; + if (pmd_flags(entry) != pmd_flags(first)) + return 0; + if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE) + return 0; + } + + /* Restore PUD page and queue page table to be freed after TLB flush */ + list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables); + set_pud(pud, pfn_pud(pfn, pmd_pgprot(first))); + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_1G); + + return 1; +} + +/* + * Collapse PMD and PUD pages in the kernel mapping around the address where + * possible. + * + * Caller must flush TLB and free page tables queued on the list before + * touching the new entries. CPU must not see TLB entries of different size + * with different attributes. + */ +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables) +{ + int collapsed = 0; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + addr &= PMD_MASK; + + spin_lock(&pgd_lock); + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + goto out; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + goto out; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud) || pud_leaf(*pud)) + goto out; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) + goto out; + + collapsed = collapse_pmd_page(pmd, addr, pgtables); + if (collapsed) + collapsed += collapse_pud_page(pud, addr, pgtables); + +out: + spin_unlock(&pgd_lock); + return collapsed; +} + static bool try_to_free_pte_page(pte_t *pte) { int i; @@ -2120,7 +2326,8 @@ int set_memory_rox(unsigned long addr, int numpages) if (__supported_pte_mask & _PAGE_NX) clr.pgprot |= _PAGE_NX; - return change_page_attr_clear(&addr, numpages, clr, 0); + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0, + CPA_COLLAPSE, NULL); } int set_memory_rw(unsigned long addr, int numpages) @@ -2147,7 +2354,8 @@ int set_memory_p(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + return change_page_attr_set_clr(&addr, numpages, + __pgprot(_PAGE_KERNEL_4K), __pgprot(0), 1, 0, NULL); } diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 64130ae19690a9..65655a5d1be283 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -65,6 +65,37 @@ enum execmem_range_flags { * Architectures that use EXECMEM_ROX_CACHE must implement this. */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); + +/** + * execmem_make_temp_rw - temporarily remap region with read-write + * permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Remaps a part of the cached large page in the ROX cache in the range + * [@ptr, @ptr + @size) as writable and not executable. The caller must + * have exclusive ownership of this range and ensure nothing will try to + * execute code in this range. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_make_temp_rw(void *ptr, size_t size); + +/** + * execmem_restore_rox - restore read-only-execute permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Restores read-only-execute permissions on a range [@ptr, @ptr + @size) + * after it was temporarily remapped as writable. Relies on architecture + * implementation of set_memory_rox() to restore mapping using large pages. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_restore_rox(void *ptr, size_t size); +#else +static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } +static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif /** diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 1c6a6c1704d8d0..d5dd54c53ace61 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -55,12 +55,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) if (is_ksym_addr((unsigned long)ptr)) return ptr; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)ptr); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); - preempt_enable(); #endif return ptr; } diff --git a/include/linux/module.h b/include/linux/module.h index 30e5b19bafa983..d9a5183a9fe711 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -370,7 +370,6 @@ enum mod_mem_type { struct module_memory { void *base; - void *rw_copy; bool is_rox; unsigned int size; @@ -666,7 +665,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) return within_module_init(addr, mod) || within_module_core(addr, mod); } -/* Search for module by name: must be in a RCU-sched critical section. */ +/* Search for module by name: must be in a RCU critical section. */ struct module *find_module(const char *name); extern void __noreturn __module_put_and_kthread_exit(struct module *mod, @@ -772,16 +771,6 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); -void *__module_writable_address(struct module *mod, void *loc); - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod || - mod->state != MODULE_STATE_UNFORMED) - return loc; - return __module_writable_address(mod, loc); -} - #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -889,11 +878,6 @@ static inline bool module_is_coming(struct module *mod) { return false; } - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - return loc; -} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1f5507ba5a1284..e395461d59e5e8 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *mod); - #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d0958095cd9..5a37cb2b6f93b8 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, + DIRECT_MAP_LEVEL2_COLLAPSE, + DIRECT_MAP_LEVEL3_COLLAPSE, #endif #ifdef CONFIG_PER_VMA_LOCK_STATS VMA_LOCK_SUCCESS, diff --git a/kernel/cfi.c b/kernel/cfi.c index 08caad7767176e..abcd4d1f98eab2 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -71,14 +71,11 @@ static bool is_module_cfi_trap(unsigned long addr) struct module *mod; bool found = false; - rcu_read_lock_sched_notrace(); - + guard(rcu)(); mod = __module_address(addr); if (mod) found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end); - rcu_read_unlock_sched_notrace(); - return found; } #else /* CONFIG_MODULES */ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 93a822d3c468ca..7cb19e6014266a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -653,13 +653,12 @@ static int __jump_label_mod_text_reserved(void *start, void *end) struct module *mod; int ret; - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - + scoped_guard(rcu) { + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + } if (!mod) return 0; @@ -746,9 +745,9 @@ static int jump_label_add_module(struct module *mod) kfree(jlm); return -ENOMEM; } - preempt_disable(); - jlm2->mod = __module_address((unsigned long)key); - preempt_enable(); + scoped_guard(rcu) + jlm2->mod = __module_address((unsigned long)key); + jlm2->entries = static_key_entries(key); jlm2->next = NULL; static_key_set_mod(key, jlm2); @@ -906,13 +905,13 @@ static void jump_label_update(struct static_key *key) return; } - preempt_disable(); - mod = __module_address((unsigned long)key); - if (mod) { - stop = mod->jump_entries + mod->num_jump_entries; - init = mod->state == MODULE_STATE_COMING; + scoped_guard(rcu) { + mod = __module_address((unsigned long)key); + if (mod) { + stop = mod->jump_entries + mod->num_jump_entries; + init = mod->state == MODULE_STATE_COMING; + } } - preempt_enable(); #endif entry = static_key_entries(key); /* if there are no users, entry can be NULL */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 88aeac84e4c057..ffe0c3d5230637 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1547,7 +1547,7 @@ static int check_kprobe_address_safe(struct kprobe *p, /* Ensure the address is in a text area, and find a module if exists. */ *probed_mod = NULL; if (!core_kernel_text((unsigned long) p->addr)) { - guard(preempt)(); + guard(rcu)(); *probed_mod = __module_text_address((unsigned long) p->addr); if (!(*probed_mod)) return -EINVAL; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 0cd39954d5a10f..abea193977d219 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -59,7 +59,7 @@ static void klp_find_object_module(struct klp_object *obj) if (!klp_is_module(obj)) return; - rcu_read_lock_sched(); + guard(rcu)(); /* * We do not want to block removal of patched modules and therefore * we do not take a reference here. The patches are removed by @@ -75,8 +75,6 @@ static void klp_find_object_module(struct klp_object *obj) */ if (mod && mod->klp_alive) obj->mod = mod; - - rcu_read_unlock_sched(); } static bool klp_initialized(void) diff --git a/kernel/module/internal.h b/kernel/module/internal.h index d09b46ef032f08..626cf8668a7eb9 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -124,17 +124,6 @@ char *module_next_tag_pair(char *string, unsigned long *secsize); #define for_each_modinfo_entry(entry, info, name) \ for (entry = get_modinfo(info, name); entry; entry = get_next_modinfo(info, name, entry)) -static inline void module_assert_mutex_or_preempt(void) -{ -#ifdef CONFIG_LOCKDEP - if (unlikely(!debug_locks)) - return; - - WARN_ON_ONCE(!rcu_read_lock_sched_held() && - !lockdep_is_held(&module_mutex)); -#endif -} - static inline unsigned long kernel_symbol_value(const struct kernel_symbol *sym) { #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index bf65e0c3c86fc0..00a60796327c06 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -177,19 +177,15 @@ void add_kallsyms(struct module *mod, const struct load_info *info) unsigned long strtab_size; void *data_base = mod->mem[MOD_DATA].base; void *init_data_base = mod->mem[MOD_INIT_DATA].base; + struct mod_kallsyms *kallsyms; - /* Set up to point into init section. */ - mod->kallsyms = (void __rcu *)init_data_base + - info->mod_kallsyms_init_off; + kallsyms = init_data_base + info->mod_kallsyms_init_off; - rcu_read_lock(); - /* The following is safe since this pointer cannot change */ - rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; - rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + kallsyms->symtab = (void *)symsec->sh_addr; + kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - rcu_dereference(mod->kallsyms)->strtab = - (void *)info->sechdrs[info->index.str].sh_addr; - rcu_dereference(mod->kallsyms)->typetab = init_data_base + info->init_typeoffs; + kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; + kallsyms->typetab = init_data_base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init @@ -199,20 +195,19 @@ void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.strtab = s = data_base + info->stroffs; mod->core_kallsyms.typetab = data_base + info->core_typeoffs; strtab_size = info->core_typeoffs - info->stroffs; - src = rcu_dereference(mod->kallsyms)->symtab; - for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { - rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); + src = kallsyms->symtab; + for (ndst = i = 0; i < kallsyms->num_symtab; i++) { + kallsyms->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { ssize_t ret; mod->core_kallsyms.typetab[ndst] = - rcu_dereference(mod->kallsyms)->typetab[i]; + kallsyms->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; - ret = strscpy(s, - &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], + ret = strscpy(s, &kallsyms->strtab[src[i].st_name], strtab_size); if (ret < 0) break; @@ -220,7 +215,9 @@ void add_kallsyms(struct module *mod, const struct load_info *info) strtab_size -= ret + 1; } } - rcu_read_unlock(); + + /* Set up to point into init section. */ + rcu_assign_pointer(mod->kallsyms, kallsyms); mod->core_kallsyms.num_symtab = ndst; } @@ -260,7 +257,7 @@ static const char *find_kallsyms_symbol(struct module *mod, { unsigned int i, best = 0; unsigned long nextval, bestval; - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms); struct module_memory *mod_mem; /* At worse, next value is at end of module */ @@ -319,7 +316,7 @@ void * __weak dereference_module_function_descriptor(struct module *mod, /* * For kallsyms to ask for address resolution. NULL means not found. Careful - * not to lock to avoid deadlock on oopses, simply disable preemption. + * not to lock to avoid deadlock on oopses, RCU is enough. */ int module_address_lookup(unsigned long addr, unsigned long *size, @@ -332,7 +329,7 @@ int module_address_lookup(unsigned long addr, int ret = 0; struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address(addr); if (mod) { if (modname) @@ -350,8 +347,6 @@ int module_address_lookup(unsigned long addr, if (sym) ret = strscpy(namebuf, sym, KSYM_NAME_LEN); } - preempt_enable(); - return ret; } @@ -359,7 +354,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) { struct module *mod; - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -371,12 +366,10 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) goto out; strscpy(symname, sym, KSYM_NAME_LEN); - preempt_enable(); return 0; } } out: - preempt_enable(); return -ERANGE; } @@ -385,13 +378,13 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, { struct module *mod; - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; - kallsyms = rcu_dereference_sched(mod->kallsyms); + kallsyms = rcu_dereference(mod->kallsyms); if (symnum < kallsyms->num_symtab) { const Elf_Sym *sym = &kallsyms->symtab[symnum]; @@ -400,12 +393,10 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, strscpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); strscpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); - preempt_enable(); return 0; } symnum -= kallsyms->num_symtab; } - preempt_enable(); return -ERANGE; } @@ -413,7 +404,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned int i; - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; @@ -453,23 +444,15 @@ static unsigned long __module_kallsyms_lookup_name(const char *name) /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name) { - unsigned long ret; - /* Don't lock: we're in enough trouble already. */ - preempt_disable(); - ret = __module_kallsyms_lookup_name(name); - preempt_enable(); - return ret; + guard(rcu)(); + return __module_kallsyms_lookup_name(name); } unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) { - unsigned long ret; - - preempt_disable(); - ret = __find_kallsyms_symbol_value(mod, name); - preempt_enable(); - return ret; + guard(rcu)(); + return __find_kallsyms_symbol_value(mod, name); } int module_kallsyms_on_each_symbol(const char *modname, @@ -490,10 +473,8 @@ int module_kallsyms_on_each_symbol(const char *modname, if (modname && strcmp(modname, mod->name)) continue; - /* Use rcu_dereference_sched() to remain compliant with the sparse tool */ - preempt_disable(); - kallsyms = rcu_dereference_sched(mod->kallsyms); - preempt_enable(); + kallsyms = rcu_dereference_check(mod->kallsyms, + lockdep_is_held(&module_mutex)); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; diff --git a/kernel/module/main.c b/kernel/module/main.c index 1fb9ad289a6f8f..974821d6e85ab3 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -67,7 +67,7 @@ /* * Mutex protects: - * 1) List of modules (also safely readable with preempt_disable), + * 1) List of modules (also safely readable within RCU read section), * 2) module_use links, * 3) mod_tree.addr_min/mod_tree.addr_max. * (delete and add uses RCU list operations). @@ -331,7 +331,7 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, /* * Find an exported symbol and return it, along with, (optional) crc and - * (optional) module which owns it. Needs preempt disabled or module_mutex. + * (optional) module which owns it. Needs RCU or module_mutex. */ bool find_symbol(struct find_symbol_arg *fsa) { @@ -345,8 +345,6 @@ bool find_symbol(struct find_symbol_arg *fsa) struct module *mod; unsigned int i; - module_assert_mutex_or_preempt(); - for (i = 0; i < ARRAY_SIZE(arr); i++) if (find_exported_symbol_in_section(&arr[i], NULL, fsa)) return true; @@ -374,16 +372,14 @@ bool find_symbol(struct find_symbol_arg *fsa) } /* - * Search for module by name: must hold module_mutex (or preempt disabled - * for read-only access). + * Search for module by name: must hold module_mutex (or RCU for read-only + * access). */ struct module *find_module_all(const char *name, size_t len, bool even_unformed) { struct module *mod; - module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list, lockdep_is_held(&module_mutex)) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) @@ -454,8 +450,7 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) struct module *mod; unsigned int cpu; - preempt_disable(); - + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -472,13 +467,10 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) per_cpu_ptr(mod->percpu, get_boot_cpu_id()); } - preempt_enable(); return true; } } } - - preempt_enable(); return false; } @@ -814,10 +806,9 @@ void __symbol_put(const char *symbol) .gplok = true, }; - preempt_disable(); + guard(rcu)(); BUG_ON(!find_symbol(&fsa)); module_put(fsa.owner); - preempt_enable(); } EXPORT_SYMBOL(__symbol_put); @@ -832,13 +823,12 @@ void symbol_put_addr(void *addr) /* * Even though we hold a reference on the module; we still need to - * disable preemption in order to safely traverse the data structure. + * RCU read section in order to safely traverse the data structure. */ - preempt_disable(); + guard(rcu)(); modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); - preempt_enable(); } EXPORT_SYMBOL_GPL(symbol_put_addr); @@ -1221,18 +1211,6 @@ void __weak module_arch_freeing_init(struct module *mod) { } -void *__module_writable_address(struct module *mod, void *loc) -{ - for_class_mod_mem_type(type, text) { - struct module_memory *mem = &mod->mem[type]; - - if (loc >= mem->base && loc < mem->base + mem->size) - return loc + (mem->rw_copy - mem->base); - } - - return loc; -} - static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); @@ -1250,21 +1228,15 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (!ptr) return -ENOMEM; - mod->mem[type].base = ptr; - if (execmem_is_rox(execmem_type)) { - ptr = vzalloc(size); + int err = execmem_make_temp_rw(ptr, size); - if (!ptr) { - execmem_free(mod->mem[type].base); + if (err) { + execmem_free(ptr); return -ENOMEM; } - mod->mem[type].rw_copy = ptr; mod->mem[type].is_rox = true; - } else { - mod->mem[type].rw_copy = mod->mem[type].base; - memset(mod->mem[type].base, 0, size); } /* @@ -1280,16 +1252,26 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) */ kmemleak_not_leak(ptr); + memset(ptr, 0, size); + mod->mem[type].base = ptr; + return 0; } +static void module_memory_restore_rox(struct module *mod) +{ + for_class_mod_mem_type(type, text) { + struct module_memory *mem = &mod->mem[type]; + + if (mem->is_rox) + execmem_restore_rox(mem->base, mem->size); + } +} + static void module_memory_free(struct module *mod, enum mod_mem_type type) { struct module_memory *mem = &mod->mem[type]; - if (mem->is_rox) - vfree(mem->rw_copy); - execmem_free(mem->base); } @@ -1348,7 +1330,7 @@ static void free_module(struct module *mod) mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); - /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ + /* Wait for RCU synchronizing before releasing mod->list and buglist. */ synchronize_rcu(); if (try_add_tainted_module(mod)) pr_err("%s: adding tainted module to the unloaded tainted modules list failed.\n", @@ -1371,21 +1353,18 @@ void *__symbol_get(const char *symbol) .warn = true, }; - preempt_disable(); - if (!find_symbol(&fsa)) - goto fail; - if (fsa.license != GPL_ONLY) { - pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n", - symbol); - goto fail; + scoped_guard(rcu) { + if (!find_symbol(&fsa)) + return NULL; + if (fsa.license != GPL_ONLY) { + pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n", + symbol); + return NULL; + } + if (strong_try_module_get(fsa.owner)) + return NULL; } - if (strong_try_module_get(fsa.owner)) - goto fail; - preempt_enable(); return (void *)kernel_symbol_value(fsa.sym); -fail: - preempt_enable(); - return NULL; } EXPORT_SYMBOL_GPL(__symbol_get); @@ -2642,7 +2621,6 @@ static int move_module(struct module *mod, struct load_info *info) for_each_mod_mem_type(type) { if (!mod->mem[type].size) { mod->mem[type].base = NULL; - mod->mem[type].rw_copy = NULL; continue; } @@ -2659,7 +2637,6 @@ static int move_module(struct module *mod, struct load_info *info) void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; const char *sname; - unsigned long addr; if (!(shdr->sh_flags & SHF_ALLOC)) continue; @@ -2680,14 +2657,12 @@ static int move_module(struct module *mod, struct load_info *info) ret = PTR_ERR(dest); goto out_err; } - addr = (unsigned long)dest; codetag_section_found = true; } else { enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; - addr = (unsigned long)mod->mem[type].base + offset; - dest = mod->mem[type].rw_copy + offset; + dest = mod->mem[type].base + offset; } if (shdr->sh_type != SHT_NOBITS) { @@ -2710,13 +2685,14 @@ static int move_module(struct module *mod, struct load_info *info) * users of info can keep taking advantage and using the newly * minted official memory area. */ - shdr->sh_addr = addr; + shdr->sh_addr = (unsigned long)dest; pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr, (long)shdr->sh_size, info->secstrings + shdr->sh_name); } return 0; out_err: + module_memory_restore_rox(mod); for (t--; t >= 0; t--) module_memory_free(mod, t); if (codetag_section_found) @@ -2863,17 +2839,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; } -int __weak module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - return 0; -} - static int post_relocation(struct module *mod, const struct load_info *info) { - int ret; - /* Sort exception table now relocations are done. */ sort_extable(mod->extable, mod->extable + mod->num_exentries); @@ -2885,24 +2852,7 @@ static int post_relocation(struct module *mod, const struct load_info *info) add_kallsyms(mod, info); /* Arch-specific module finalizing. */ - ret = module_finalize(info->hdr, info->sechdrs, mod); - if (ret) - return ret; - - for_each_mod_mem_type(type) { - struct module_memory *mem = &mod->mem[type]; - - if (mem->is_rox) { - if (!execmem_update_copy(mem->base, mem->rw_copy, - mem->size)) - return -ENOMEM; - - vfree(mem->rw_copy); - mem->rw_copy = NULL; - } - } - - return module_post_finalize(info->hdr, info->sechdrs, mod); + return module_finalize(info->hdr, info->sechdrs, mod); } /* Call module constructors. */ @@ -3049,7 +2999,7 @@ static noinline int do_init_module(struct module *mod) #endif /* * We want to free module_init, but be aware that kallsyms may be - * walking this with preempt disabled. In all the failure paths, we + * walking this within an RCU read section. In all the failure paths, we * call synchronize_rcu(), but we don't want to slow down the success * path. execmem_free() cannot be called in an interrupt, so do the * work and call synchronize_rcu() in a work queue. @@ -3499,6 +3449,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod->mem[type].size); } + module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy: /* @@ -3715,28 +3666,23 @@ char *module_flags(struct module *mod, char *buf, bool show_state) /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_extables(unsigned long addr) { - const struct exception_table_entry *e = NULL; struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address(addr); if (!mod) - goto out; + return NULL; if (!mod->num_exentries) - goto out; - - e = search_extable(mod->extable, - mod->num_exentries, - addr); -out: - preempt_enable(); - + return NULL; /* - * Now, if we found one, we are running inside it now, hence - * we cannot unload the module, hence no refcnt needed. + * The address passed here belongs to a module that is currently + * invoked (we are running inside it). Therefore its module::refcnt + * needs already be >0 to ensure that it is not removed at this stage. + * All other user need to invoke this function within a RCU read + * section. */ - return e; + return search_extable(mod->extable, mod->num_exentries, addr); } /** @@ -3748,20 +3694,15 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) */ bool is_module_address(unsigned long addr) { - bool ret; - - preempt_disable(); - ret = __module_address(addr) != NULL; - preempt_enable(); - - return ret; + guard(rcu)(); + return __module_address(addr) != NULL; } /** * __module_address() - get the module which contains an address. * @addr: the address. * - * Must be called with preempt disabled or module mutex held so that + * Must be called within RCU read section or module mutex held so that * module doesn't get freed during this. */ struct module *__module_address(unsigned long addr) @@ -3779,8 +3720,6 @@ struct module *__module_address(unsigned long addr) return NULL; lookup: - module_assert_mutex_or_preempt(); - mod = mod_find(addr, &mod_tree); if (mod) { BUG_ON(!within_module(addr, mod)); @@ -3800,20 +3739,15 @@ struct module *__module_address(unsigned long addr) */ bool is_module_text_address(unsigned long addr) { - bool ret; - - preempt_disable(); - ret = __module_text_address(addr) != NULL; - preempt_enable(); - - return ret; + guard(rcu)(); + return __module_text_address(addr) != NULL; } /** * __module_text_address() - get the module whose code contains an address. * @addr: the address. * - * Must be called with preempt disabled or module mutex held so that + * Must be called within RCU read section or module mutex held so that * module doesn't get freed during this. */ struct module *__module_text_address(unsigned long addr) @@ -3836,7 +3770,7 @@ void print_modules(void) printk(KERN_DEFAULT "Modules linked in:"); /* Most callers should already have preempt disabled, but make sure */ - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -3844,7 +3778,6 @@ void print_modules(void) } print_unloaded_tainted_modules(); - preempt_enable(); if (last_unloaded_module.name[0]) pr_cont(" [last unloaded: %s%s]", last_unloaded_module.name, last_unloaded_module.taints); diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 74834ba15615fa..03f4142cfbf4e3 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" static int module_set_memory(const struct module *mod, enum mod_mem_type type, @@ -32,12 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type, int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { + const struct module_memory *mem = &mod->mem[type]; int ret; - if (mod->mem[type].is_rox) - continue; - - if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + if (mem->is_rox) + ret = execmem_restore_rox(mem->base, mem->size); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox); else ret = module_set_memory(mod, type, set_memory_x); diff --git a/kernel/module/tracking.c b/kernel/module/tracking.c index 16742d1c630c67..4fefec5b683c60 100644 --- a/kernel/module/tracking.c +++ b/kernel/module/tracking.c @@ -21,8 +21,6 @@ int try_add_tainted_module(struct module *mod) { struct mod_unload_taint *mod_taint; - module_assert_mutex_or_preempt(); - if (!mod->taints) goto out; diff --git a/kernel/module/tree_lookup.c b/kernel/module/tree_lookup.c index 277197977d438b..d3204c5c74eb7c 100644 --- a/kernel/module/tree_lookup.c +++ b/kernel/module/tree_lookup.c @@ -12,11 +12,11 @@ /* * Use a latched RB-tree for __module_address(); this allows us to use - * RCU-sched lookups of the address from any context. + * RCU lookups of the address from any context. * - * This is conditional on PERF_EVENTS || TRACING because those can really hit - * __module_address() hard by doing a lot of stack unwinding; potentially from - * NMI context. + * This is conditional on PERF_EVENTS || TRACING || CFI_CLANG because those can + * really hit __module_address() hard by doing a lot of stack unwinding; + * potentially from NMI context. */ static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) diff --git a/kernel/module/version.c b/kernel/module/version.c index 3718a886832198..2beefeba82d94a 100644 --- a/kernel/module/version.c +++ b/kernel/module/version.c @@ -79,17 +79,17 @@ int check_modstruct_version(const struct load_info *info, .name = "module_layout", .gplok = true, }; + bool have_symbol; /* * Since this should be found in kernel (which can't be removed), no - * locking is necessary -- use preempt_disable() to placate lockdep. + * locking is necessary. Regardless use a RCU read section to keep + * lockdep happy. */ - preempt_disable(); - if (!find_symbol(&fsa)) { - preempt_enable(); - BUG(); - } - preempt_enable(); + scoped_guard(rcu) + have_symbol = find_symbol(&fsa); + BUG_ON(!have_symbol); + return check_version(info, "module_layout", mod, fsa.crc); } diff --git a/kernel/params.c b/kernel/params.c index 0074d29c9b80ce..2509f216c9f3cf 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -551,7 +551,7 @@ struct module_param_attrs { unsigned int num; struct attribute_group grp; - struct param_attribute attrs[]; + struct param_attribute attrs[] __counted_by(num); }; #ifdef CONFIG_SYSFS @@ -651,35 +651,32 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, } /* Enlarge allocations. */ - new_mp = krealloc(mk->mp, - sizeof(*mk->mp) + - sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1), + new_mp = krealloc(mk->mp, struct_size(mk->mp, attrs, mk->mp->num + 1), GFP_KERNEL); if (!new_mp) return -ENOMEM; mk->mp = new_mp; + mk->mp->num++; /* Extra pointer for NULL terminator */ - new_attrs = krealloc(mk->mp->grp.attrs, - sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2), - GFP_KERNEL); + new_attrs = krealloc_array(mk->mp->grp.attrs, mk->mp->num + 1, + sizeof(mk->mp->grp.attrs[0]), GFP_KERNEL); if (!new_attrs) return -ENOMEM; mk->mp->grp.attrs = new_attrs; /* Tack new one on the end. */ - memset(&mk->mp->attrs[mk->mp->num], 0, sizeof(mk->mp->attrs[0])); - sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); - mk->mp->attrs[mk->mp->num].param = kp; - mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; + memset(&mk->mp->attrs[mk->mp->num - 1], 0, sizeof(mk->mp->attrs[0])); + sysfs_attr_init(&mk->mp->attrs[mk->mp->num - 1].mattr.attr); + mk->mp->attrs[mk->mp->num - 1].param = kp; + mk->mp->attrs[mk->mp->num - 1].mattr.show = param_attr_show; /* Do not allow runtime DAC changes to make param writable. */ if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) - mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; + mk->mp->attrs[mk->mp->num - 1].mattr.store = param_attr_store; else - mk->mp->attrs[mk->mp->num].mattr.store = NULL; - mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; - mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; - mk->mp->num++; + mk->mp->attrs[mk->mp->num - 1].mattr.store = NULL; + mk->mp->attrs[mk->mp->num - 1].mattr.attr.name = (char *)name; + mk->mp->attrs[mk->mp->num - 1].mattr.attr.mode = kp->perm; /* Fix up all the pointers, since krealloc can move us */ for (i = 0; i < mk->mp->num; i++) diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index bb7d066a7c3979..c2c59e6ef35d07 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -325,13 +325,12 @@ static int __static_call_mod_text_reserved(void *start, void *end) struct module *mod; int ret; - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - + scoped_guard(rcu) { + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + } if (!mod) return 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index adc947587eb813..e6a17a60d8787c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2345,10 +2345,9 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)btp); module_put(mod); - preempt_enable(); } static __always_inline @@ -2932,18 +2931,21 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 u32 i, err = 0; for (i = 0; i < addrs_cnt; i++) { + bool skip_add = false; struct module *mod; - preempt_disable(); - mod = __module_address(addrs[i]); - /* Either no module or we it's already stored */ - if (!mod || has_module(&arr, mod)) { - preempt_enable(); - continue; + scoped_guard(rcu) { + mod = __module_address(addrs[i]); + /* Either no module or it's already stored */ + if (!mod || has_module(&arr, mod)) { + skip_add = true; + break; /* scoped_guard */ + } + if (!try_module_get(mod)) + err = -EINVAL; } - if (!try_module_get(mod)) - err = -EINVAL; - preempt_enable(); + if (skip_add) + continue; if (err) break; err = add_module(&arr, mod); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d8d5f18a141adc..48057531ee4ebb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -124,9 +124,8 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) if (!p) return true; *p = '\0'; - rcu_read_lock_sched(); - ret = !!find_module(tk->symbol); - rcu_read_unlock_sched(); + scoped_guard(rcu) + ret = !!find_module(tk->symbol); *p = ':'; return ret; @@ -796,12 +795,10 @@ static struct module *try_module_get_by_name(const char *name) { struct module *mod; - rcu_read_lock_sched(); + guard(rcu)(); mod = find_module(name); if (mod && !try_module_get(mod)) mod = NULL; - rcu_read_unlock_sched(); - return mod; } #else diff --git a/lib/bug.c b/lib/bug.c index e0ff219899902f..b1f07459c2ee36 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -66,23 +66,19 @@ static LIST_HEAD(module_bug_list); static struct bug_entry *module_find_bug(unsigned long bugaddr) { + struct bug_entry *bug; struct module *mod; - struct bug_entry *bug = NULL; - rcu_read_lock_sched(); + guard(rcu)(); list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; bug = mod->bug_table; for (i = 0; i < mod->num_bugs; ++i, ++bug) if (bugaddr == bug_addr(bug)) - goto out; + return bug; } - bug = NULL; -out: - rcu_read_unlock_sched(); - - return bug; + return NULL; } void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, @@ -235,11 +231,11 @@ void generic_bug_clear_once(void) #ifdef CONFIG_MODULES struct module *mod; - rcu_read_lock_sched(); - list_for_each_entry_rcu(mod, &module_bug_list, bug_list) - clear_once_table(mod->bug_table, - mod->bug_table + mod->num_bugs); - rcu_read_unlock_sched(); + scoped_guard(rcu) { + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) + clear_once_table(mod->bug_table, + mod->bug_table + mod->num_bugs); + } #endif clear_once_table(__start___bug_table, __stop___bug_table); diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh index 561dcac0f359c1..31fe4ed63de83e 100755 --- a/lib/tests/module/gen_test_kallsyms.sh +++ b/lib/tests/module/gen_test_kallsyms.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash TARGET=$(basename $1) DIR=lib/tests/module diff --git a/mm/execmem.c b/mm/execmem.c index 317b6a8d35be07..e6c4f5076ca8d8 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -257,7 +257,6 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; - unsigned long start, end; struct vm_struct *vm; size_t alloc_size; int err = -ENOMEM; @@ -275,26 +274,18 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - start = (unsigned long)p; - end = start + alloc_size; - - vunmap_range(start, end); - - err = execmem_set_direct_map_valid(vm, false); - if (err) - goto err_free_mem; - - err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages, - PMD_SHIFT); + err = set_memory_rox((unsigned long)p, vm->nr_pages); if (err) goto err_free_mem; err = execmem_cache_add(p, alloc_size); if (err) - goto err_free_mem; + goto err_reset_direct_map; return 0; +err_reset_direct_map: + execmem_set_direct_map_valid(vm, true); err_free_mem: vfree(p); return err; @@ -344,6 +335,28 @@ static bool execmem_cache_free(void *ptr) return true; } + +int execmem_make_temp_rw(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + int ret; + + ret = set_memory_nx(addr, nr); + if (ret) + return ret; + + return set_memory_rw(addr, nr); +} + +int execmem_restore_rox(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + + return set_memory_rox(addr, nr); +} + #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ static void *execmem_cache_alloc(struct execmem_range *range, size_t size) { diff --git a/mm/vmstat.c b/mm/vmstat.c index 16bfe1c694dd4e..88998725f1c5af 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1435,6 +1435,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_X86 "direct_map_level2_splits", "direct_map_level3_splits", + "direct_map_level2_collapses", + "direct_map_level3_collapses", #endif #ifdef CONFIG_PER_VMA_LOCK_STATS "vma_lock_success",